use some unsafe...

This commit is contained in:
Magnus Ulimoen 2021-02-10 19:29:26 +01:00
parent 8a6dc60edf
commit 02175d1734
1 changed files with 17 additions and 3 deletions

View File

@ -433,6 +433,14 @@ pub(crate) fn diff_op_2d_sliceable_y_simd<const M: usize, const N: usize, const
let prev = &prev[..N * ny]; let prev = &prev[..N * ny];
let fut = &mut fut[..M * ny]; let fut = &mut fut[..M * ny];
/*
let prevcol = {
let prev_ptr = prev.as_ptr();
move |i: usize| -> &[Float] {
unsafe { std::slice::from_raw_parts(prev_ptr.add(i * ny), ny) }
}
};
*/
let prevcol = |i: usize| -> &[Float] { &prev[i * ny..(i + 1) * ny] }; let prevcol = |i: usize| -> &[Float] { &prev[i * ny..(i + 1) * ny] };
for (&bl, fut) in matrix.iter_rows().zip(fut.chunks_exact_mut(ny)) { for (&bl, fut) in matrix.iter_rows().zip(fut.chunks_exact_mut(ny)) {
@ -468,9 +476,16 @@ pub(crate) fn diff_op_2d_sliceable_y_simd<const M: usize, const N: usize, const
let half_diag_width = (D - 1) / 2; let half_diag_width = (D - 1) / 2;
assert!(half_diag_width <= M); assert!(half_diag_width <= M);
let prevcol = |i: usize| -> &[Float] { &prev[i * ny..(i + 1) * ny] }; let prevcol = {
let prev_ptr = prev.as_ptr();
move |i: usize| -> &[Float] {
unsafe { std::slice::from_raw_parts(prev_ptr.add(i * ny), ny) }
}
};
//let prevcol = |i: usize| -> &[Float] { &prev[i * ny..(i + 1) * ny] };
for (fut, ifut) in futmid.chunks_exact_mut(ny).zip(M..nx - M) { for (fut, ifut) in futmid.chunks_exact_mut(ny).zip(M..nx - M) {
let mut fut = fut.chunks_exact_mut(SimdT::lanes()); let mut fut = fut.array_chunks_mut::<{ SimdT::lanes() }>();
for (j, fut) in fut.by_ref().enumerate() { for (j, fut) in fut.by_ref().enumerate() {
let index_to_simd = let index_to_simd =
|i| SimdT::from_slice_unaligned(&prevcol(i)[SimdT::lanes() * j..]); |i| SimdT::from_slice_unaligned(&prevcol(i)[SimdT::lanes() * j..]);
@ -481,7 +496,6 @@ pub(crate) fn diff_op_2d_sliceable_y_simd<const M: usize, const N: usize, const
} }
f *= idx; f *= idx;
{ {
// puts simd along stride 1, j never goes past end of slice
f.write_to_slice_unaligned(fut); f.write_to_slice_unaligned(fut);
} }
} }