Elide bounds check in diffxi

array_windows.skip did not elide bounds checks as it should. If
the slice is instead offset by the skipped amount, we have the
same behavour, but aids the compiler enough.
The two changed lines allows SIMD optimisations, giving an
impressive reduction in instructions by two thirds in the
benchmark.
This commit is contained in:
Magnus Ulimoen 2021-03-25 17:23:01 +01:00
parent a81ba0365d
commit 76f5291131
1 changed files with 1 additions and 2 deletions

View File

@ -142,9 +142,8 @@ pub(crate) fn diff_op_1d_slice<const M: usize, const N: usize, const D: usize>(
// based on the block size // based on the block size
let window_elems_to_skip = M - ((D - 1) / 2); let window_elems_to_skip = M - ((D - 1) / 2);
for (window, f) in prev for (window, f) in prev[window_elems_to_skip..]
.array_windows::<D>() .array_windows::<D>()
.skip(window_elems_to_skip)
.zip(fut.array_chunks_mut::<1>()) .zip(fut.array_chunks_mut::<1>())
{ {
let fut = ColVector::<_, 1>::map_to_col_mut(f); let fut = ColVector::<_, 1>::map_to_col_mut(f);