Elide bounds check in diffxi
array_windows.skip did not elide bounds checks as it should. If the slice is instead offset by the skipped amount, we have the same behavour, but aids the compiler enough. The two changed lines allows SIMD optimisations, giving an impressive reduction in instructions by two thirds in the benchmark.
This commit is contained in:
parent
a81ba0365d
commit
76f5291131
|
@ -142,9 +142,8 @@ pub(crate) fn diff_op_1d_slice<const M: usize, const N: usize, const D: usize>(
|
||||||
// based on the block size
|
// based on the block size
|
||||||
let window_elems_to_skip = M - ((D - 1) / 2);
|
let window_elems_to_skip = M - ((D - 1) / 2);
|
||||||
|
|
||||||
for (window, f) in prev
|
for (window, f) in prev[window_elems_to_skip..]
|
||||||
.array_windows::<D>()
|
.array_windows::<D>()
|
||||||
.skip(window_elems_to_skip)
|
|
||||||
.zip(fut.array_chunks_mut::<1>())
|
.zip(fut.array_chunks_mut::<1>())
|
||||||
{
|
{
|
||||||
let fut = ColVector::<_, 1>::map_to_col_mut(f);
|
let fut = ColVector::<_, 1>::map_to_col_mut(f);
|
||||||
|
|
Loading…
Reference in New Issue