From a7660281c83af9b4d1ad6db0df097369f238a95a Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Fri, 29 Jan 2021 08:32:26 +0100 Subject: [PATCH] add inline to remove magic fix --- sbp/src/operators/algos.rs | 10 ++++++++-- sbp/src/operators/traditional4.rs | 5 ----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sbp/src/operators/algos.rs b/sbp/src/operators/algos.rs index 88c92e7..115b322 100644 --- a/sbp/src/operators/algos.rs +++ b/sbp/src/operators/algos.rs @@ -60,11 +60,11 @@ pub(crate) mod constmatrix { pub const fn new(data: [[T; N]; M]) -> Self { Self { data } } - #[inline] + #[inline(always)] pub const fn nrows(&self) -> usize { M } - #[inline] + #[inline(always)] pub const fn ncols(&self) -> usize { N } @@ -95,12 +95,15 @@ pub(crate) mod constmatrix { } } } + #[inline(always)] pub fn iter(&self) -> impl Iterator { self.data.iter().flatten() } + #[inline(always)] pub fn iter_mut(&mut self) -> impl Iterator { self.data.iter_mut().flatten() } + #[inline(always)] pub fn iter_rows( &self, ) -> impl ExactSizeIterator + DoubleEndedIterator { @@ -122,9 +125,11 @@ pub(crate) mod constmatrix { } impl ColVector { + #[inline(always)] pub fn map_to_col(slice: &[T; N]) -> &ColVector { unsafe { std::mem::transmute::<&[T; N], &Self>(slice) } } + #[inline(always)] pub fn map_to_col_mut(slice: &mut [T; N]) -> &mut ColVector { unsafe { std::mem::transmute::<&mut [T; N], &mut Self>(slice) } } @@ -143,6 +148,7 @@ pub(crate) mod constmatrix { where for<'f> T: core::ops::MulAssign<&'f T>, { + #[inline(always)] fn mul_assign(&mut self, other: &T) { self.iter_mut().for_each(|x| *x *= other) } diff --git a/sbp/src/operators/traditional4.rs b/sbp/src/operators/traditional4.rs index 352a92b..37519e2 100644 --- a/sbp/src/operators/traditional4.rs +++ b/sbp/src/operators/traditional4.rs @@ -90,11 +90,6 @@ impl SbpOperator1d for SBP4 { } fn diff_op_row_local(prev: ndarray::ArrayView2, mut fut: ndarray::ArrayViewMut2) { - // Magic two lines that prevents or enables optimisation - // (doubles instructions when not included) - let mut flipmatrix = SBP4::BLOCK_MATRIX; - flipmatrix *= &-1.0; - for (p, mut f) in prev .axis_iter(ndarray::Axis(0)) .zip(fut.axis_iter_mut(ndarray::Axis(0)))