From 36293e75e61ee2834fd009555974afc103844f82 Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Thu, 28 Jan 2021 21:59:13 +0100 Subject: [PATCH] 10% instr reduction with fast_ intr --- sbp/src/operators/algos.rs | 53 ++++++++++++++++++++++--------- sbp/src/operators/traditional4.rs | 4 ++- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/sbp/src/operators/algos.rs b/sbp/src/operators/algos.rs index 3fd83bf..26686e0 100644 --- a/sbp/src/operators/algos.rs +++ b/sbp/src/operators/algos.rs @@ -59,9 +59,11 @@ pub(crate) mod constmatrix { pub const fn new(data: [[T; N]; M]) -> Self { Self { data } } + #[inline] pub const fn nrows(&self) -> usize { M } + #[inline] pub const fn ncols(&self) -> usize { N } @@ -82,12 +84,13 @@ pub(crate) mod constmatrix { T: Default + core::ops::AddAssign, for<'f> &'f T: std::ops::Mul, { - *out = Default::default(); for i in 0..M { for j in 0..P { + let mut t = T::default(); for k in 0..N { - out[(i, j)] += &self[(i, k)] * &other[(k, j)]; + t += &self[(i, k)] * &other[(k, j)]; } + out[(i, j)] = t; } } } @@ -148,6 +151,34 @@ pub(crate) mod constmatrix { } } + use super::Float; + impl Matrix { + pub fn matmul_fast( + &self, + other: &Matrix, + ) -> Matrix { + let mut out = Matrix::default(); + self.matmul_into_fast(other, &mut out); + out + } + pub fn matmul_into_fast( + &self, + other: &Matrix, + out: &mut Matrix, + ) { + use core::intrinsics::{fadd_fast, fmul_fast}; + for i in 0..M { + for j in 0..P { + let mut t = 0.0; + for k in 0..N { + t = unsafe { fadd_fast(t, fmul_fast(self[(i, k)], other[(k, j)])) } + } + out[(i, j)] = t; + } + } + } + } + #[cfg(test)] mod tests { use super::{super::*, *}; @@ -240,8 +271,8 @@ pub(crate) fn diff_op_1d_matrix( #[inline(always)] pub(crate) fn diff_op_1d_slice_matrix( block: &Matrix, + endblock: &Matrix, diag: &RowVector, - symmetry: Symmetry, optype: OperatorType, prev: &[Float], fut: &mut [Float], @@ -262,10 +293,10 @@ pub(crate) fn diff_op_1d_slice_matrix::map_to_col((&prev[0..N]).try_into().unwrap()); + let prev = ColVector::<_, N>::map_to_col(prev.array_windows::().nth(0).unwrap()); let fut = ColVector::<_, M>::map_to_col_mut((&mut fut[0..M]).try_into().unwrap()); - block.matmul_into(prev, fut); + block.matmul_into_fast(prev, fut); *fut *= &idx; } @@ -283,24 +314,16 @@ pub(crate) fn diff_op_1d_slice_matrix::map_to_col_mut(f); let prev = ColVector::<_, D>::map_to_col(window); - diag.matmul_into(prev, fut); + diag.matmul_into_fast(prev, fut); *fut *= &idx; } - let flipped = { - let mut flipped = block.flip(); - if symmetry != Symmetry::Symmetric { - flipped *= &-1.0; - } - flipped - }; - { let prev = prev.array_windows::().last().unwrap(); let prev = ColVector::<_, N>::map_to_col(prev); let fut = ColVector::<_, M>::map_to_col_mut((&mut fut[nx - M..]).try_into().unwrap()); - flipped.matmul_into(prev, fut); + endblock.matmul_into_fast(prev, fut); *fut *= &idx; } } diff --git a/sbp/src/operators/traditional4.rs b/sbp/src/operators/traditional4.rs index c29944d..41f3c03 100644 --- a/sbp/src/operators/traditional4.rs +++ b/sbp/src/operators/traditional4.rs @@ -83,14 +83,16 @@ impl SbpOperator1d for SBP4 { } fn diff_op_row_local(prev: ndarray::ArrayView2, mut fut: ndarray::ArrayViewMut2) { + let mut flipmatrix = SBP4::BLOCK_MATRIX.flip(); + flipmatrix *= &-1.0; for (p, mut f) in prev .axis_iter(ndarray::Axis(0)) .zip(fut.axis_iter_mut(ndarray::Axis(0))) { super::diff_op_1d_slice_matrix( &SBP4::BLOCK_MATRIX, + &flipmatrix, &SBP4::DIAG_MATRIX, - super::Symmetry::AntiSymmetric, super::OperatorType::Normal, p.as_slice().unwrap(), f.as_slice_mut().unwrap(),