From 4ae5c02bb1a7246fe814b9e8f3436e0d4a3402d3 Mon Sep 17 00:00:00 2001 From: Magnus Ulimoen Date: Tue, 23 Mar 2021 19:21:38 +0100 Subject: [PATCH] Replace FastFloat with mul_add --- multigrid/Cargo.toml | 2 +- sbp/Cargo.toml | 1 - sbp/src/operators/algos.rs | 70 ++++++++------------------------------ webfront/Cargo.toml | 2 +- 4 files changed, 16 insertions(+), 59 deletions(-) diff --git a/multigrid/Cargo.toml b/multigrid/Cargo.toml index 238ad7c..0370fd0 100644 --- a/multigrid/Cargo.toml +++ b/multigrid/Cargo.toml @@ -6,7 +6,7 @@ edition = "2018" [dependencies] -sbp = { path = "../sbp", features = ["serde1", "fast-float"] } +sbp = { path = "../sbp", features = ["serde1"] } euler = { path = "../euler", features = ["serde1"] } hdf5 = "0.7.0" integrate = { path = "../utils/integrate" } diff --git a/sbp/Cargo.toml b/sbp/Cargo.toml index 4e85fe5..5fd3a30 100644 --- a/sbp/Cargo.toml +++ b/sbp/Cargo.toml @@ -17,7 +17,6 @@ constmatrix = { path = "../utils/constmatrix" } [features] # Use f32 as precision, default is f64 f32 = ["float/f32"] -fast-float = ["float/fast-float"] sparse = ["sprs"] serde1 = ["serde", "ndarray/serde"] diff --git a/sbp/src/operators/algos.rs b/sbp/src/operators/algos.rs index b075447..5d0896e 100644 --- a/sbp/src/operators/algos.rs +++ b/sbp/src/operators/algos.rs @@ -4,9 +4,6 @@ use num_traits::Zero; pub(crate) use constmatrix::{ColVector, Matrix, RowVector}; -#[cfg(feature = "fast-float")] -use float::FastFloat; - #[derive(Clone, Debug, PartialEq)] pub(crate) struct DiagonalMatrix { pub start: [Float; B], @@ -105,17 +102,14 @@ pub(crate) fn diff_op_1d_slice( prev: &[Float], fut: &mut [Float], ) { - #[cfg(feature = "fast-float")] - let (matrix, prev, fut) = { - use std::mem::transmute; - unsafe { - ( - transmute::<_, &BlockMatrix>(matrix), - transmute::<_, &[FastFloat]>(prev), - transmute::<_, &mut [FastFloat]>(fut), - ) - } - }; + #[inline(never)] + fn dedup_matmul( + c: &mut ColVector, + a: &Matrix, + b: &ColVector, + ) { + c.matmul_float_into(a, b) + } assert_eq!(prev.len(), fut.len()); let nx = prev.len(); @@ -130,8 +124,6 @@ pub(crate) fn diff_op_1d_slice( 1.0 / (nx - 1) as Float }; let idx = 1.0 / dx; - #[cfg(feature = "fast-float")] - let idx = FastFloat::from(idx); // Help aliasing analysis let (futb1, fut) = fut.split_at_mut(M); @@ -142,7 +134,7 @@ pub(crate) fn diff_op_1d_slice( let prev = ColVector::<_, N>::map_to_col(prev.array_windows::().next().unwrap()); let fut = ColVector::<_, M>::map_to_col_mut(futb1.try_into().unwrap()); - fut.matmul_into(&matrix.start, prev); + dedup_matmul(fut, &matrix.start, prev); *fut *= idx; } @@ -158,7 +150,7 @@ pub(crate) fn diff_op_1d_slice( let fut = ColVector::<_, 1>::map_to_col_mut(f); let prev = ColVector::<_, D>::map_to_col(window); - fut.matmul_into(&matrix.diag, prev); + fut.matmul_float_into(&matrix.diag, prev); *fut *= idx; } @@ -167,7 +159,7 @@ pub(crate) fn diff_op_1d_slice( let prev = ColVector::<_, N>::map_to_col(prev); let fut = ColVector::<_, M>::map_to_col_mut(futb2.try_into().unwrap()); - fut.matmul_into(&matrix.end, prev); + dedup_matmul(fut, &matrix.end, prev); *fut *= idx; } } @@ -199,19 +191,6 @@ pub(crate) fn diff_op_2d_fallback, mut fut: ArrayViewMut2, ) { - /* Does not increase the perf... - #[cfg(feature = "fast-float")] - let (matrix, prev, mut fut) = unsafe { - ( - std::mem::transmute::<_, &BlockMatrix>(matrix), - std::mem::transmute::<_, ArrayView2>(prev), - std::mem::transmute::<_, ArrayViewMut2>(fut), - ) - }; - #[cfg(not(feature = "fast-float"))] - let mut fut = fut; - */ - assert_eq!(prev.shape(), fut.shape()); let nx = prev.shape()[1]; let ny = prev.shape()[0]; @@ -287,19 +266,6 @@ pub(crate) fn diff_op_2d_sliceable_y, mut fut: ArrayViewMut2, ) { - /* Does not increase the perf... - #[cfg(feature = "fast-float")] - let (matrix, prev, mut fut) = unsafe { - ( - std::mem::transmute::<_, &BlockMatrix>(matrix), - std::mem::transmute::<_, ArrayView2>(prev), - std::mem::transmute::<_, ArrayViewMut2>(fut), - ) - }; - #[cfg(not(feature = "fast-float"))] - let mut fut = fut; - */ - assert_eq!(prev.shape(), fut.shape()); let nx = prev.shape()[1]; let ny = prev.shape()[0]; @@ -733,17 +699,9 @@ fn dotproduct<'a>( u: impl IntoIterator, v: impl IntoIterator, ) -> Float { - u.into_iter().zip(v.into_iter()).fold(0.0, |acc, (&u, &v)| { - #[cfg(feature = "fast-float")] - { - // We do not care about the order of multiplication nor addition - (FastFloat::from(acc) + FastFloat::from(u) * FastFloat::from(v)).into() - } - #[cfg(not(feature = "fast-float"))] - { - acc + u * v - } - }) + u.into_iter() + .zip(v.into_iter()) + .fold(0.0, |acc, (&u, &v)| Float::mul_add(u, v, acc)) } #[cfg(feature = "sparse")] diff --git a/webfront/Cargo.toml b/webfront/Cargo.toml index 43baf83..7d80dc0 100644 --- a/webfront/Cargo.toml +++ b/webfront/Cargo.toml @@ -11,7 +11,7 @@ crate-type = ["cdylib"] wasm-bindgen = "0.2.63" console_error_panic_hook = "0.1.6" wee_alloc = "0.4.5" -sbp = { path = "../sbp", features = ["f32", "fast-float"] } +sbp = { path = "../sbp", features = ["f32"] } ndarray = "0.14.0" euler = { path = "../euler" } maxwell = { path = "../maxwell" }