use generalised diff_op_row for Upwind4
This commit is contained in:
		| @@ -1,98 +1,16 @@ | ||||
| use super::{SbpOperator1d, SbpOperator2d, UpwindOperator1d, UpwindOperator2d}; | ||||
| use super::{diff_op_row, SbpOperator1d, SbpOperator2d, UpwindOperator1d, UpwindOperator2d}; | ||||
| use crate::Float; | ||||
| use ndarray::{ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2, Axis}; | ||||
|  | ||||
| #[derive(Debug, Copy, Clone)] | ||||
| pub struct Upwind4; | ||||
|  | ||||
| /// Simdtype used in diff_simd_col and diff_simd_row | ||||
| /// Simdtype used in diff_simd_col | ||||
| #[cfg(feature = "f32")] | ||||
| type SimdT = packed_simd::f32x8; | ||||
| #[cfg(not(feature = "f32"))] | ||||
| type SimdT = packed_simd::f64x8; | ||||
|  | ||||
| macro_rules! diff_simd_row_7_47 { | ||||
|     ($name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => { | ||||
|         #[inline(never)] | ||||
|         fn $name(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { | ||||
|             assert_eq!(prev.shape(), fut.shape()); | ||||
|             assert!(prev.len_of(Axis(1)) >= 2 * $BLOCK.len()); | ||||
|             assert!(prev.len() >= SimdT::lanes()); | ||||
|             // The prev and fut array must have contiguous last dimension | ||||
|             assert_eq!(prev.strides()[1], 1); | ||||
|             assert_eq!(fut.strides()[1], 1); | ||||
|  | ||||
|             let nx = prev.len_of(Axis(1)); | ||||
|             let dx = 1.0 / (nx - 1) as Float; | ||||
|             let idx = 1.0 / dx; | ||||
|  | ||||
|             for j in 0..prev.len_of(Axis(0)) { | ||||
|                 use std::slice; | ||||
|                 let prev = unsafe { slice::from_raw_parts(prev.uget((j, 0)) as *const Float, nx) }; | ||||
|                 let fut = | ||||
|                     unsafe { slice::from_raw_parts_mut(fut.uget_mut((j, 0)) as *mut Float, nx) }; | ||||
|  | ||||
|                 let first_elems = unsafe { SimdT::from_slice_unaligned_unchecked(prev) }; | ||||
|                 let block = { | ||||
|                     let bl = $BLOCK; | ||||
|                     [ | ||||
|                         SimdT::new( | ||||
|                             bl[0][0], bl[0][1], bl[0][2], bl[0][3], bl[0][4], bl[0][5], bl[0][6], | ||||
|                             0.0, | ||||
|                         ), | ||||
|                         SimdT::new( | ||||
|                             bl[1][0], bl[1][1], bl[1][2], bl[1][3], bl[1][4], bl[1][5], bl[1][6], | ||||
|                             0.0, | ||||
|                         ), | ||||
|                         SimdT::new( | ||||
|                             bl[2][0], bl[2][1], bl[2][2], bl[2][3], bl[2][4], bl[2][5], bl[2][6], | ||||
|                             0.0, | ||||
|                         ), | ||||
|                         SimdT::new( | ||||
|                             bl[3][0], bl[3][1], bl[3][2], bl[3][3], bl[3][4], bl[3][5], bl[3][6], | ||||
|                             0.0, | ||||
|                         ), | ||||
|                     ] | ||||
|                 }; | ||||
|                 fut[0] = idx * (block[0] * first_elems).sum(); | ||||
|                 fut[1] = idx * (block[1] * first_elems).sum(); | ||||
|                 fut[2] = idx * (block[2] * first_elems).sum(); | ||||
|                 fut[3] = idx * (block[3] * first_elems).sum(); | ||||
|  | ||||
|                 let diag = { | ||||
|                     let diag = $DIAG; | ||||
|                     SimdT::new( | ||||
|                         diag[0], diag[1], diag[2], diag[3], diag[4], diag[5], diag[6], 0.0, | ||||
|                     ) | ||||
|                 }; | ||||
|                 for i in 4..nx - 4 { | ||||
|                     unsafe { | ||||
|                         let prev = SimdT::from_slice_unaligned_unchecked(&prev[i - 3..]); | ||||
|                         *fut.get_unchecked_mut(i) = idx * (prev * diag).sum(); | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 let last_elems = unsafe { SimdT::from_slice_unaligned_unchecked(&prev[nx - 8..]) } | ||||
|                     .shuffle1_dyn([7, 6, 5, 4, 3, 2, 1, 0].into()); | ||||
|                 if $symmetric { | ||||
|                     fut[nx - 4] = idx * (block[3] * last_elems).sum(); | ||||
|                     fut[nx - 3] = idx * (block[2] * last_elems).sum(); | ||||
|                     fut[nx - 2] = idx * (block[1] * last_elems).sum(); | ||||
|                     fut[nx - 1] = idx * (block[0] * last_elems).sum(); | ||||
|                 } else { | ||||
|                     fut[nx - 4] = -idx * (block[3] * last_elems).sum(); | ||||
|                     fut[nx - 3] = -idx * (block[2] * last_elems).sum(); | ||||
|                     fut[nx - 2] = -idx * (block[1] * last_elems).sum(); | ||||
|                     fut[nx - 1] = -idx * (block[0] * last_elems).sum(); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| } | ||||
|  | ||||
| diff_simd_row_7_47!(diff_simd_row, Upwind4::BLOCK, Upwind4::DIAG, false); | ||||
| diff_simd_row_7_47!(diss_simd_row, Upwind4::DISS_BLOCK, Upwind4::DISS_DIAG, true); | ||||
|  | ||||
| macro_rules! diff_simd_col_7_47 { | ||||
|     ($name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => { | ||||
|         #[inline(never)] | ||||
| @@ -291,7 +209,7 @@ impl<SBP: SbpOperator1d> SbpOperator2d for (&SBP, &Upwind4) { | ||||
|  | ||||
|         match (prev.strides(), fut.strides()) { | ||||
|             ([_, 1], [_, 1]) => { | ||||
|                 diff_simd_row(prev, fut); | ||||
|                 diff_op_row(Upwind4::BLOCK, Upwind4::DIAG, false, false)(prev, fut); | ||||
|             } | ||||
|             ([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => { | ||||
|                 diff_simd_col(prev, fut); | ||||
| @@ -412,7 +330,7 @@ impl<UO: UpwindOperator1d> UpwindOperator2d for (&UO, &Upwind4) { | ||||
|  | ||||
|         match (prev.strides(), fut.strides()) { | ||||
|             ([_, 1], [_, 1]) => { | ||||
|                 diss_simd_row(prev, fut); | ||||
|                 diff_op_row(Upwind4::DISS_BLOCK, Upwind4::DISS_DIAG, true, false)(prev, fut); | ||||
|             } | ||||
|             ([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => { | ||||
|                 diss_simd_col(prev, fut); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Magnus Ulimoen
					Magnus Ulimoen