use default trait methods

This commit is contained in:
Magnus Ulimoen 2020-04-08 23:07:14 +02:00
parent 60f9a89e65
commit c3a40d81ee
5 changed files with 293 additions and 354 deletions

View File

@ -3,68 +3,82 @@
use crate::Float; use crate::Float;
use ndarray::{ArrayView2, ArrayViewMut2}; use ndarray::{ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2};
pub trait SbpOperator: Send + Sync { pub trait SbpOperator: Send + Sync {
fn diffxi(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>); fn diff1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>);
fn diffeta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>); fn diffxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) {
assert_eq!(prev.shape(), fut.shape());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff1d(r0, r1);
}
}
fn diffeta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
Self::diffxi(prev.reversed_axes(), fut.reversed_axes())
}
fn h() -> &'static [Float]; fn h() -> &'static [Float];
} }
pub trait UpwindOperator: SbpOperator { pub trait UpwindOperator: SbpOperator {
fn dissxi(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>); fn diss1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>);
fn disseta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>); fn dissxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) {
assert_eq!(prev.shape(), fut.shape());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diss1d(r0, r1);
}
}
fn disseta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
Self::dissxi(prev.reversed_axes(), fut.reversed_axes())
}
} }
#[macro_export] #[macro_export]
macro_rules! diff_op_1d { macro_rules! diff_op_1d {
($self: ty, $name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => { ($name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => {
impl $self { fn $name(prev: ArrayView1<Float>, mut fut: ArrayViewMut1<Float>) {
fn $name(prev: ArrayView1<Float>, mut fut: ArrayViewMut1<Float>) { assert_eq!(prev.shape(), fut.shape());
assert_eq!(prev.shape(), fut.shape()); let nx = prev.shape()[0];
let nx = prev.shape()[0]; assert!(nx >= 2 * $BLOCK.len());
assert!(nx >= 2 * $BLOCK.len());
let dx = 1.0 / (nx - 1) as Float; let dx = 1.0 / (nx - 1) as Float;
let idx = 1.0 / dx; let idx = 1.0 / dx;
let block = ::ndarray::arr2($BLOCK); let block = ::ndarray::arr2($BLOCK);
let diag = ::ndarray::arr1($DIAG); let diag = ::ndarray::arr1($DIAG);
let first_elems = prev.slice(::ndarray::s!(..block.len_of(::ndarray::Axis(1)))); let first_elems = prev.slice(::ndarray::s!(..block.len_of(::ndarray::Axis(1))));
for (bl, f) in block.outer_iter().zip(&mut fut) { for (bl, f) in block.outer_iter().zip(&mut fut) {
let diff = first_elems.dot(&bl); let diff = first_elems.dot(&bl);
*f = diff * idx; *f = diff * idx;
} }
// The window needs to be aligned to the diagonal elements, // The window needs to be aligned to the diagonal elements,
// based on the block size // based on the block size
let window_elems_to_skip = let window_elems_to_skip =
block.len_of(::ndarray::Axis(0)) - ((diag.len() - 1) / 2); block.len_of(::ndarray::Axis(0)) - ((diag.len() - 1) / 2);
for (window, f) in prev for (window, f) in prev
.windows(diag.len()) .windows(diag.len())
.into_iter() .into_iter()
.skip(window_elems_to_skip) .skip(window_elems_to_skip)
.zip(fut.iter_mut().skip(block.len_of(::ndarray::Axis(0)))) .zip(fut.iter_mut().skip(block.len_of(::ndarray::Axis(0))))
.take(nx - 2 * block.len_of(::ndarray::Axis(0))) .take(nx - 2 * block.len_of(::ndarray::Axis(0)))
{ {
let diff = diag.dot(&window); let diff = diag.dot(&window);
*f = diff * idx; *f = diff * idx;
} }
let last_elems = prev.slice(::ndarray::s!(nx - block.len_of(::ndarray::Axis(1))..;-1)); let last_elems = prev.slice(::ndarray::s!(nx - block.len_of(::ndarray::Axis(1))..;-1));
for (bl, f) in block.outer_iter() for (bl, f) in block.outer_iter()
.zip(&mut fut.slice_mut(s![nx - block.len_of(::ndarray::Axis(0))..;-1])) .zip(&mut fut.slice_mut(s![nx - block.len_of(::ndarray::Axis(0))..;-1]))
{ {
let diff = if $symmetric { let diff = if $symmetric {
bl.dot(&last_elems) bl.dot(&last_elems)
} else { } else {
-bl.dot(&last_elems) -bl.dot(&last_elems)
}; };
*f = diff * idx; *f = diff * idx;
}
} }
} }
}; };

View File

@ -1,12 +1,12 @@
use super::SbpOperator; use super::SbpOperator;
use crate::diff_op_1d; use crate::diff_op_1d;
use crate::Float; use crate::Float;
use ndarray::{s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2}; use ndarray::{s, ArrayView1, ArrayViewMut1};
#[derive(Debug)] #[derive(Debug)]
pub struct SBP4 {} pub struct SBP4 {}
diff_op_1d!(SBP4, diff_1d, SBP4::BLOCK, SBP4::DIAG, false); diff_op_1d!(diff_1d, SBP4::BLOCK, SBP4::DIAG, false);
impl SBP4 { impl SBP4 {
#[rustfmt::skip] #[rustfmt::skip]
@ -27,18 +27,8 @@ impl SBP4 {
} }
impl SbpOperator for SBP4 { impl SbpOperator for SBP4 {
fn diffxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { fn diff1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>) {
assert_eq!(prev.shape(), fut.shape()); diff_1d(prev, fut)
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1);
}
}
fn diffeta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
} }
fn h() -> &'static [Float] { fn h() -> &'static [Float] {

View File

@ -1,12 +1,12 @@
use super::SbpOperator; use super::SbpOperator;
use crate::diff_op_1d; use crate::diff_op_1d;
use crate::Float; use crate::Float;
use ndarray::{s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2}; use ndarray::{s, ArrayView1, ArrayViewMut1};
#[derive(Debug)] #[derive(Debug)]
pub struct SBP8 {} pub struct SBP8 {}
diff_op_1d!(SBP8, diff_1d, SBP8::BLOCK, SBP8::DIAG, false); diff_op_1d!(diff_1d, SBP8::BLOCK, SBP8::DIAG, false);
impl SBP8 { impl SBP8 {
#[rustfmt::skip] #[rustfmt::skip]
@ -31,18 +31,8 @@ impl SBP8 {
} }
impl SbpOperator for SBP8 { impl SbpOperator for SBP8 {
fn diffxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { fn diff1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>) {
assert_eq!(prev.shape(), fut.shape()); diff_1d(prev, fut)
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1);
}
}
fn diffeta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
} }
fn h() -> &'static [Float] { fn h() -> &'static [Float] {

View File

@ -12,251 +12,232 @@ type SimdT = packed_simd::f32x8;
#[cfg(not(feature = "f32"))] #[cfg(not(feature = "f32"))]
type SimdT = packed_simd::f64x8; type SimdT = packed_simd::f64x8;
diff_op_1d!(Upwind4, diff_1d, Upwind4::BLOCK, Upwind4::DIAG, false); diff_op_1d!(diff_1d, Upwind4::BLOCK, Upwind4::DIAG, false);
diff_op_1d!( diff_op_1d!(diss_1d, Upwind4::DISS_BLOCK, Upwind4::DISS_DIAG, true);
Upwind4,
diss_1d,
Upwind4::DISS_BLOCK,
Upwind4::DISS_DIAG,
true
);
macro_rules! diff_simd_row_7_47 { macro_rules! diff_simd_row_7_47 {
($self: ident, $name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => { ($name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => {
impl $self { #[inline(never)]
#[inline(never)] fn $name(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) {
fn $name(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { assert_eq!(prev.shape(), fut.shape());
assert_eq!(prev.shape(), fut.shape()); assert!(prev.len_of(Axis(1)) >= 2 * $BLOCK.len());
assert!(prev.len_of(Axis(1)) >= 2 * $BLOCK.len()); assert!(prev.len() >= SimdT::lanes());
assert!(prev.len() >= SimdT::lanes()); // The prev and fut array must have contiguous last dimension
// The prev and fut array must have contiguous last dimension assert_eq!(prev.strides()[1], 1);
assert_eq!(prev.strides()[1], 1); assert_eq!(fut.strides()[1], 1);
assert_eq!(fut.strides()[1], 1);
let nx = prev.len_of(Axis(1)); let nx = prev.len_of(Axis(1));
let dx = 1.0 / (nx - 1) as Float; let dx = 1.0 / (nx - 1) as Float;
let idx = 1.0 / dx; let idx = 1.0 / dx;
for j in 0..prev.len_of(Axis(0)) { for j in 0..prev.len_of(Axis(0)) {
use std::slice; use std::slice;
let prev = let prev = unsafe { slice::from_raw_parts(prev.uget((j, 0)) as *const Float, nx) };
unsafe { slice::from_raw_parts(prev.uget((j, 0)) as *const Float, nx) }; let fut =
let fut = unsafe { unsafe { slice::from_raw_parts_mut(fut.uget_mut((j, 0)) as *mut Float, nx) };
slice::from_raw_parts_mut(fut.uget_mut((j, 0)) as *mut Float, nx)
};
let first_elems = unsafe { SimdT::from_slice_unaligned_unchecked(prev) }; let first_elems = unsafe { SimdT::from_slice_unaligned_unchecked(prev) };
let block = { let block = {
let bl = $BLOCK; let bl = $BLOCK;
[ [
SimdT::new(
bl[0][0], bl[0][1], bl[0][2], bl[0][3], bl[0][4], bl[0][5],
bl[0][6], 0.0,
),
SimdT::new(
bl[1][0], bl[1][1], bl[1][2], bl[1][3], bl[1][4], bl[1][5],
bl[1][6], 0.0,
),
SimdT::new(
bl[2][0], bl[2][1], bl[2][2], bl[2][3], bl[2][4], bl[2][5],
bl[2][6], 0.0,
),
SimdT::new(
bl[3][0], bl[3][1], bl[3][2], bl[3][3], bl[3][4], bl[3][5],
bl[3][6], 0.0,
),
]
};
fut[0] = idx * (block[0] * first_elems).sum();
fut[1] = idx * (block[1] * first_elems).sum();
fut[2] = idx * (block[2] * first_elems).sum();
fut[3] = idx * (block[3] * first_elems).sum();
let diag = {
let diag = $DIAG;
SimdT::new( SimdT::new(
diag[0], diag[1], diag[2], diag[3], diag[4], diag[5], diag[6], 0.0, bl[0][0], bl[0][1], bl[0][2], bl[0][3], bl[0][4], bl[0][5], bl[0][6],
) 0.0,
}; ),
for i in 4..nx - 4 { SimdT::new(
unsafe { bl[1][0], bl[1][1], bl[1][2], bl[1][3], bl[1][4], bl[1][5], bl[1][6],
let prev = SimdT::from_slice_unaligned_unchecked(&prev[i - 3..]); 0.0,
*fut.get_unchecked_mut(i) = idx * (prev * diag).sum(); ),
} SimdT::new(
} bl[2][0], bl[2][1], bl[2][2], bl[2][3], bl[2][4], bl[2][5], bl[2][6],
0.0,
),
SimdT::new(
bl[3][0], bl[3][1], bl[3][2], bl[3][3], bl[3][4], bl[3][5], bl[3][6],
0.0,
),
]
};
fut[0] = idx * (block[0] * first_elems).sum();
fut[1] = idx * (block[1] * first_elems).sum();
fut[2] = idx * (block[2] * first_elems).sum();
fut[3] = idx * (block[3] * first_elems).sum();
let last_elems = let diag = {
unsafe { SimdT::from_slice_unaligned_unchecked(&prev[nx - 8..]) } let diag = $DIAG;
.shuffle1_dyn([7, 6, 5, 4, 3, 2, 1, 0].into()); SimdT::new(
if $symmetric { diag[0], diag[1], diag[2], diag[3], diag[4], diag[5], diag[6], 0.0,
fut[nx - 4] = idx * (block[3] * last_elems).sum(); )
fut[nx - 3] = idx * (block[2] * last_elems).sum(); };
fut[nx - 2] = idx * (block[1] * last_elems).sum(); for i in 4..nx - 4 {
fut[nx - 1] = idx * (block[0] * last_elems).sum(); unsafe {
} else { let prev = SimdT::from_slice_unaligned_unchecked(&prev[i - 3..]);
fut[nx - 4] = -idx * (block[3] * last_elems).sum(); *fut.get_unchecked_mut(i) = idx * (prev * diag).sum();
fut[nx - 3] = -idx * (block[2] * last_elems).sum();
fut[nx - 2] = -idx * (block[1] * last_elems).sum();
fut[nx - 1] = -idx * (block[0] * last_elems).sum();
} }
} }
let last_elems = unsafe { SimdT::from_slice_unaligned_unchecked(&prev[nx - 8..]) }
.shuffle1_dyn([7, 6, 5, 4, 3, 2, 1, 0].into());
if $symmetric {
fut[nx - 4] = idx * (block[3] * last_elems).sum();
fut[nx - 3] = idx * (block[2] * last_elems).sum();
fut[nx - 2] = idx * (block[1] * last_elems).sum();
fut[nx - 1] = idx * (block[0] * last_elems).sum();
} else {
fut[nx - 4] = -idx * (block[3] * last_elems).sum();
fut[nx - 3] = -idx * (block[2] * last_elems).sum();
fut[nx - 2] = -idx * (block[1] * last_elems).sum();
fut[nx - 1] = -idx * (block[0] * last_elems).sum();
}
} }
} }
}; };
} }
diff_simd_row_7_47!(Upwind4, diff_simd_row, Upwind4::BLOCK, Upwind4::DIAG, false); diff_simd_row_7_47!(diff_simd_row, Upwind4::BLOCK, Upwind4::DIAG, false);
diff_simd_row_7_47!( diff_simd_row_7_47!(diss_simd_row, Upwind4::DISS_BLOCK, Upwind4::DISS_DIAG, true);
Upwind4,
diss_simd_row,
Upwind4::DISS_BLOCK,
Upwind4::DISS_DIAG,
true
);
macro_rules! diff_simd_col_7_47 { macro_rules! diff_simd_col_7_47 {
($self: ident, $name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => { ($name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => {
impl $self { #[inline(never)]
#[inline(never)] fn $name(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) {
fn $name(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { use std::slice;
use std::slice; assert_eq!(prev.shape(), fut.shape());
assert_eq!(prev.shape(), fut.shape()); assert_eq!(prev.stride_of(Axis(0)), 1);
assert_eq!(prev.stride_of(Axis(0)), 1); assert_eq!(fut.stride_of(Axis(0)), 1);
assert_eq!(fut.stride_of(Axis(0)), 1); let ny = prev.len_of(Axis(0));
let ny = prev.len_of(Axis(0)); let nx = prev.len_of(Axis(1));
let nx = prev.len_of(Axis(1)); assert!(nx >= 2 * $BLOCK.len());
assert!(nx >= 2 * $BLOCK.len()); assert!(ny >= SimdT::lanes());
assert!(ny >= SimdT::lanes()); assert!(ny % SimdT::lanes() == 0);
assert!(ny % SimdT::lanes() == 0);
let dx = 1.0 / (nx - 1) as Float; let dx = 1.0 / (nx - 1) as Float;
let idx = 1.0 / dx; let idx = 1.0 / dx;
for j in (0..ny).step_by(SimdT::lanes()) { for j in (0..ny).step_by(SimdT::lanes()) {
let a = unsafe { let a = unsafe {
[ [
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 0)) as *const Float, prev.uget((j, 0)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 1)) as *const Float, prev.uget((j, 1)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 2)) as *const Float, prev.uget((j, 2)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 3)) as *const Float, prev.uget((j, 3)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 4)) as *const Float, prev.uget((j, 4)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 5)) as *const Float, prev.uget((j, 5)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 6)) as *const Float, prev.uget((j, 6)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
] ]
}; };
for (i, bl) in $BLOCK.iter().enumerate() { for (i, bl) in $BLOCK.iter().enumerate() {
let b = idx let b = idx
* (a[0] * bl[0] * (a[0] * bl[0]
+ a[1] * bl[1] + a[1] * bl[1]
+ a[2] * bl[2] + a[2] * bl[2]
+ a[3] * bl[3] + a[3] * bl[3]
+ a[4] * bl[4] + a[4] * bl[4]
+ a[5] * bl[5] + a[5] * bl[5]
+ a[6] * bl[6]); + a[6] * bl[6]);
unsafe { unsafe {
b.write_to_slice_unaligned(slice::from_raw_parts_mut( b.write_to_slice_unaligned(slice::from_raw_parts_mut(
fut.uget_mut((j, i)) as *mut Float, fut.uget_mut((j, i)) as *mut Float,
SimdT::lanes(), SimdT::lanes(),
)); ));
}
} }
}
let mut a = a; let mut a = a;
for i in $BLOCK.len()..nx - $BLOCK.len() { for i in $BLOCK.len()..nx - $BLOCK.len() {
// Push a onto circular buffer // Push a onto circular buffer
a = [a[1], a[2], a[3], a[4], a[5], a[6], unsafe { a = [a[1], a[2], a[3], a[4], a[5], a[6], unsafe {
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, i + 3)) as *const Float, prev.uget((j, i + 3)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)) ))
}]; }];
let b = idx let b = idx
* (a[0] * $DIAG[0] * (a[0] * $DIAG[0]
+ a[1] * $DIAG[1] + a[1] * $DIAG[1]
+ a[2] * $DIAG[2] + a[2] * $DIAG[2]
+ a[3] * $DIAG[3] + a[3] * $DIAG[3]
+ a[4] * $DIAG[4] + a[4] * $DIAG[4]
+ a[5] * $DIAG[5] + a[5] * $DIAG[5]
+ a[6] * $DIAG[6]); + a[6] * $DIAG[6]);
unsafe { unsafe {
b.write_to_slice_unaligned(slice::from_raw_parts_mut( b.write_to_slice_unaligned(slice::from_raw_parts_mut(
fut.uget_mut((j, i)) as *mut Float, fut.uget_mut((j, i)) as *mut Float,
SimdT::lanes(), SimdT::lanes(),
)); ));
}
} }
}
let a = unsafe { let a = unsafe {
[ [
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 1)) as *const Float, prev.uget((j, nx - 1)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 2)) as *const Float, prev.uget((j, nx - 2)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 3)) as *const Float, prev.uget((j, nx - 3)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 4)) as *const Float, prev.uget((j, nx - 4)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 5)) as *const Float, prev.uget((j, nx - 5)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 6)) as *const Float, prev.uget((j, nx - 6)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
SimdT::from_slice_unaligned(slice::from_raw_parts( SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 7)) as *const Float, prev.uget((j, nx - 7)) as *const Float,
SimdT::lanes(), SimdT::lanes(),
)), )),
] ]
}; };
for (i, bl) in $BLOCK.iter().enumerate() { for (i, bl) in $BLOCK.iter().enumerate() {
let idx = if $symmetric { idx } else { -idx }; let idx = if $symmetric { idx } else { -idx };
let b = idx let b = idx
* (a[0] * bl[0] * (a[0] * bl[0]
+ a[1] * bl[1] + a[1] * bl[1]
+ a[2] * bl[2] + a[2] * bl[2]
+ a[3] * bl[3] + a[3] * bl[3]
+ a[4] * bl[4] + a[4] * bl[4]
+ a[5] * bl[5] + a[5] * bl[5]
+ a[6] * bl[6]); + a[6] * bl[6]);
unsafe { unsafe {
b.write_to_slice_unaligned(slice::from_raw_parts_mut( b.write_to_slice_unaligned(slice::from_raw_parts_mut(
fut.uget_mut((j, nx - 1 - i)) as *mut Float, fut.uget_mut((j, nx - 1 - i)) as *mut Float,
SimdT::lanes(), SimdT::lanes(),
)); ));
}
} }
} }
} }
@ -264,14 +245,8 @@ macro_rules! diff_simd_col_7_47 {
}; };
} }
diff_simd_col_7_47!(Upwind4, diff_simd_col, Upwind4::BLOCK, Upwind4::DIAG, false); diff_simd_col_7_47!(diff_simd_col, Upwind4::BLOCK, Upwind4::DIAG, false);
diff_simd_col_7_47!( diff_simd_col_7_47!(diss_simd_col, Upwind4::DISS_BLOCK, Upwind4::DISS_DIAG, true);
Upwind4,
diss_simd_col,
Upwind4::DISS_BLOCK,
Upwind4::DISS_DIAG,
true
);
impl Upwind4 { impl Upwind4 {
#[rustfmt::skip] #[rustfmt::skip]
@ -305,32 +280,30 @@ impl Upwind4 {
} }
impl SbpOperator for Upwind4 { impl SbpOperator for Upwind4 {
fn diff1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>) {
diff_1d(prev, fut)
}
fn diffxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { fn diffxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) {
assert_eq!(prev.shape(), fut.shape()); assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len()); assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
match (prev.strides(), fut.strides()) { match (prev.strides(), fut.strides()) {
([_, 1], [_, 1]) => { ([_, 1], [_, 1]) => {
Self::diff_simd_row(prev, fut); diff_simd_row(prev, fut);
} }
([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => { ([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => {
Self::diff_simd_col(prev, fut); diff_simd_col(prev, fut);
} }
([_, _], [_, _]) => { ([_, _], [_, _]) => {
// Fallback, work row by row // Fallback, work row by row
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) { for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1); Self::diff1d(r0, r1);
} }
} }
_ => unreachable!("Should only be two elements in the strides vectors"), _ => unreachable!("Should only be two elements in the strides vectors"),
} }
} }
fn diffeta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
}
fn h() -> &'static [Float] { fn h() -> &'static [Float] {
Self::HBLOCK Self::HBLOCK
} }
@ -350,7 +323,7 @@ fn upwind4_test() {
target[i] = 1.0; target[i] = 1.0;
} }
res.fill(0.0); res.fill(0.0);
Upwind4::diff_1d(source.view(), res.view_mut()); Upwind4::diff1d(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-4); approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-4);
{ {
let source = source.to_owned().insert_axis(ndarray::Axis(0)); let source = source.to_owned().insert_axis(ndarray::Axis(0));
@ -376,7 +349,7 @@ fn upwind4_test() {
target[i] = 2.0 * x; target[i] = 2.0 * x;
} }
res.fill(0.0); res.fill(0.0);
Upwind4::diff_1d(source.view(), res.view_mut()); Upwind4::diff1d(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-4); approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-4);
{ {
let source = source.to_owned().insert_axis(ndarray::Axis(0)); let source = source.to_owned().insert_axis(ndarray::Axis(0));
@ -402,7 +375,7 @@ fn upwind4_test() {
target[i] = 3.0 * x * x; target[i] = 3.0 * x * x;
} }
res.fill(0.0); res.fill(0.0);
Upwind4::diff_1d(source.view(), res.view_mut()); Upwind4::diff1d(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-2); approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-2);
{ {
@ -425,31 +398,29 @@ fn upwind4_test() {
} }
impl UpwindOperator for Upwind4 { impl UpwindOperator for Upwind4 {
fn diss1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>) {
diss_1d(prev, fut)
}
fn dissxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { fn dissxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) {
assert_eq!(prev.shape(), fut.shape()); assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len()); assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
match (prev.strides(), fut.strides()) { match (prev.strides(), fut.strides()) {
([_, 1], [_, 1]) => { ([_, 1], [_, 1]) => {
Self::diss_simd_row(prev, fut); diss_simd_row(prev, fut);
} }
([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => { ([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => {
Self::diss_simd_col(prev, fut); diss_simd_col(prev, fut);
} }
([_, _], [_, _]) => { ([_, _], [_, _]) => {
// Fallback, work row by row // Fallback, work row by row
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) { for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diss_1d(r0, r1); Self::diss1d(r0, r1);
} }
} }
_ => unreachable!("Should only be two elements in the strides vectors"), _ => unreachable!("Should only be two elements in the strides vectors"),
} }
} }
fn disseta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
// diffeta = transpose then use dissxi
Self::dissxi(prev.reversed_axes(), fut.reversed_axes());
}
} }
#[test] #[test]

View File

@ -1,19 +1,13 @@
use super::{SbpOperator, UpwindOperator}; use super::{SbpOperator, UpwindOperator};
use crate::diff_op_1d; use crate::diff_op_1d;
use crate::Float; use crate::Float;
use ndarray::{s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2}; use ndarray::{s, ArrayView1, ArrayViewMut1};
#[derive(Debug)] #[derive(Debug)]
pub struct Upwind9 {} pub struct Upwind9 {}
diff_op_1d!(Upwind9, diff_1d, Upwind9::BLOCK, Upwind9::DIAG, false); diff_op_1d!(diff_1d, Upwind9::BLOCK, Upwind9::DIAG, false);
diff_op_1d!( diff_op_1d!(diss_1d, Upwind9::DISS_BLOCK, Upwind9::DISS_DIAG, true);
Upwind9,
diss_1d,
Upwind9::DISS_BLOCK,
Upwind9::DISS_DIAG,
true
);
impl Upwind9 { impl Upwind9 {
#[rustfmt::skip] #[rustfmt::skip]
@ -55,18 +49,8 @@ impl Upwind9 {
} }
impl SbpOperator for Upwind9 { impl SbpOperator for Upwind9 {
fn diffxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { fn diff1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>) {
assert_eq!(prev.shape(), fut.shape()); diff_1d(prev, fut)
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1);
}
}
fn diffeta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
} }
fn h() -> &'static [Float] { fn h() -> &'static [Float] {
@ -75,18 +59,8 @@ impl SbpOperator for Upwind9 {
} }
impl UpwindOperator for Upwind9 { impl UpwindOperator for Upwind9 {
fn dissxi(prev: ArrayView2<Float>, mut fut: ArrayViewMut2<Float>) { fn diss1d(prev: ArrayView1<Float>, fut: ArrayViewMut1<Float>) {
assert_eq!(prev.shape(), fut.shape()); diss_1d(prev, fut)
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diss_1d(r0, r1);
}
}
fn disseta(prev: ArrayView2<Float>, fut: ArrayViewMut2<Float>) {
// diffeta = transpose then use dissxi
Self::dissxi(prev.reversed_axes(), fut.reversed_axes());
} }
} }