upwind operators
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
use super::SbpOperator;
|
||||
use super::{SbpOperator, UpwindOperator};
|
||||
use ndarray::{arr1, arr2, s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2};
|
||||
|
||||
/// Simdtype used in diffeta_simd
|
||||
@@ -23,6 +23,55 @@ impl Upwind4 {
|
||||
[ 3.0 / 298.0, 16.0 / 149.0, -227.0 / 298.0, 0.0, 126.0 / 149.0, -36.0 / 149.0, 6.0 / 149.0],
|
||||
];
|
||||
|
||||
const DISS_BLOCK: [[f32; 7]; 4] = [
|
||||
[
|
||||
-3.0 / 49.0,
|
||||
9.0 / 49.0,
|
||||
-9.0 / 49.0,
|
||||
3.0 / 49.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
[
|
||||
3.0 / 61.0,
|
||||
-11.0 / 61.0,
|
||||
15.0 / 61.0,
|
||||
-9.0 / 61.0,
|
||||
2.0 / 61.0,
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
[
|
||||
-3.0 / 41.0,
|
||||
15.0 / 41.0,
|
||||
-29.0 / 41.0,
|
||||
27.0 / 41.0,
|
||||
-12.0 / 41.0,
|
||||
2.0 / 41.0,
|
||||
0.0,
|
||||
],
|
||||
[
|
||||
3.0 / 149.0,
|
||||
-27.0 / 149.0,
|
||||
81.0 / 149.0,
|
||||
-117.0 / 149.0,
|
||||
90.0 / 149.0,
|
||||
-36.0 / 149.0,
|
||||
6.0 / 149.0,
|
||||
],
|
||||
];
|
||||
|
||||
const DISS_DIAG: [f32; 7] = [
|
||||
1.0 / 24.0,
|
||||
-1.0 / 4.0,
|
||||
5.0 / 8.0,
|
||||
-5.0 / 6.0,
|
||||
5.0 / 8.0,
|
||||
-1.0 / 4.0,
|
||||
1.0 / 24.0,
|
||||
];
|
||||
|
||||
#[inline(never)]
|
||||
fn diff_simd(prev: &[f32], fut: &mut [f32]) {
|
||||
use packed_simd::{f32x8, u32x8};
|
||||
@@ -235,6 +284,218 @@ impl Upwind4 {
|
||||
*f = diff * idx;
|
||||
}
|
||||
}
|
||||
#[inline(never)]
|
||||
fn diss_simd(prev: &[f32], fut: &mut [f32]) {
|
||||
use packed_simd::{f32x8, u32x8};
|
||||
assert_eq!(prev.len(), fut.len());
|
||||
assert!(prev.len() >= 2 * Self::DISS_BLOCK.len());
|
||||
let nx = prev.len();
|
||||
let dx = 1.0 / (nx - 1) as f32;
|
||||
let idx = 1.0 / dx;
|
||||
|
||||
let first_elems = unsafe { f32x8::from_slice_unaligned_unchecked(prev) };
|
||||
let block = [
|
||||
f32x8::new(
|
||||
Self::DISS_BLOCK[0][0],
|
||||
Self::DISS_BLOCK[0][1],
|
||||
Self::DISS_BLOCK[0][2],
|
||||
Self::DISS_BLOCK[0][3],
|
||||
Self::DISS_BLOCK[0][4],
|
||||
Self::DISS_BLOCK[0][5],
|
||||
Self::DISS_BLOCK[0][6],
|
||||
0.0,
|
||||
),
|
||||
f32x8::new(
|
||||
Self::DISS_BLOCK[1][0],
|
||||
Self::DISS_BLOCK[1][1],
|
||||
Self::DISS_BLOCK[1][2],
|
||||
Self::DISS_BLOCK[1][3],
|
||||
Self::DISS_BLOCK[1][4],
|
||||
Self::DISS_BLOCK[1][5],
|
||||
Self::DISS_BLOCK[1][6],
|
||||
0.0,
|
||||
),
|
||||
f32x8::new(
|
||||
Self::DISS_BLOCK[2][0],
|
||||
Self::DISS_BLOCK[2][1],
|
||||
Self::DISS_BLOCK[2][2],
|
||||
Self::DISS_BLOCK[2][3],
|
||||
Self::DISS_BLOCK[2][4],
|
||||
Self::DISS_BLOCK[2][5],
|
||||
Self::DISS_BLOCK[2][6],
|
||||
0.0,
|
||||
),
|
||||
f32x8::new(
|
||||
Self::DISS_BLOCK[3][0],
|
||||
Self::DISS_BLOCK[3][1],
|
||||
Self::DISS_BLOCK[3][2],
|
||||
Self::DISS_BLOCK[3][3],
|
||||
Self::DISS_BLOCK[3][4],
|
||||
Self::DISS_BLOCK[3][5],
|
||||
Self::DISS_BLOCK[3][6],
|
||||
0.0,
|
||||
),
|
||||
];
|
||||
unsafe {
|
||||
*fut.get_unchecked_mut(0) = idx * (block[0] * first_elems).sum();
|
||||
*fut.get_unchecked_mut(1) = idx * (block[1] * first_elems).sum();
|
||||
*fut.get_unchecked_mut(2) = idx * (block[2] * first_elems).sum();
|
||||
*fut.get_unchecked_mut(3) = idx * (block[3] * first_elems).sum()
|
||||
};
|
||||
|
||||
let diag = f32x8::new(
|
||||
Self::DISS_DIAG[0],
|
||||
Self::DISS_DIAG[1],
|
||||
Self::DISS_DIAG[2],
|
||||
Self::DISS_DIAG[3],
|
||||
Self::DISS_DIAG[4],
|
||||
Self::DISS_DIAG[5],
|
||||
Self::DISS_DIAG[6],
|
||||
0.0,
|
||||
);
|
||||
for (f, p) in fut
|
||||
.iter_mut()
|
||||
.skip(block.len())
|
||||
.zip(
|
||||
prev.windows(f32x8::lanes())
|
||||
.map(f32x8::from_slice_unaligned)
|
||||
.skip(1),
|
||||
)
|
||||
.take(nx - 2 * block.len())
|
||||
{
|
||||
*f = idx * (p * diag).sum();
|
||||
}
|
||||
|
||||
let last_elems = unsafe { f32x8::from_slice_unaligned_unchecked(&prev[nx - 8..]) }
|
||||
.shuffle1_dyn(u32x8::new(7, 6, 5, 4, 3, 2, 1, 0));
|
||||
unsafe {
|
||||
*fut.get_unchecked_mut(nx - 4) = idx * (block[3] * last_elems).sum();
|
||||
*fut.get_unchecked_mut(nx - 3) = idx * (block[2] * last_elems).sum();
|
||||
*fut.get_unchecked_mut(nx - 2) = idx * (block[1] * last_elems).sum();
|
||||
*fut.get_unchecked_mut(nx - 1) = idx * (block[0] * last_elems).sum();
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn disseta_simd(prev: &[f32], fut: &mut [f32], nx: usize, ny: usize) {
|
||||
assert!(ny >= 2 * Self::DISS_BLOCK.len());
|
||||
assert!(nx >= SimdT::lanes());
|
||||
assert!(nx % SimdT::lanes() == 0);
|
||||
assert_eq!(prev.len(), fut.len());
|
||||
assert_eq!(prev.len(), nx * ny);
|
||||
|
||||
let dy = 1.0 / (ny - 1) as f32;
|
||||
let idy = 1.0 / dy;
|
||||
|
||||
for j in (0..nx).step_by(SimdT::lanes()) {
|
||||
let a = [
|
||||
SimdT::from_slice_unaligned(&prev[0 * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[1 * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[2 * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[3 * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[4 * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[5 * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[6 * nx + j..]),
|
||||
];
|
||||
|
||||
for (i, bl) in Self::DISS_BLOCK.iter().enumerate() {
|
||||
let b = idy
|
||||
* (a[0] * bl[0]
|
||||
+ a[1] * bl[1]
|
||||
+ a[2] * bl[2]
|
||||
+ a[3] * bl[3]
|
||||
+ a[4] * bl[4]
|
||||
+ a[5] * bl[5]
|
||||
+ a[6] * bl[6]);
|
||||
b.write_to_slice_unaligned(&mut fut[i * nx + j..]);
|
||||
}
|
||||
|
||||
let mut a = a;
|
||||
for i in Self::DISS_BLOCK.len()..ny - Self::DISS_BLOCK.len() {
|
||||
// Push a onto circular buffer
|
||||
a = [
|
||||
a[1],
|
||||
a[2],
|
||||
a[3],
|
||||
a[4],
|
||||
a[5],
|
||||
a[6],
|
||||
SimdT::from_slice_unaligned(&prev[nx * (i + 3) + j..]),
|
||||
];
|
||||
let b = idy
|
||||
* (a[0] * Self::DISS_DIAG[0]
|
||||
+ a[1] * Self::DISS_DIAG[1]
|
||||
+ a[2] * Self::DISS_DIAG[2]
|
||||
+ a[3] * Self::DISS_DIAG[3]
|
||||
+ a[4] * Self::DISS_DIAG[4]
|
||||
+ a[5] * Self::DISS_DIAG[5]
|
||||
+ a[6] * Self::DISS_DIAG[6]);
|
||||
b.write_to_slice_unaligned(&mut fut[nx * i + j..]);
|
||||
}
|
||||
|
||||
let a = [
|
||||
SimdT::from_slice_unaligned(&prev[(ny - 1) * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[(ny - 2) * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[(ny - 3) * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[(ny - 4) * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[(ny - 5) * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[(ny - 6) * nx + j..]),
|
||||
SimdT::from_slice_unaligned(&prev[(ny - 7) * nx + j..]),
|
||||
];
|
||||
|
||||
for (i, bl) in Self::DISS_BLOCK.iter().enumerate() {
|
||||
let b = idy
|
||||
* (a[0] * bl[0]
|
||||
+ a[1] * bl[1]
|
||||
+ a[2] * bl[2]
|
||||
+ a[3] * bl[3]
|
||||
+ a[4] * bl[4]
|
||||
+ a[5] * bl[5]
|
||||
+ a[6] * bl[6]);
|
||||
b.write_to_slice_unaligned(&mut fut[(ny - 1 - i) * nx + j..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn diss(prev: ArrayView1<f32>, mut fut: ArrayViewMut1<f32>) {
|
||||
assert_eq!(prev.shape(), fut.shape());
|
||||
let nx = prev.shape()[0];
|
||||
assert!(nx >= 2 * Self::DISS_BLOCK.len());
|
||||
|
||||
if let (Some(p), Some(f)) = (prev.as_slice(), fut.as_slice_mut()) {
|
||||
Self::diss_simd(p, f);
|
||||
return;
|
||||
}
|
||||
|
||||
let dx = 1.0 / (nx - 1) as f32;
|
||||
let idx = 1.0 / dx;
|
||||
|
||||
let diag = arr1(&Self::DISS_DIAG);
|
||||
let block = arr2(&Self::DISS_BLOCK);
|
||||
|
||||
let first_elems = prev.slice(s!(..7));
|
||||
for (bl, f) in block.outer_iter().zip(&mut fut) {
|
||||
let diff = first_elems.dot(&bl);
|
||||
*f = diff * idx;
|
||||
}
|
||||
|
||||
for (window, f) in prev
|
||||
.windows(diag.len())
|
||||
.into_iter()
|
||||
.skip(1)
|
||||
.zip(fut.iter_mut().skip(4))
|
||||
.take(nx - 8)
|
||||
{
|
||||
let diff = diag.dot(&window);
|
||||
*f = diff * idx;
|
||||
}
|
||||
|
||||
let last_elems = prev.slice(s!(nx - 7..;-1));
|
||||
for (bl, f) in block.outer_iter().zip(&mut fut.slice_mut(s![nx - 4..;-1])) {
|
||||
let diff = bl.dot(&last_elems);
|
||||
*f = diff * idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SbpOperator for Upwind4 {
|
||||
@@ -353,3 +614,28 @@ fn upwind4_test() {
|
||||
approx::assert_abs_diff_eq!(&res.to_owned(), &target.to_owned(), epsilon = 1e-2);
|
||||
}
|
||||
}
|
||||
|
||||
impl UpwindOperator for Upwind4 {
|
||||
fn dissxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
|
||||
assert_eq!(prev.shape(), fut.shape());
|
||||
assert!(prev.shape()[1] >= 2 * Self::DISS_BLOCK.len());
|
||||
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
|
||||
Self::diss(r0, r1)
|
||||
}
|
||||
}
|
||||
|
||||
fn disseta(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
|
||||
assert_eq!(prev.shape(), fut.shape());
|
||||
assert!(prev.shape()[0] >= 2 * Self::DISS_BLOCK.len());
|
||||
let nx = prev.shape()[1];
|
||||
let ny = prev.shape()[0];
|
||||
if nx >= SimdT::lanes() && nx % SimdT::lanes() == 0 {
|
||||
if let (Some(p), Some(f)) = (prev.as_slice(), fut.as_slice_mut()) {
|
||||
Self::disseta_simd(p, f, nx, ny);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// diffeta = transpose then use diffxi
|
||||
Self::dissxi(prev.reversed_axes(), fut.reversed_axes());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user