use workspaces

This commit is contained in:
Magnus Ulimoen
2020-01-30 18:28:22 +01:00
parent 4ac1ad0d2c
commit 264b483aef
23 changed files with 185 additions and 169 deletions

View File

@@ -0,0 +1,45 @@
use super::SbpOperator;
use crate::diff_op_1d;
use ndarray::{s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2};
pub struct SBP4 {}
diff_op_1d!(SBP4, diff_1d, SBP4::BLOCK, SBP4::DIAG, false);
impl SBP4 {
#[rustfmt::skip]
const HBLOCK: &'static [f32] = &[
17.0 / 48.0, 59.0 / 48.0, 43.0 / 48.0, 49.0 / 48.0,
];
#[rustfmt::skip]
const DIAG: &'static [f32] = &[
1.0 / 12.0, -2.0 / 3.0, 0.0, 2.0 / 3.0, -1.0 / 12.0,
];
#[rustfmt::skip]
const BLOCK: &'static [[f32; 6]] = &[
[-1.41176470588235e+00, 1.73529411764706e+00, -2.35294117647059e-01, -8.82352941176471e-02, 0.00000000000000e+00, 0.00000000000000e+00],
[-5.00000000000000e-01, 0.00000000000000e+00, 5.00000000000000e-01, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[9.30232558139535e-02, -6.86046511627907e-01, 0.00000000000000e+00, 6.86046511627907e-01, -9.30232558139535e-02, 0.00000000000000e+00],
[3.06122448979592e-02, 0.00000000000000e+00, -6.02040816326531e-01, 0.00000000000000e+00, 6.53061224489796e-01, -8.16326530612245e-02],
];
}
impl SbpOperator for SBP4 {
fn diffxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1);
}
}
fn diffeta(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
}
fn h() -> &'static [f32] {
Self::HBLOCK
}
}

View File

@@ -0,0 +1,49 @@
use super::SbpOperator;
use crate::diff_op_1d;
use ndarray::{s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2};
pub struct SBP8 {}
diff_op_1d!(SBP8, diff_1d, SBP8::BLOCK, SBP8::DIAG, false);
impl SBP8 {
#[rustfmt::skip]
const HBLOCK: &'static [f32] = &[
2.94890676177879e-01, 1.52572062389771e+00, 2.57452876984127e-01, 1.79811370149912e+00, 4.12708057760141e-01, 1.27848462301587e+00, 9.23295579805997e-01, 1.00933386085916e+00
];
#[rustfmt::skip]
const DIAG: &'static [f32] = &[
3.57142857142857e-03, -3.80952380952381e-02, 2.00000000000000e-01, -8.00000000000000e-01, -0.00000000000000e+00, 8.00000000000000e-01, -2.00000000000000e-01, 3.80952380952381e-02, -3.57142857142857e-03
];
#[rustfmt::skip]
const BLOCK: &'static [[f32; 12]] = &[
[-1.69554360443190e+00, 2.24741246341404e+00, -3.38931922601500e-02, -7.81028168126749e-01, 2.54881486107905e-02, 3.43865227388873e-01, -8.62858162633335e-02, -2.00150583315761e-02, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[-4.34378988266985e-01, 0.00000000000000e+00, 9.18511925072956e-02, 4.94008626807984e-01, -2.46151762937235e-02, -1.86759403432935e-01, 5.27267838475813e-02, 7.16696483080115e-03, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[3.88218088704253e-02, -5.44329744454984e-01, 0.00000000000000e+00, 3.89516189693211e-01, 1.36433486528546e-01, 1.03290582800845e-01, -1.79720579323281e-01, 5.59882558852296e-02, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[1.28088632226564e-01, -4.19172130036008e-01, -5.57707021445779e-02, 0.00000000000000e+00, 1.24714160903055e-01, 2.81285212519100e-01, -3.94470423942641e-02, -1.96981310738430e-02, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[-1.82119472519009e-02, 9.09986646154550e-02, -8.51090570277506e-02, -5.43362886365301e-01, 0.00000000000000e+00, 6.37392455438558e-01, -1.02950081118829e-01, 2.98964956216039e-02, -8.65364391190110e-03, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[-7.93147196245203e-02, 2.22875323171502e-01, -2.07999824391436e-02, -3.95611167748401e-01, -2.05756876210586e-01, 0.00000000000000e+00, 5.45876519966127e-01, -9.42727926638298e-02, 2.97971812952850e-02, -2.79348574643297e-03, 0.00000000000000e+00, 0.00000000000000e+00],
[2.75587615266177e-02, -8.71295642560637e-02, 5.01135077563584e-02, 7.68229253600969e-02, 4.60181213406519e-02, -7.55873581663580e-01, 0.00000000000000e+00, 8.21713248844682e-01, -2.16615355227872e-01, 4.12600676624518e-02, -3.86813134335486e-03, 0.00000000000000e+00],
[5.84767272160451e-03, -1.08336661209337e-02, -1.42810403117803e-02, 3.50919361287023e-02, -1.22244235731112e-02, 1.19411743193552e-01, -7.51668243727123e-01, 0.00000000000000e+00, 7.92601963555477e-01, -1.98150490888869e-01, 3.77429506454989e-02, -3.53840162301552e-03],
];
}
impl SbpOperator for SBP8 {
fn diffxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1);
}
}
fn diffeta(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
}
fn h() -> &'static [f32] {
Self::HBLOCK
}
}

View File

@@ -0,0 +1,455 @@
use super::{SbpOperator, UpwindOperator};
use crate::diff_op_1d;
use ndarray::{s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2, Axis};
pub struct Upwind4 {}
/// Simdtype used in diff_simd_col
type SimdT = packed_simd::f32x8;
diff_op_1d!(Upwind4, diff_1d, Upwind4::BLOCK, Upwind4::DIAG, false);
diff_op_1d!(
Upwind4,
diss_1d,
Upwind4::DISS_BLOCK,
Upwind4::DISS_DIAG,
true
);
macro_rules! diff_simd_row_7_47 {
($self: ident, $name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => {
impl $self {
#[inline(never)]
fn $name(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
use packed_simd::{f32x8, u32x8};
assert_eq!(prev.shape(), fut.shape());
assert!(prev.len_of(Axis(1)) >= 2 * $BLOCK.len());
assert!(prev.len() >= f32x8::lanes());
// The prev and fut array must have contigous last dimension
assert_eq!(prev.strides()[1], 1);
assert_eq!(fut.strides()[1], 1);
let nx = prev.len_of(Axis(1));
let dx = 1.0 / (nx - 1) as f32;
let idx = 1.0 / dx;
for j in 0..prev.len_of(Axis(0)) {
use std::slice;
let prev =
unsafe { slice::from_raw_parts(prev.uget((j, 0)) as *const f32, nx) };
let fut =
unsafe { slice::from_raw_parts_mut(fut.uget_mut((j, 0)) as *mut f32, nx) };
//let mut fut = fut.slice_mut(s![j, ..]);
let first_elems = unsafe { f32x8::from_slice_unaligned_unchecked(prev) };
let block = {
let bl = $BLOCK;
[
f32x8::new(
bl[0][0], bl[0][1], bl[0][2], bl[0][3], bl[0][4], bl[0][5],
bl[0][6], 0.0,
),
f32x8::new(
bl[1][0], bl[1][1], bl[1][2], bl[1][3], bl[1][4], bl[1][5],
bl[1][6], 0.0,
),
f32x8::new(
bl[2][0], bl[2][1], bl[2][2], bl[2][3], bl[2][4], bl[2][5],
bl[2][6], 0.0,
),
f32x8::new(
bl[3][0], bl[3][1], bl[3][2], bl[3][3], bl[3][4], bl[3][5],
bl[3][6], 0.0,
),
]
};
fut[0] = idx * (block[0] * first_elems).sum();
fut[1] = idx * (block[1] * first_elems).sum();
fut[2] = idx * (block[2] * first_elems).sum();
fut[3] = idx * (block[3] * first_elems).sum();
let diag = {
let diag = $DIAG;
f32x8::new(
diag[0], diag[1], diag[2], diag[3], diag[4], diag[5], diag[6], 0.0,
)
};
for (f, p) in fut
.iter_mut()
.skip(block.len())
.zip(
prev.windows(f32x8::lanes())
.map(f32x8::from_slice_unaligned)
.skip(1),
)
.take(nx - 2 * block.len())
{
*f = idx * (p * diag).sum();
}
let last_elems =
unsafe { f32x8::from_slice_unaligned_unchecked(&prev[nx - 8..]) }
.shuffle1_dyn(u32x8::new(7, 6, 5, 4, 3, 2, 1, 0));
if $symmetric {
fut[nx - 4] = idx * (block[3] * last_elems).sum();
fut[nx - 3] = idx * (block[2] * last_elems).sum();
fut[nx - 2] = idx * (block[1] * last_elems).sum();
fut[nx - 1] = idx * (block[0] * last_elems).sum();
} else {
fut[nx - 4] = -idx * (block[3] * last_elems).sum();
fut[nx - 3] = -idx * (block[2] * last_elems).sum();
fut[nx - 2] = -idx * (block[1] * last_elems).sum();
fut[nx - 1] = -idx * (block[0] * last_elems).sum();
}
}
}
}
};
}
diff_simd_row_7_47!(Upwind4, diff_simd_row, Upwind4::BLOCK, Upwind4::DIAG, false);
diff_simd_row_7_47!(
Upwind4,
diss_simd_row,
Upwind4::DISS_BLOCK,
Upwind4::DISS_DIAG,
true
);
macro_rules! diff_simd_col_7_47 {
($self: ident, $name: ident, $BLOCK: expr, $DIAG: expr, $symmetric: expr) => {
impl $self {
#[inline(never)]
fn $name(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
use std::slice;
assert_eq!(prev.shape(), fut.shape());
assert_eq!(prev.stride_of(Axis(0)), 1);
assert_eq!(prev.stride_of(Axis(0)), 1);
let ny = prev.len_of(Axis(0));
let nx = prev.len_of(Axis(1));
assert!(nx >= 2 * $BLOCK.len());
assert!(ny >= SimdT::lanes());
assert!(ny % SimdT::lanes() == 0);
let dx = 1.0 / (nx - 1) as f32;
let idx = 1.0 / dx;
for j in (0..ny).step_by(SimdT::lanes()) {
let a = unsafe {
[
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 0)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 1)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 2)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 3)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 4)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 5)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, 6)) as *const f32,
SimdT::lanes(),
)),
]
};
for (i, bl) in $BLOCK.iter().enumerate() {
let b = idx
* (a[0] * bl[0]
+ a[1] * bl[1]
+ a[2] * bl[2]
+ a[3] * bl[3]
+ a[4] * bl[4]
+ a[5] * bl[5]
+ a[6] * bl[6]);
unsafe {
b.write_to_slice_unaligned(slice::from_raw_parts_mut(
fut.uget_mut((j, i)) as *mut f32,
SimdT::lanes(),
));
}
}
let mut a = a;
for i in $BLOCK.len()..nx - $BLOCK.len() {
// Push a onto circular buffer
a = [a[1], a[2], a[3], a[4], a[5], a[6], unsafe {
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, i + 3)) as *const f32,
SimdT::lanes(),
))
}];
let b = idx
* (a[0] * $DIAG[0]
+ a[1] * $DIAG[1]
+ a[2] * $DIAG[2]
+ a[3] * $DIAG[3]
+ a[4] * $DIAG[4]
+ a[5] * $DIAG[5]
+ a[6] * $DIAG[6]);
unsafe {
b.write_to_slice_unaligned(slice::from_raw_parts_mut(
fut.uget_mut((j, i)) as *mut f32,
SimdT::lanes(),
));
}
}
let a = unsafe {
[
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 1)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 2)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 3)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 4)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 5)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 6)) as *const f32,
SimdT::lanes(),
)),
SimdT::from_slice_unaligned(slice::from_raw_parts(
prev.uget((j, nx - 7)) as *const f32,
SimdT::lanes(),
)),
]
};
for (i, bl) in $BLOCK.iter().enumerate() {
let idx = if $symmetric { idx } else { -idx };
let b = idx
* (a[0] * bl[0]
+ a[1] * bl[1]
+ a[2] * bl[2]
+ a[3] * bl[3]
+ a[4] * bl[4]
+ a[5] * bl[5]
+ a[6] * bl[6]);
unsafe {
b.write_to_slice_unaligned(slice::from_raw_parts_mut(
fut.uget_mut((j, nx - 1 - i)) as *mut f32,
SimdT::lanes(),
));
}
}
}
}
}
};
}
diff_simd_col_7_47!(Upwind4, diff_simd_col, Upwind4::BLOCK, Upwind4::DIAG, false);
diff_simd_col_7_47!(
Upwind4,
diss_simd_col,
Upwind4::DISS_BLOCK,
Upwind4::DISS_DIAG,
true
);
impl Upwind4 {
#[rustfmt::skip]
const HBLOCK: &'static [f32] = &[
49.0 / 144.0, 61.0 / 48.0, 41.0 / 48.0, 149.0 / 144.0
];
#[rustfmt::skip]
const DIAG: &'static [f32] = &[
-1.0 / 24.0, 1.0 / 4.0, -7.0 / 8.0, 0.0, 7.0 / 8.0, -1.0 / 4.0, 1.0 / 24.0
];
#[rustfmt::skip]
const BLOCK: &'static [[f32; 7]] = &[
[ -72.0 / 49.0, 187.0 / 98.0, -20.0 / 49.0, -3.0 / 98.0, 0.0, 0.0, 0.0],
[-187.0 / 366.0, 0.0, 69.0 / 122.0, -16.0 / 183.0, 2.0 / 61.0, 0.0, 0.0],
[ 20.0 / 123.0, -69.0 / 82.0, 0.0, 227.0 / 246.0, -12.0 / 41.0, 2.0 / 41.0, 0.0],
[ 3.0 / 298.0, 16.0 / 149.0, -227.0 / 298.0, 0.0, 126.0 / 149.0, -36.0 / 149.0, 6.0 / 149.0],
];
#[rustfmt::skip]
const DISS_BLOCK: &'static [[f32; 7]; 4] = &[
[-3.0 / 49.0, 9.0 / 49.0, -9.0 / 49.0, 3.0 / 49.0, 0.0, 0.0, 0.0],
[ 3.0 / 61.0, -11.0 / 61.0, 15.0 / 61.0, -9.0 / 61.0, 2.0 / 61.0, 0.0, 0.0],
[-3.0 / 41.0, 15.0 / 41.0, -29.0 / 41.0, 27.0 / 41.0, -12.0 / 41.0, 2.0 / 41.0, 0.0],
[3.0 / 149.0, -27.0 / 149.0, 81.0 / 149.0, -117.0 / 149.0, 90.0 / 149.0, -36.0 / 149.0, 6.0 / 149.0],
];
#[rustfmt::skip]
const DISS_DIAG: &'static [f32; 7] = &[
1.0 / 24.0, -1.0 / 4.0, 5.0 / 8.0, -5.0 / 6.0, 5.0 / 8.0, -1.0 / 4.0, 1.0 / 24.0
];
}
impl SbpOperator for Upwind4 {
fn diffxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
match (prev.strides(), fut.strides()) {
([_, 1], [_, 1]) => {
Self::diff_simd_row(prev, fut);
}
([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => {
Self::diff_simd_col(prev, fut);
}
([_, _], [_, _]) => {
// Fallback, work row by row
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1);
}
}
_ => unreachable!("Should only be two elements in the strides vectors"),
}
}
fn diffeta(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
}
fn h() -> &'static [f32] {
Self::HBLOCK
}
}
#[test]
fn upwind4_test() {
use ndarray::prelude::*;
let nx = 20;
let dx = 1.0 / (nx - 1) as f32;
let mut source: ndarray::Array1<f32> = ndarray::Array1::zeros(nx);
let mut res = ndarray::Array1::zeros(nx);
let mut target = ndarray::Array1::zeros(nx);
for i in 0..nx {
source[i] = i as f32 * dx;
target[i] = 1.0;
}
res.fill(0.0);
Upwind4::diff_1d(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-4);
{
let source = source.to_owned().insert_axis(ndarray::Axis(0));
let mut res = res.to_owned().insert_axis(ndarray::Axis(0));
let target = target.to_owned().insert_axis(ndarray::Axis(0));
res.fill(0.0);
Upwind4::diffxi(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-2);
}
{
let source = Array2::from_shape_fn((nx, 8), |(i, _)| source[i]);
let target = Array2::from_shape_fn((nx, 8), |(i, _)| target[i]);
let mut res = Array2::zeros((nx, 8));
res.fill(0.0);
Upwind4::diffeta(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res.to_owned(), &target.to_owned(), epsilon = 1e-2);
}
for i in 0..nx {
let x = i as f32 * dx;
source[i] = x * x;
target[i] = 2.0 * x;
}
res.fill(0.0);
Upwind4::diff_1d(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-4);
{
let source = source.to_owned().insert_axis(ndarray::Axis(0));
let mut res = res.to_owned().insert_axis(ndarray::Axis(0));
let target = target.to_owned().insert_axis(ndarray::Axis(0));
res.fill(0.0);
Upwind4::diffxi(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-2);
}
{
let source = Array2::from_shape_fn((nx, 8), |(i, _)| source[i]);
let target = Array2::from_shape_fn((nx, 8), |(i, _)| target[i]);
let mut res = Array2::zeros((nx, 8));
res.fill(0.0);
Upwind4::diffeta(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res.to_owned(), &target.to_owned(), epsilon = 1e-2);
}
for i in 0..nx {
let x = i as f32 * dx;
source[i] = x * x * x;
target[i] = 3.0 * x * x;
}
res.fill(0.0);
Upwind4::diff_1d(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-2);
{
let source = source.to_owned().insert_axis(ndarray::Axis(0));
let mut res = res.to_owned().insert_axis(ndarray::Axis(0));
let target = target.to_owned().insert_axis(ndarray::Axis(0));
res.fill(0.0);
Upwind4::diffxi(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res, &target, epsilon = 1e-2);
}
{
let source = Array2::from_shape_fn((nx, 8), |(i, _)| source[i]);
let target = Array2::from_shape_fn((nx, 8), |(i, _)| target[i]);
let mut res = Array2::zeros((nx, 8));
res.fill(0.0);
Upwind4::diffeta(source.view(), res.view_mut());
approx::assert_abs_diff_eq!(&res.to_owned(), &target.to_owned(), epsilon = 1e-2);
}
}
impl UpwindOperator for Upwind4 {
fn dissxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
match (prev.strides(), fut.strides()) {
([_, 1], [_, 1]) => {
Self::diss_simd_row(prev, fut);
}
([1, _], [1, _]) if prev.len_of(Axis(0)) % SimdT::lanes() == 0 => {
Self::diss_simd_col(prev, fut);
}
([_, _], [_, _]) => {
// Fallback, work row by row
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diss_1d(r0, r1);
}
}
_ => unreachable!("Should only be two elements in the strides vectors"),
}
}
fn disseta(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>) {
// diffeta = transpose then use dissxi
Self::dissxi(prev.reversed_axes(), fut.reversed_axes());
}
}

View File

@@ -0,0 +1,89 @@
use super::{SbpOperator, UpwindOperator};
use crate::diff_op_1d;
use ndarray::{s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2};
pub struct Upwind9 {}
diff_op_1d!(Upwind9, diff_1d, Upwind9::BLOCK, Upwind9::DIAG, false);
diff_op_1d!(
Upwind9,
diss_1d,
Upwind9::DISS_BLOCK,
Upwind9::DISS_DIAG,
true
);
impl Upwind9 {
#[rustfmt::skip]
const HBLOCK: &'static [f32] = &[
1070017.0/3628800.0, 5537111.0/3628800.0, 103613.0/403200.0, 261115.0/145152.0, 298951.0/725760.0, 515677.0/403200.0, 3349879.0/3628800.0, 3662753.0/3628800.0
];
#[rustfmt::skip]
const DIAG: &'static [f32] = &[
-1.0/1260.0, 5.0/504.0, -5.0/84.0, 5.0/21.0, -5.0/6.0, 0.0, 5.0/6.0, -5.0/21.0, 5.0/84.0, -5.0/504.0, 1.0/1260.0,
];
#[rustfmt::skip]
const BLOCK: &'static [[f32; 13]] = &[
[-1.69567399396458e+00, 2.29023358159400e+00, -2.16473500425698e-01, -5.05879766354449e-01, -1.01161106778154e-01, 2.59147072064383e-01, 1.93922119400659e-02, -4.95844980755642e-02, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[-4.42575354959737e-01, 0.00000000000000e+00, 1.91582959381899e-01, 2.82222626681305e-01, 1.12083989713257e-01, -1.51334868892111e-01, -2.23600502721044e-02, 3.03806983474913e-02, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[2.48392603571843e-01, -1.13758367065272e+00, 0.00000000000000e+00, 1.95334726810969e+00, -1.58879011773212e+00, 3.93797129320378e-01, 2.52140821030291e-01, -1.21304033647356e-01, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[8.29213105268236e-02, -2.39388470313226e-01, -2.79038666398460e-01, 0.00000000000000e+00, 3.43018053395471e-01, 1.10370852514749e-01, 1.72029988649808e-03, -2.00445645303789e-02, 4.41184918522490e-04, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[7.24159504343116e-02, -4.15199475743626e-01, 9.91181694804303e-01, -1.49802407438608e+00, 0.00000000000000e+00, 1.30188867830442e+00, -6.03535071819214e-01, 1.73429775718218e-01, -2.40842144699299e-02, 1.92673715759439e-03, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[-5.97470838462221e-02, 1.80551858630298e-01, -7.91241454636765e-02, -1.55240829877729e-01, -4.19298775383066e-01, 0.00000000000000e+00, 6.42287612546289e-01, -1.48833147569152e-01, 4.65407609802260e-02, -7.75679349670433e-03, 6.20543479736347e-04, 0.00000000000000e+00, 0.00000000000000e+00],
[-6.19425252179959e-03, 3.69595678895333e-02, -7.01892820620398e-02, -3.35233082197107e-03, 2.69304373763091e-01, -8.89857974743355e-01, 0.00000000000000e+00, 8.66656645522330e-01, -2.57919763669076e-01, 6.44799409172690e-02, -1.07466568195448e-02, 8.59732545563586e-04, 0.00000000000000e+00],
[1.44853491014330e-02, -4.59275574977554e-02, 3.08833474560615e-02, 3.57240610228828e-02, -7.07760049349999e-02, 1.88587240076292e-01, -7.92626447113877e-01, 0.00000000000000e+00, 8.25608497215073e-01, -2.35888142061449e-01, 5.89720355153623e-02, -9.82867258589373e-03, 7.86293806871498e-04],
];
#[rustfmt::skip]
const DISS_BLOCK: &'static [[f32; 13]] = &[
[-3.99020778658945e-04, 2.05394169917502e-03, -4.24493243399805e-03, 4.38126393542801e-03, -2.18883813216888e-03, 2.98565988131608e-04, 1.38484104084115e-04, -3.94643819928825e-05, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[3.96913216138553e-04, -2.28230530115522e-03, 5.43069719436758e-03, -6.81086901935894e-03, 4.69064759201504e-03, -1.61429862514855e-03, 1.62083873811316e-04, 2.71310693302277e-05, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[-4.87084939816571e-03, 3.22464611075207e-02, -9.06094757860846e-02, 1.39830191253413e-01, -1.27675500367419e-01, 6.87310321912961e-02, -2.00917702215270e-02, 2.43991122096699e-03, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[7.18155125886276e-04, -5.77715378536685e-03, 1.99749582302141e-02, -3.87940986951101e-02, 4.62756436981388e-02, -3.46770570075288e-02, 1.59058082995305e-02, -4.06744078428648e-03, 4.41184918522490e-04, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[-1.56687484682703e-03, 1.73758484693946e-02, -7.96515646886111e-02, 2.02094401829054e-01, -3.16098733124618e-01, 3.17999240131250e-01, -2.06522928911140e-01, 8.37112455598470e-02, -1.92673715759439e-02, 1.92673715759439e-03, 0.00000000000000e+00, 0.00000000000000e+00, 0.00000000000000e+00],
[6.88352254356072e-05, -1.92595810396278e-03, 1.38098624496279e-02, -4.87746083763075e-02, 1.02417890394006e-01, -1.38292226669620e-01, 1.23829022892659e-01, -7.34723830823462e-02, 2.79244565881356e-02, -6.20543479736347e-03, 6.20543479736347e-04, 0.00000000000000e+00, 0.00000000000000e+00],
[4.42345367100640e-05, 2.67913080025652e-04, -5.59301314813691e-03, 3.09954862110834e-02, -9.21529346596015e-02, 1.71559035817103e-01, -2.12738289547735e-01, 1.79835101537893e-01, -1.03167905467630e-01, 3.86879645503614e-02, -8.59732545563586e-03, 8.59732545563586e-04, 0.00000000000000e+00],
[-1.15289127131636e-05, 4.10149803795578e-05, 6.21188131452618e-04, -7.24912245235322e-03, 3.41622279353287e-02, -9.30972311856124e-02, 1.64473506705108e-01, -1.98013074867399e-01, 1.65121699443015e-01, -9.43552568245798e-02, 3.53832213092174e-02, -7.86293806871498e-03, 7.86293806871498e-04]
];
#[rustfmt::skip]
const DISS_DIAG: &'static [f32] = &[
1.0/1260.0, -1.0/126.0, 1.0/28.0, -2.0/21.0, 1.0/6.0, -1.0/5.0, 1.0/6.0, -2.0/21.0, 1.0/28.0, -1.0/126.0, 1.0/1260.0,
];
}
impl SbpOperator for Upwind9 {
fn diffxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diff_1d(r0, r1);
}
}
fn diffeta(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>) {
// transpose then use diffxi
Self::diffxi(prev.reversed_axes(), fut.reversed_axes());
}
fn h() -> &'static [f32] {
Self::HBLOCK
}
}
impl UpwindOperator for Upwind9 {
fn dissxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
assert_eq!(prev.shape(), fut.shape());
assert!(prev.shape()[1] >= 2 * Self::BLOCK.len());
for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
Self::diss_1d(r0, r1);
}
}
fn disseta(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>) {
// diffeta = transpose then use dissxi
Self::dissxi(prev.reversed_axes(), fut.reversed_axes());
}
}