upwind operators
This commit is contained in:
		
							
								
								
									
										12
									
								
								src/lib.rs
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								src/lib.rs
									
									
									
									
									
								
							@@ -47,6 +47,18 @@ impl Universe {
 | 
			
		||||
        self.sys.0.set_gaussian(x0, y0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Using artifical dissipation with the upwind operator
 | 
			
		||||
    pub fn advance_upwind(&mut self, dt: f32) {
 | 
			
		||||
        Field::advance_upwind::<operators::Upwind4>(
 | 
			
		||||
            &self.sys.0,
 | 
			
		||||
            &mut self.sys.1,
 | 
			
		||||
            dt,
 | 
			
		||||
            &self.grid,
 | 
			
		||||
            Some(&mut self.wb),
 | 
			
		||||
        );
 | 
			
		||||
        std::mem::swap(&mut self.sys.0, &mut self.sys.1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn advance(&mut self, dt: f32) {
 | 
			
		||||
        Field::advance::<operators::Upwind4>(
 | 
			
		||||
            &self.sys.0,
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										163
									
								
								src/maxwell.rs
									
									
									
									
									
								
							
							
						
						
									
										163
									
								
								src/maxwell.rs
									
									
									
									
									
								
							@@ -1,4 +1,4 @@
 | 
			
		||||
use super::operators::SbpOperator;
 | 
			
		||||
use super::operators::{SbpOperator, UpwindOperator};
 | 
			
		||||
use super::Grid;
 | 
			
		||||
use ndarray::prelude::*;
 | 
			
		||||
use ndarray::{azip, Zip};
 | 
			
		||||
@@ -97,6 +97,62 @@ impl Field {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub(crate) fn advance_upwind<UO>(
 | 
			
		||||
        &self,
 | 
			
		||||
        fut: &mut Self,
 | 
			
		||||
        dt: f32,
 | 
			
		||||
        grid: &Grid<UO>,
 | 
			
		||||
        work_buffers: Option<&mut WorkBuffers>,
 | 
			
		||||
    ) where
 | 
			
		||||
        UO: UpwindOperator,
 | 
			
		||||
    {
 | 
			
		||||
        assert_eq!(self.0.shape(), fut.0.shape());
 | 
			
		||||
 | 
			
		||||
        let mut wb: WorkBuffers;
 | 
			
		||||
        let (y, k, tmp) = if let Some(x) = work_buffers {
 | 
			
		||||
            (&mut x.y, &mut x.buf, &mut x.tmp)
 | 
			
		||||
        } else {
 | 
			
		||||
            wb = WorkBuffers::new(self.nx(), self.ny());
 | 
			
		||||
            (&mut wb.y, &mut wb.buf, &mut wb.tmp)
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let boundaries = BoundaryTerms {
 | 
			
		||||
            north: Boundary::This,
 | 
			
		||||
            south: Boundary::This,
 | 
			
		||||
            west: Boundary::This,
 | 
			
		||||
            east: Boundary::This,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        for i in 0..4 {
 | 
			
		||||
            // y = y0 + c*kn
 | 
			
		||||
            y.assign(&self);
 | 
			
		||||
            match i {
 | 
			
		||||
                0 => {}
 | 
			
		||||
                1 | 2 => {
 | 
			
		||||
                    y.scaled_add(1.0 / 2.0 * dt, &k[i - 1]);
 | 
			
		||||
                }
 | 
			
		||||
                3 => {
 | 
			
		||||
                    y.scaled_add(dt, &k[i - 1]);
 | 
			
		||||
                }
 | 
			
		||||
                _ => {
 | 
			
		||||
                    unreachable!();
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
            RHS_upwind(&mut k[i], &y, grid, &boundaries, tmp);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Zip::from(&mut fut.0)
 | 
			
		||||
            .and(&self.0)
 | 
			
		||||
            .and(&*k[0])
 | 
			
		||||
            .and(&*k[1])
 | 
			
		||||
            .and(&*k[2])
 | 
			
		||||
            .and(&*k[3])
 | 
			
		||||
            .apply(|y1, &y0, &k1, &k2, &k3, &k4| {
 | 
			
		||||
                *y1 = y0 + dt / 6.0 * (k1 + 2.0 * k2 + 2.0 * k3 + k4)
 | 
			
		||||
            });
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Solving (Au)_x + (Bu)_y
 | 
			
		||||
    /// with:
 | 
			
		||||
    ///        A               B
 | 
			
		||||
@@ -186,6 +242,25 @@ fn RHS<SBP: SbpOperator>(
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[allow(non_snake_case)]
 | 
			
		||||
fn RHS_upwind<UO: UpwindOperator>(
 | 
			
		||||
    k: &mut Field,
 | 
			
		||||
    y: &Field,
 | 
			
		||||
    grid: &Grid<UO>,
 | 
			
		||||
    boundaries: &BoundaryTerms,
 | 
			
		||||
    tmp: &mut (Array2<f32>, Array2<f32>, Array2<f32>, Array2<f32>),
 | 
			
		||||
) {
 | 
			
		||||
    fluxes(k, y, grid, tmp);
 | 
			
		||||
    dissipation(k, y, grid, tmp);
 | 
			
		||||
 | 
			
		||||
    SAT_characteristics(k, y, grid, boundaries);
 | 
			
		||||
 | 
			
		||||
    azip!((k in &mut k.0,
 | 
			
		||||
                    &detj in &grid.detj.broadcast((3, y.ny(), y.nx())).unwrap()) {
 | 
			
		||||
        *k /= detj;
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn fluxes<SBP: SbpOperator>(
 | 
			
		||||
    k: &mut Field,
 | 
			
		||||
    y: &Field,
 | 
			
		||||
@@ -260,6 +335,92 @@ fn fluxes<SBP: SbpOperator>(
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn dissipation<UO: UpwindOperator>(
 | 
			
		||||
    k: &mut Field,
 | 
			
		||||
    y: &Field,
 | 
			
		||||
    grid: &Grid<UO>,
 | 
			
		||||
    tmp: &mut (Array2<f32>, Array2<f32>, Array2<f32>, Array2<f32>),
 | 
			
		||||
) {
 | 
			
		||||
    // ex component
 | 
			
		||||
    {
 | 
			
		||||
        ndarray::azip!((a in &mut tmp.0,
 | 
			
		||||
                        &kx in &grid.detj_dxi_dx,
 | 
			
		||||
                        &ky in &grid.detj_dxi_dy,
 | 
			
		||||
                        &ex in &y.ex(),
 | 
			
		||||
                        &ey in &y.ey()) {
 | 
			
		||||
            let r = f32::hypot(kx, ky);
 | 
			
		||||
            *a = ky*ky/r * ex + -kx*ky/r*ey;
 | 
			
		||||
        });
 | 
			
		||||
        UO::dissxi(tmp.0.view(), tmp.1.view_mut());
 | 
			
		||||
 | 
			
		||||
        ndarray::azip!((b in &mut tmp.2,
 | 
			
		||||
                    &kx in &grid.detj_deta_dx,
 | 
			
		||||
                    &ky in &grid.detj_deta_dy,
 | 
			
		||||
                    &ex in &y.ex(),
 | 
			
		||||
                    &ey in &y.ey()) {
 | 
			
		||||
            let r = f32::hypot(kx, ky);
 | 
			
		||||
            *b = ky*ky/r * ex + -kx*ky/r*ey;
 | 
			
		||||
        });
 | 
			
		||||
        UO::disseta(tmp.2.view(), tmp.3.view_mut());
 | 
			
		||||
 | 
			
		||||
        ndarray::azip!((flux in &mut k.ex_mut(), &ax in &tmp.1, &by in &tmp.3)
 | 
			
		||||
            *flux += ax + by
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // hz component
 | 
			
		||||
    {
 | 
			
		||||
        ndarray::azip!((a in &mut tmp.0,
 | 
			
		||||
                        &kx in &grid.detj_dxi_dx,
 | 
			
		||||
                        &ky in &grid.detj_dxi_dy,
 | 
			
		||||
                        &hz in &y.hz()) {
 | 
			
		||||
            let r = f32::hypot(kx, ky);
 | 
			
		||||
            *a = r * hz;
 | 
			
		||||
        });
 | 
			
		||||
        UO::dissxi(tmp.0.view(), tmp.1.view_mut());
 | 
			
		||||
 | 
			
		||||
        ndarray::azip!((b in &mut tmp.2,
 | 
			
		||||
                        &kx in &grid.detj_deta_dx,
 | 
			
		||||
                        &ky in &grid.detj_deta_dy,
 | 
			
		||||
                        &hz in &y.hz()) {
 | 
			
		||||
            let r = f32::hypot(kx, ky);
 | 
			
		||||
            *b = r * hz;
 | 
			
		||||
        });
 | 
			
		||||
        UO::disseta(tmp.2.view(), tmp.3.view_mut());
 | 
			
		||||
 | 
			
		||||
        ndarray::azip!((flux in &mut k.hz_mut(), &ax in &tmp.1, &by in &tmp.3)
 | 
			
		||||
            *flux += ax + by
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // ey
 | 
			
		||||
    {
 | 
			
		||||
        ndarray::azip!((a in &mut tmp.0,
 | 
			
		||||
                        &kx in &grid.detj_dxi_dx,
 | 
			
		||||
                        &ky in &grid.detj_dxi_dy,
 | 
			
		||||
                        &ex in &y.ex(),
 | 
			
		||||
                        &ey in &y.ey()) {
 | 
			
		||||
            let r = f32::hypot(kx, ky);
 | 
			
		||||
            *a = -kx*ky/r * ex + kx*kx/r*ey;
 | 
			
		||||
        });
 | 
			
		||||
        UO::dissxi(tmp.0.view(), tmp.1.view_mut());
 | 
			
		||||
 | 
			
		||||
        ndarray::azip!((b in &mut tmp.2,
 | 
			
		||||
                    &kx in &grid.detj_deta_dx,
 | 
			
		||||
                    &ky in &grid.detj_deta_dy,
 | 
			
		||||
                    &ex in &y.ex(),
 | 
			
		||||
                    &ey in &y.ey()) {
 | 
			
		||||
            let r = f32::hypot(kx, ky);
 | 
			
		||||
            *b = -kx*ky/r * ex + kx*kx/r*ey;
 | 
			
		||||
        });
 | 
			
		||||
        UO::disseta(tmp.2.view(), tmp.3.view_mut());
 | 
			
		||||
 | 
			
		||||
        ndarray::azip!((flux in &mut k.hz_mut(), &ax in &tmp.1, &by in &tmp.3)
 | 
			
		||||
            *flux += ax + by
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug)]
 | 
			
		||||
pub enum Boundary {
 | 
			
		||||
    This,
 | 
			
		||||
 
 | 
			
		||||
@@ -6,5 +6,10 @@ pub trait SbpOperator {
 | 
			
		||||
    fn h() -> &'static [f32];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub trait UpwindOperator: SbpOperator {
 | 
			
		||||
    fn dissxi(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>);
 | 
			
		||||
    fn disseta(prev: ArrayView2<f32>, fut: ArrayViewMut2<f32>);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
mod upwind4;
 | 
			
		||||
pub use upwind4::Upwind4;
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,4 @@
 | 
			
		||||
use super::SbpOperator;
 | 
			
		||||
use super::{SbpOperator, UpwindOperator};
 | 
			
		||||
use ndarray::{arr1, arr2, s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2};
 | 
			
		||||
 | 
			
		||||
/// Simdtype used in diffeta_simd
 | 
			
		||||
@@ -23,6 +23,55 @@ impl Upwind4 {
 | 
			
		||||
        [   3.0 / 298.0, 16.0 / 149.0, -227.0 / 298.0,           0.0, 126.0 / 149.0, -36.0 / 149.0, 6.0 / 149.0],
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    const DISS_BLOCK: [[f32; 7]; 4] = [
 | 
			
		||||
        [
 | 
			
		||||
            -3.0 / 49.0,
 | 
			
		||||
            9.0 / 49.0,
 | 
			
		||||
            -9.0 / 49.0,
 | 
			
		||||
            3.0 / 49.0,
 | 
			
		||||
            0.0,
 | 
			
		||||
            0.0,
 | 
			
		||||
            0.0,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            3.0 / 61.0,
 | 
			
		||||
            -11.0 / 61.0,
 | 
			
		||||
            15.0 / 61.0,
 | 
			
		||||
            -9.0 / 61.0,
 | 
			
		||||
            2.0 / 61.0,
 | 
			
		||||
            0.0,
 | 
			
		||||
            0.0,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            -3.0 / 41.0,
 | 
			
		||||
            15.0 / 41.0,
 | 
			
		||||
            -29.0 / 41.0,
 | 
			
		||||
            27.0 / 41.0,
 | 
			
		||||
            -12.0 / 41.0,
 | 
			
		||||
            2.0 / 41.0,
 | 
			
		||||
            0.0,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            3.0 / 149.0,
 | 
			
		||||
            -27.0 / 149.0,
 | 
			
		||||
            81.0 / 149.0,
 | 
			
		||||
            -117.0 / 149.0,
 | 
			
		||||
            90.0 / 149.0,
 | 
			
		||||
            -36.0 / 149.0,
 | 
			
		||||
            6.0 / 149.0,
 | 
			
		||||
        ],
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    const DISS_DIAG: [f32; 7] = [
 | 
			
		||||
        1.0 / 24.0,
 | 
			
		||||
        -1.0 / 4.0,
 | 
			
		||||
        5.0 / 8.0,
 | 
			
		||||
        -5.0 / 6.0,
 | 
			
		||||
        5.0 / 8.0,
 | 
			
		||||
        -1.0 / 4.0,
 | 
			
		||||
        1.0 / 24.0,
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    #[inline(never)]
 | 
			
		||||
    fn diff_simd(prev: &[f32], fut: &mut [f32]) {
 | 
			
		||||
        use packed_simd::{f32x8, u32x8};
 | 
			
		||||
@@ -235,6 +284,218 @@ impl Upwind4 {
 | 
			
		||||
            *f = diff * idx;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    #[inline(never)]
 | 
			
		||||
    fn diss_simd(prev: &[f32], fut: &mut [f32]) {
 | 
			
		||||
        use packed_simd::{f32x8, u32x8};
 | 
			
		||||
        assert_eq!(prev.len(), fut.len());
 | 
			
		||||
        assert!(prev.len() >= 2 * Self::DISS_BLOCK.len());
 | 
			
		||||
        let nx = prev.len();
 | 
			
		||||
        let dx = 1.0 / (nx - 1) as f32;
 | 
			
		||||
        let idx = 1.0 / dx;
 | 
			
		||||
 | 
			
		||||
        let first_elems = unsafe { f32x8::from_slice_unaligned_unchecked(prev) };
 | 
			
		||||
        let block = [
 | 
			
		||||
            f32x8::new(
 | 
			
		||||
                Self::DISS_BLOCK[0][0],
 | 
			
		||||
                Self::DISS_BLOCK[0][1],
 | 
			
		||||
                Self::DISS_BLOCK[0][2],
 | 
			
		||||
                Self::DISS_BLOCK[0][3],
 | 
			
		||||
                Self::DISS_BLOCK[0][4],
 | 
			
		||||
                Self::DISS_BLOCK[0][5],
 | 
			
		||||
                Self::DISS_BLOCK[0][6],
 | 
			
		||||
                0.0,
 | 
			
		||||
            ),
 | 
			
		||||
            f32x8::new(
 | 
			
		||||
                Self::DISS_BLOCK[1][0],
 | 
			
		||||
                Self::DISS_BLOCK[1][1],
 | 
			
		||||
                Self::DISS_BLOCK[1][2],
 | 
			
		||||
                Self::DISS_BLOCK[1][3],
 | 
			
		||||
                Self::DISS_BLOCK[1][4],
 | 
			
		||||
                Self::DISS_BLOCK[1][5],
 | 
			
		||||
                Self::DISS_BLOCK[1][6],
 | 
			
		||||
                0.0,
 | 
			
		||||
            ),
 | 
			
		||||
            f32x8::new(
 | 
			
		||||
                Self::DISS_BLOCK[2][0],
 | 
			
		||||
                Self::DISS_BLOCK[2][1],
 | 
			
		||||
                Self::DISS_BLOCK[2][2],
 | 
			
		||||
                Self::DISS_BLOCK[2][3],
 | 
			
		||||
                Self::DISS_BLOCK[2][4],
 | 
			
		||||
                Self::DISS_BLOCK[2][5],
 | 
			
		||||
                Self::DISS_BLOCK[2][6],
 | 
			
		||||
                0.0,
 | 
			
		||||
            ),
 | 
			
		||||
            f32x8::new(
 | 
			
		||||
                Self::DISS_BLOCK[3][0],
 | 
			
		||||
                Self::DISS_BLOCK[3][1],
 | 
			
		||||
                Self::DISS_BLOCK[3][2],
 | 
			
		||||
                Self::DISS_BLOCK[3][3],
 | 
			
		||||
                Self::DISS_BLOCK[3][4],
 | 
			
		||||
                Self::DISS_BLOCK[3][5],
 | 
			
		||||
                Self::DISS_BLOCK[3][6],
 | 
			
		||||
                0.0,
 | 
			
		||||
            ),
 | 
			
		||||
        ];
 | 
			
		||||
        unsafe {
 | 
			
		||||
            *fut.get_unchecked_mut(0) = idx * (block[0] * first_elems).sum();
 | 
			
		||||
            *fut.get_unchecked_mut(1) = idx * (block[1] * first_elems).sum();
 | 
			
		||||
            *fut.get_unchecked_mut(2) = idx * (block[2] * first_elems).sum();
 | 
			
		||||
            *fut.get_unchecked_mut(3) = idx * (block[3] * first_elems).sum()
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let diag = f32x8::new(
 | 
			
		||||
            Self::DISS_DIAG[0],
 | 
			
		||||
            Self::DISS_DIAG[1],
 | 
			
		||||
            Self::DISS_DIAG[2],
 | 
			
		||||
            Self::DISS_DIAG[3],
 | 
			
		||||
            Self::DISS_DIAG[4],
 | 
			
		||||
            Self::DISS_DIAG[5],
 | 
			
		||||
            Self::DISS_DIAG[6],
 | 
			
		||||
            0.0,
 | 
			
		||||
        );
 | 
			
		||||
        for (f, p) in fut
 | 
			
		||||
            .iter_mut()
 | 
			
		||||
            .skip(block.len())
 | 
			
		||||
            .zip(
 | 
			
		||||
                prev.windows(f32x8::lanes())
 | 
			
		||||
                    .map(f32x8::from_slice_unaligned)
 | 
			
		||||
                    .skip(1),
 | 
			
		||||
            )
 | 
			
		||||
            .take(nx - 2 * block.len())
 | 
			
		||||
        {
 | 
			
		||||
            *f = idx * (p * diag).sum();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let last_elems = unsafe { f32x8::from_slice_unaligned_unchecked(&prev[nx - 8..]) }
 | 
			
		||||
            .shuffle1_dyn(u32x8::new(7, 6, 5, 4, 3, 2, 1, 0));
 | 
			
		||||
        unsafe {
 | 
			
		||||
            *fut.get_unchecked_mut(nx - 4) = idx * (block[3] * last_elems).sum();
 | 
			
		||||
            *fut.get_unchecked_mut(nx - 3) = idx * (block[2] * last_elems).sum();
 | 
			
		||||
            *fut.get_unchecked_mut(nx - 2) = idx * (block[1] * last_elems).sum();
 | 
			
		||||
            *fut.get_unchecked_mut(nx - 1) = idx * (block[0] * last_elems).sum();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[inline(never)]
 | 
			
		||||
    fn disseta_simd(prev: &[f32], fut: &mut [f32], nx: usize, ny: usize) {
 | 
			
		||||
        assert!(ny >= 2 * Self::DISS_BLOCK.len());
 | 
			
		||||
        assert!(nx >= SimdT::lanes());
 | 
			
		||||
        assert!(nx % SimdT::lanes() == 0);
 | 
			
		||||
        assert_eq!(prev.len(), fut.len());
 | 
			
		||||
        assert_eq!(prev.len(), nx * ny);
 | 
			
		||||
 | 
			
		||||
        let dy = 1.0 / (ny - 1) as f32;
 | 
			
		||||
        let idy = 1.0 / dy;
 | 
			
		||||
 | 
			
		||||
        for j in (0..nx).step_by(SimdT::lanes()) {
 | 
			
		||||
            let a = [
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[0 * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[1 * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[2 * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[3 * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[4 * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[5 * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[6 * nx + j..]),
 | 
			
		||||
            ];
 | 
			
		||||
 | 
			
		||||
            for (i, bl) in Self::DISS_BLOCK.iter().enumerate() {
 | 
			
		||||
                let b = idy
 | 
			
		||||
                    * (a[0] * bl[0]
 | 
			
		||||
                        + a[1] * bl[1]
 | 
			
		||||
                        + a[2] * bl[2]
 | 
			
		||||
                        + a[3] * bl[3]
 | 
			
		||||
                        + a[4] * bl[4]
 | 
			
		||||
                        + a[5] * bl[5]
 | 
			
		||||
                        + a[6] * bl[6]);
 | 
			
		||||
                b.write_to_slice_unaligned(&mut fut[i * nx + j..]);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            let mut a = a;
 | 
			
		||||
            for i in Self::DISS_BLOCK.len()..ny - Self::DISS_BLOCK.len() {
 | 
			
		||||
                // Push a onto circular buffer
 | 
			
		||||
                a = [
 | 
			
		||||
                    a[1],
 | 
			
		||||
                    a[2],
 | 
			
		||||
                    a[3],
 | 
			
		||||
                    a[4],
 | 
			
		||||
                    a[5],
 | 
			
		||||
                    a[6],
 | 
			
		||||
                    SimdT::from_slice_unaligned(&prev[nx * (i + 3) + j..]),
 | 
			
		||||
                ];
 | 
			
		||||
                let b = idy
 | 
			
		||||
                    * (a[0] * Self::DISS_DIAG[0]
 | 
			
		||||
                        + a[1] * Self::DISS_DIAG[1]
 | 
			
		||||
                        + a[2] * Self::DISS_DIAG[2]
 | 
			
		||||
                        + a[3] * Self::DISS_DIAG[3]
 | 
			
		||||
                        + a[4] * Self::DISS_DIAG[4]
 | 
			
		||||
                        + a[5] * Self::DISS_DIAG[5]
 | 
			
		||||
                        + a[6] * Self::DISS_DIAG[6]);
 | 
			
		||||
                b.write_to_slice_unaligned(&mut fut[nx * i + j..]);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            let a = [
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[(ny - 1) * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[(ny - 2) * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[(ny - 3) * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[(ny - 4) * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[(ny - 5) * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[(ny - 6) * nx + j..]),
 | 
			
		||||
                SimdT::from_slice_unaligned(&prev[(ny - 7) * nx + j..]),
 | 
			
		||||
            ];
 | 
			
		||||
 | 
			
		||||
            for (i, bl) in Self::DISS_BLOCK.iter().enumerate() {
 | 
			
		||||
                let b = idy
 | 
			
		||||
                    * (a[0] * bl[0]
 | 
			
		||||
                        + a[1] * bl[1]
 | 
			
		||||
                        + a[2] * bl[2]
 | 
			
		||||
                        + a[3] * bl[3]
 | 
			
		||||
                        + a[4] * bl[4]
 | 
			
		||||
                        + a[5] * bl[5]
 | 
			
		||||
                        + a[6] * bl[6]);
 | 
			
		||||
                b.write_to_slice_unaligned(&mut fut[(ny - 1 - i) * nx + j..]);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn diss(prev: ArrayView1<f32>, mut fut: ArrayViewMut1<f32>) {
 | 
			
		||||
        assert_eq!(prev.shape(), fut.shape());
 | 
			
		||||
        let nx = prev.shape()[0];
 | 
			
		||||
        assert!(nx >= 2 * Self::DISS_BLOCK.len());
 | 
			
		||||
 | 
			
		||||
        if let (Some(p), Some(f)) = (prev.as_slice(), fut.as_slice_mut()) {
 | 
			
		||||
            Self::diss_simd(p, f);
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let dx = 1.0 / (nx - 1) as f32;
 | 
			
		||||
        let idx = 1.0 / dx;
 | 
			
		||||
 | 
			
		||||
        let diag = arr1(&Self::DISS_DIAG);
 | 
			
		||||
        let block = arr2(&Self::DISS_BLOCK);
 | 
			
		||||
 | 
			
		||||
        let first_elems = prev.slice(s!(..7));
 | 
			
		||||
        for (bl, f) in block.outer_iter().zip(&mut fut) {
 | 
			
		||||
            let diff = first_elems.dot(&bl);
 | 
			
		||||
            *f = diff * idx;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for (window, f) in prev
 | 
			
		||||
            .windows(diag.len())
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .skip(1)
 | 
			
		||||
            .zip(fut.iter_mut().skip(4))
 | 
			
		||||
            .take(nx - 8)
 | 
			
		||||
        {
 | 
			
		||||
            let diff = diag.dot(&window);
 | 
			
		||||
            *f = diff * idx;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let last_elems = prev.slice(s!(nx - 7..;-1));
 | 
			
		||||
        for (bl, f) in block.outer_iter().zip(&mut fut.slice_mut(s![nx - 4..;-1])) {
 | 
			
		||||
            let diff = bl.dot(&last_elems);
 | 
			
		||||
            *f = diff * idx;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl SbpOperator for Upwind4 {
 | 
			
		||||
@@ -353,3 +614,28 @@ fn upwind4_test() {
 | 
			
		||||
        approx::assert_abs_diff_eq!(&res.to_owned(), &target.to_owned(), epsilon = 1e-2);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl UpwindOperator for Upwind4 {
 | 
			
		||||
    fn dissxi(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
 | 
			
		||||
        assert_eq!(prev.shape(), fut.shape());
 | 
			
		||||
        assert!(prev.shape()[1] >= 2 * Self::DISS_BLOCK.len());
 | 
			
		||||
        for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) {
 | 
			
		||||
            Self::diss(r0, r1)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn disseta(prev: ArrayView2<f32>, mut fut: ArrayViewMut2<f32>) {
 | 
			
		||||
        assert_eq!(prev.shape(), fut.shape());
 | 
			
		||||
        assert!(prev.shape()[0] >= 2 * Self::DISS_BLOCK.len());
 | 
			
		||||
        let nx = prev.shape()[1];
 | 
			
		||||
        let ny = prev.shape()[0];
 | 
			
		||||
        if nx >= SimdT::lanes() && nx % SimdT::lanes() == 0 {
 | 
			
		||||
            if let (Some(p), Some(f)) = (prev.as_slice(), fut.as_slice_mut()) {
 | 
			
		||||
                Self::disseta_simd(p, f, nx, ny);
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        // diffeta = transpose then use diffxi
 | 
			
		||||
        Self::dissxi(prev.reversed_axes(), fut.reversed_axes());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user