diff --git a/main.js b/main.js index cc88d30..5b079f6 100644 --- a/main.js +++ b/main.js @@ -243,8 +243,8 @@ import { Universe, default as init, set_panic_hook as setPanicHook } from "./max gl.drawElements(gl.TRIANGLES, vertexCount, type, offset); } - universe.advance(dt/2); - universe.advance(dt/2); + universe.advance_upwind(dt/2); + universe.advance_upwind(dt/2); window.requestAnimationFrame(drawMe); } diff --git a/src/lib.rs b/src/lib.rs index 390a6fc..c45475d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,18 @@ impl Universe { self.sys.0.set_gaussian(x0, y0); } + /// Using artifical dissipation with the upwind operator + pub fn advance_upwind(&mut self, dt: f32) { + Field::advance_upwind::( + &self.sys.0, + &mut self.sys.1, + dt, + &self.grid, + Some(&mut self.wb), + ); + std::mem::swap(&mut self.sys.0, &mut self.sys.1); + } + pub fn advance(&mut self, dt: f32) { Field::advance::( &self.sys.0, diff --git a/src/maxwell.rs b/src/maxwell.rs index 4bc79e1..f2e3174 100644 --- a/src/maxwell.rs +++ b/src/maxwell.rs @@ -1,4 +1,4 @@ -use super::operators::SbpOperator; +use super::operators::{SbpOperator, UpwindOperator}; use super::Grid; use ndarray::prelude::*; use ndarray::{azip, Zip}; @@ -97,6 +97,62 @@ impl Field { } } + pub(crate) fn advance_upwind( + &self, + fut: &mut Self, + dt: f32, + grid: &Grid, + work_buffers: Option<&mut WorkBuffers>, + ) where + UO: UpwindOperator, + { + assert_eq!(self.0.shape(), fut.0.shape()); + + let mut wb: WorkBuffers; + let (y, k, tmp) = if let Some(x) = work_buffers { + (&mut x.y, &mut x.buf, &mut x.tmp) + } else { + wb = WorkBuffers::new(self.nx(), self.ny()); + (&mut wb.y, &mut wb.buf, &mut wb.tmp) + }; + + let boundaries = BoundaryTerms { + north: Boundary::This, + south: Boundary::This, + west: Boundary::This, + east: Boundary::This, + }; + + for i in 0..4 { + // y = y0 + c*kn + y.assign(&self); + match i { + 0 => {} + 1 | 2 => { + y.scaled_add(1.0 / 2.0 * dt, &k[i - 1]); + } + 3 => { + y.scaled_add(dt, &k[i - 1]); + } + _ => { + unreachable!(); + } + }; + + RHS_upwind(&mut k[i], &y, grid, &boundaries, tmp); + } + + Zip::from(&mut fut.0) + .and(&self.0) + .and(&*k[0]) + .and(&*k[1]) + .and(&*k[2]) + .and(&*k[3]) + .apply(|y1, &y0, &k1, &k2, &k3, &k4| { + *y1 = y0 + dt / 6.0 * (k1 + 2.0 * k2 + 2.0 * k3 + k4) + }); + } + /// Solving (Au)_x + (Bu)_y /// with: /// A B @@ -186,6 +242,25 @@ fn RHS( }); } +#[allow(non_snake_case)] +fn RHS_upwind( + k: &mut Field, + y: &Field, + grid: &Grid, + boundaries: &BoundaryTerms, + tmp: &mut (Array2, Array2, Array2, Array2), +) { + fluxes(k, y, grid, tmp); + dissipation(k, y, grid, tmp); + + SAT_characteristics(k, y, grid, boundaries); + + azip!((k in &mut k.0, + &detj in &grid.detj.broadcast((3, y.ny(), y.nx())).unwrap()) { + *k /= detj; + }); +} + fn fluxes( k: &mut Field, y: &Field, @@ -260,6 +335,92 @@ fn fluxes( } } +fn dissipation( + k: &mut Field, + y: &Field, + grid: &Grid, + tmp: &mut (Array2, Array2, Array2, Array2), +) { + // ex component + { + ndarray::azip!((a in &mut tmp.0, + &kx in &grid.detj_dxi_dx, + &ky in &grid.detj_dxi_dy, + &ex in &y.ex(), + &ey in &y.ey()) { + let r = f32::hypot(kx, ky); + *a = ky*ky/r * ex + -kx*ky/r*ey; + }); + UO::dissxi(tmp.0.view(), tmp.1.view_mut()); + + ndarray::azip!((b in &mut tmp.2, + &kx in &grid.detj_deta_dx, + &ky in &grid.detj_deta_dy, + &ex in &y.ex(), + &ey in &y.ey()) { + let r = f32::hypot(kx, ky); + *b = ky*ky/r * ex + -kx*ky/r*ey; + }); + UO::disseta(tmp.2.view(), tmp.3.view_mut()); + + ndarray::azip!((flux in &mut k.ex_mut(), &ax in &tmp.1, &by in &tmp.3) + *flux += ax + by + ); + } + + // hz component + { + ndarray::azip!((a in &mut tmp.0, + &kx in &grid.detj_dxi_dx, + &ky in &grid.detj_dxi_dy, + &hz in &y.hz()) { + let r = f32::hypot(kx, ky); + *a = r * hz; + }); + UO::dissxi(tmp.0.view(), tmp.1.view_mut()); + + ndarray::azip!((b in &mut tmp.2, + &kx in &grid.detj_deta_dx, + &ky in &grid.detj_deta_dy, + &hz in &y.hz()) { + let r = f32::hypot(kx, ky); + *b = r * hz; + }); + UO::disseta(tmp.2.view(), tmp.3.view_mut()); + + ndarray::azip!((flux in &mut k.hz_mut(), &ax in &tmp.1, &by in &tmp.3) + *flux += ax + by + ); + } + + // ey + { + ndarray::azip!((a in &mut tmp.0, + &kx in &grid.detj_dxi_dx, + &ky in &grid.detj_dxi_dy, + &ex in &y.ex(), + &ey in &y.ey()) { + let r = f32::hypot(kx, ky); + *a = -kx*ky/r * ex + kx*kx/r*ey; + }); + UO::dissxi(tmp.0.view(), tmp.1.view_mut()); + + ndarray::azip!((b in &mut tmp.2, + &kx in &grid.detj_deta_dx, + &ky in &grid.detj_deta_dy, + &ex in &y.ex(), + &ey in &y.ey()) { + let r = f32::hypot(kx, ky); + *b = -kx*ky/r * ex + kx*kx/r*ey; + }); + UO::disseta(tmp.2.view(), tmp.3.view_mut()); + + ndarray::azip!((flux in &mut k.hz_mut(), &ax in &tmp.1, &by in &tmp.3) + *flux += ax + by + ); + } +} + #[derive(Clone, Debug)] pub enum Boundary { This, diff --git a/src/operators.rs b/src/operators.rs index 6cf45fa..0b734ab 100644 --- a/src/operators.rs +++ b/src/operators.rs @@ -6,5 +6,10 @@ pub trait SbpOperator { fn h() -> &'static [f32]; } +pub trait UpwindOperator: SbpOperator { + fn dissxi(prev: ArrayView2, fut: ArrayViewMut2); + fn disseta(prev: ArrayView2, fut: ArrayViewMut2); +} + mod upwind4; pub use upwind4::Upwind4; diff --git a/src/operators/upwind4.rs b/src/operators/upwind4.rs index bf286fe..3bea5f0 100644 --- a/src/operators/upwind4.rs +++ b/src/operators/upwind4.rs @@ -1,4 +1,4 @@ -use super::SbpOperator; +use super::{SbpOperator, UpwindOperator}; use ndarray::{arr1, arr2, s, ArrayView1, ArrayView2, ArrayViewMut1, ArrayViewMut2}; /// Simdtype used in diffeta_simd @@ -23,6 +23,55 @@ impl Upwind4 { [ 3.0 / 298.0, 16.0 / 149.0, -227.0 / 298.0, 0.0, 126.0 / 149.0, -36.0 / 149.0, 6.0 / 149.0], ]; + const DISS_BLOCK: [[f32; 7]; 4] = [ + [ + -3.0 / 49.0, + 9.0 / 49.0, + -9.0 / 49.0, + 3.0 / 49.0, + 0.0, + 0.0, + 0.0, + ], + [ + 3.0 / 61.0, + -11.0 / 61.0, + 15.0 / 61.0, + -9.0 / 61.0, + 2.0 / 61.0, + 0.0, + 0.0, + ], + [ + -3.0 / 41.0, + 15.0 / 41.0, + -29.0 / 41.0, + 27.0 / 41.0, + -12.0 / 41.0, + 2.0 / 41.0, + 0.0, + ], + [ + 3.0 / 149.0, + -27.0 / 149.0, + 81.0 / 149.0, + -117.0 / 149.0, + 90.0 / 149.0, + -36.0 / 149.0, + 6.0 / 149.0, + ], + ]; + + const DISS_DIAG: [f32; 7] = [ + 1.0 / 24.0, + -1.0 / 4.0, + 5.0 / 8.0, + -5.0 / 6.0, + 5.0 / 8.0, + -1.0 / 4.0, + 1.0 / 24.0, + ]; + #[inline(never)] fn diff_simd(prev: &[f32], fut: &mut [f32]) { use packed_simd::{f32x8, u32x8}; @@ -235,6 +284,218 @@ impl Upwind4 { *f = diff * idx; } } + #[inline(never)] + fn diss_simd(prev: &[f32], fut: &mut [f32]) { + use packed_simd::{f32x8, u32x8}; + assert_eq!(prev.len(), fut.len()); + assert!(prev.len() >= 2 * Self::DISS_BLOCK.len()); + let nx = prev.len(); + let dx = 1.0 / (nx - 1) as f32; + let idx = 1.0 / dx; + + let first_elems = unsafe { f32x8::from_slice_unaligned_unchecked(prev) }; + let block = [ + f32x8::new( + Self::DISS_BLOCK[0][0], + Self::DISS_BLOCK[0][1], + Self::DISS_BLOCK[0][2], + Self::DISS_BLOCK[0][3], + Self::DISS_BLOCK[0][4], + Self::DISS_BLOCK[0][5], + Self::DISS_BLOCK[0][6], + 0.0, + ), + f32x8::new( + Self::DISS_BLOCK[1][0], + Self::DISS_BLOCK[1][1], + Self::DISS_BLOCK[1][2], + Self::DISS_BLOCK[1][3], + Self::DISS_BLOCK[1][4], + Self::DISS_BLOCK[1][5], + Self::DISS_BLOCK[1][6], + 0.0, + ), + f32x8::new( + Self::DISS_BLOCK[2][0], + Self::DISS_BLOCK[2][1], + Self::DISS_BLOCK[2][2], + Self::DISS_BLOCK[2][3], + Self::DISS_BLOCK[2][4], + Self::DISS_BLOCK[2][5], + Self::DISS_BLOCK[2][6], + 0.0, + ), + f32x8::new( + Self::DISS_BLOCK[3][0], + Self::DISS_BLOCK[3][1], + Self::DISS_BLOCK[3][2], + Self::DISS_BLOCK[3][3], + Self::DISS_BLOCK[3][4], + Self::DISS_BLOCK[3][5], + Self::DISS_BLOCK[3][6], + 0.0, + ), + ]; + unsafe { + *fut.get_unchecked_mut(0) = idx * (block[0] * first_elems).sum(); + *fut.get_unchecked_mut(1) = idx * (block[1] * first_elems).sum(); + *fut.get_unchecked_mut(2) = idx * (block[2] * first_elems).sum(); + *fut.get_unchecked_mut(3) = idx * (block[3] * first_elems).sum() + }; + + let diag = f32x8::new( + Self::DISS_DIAG[0], + Self::DISS_DIAG[1], + Self::DISS_DIAG[2], + Self::DISS_DIAG[3], + Self::DISS_DIAG[4], + Self::DISS_DIAG[5], + Self::DISS_DIAG[6], + 0.0, + ); + for (f, p) in fut + .iter_mut() + .skip(block.len()) + .zip( + prev.windows(f32x8::lanes()) + .map(f32x8::from_slice_unaligned) + .skip(1), + ) + .take(nx - 2 * block.len()) + { + *f = idx * (p * diag).sum(); + } + + let last_elems = unsafe { f32x8::from_slice_unaligned_unchecked(&prev[nx - 8..]) } + .shuffle1_dyn(u32x8::new(7, 6, 5, 4, 3, 2, 1, 0)); + unsafe { + *fut.get_unchecked_mut(nx - 4) = idx * (block[3] * last_elems).sum(); + *fut.get_unchecked_mut(nx - 3) = idx * (block[2] * last_elems).sum(); + *fut.get_unchecked_mut(nx - 2) = idx * (block[1] * last_elems).sum(); + *fut.get_unchecked_mut(nx - 1) = idx * (block[0] * last_elems).sum(); + } + } + + #[inline(never)] + fn disseta_simd(prev: &[f32], fut: &mut [f32], nx: usize, ny: usize) { + assert!(ny >= 2 * Self::DISS_BLOCK.len()); + assert!(nx >= SimdT::lanes()); + assert!(nx % SimdT::lanes() == 0); + assert_eq!(prev.len(), fut.len()); + assert_eq!(prev.len(), nx * ny); + + let dy = 1.0 / (ny - 1) as f32; + let idy = 1.0 / dy; + + for j in (0..nx).step_by(SimdT::lanes()) { + let a = [ + SimdT::from_slice_unaligned(&prev[0 * nx + j..]), + SimdT::from_slice_unaligned(&prev[1 * nx + j..]), + SimdT::from_slice_unaligned(&prev[2 * nx + j..]), + SimdT::from_slice_unaligned(&prev[3 * nx + j..]), + SimdT::from_slice_unaligned(&prev[4 * nx + j..]), + SimdT::from_slice_unaligned(&prev[5 * nx + j..]), + SimdT::from_slice_unaligned(&prev[6 * nx + j..]), + ]; + + for (i, bl) in Self::DISS_BLOCK.iter().enumerate() { + let b = idy + * (a[0] * bl[0] + + a[1] * bl[1] + + a[2] * bl[2] + + a[3] * bl[3] + + a[4] * bl[4] + + a[5] * bl[5] + + a[6] * bl[6]); + b.write_to_slice_unaligned(&mut fut[i * nx + j..]); + } + + let mut a = a; + for i in Self::DISS_BLOCK.len()..ny - Self::DISS_BLOCK.len() { + // Push a onto circular buffer + a = [ + a[1], + a[2], + a[3], + a[4], + a[5], + a[6], + SimdT::from_slice_unaligned(&prev[nx * (i + 3) + j..]), + ]; + let b = idy + * (a[0] * Self::DISS_DIAG[0] + + a[1] * Self::DISS_DIAG[1] + + a[2] * Self::DISS_DIAG[2] + + a[3] * Self::DISS_DIAG[3] + + a[4] * Self::DISS_DIAG[4] + + a[5] * Self::DISS_DIAG[5] + + a[6] * Self::DISS_DIAG[6]); + b.write_to_slice_unaligned(&mut fut[nx * i + j..]); + } + + let a = [ + SimdT::from_slice_unaligned(&prev[(ny - 1) * nx + j..]), + SimdT::from_slice_unaligned(&prev[(ny - 2) * nx + j..]), + SimdT::from_slice_unaligned(&prev[(ny - 3) * nx + j..]), + SimdT::from_slice_unaligned(&prev[(ny - 4) * nx + j..]), + SimdT::from_slice_unaligned(&prev[(ny - 5) * nx + j..]), + SimdT::from_slice_unaligned(&prev[(ny - 6) * nx + j..]), + SimdT::from_slice_unaligned(&prev[(ny - 7) * nx + j..]), + ]; + + for (i, bl) in Self::DISS_BLOCK.iter().enumerate() { + let b = idy + * (a[0] * bl[0] + + a[1] * bl[1] + + a[2] * bl[2] + + a[3] * bl[3] + + a[4] * bl[4] + + a[5] * bl[5] + + a[6] * bl[6]); + b.write_to_slice_unaligned(&mut fut[(ny - 1 - i) * nx + j..]); + } + } + } + + fn diss(prev: ArrayView1, mut fut: ArrayViewMut1) { + assert_eq!(prev.shape(), fut.shape()); + let nx = prev.shape()[0]; + assert!(nx >= 2 * Self::DISS_BLOCK.len()); + + if let (Some(p), Some(f)) = (prev.as_slice(), fut.as_slice_mut()) { + Self::diss_simd(p, f); + return; + } + + let dx = 1.0 / (nx - 1) as f32; + let idx = 1.0 / dx; + + let diag = arr1(&Self::DISS_DIAG); + let block = arr2(&Self::DISS_BLOCK); + + let first_elems = prev.slice(s!(..7)); + for (bl, f) in block.outer_iter().zip(&mut fut) { + let diff = first_elems.dot(&bl); + *f = diff * idx; + } + + for (window, f) in prev + .windows(diag.len()) + .into_iter() + .skip(1) + .zip(fut.iter_mut().skip(4)) + .take(nx - 8) + { + let diff = diag.dot(&window); + *f = diff * idx; + } + + let last_elems = prev.slice(s!(nx - 7..;-1)); + for (bl, f) in block.outer_iter().zip(&mut fut.slice_mut(s![nx - 4..;-1])) { + let diff = bl.dot(&last_elems); + *f = diff * idx; + } + } } impl SbpOperator for Upwind4 { @@ -353,3 +614,28 @@ fn upwind4_test() { approx::assert_abs_diff_eq!(&res.to_owned(), &target.to_owned(), epsilon = 1e-2); } } + +impl UpwindOperator for Upwind4 { + fn dissxi(prev: ArrayView2, mut fut: ArrayViewMut2) { + assert_eq!(prev.shape(), fut.shape()); + assert!(prev.shape()[1] >= 2 * Self::DISS_BLOCK.len()); + for (r0, r1) in prev.outer_iter().zip(fut.outer_iter_mut()) { + Self::diss(r0, r1) + } + } + + fn disseta(prev: ArrayView2, mut fut: ArrayViewMut2) { + assert_eq!(prev.shape(), fut.shape()); + assert!(prev.shape()[0] >= 2 * Self::DISS_BLOCK.len()); + let nx = prev.shape()[1]; + let ny = prev.shape()[0]; + if nx >= SimdT::lanes() && nx % SimdT::lanes() == 0 { + if let (Some(p), Some(f)) = (prev.as_slice(), fut.as_slice_mut()) { + Self::disseta_simd(p, f, nx, ny); + return; + } + } + // diffeta = transpose then use diffxi + Self::dissxi(prev.reversed_axes(), fut.reversed_axes()); + } +}