assign instead of add_assign in diffx/diffy
This commit is contained in:
parent
9bba3531cd
commit
e0f3ed3fd8
|
@ -79,21 +79,19 @@ impl System {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// hz = -ey_x + ex_y
|
||||||
|
let tmp = &mut k[i].0;
|
||||||
|
SBP::diffx(y.2.view(), tmp.view_mut());
|
||||||
|
SBP::diffy(y.0.view(), k[i].1.view_mut());
|
||||||
|
k[i].1.scaled_add(-1.0, tmp);
|
||||||
|
|
||||||
// ex = hz_y
|
// ex = hz_y
|
||||||
k[i].0.fill(0.0);
|
|
||||||
SBP::diffy(y.1.view(), k[i].0.view_mut());
|
SBP::diffy(y.1.view(), k[i].0.view_mut());
|
||||||
|
|
||||||
// ey = -hz_x
|
// ey = -hz_x
|
||||||
k[i].2.fill(0.0);
|
|
||||||
SBP::diffx(y.1.view(), k[i].2.view_mut());
|
SBP::diffx(y.1.view(), k[i].2.view_mut());
|
||||||
k[i].2.mapv_inplace(|v| -v);
|
k[i].2.mapv_inplace(|v| -v);
|
||||||
|
|
||||||
// hz = -ey_x + ex_y
|
|
||||||
k[i].1.fill(0.0);
|
|
||||||
SBP::diffx(y.2.view(), k[i].1.view_mut());
|
|
||||||
k[i].1.mapv_inplace(|v| -v);
|
|
||||||
SBP::diffy(y.0.view(), k[i].1.view_mut());
|
|
||||||
|
|
||||||
// Boundary conditions (SAT)
|
// Boundary conditions (SAT)
|
||||||
let ny = y.0.shape()[0];
|
let ny = y.0.shape()[0];
|
||||||
let nx = y.0.shape()[1];
|
let nx = y.0.shape()[1];
|
||||||
|
|
|
@ -105,10 +105,10 @@ impl Upwind4 {
|
||||||
),
|
),
|
||||||
];
|
];
|
||||||
unsafe {
|
unsafe {
|
||||||
*fut.get_unchecked_mut(0) += idx * (block[0] * first_elems).sum();
|
*fut.get_unchecked_mut(0) = idx * (block[0] * first_elems).sum();
|
||||||
*fut.get_unchecked_mut(1) += idx * (block[1] * first_elems).sum();
|
*fut.get_unchecked_mut(1) = idx * (block[1] * first_elems).sum();
|
||||||
*fut.get_unchecked_mut(2) += idx * (block[2] * first_elems).sum();
|
*fut.get_unchecked_mut(2) = idx * (block[2] * first_elems).sum();
|
||||||
*fut.get_unchecked_mut(3) += idx * (block[3] * first_elems).sum()
|
*fut.get_unchecked_mut(3) = idx * (block[3] * first_elems).sum()
|
||||||
};
|
};
|
||||||
|
|
||||||
let diag = f32x8::new(
|
let diag = f32x8::new(
|
||||||
|
@ -131,16 +131,16 @@ impl Upwind4 {
|
||||||
)
|
)
|
||||||
.take(nx - 2 * block.len())
|
.take(nx - 2 * block.len())
|
||||||
{
|
{
|
||||||
*f += idx * (p * diag).sum();
|
*f = idx * (p * diag).sum();
|
||||||
}
|
}
|
||||||
|
|
||||||
let last_elems = unsafe { f32x8::from_slice_unaligned_unchecked(&prev[nx - 8..]) }
|
let last_elems = unsafe { f32x8::from_slice_unaligned_unchecked(&prev[nx - 8..]) }
|
||||||
.shuffle1_dyn(u32x8::new(7, 6, 5, 4, 3, 2, 1, 0));
|
.shuffle1_dyn(u32x8::new(7, 6, 5, 4, 3, 2, 1, 0));
|
||||||
unsafe {
|
unsafe {
|
||||||
*fut.get_unchecked_mut(nx - 4) += -idx * (block[3] * last_elems).sum();
|
*fut.get_unchecked_mut(nx - 4) = -idx * (block[3] * last_elems).sum();
|
||||||
*fut.get_unchecked_mut(nx - 3) += -idx * (block[2] * last_elems).sum();
|
*fut.get_unchecked_mut(nx - 3) = -idx * (block[2] * last_elems).sum();
|
||||||
*fut.get_unchecked_mut(nx - 2) += -idx * (block[1] * last_elems).sum();
|
*fut.get_unchecked_mut(nx - 2) = -idx * (block[1] * last_elems).sum();
|
||||||
*fut.get_unchecked_mut(nx - 1) += -idx * (block[0] * last_elems).sum();
|
*fut.get_unchecked_mut(nx - 1) = -idx * (block[0] * last_elems).sum();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,8 +167,7 @@ impl Upwind4 {
|
||||||
];
|
];
|
||||||
|
|
||||||
for (i, bl) in Self::BLOCK.iter().enumerate() {
|
for (i, bl) in Self::BLOCK.iter().enumerate() {
|
||||||
let mut b = f32x4::from_slice_unaligned(&fut[i * nx + j..]);
|
let b = idy
|
||||||
b += idy
|
|
||||||
* (a[0] * bl[0]
|
* (a[0] * bl[0]
|
||||||
+ a[1] * bl[1]
|
+ a[1] * bl[1]
|
||||||
+ a[2] * bl[2]
|
+ a[2] * bl[2]
|
||||||
|
@ -191,8 +190,7 @@ impl Upwind4 {
|
||||||
a[6],
|
a[6],
|
||||||
f32x4::from_slice_unaligned(&prev[nx * (i + 3) + j..]),
|
f32x4::from_slice_unaligned(&prev[nx * (i + 3) + j..]),
|
||||||
];
|
];
|
||||||
let mut b = f32x4::from_slice_unaligned(&fut[nx * i + j..]);
|
let b = idy
|
||||||
b += idy
|
|
||||||
* (a[0] * Self::DIAG[0]
|
* (a[0] * Self::DIAG[0]
|
||||||
+ a[1] * Self::DIAG[1]
|
+ a[1] * Self::DIAG[1]
|
||||||
+ a[2] * Self::DIAG[2]
|
+ a[2] * Self::DIAG[2]
|
||||||
|
@ -214,8 +212,7 @@ impl Upwind4 {
|
||||||
];
|
];
|
||||||
|
|
||||||
for (i, bl) in Self::BLOCK.iter().enumerate() {
|
for (i, bl) in Self::BLOCK.iter().enumerate() {
|
||||||
let mut b = f32x4::from_slice_unaligned(&fut[(ny - 1 - i) * nx + j..]);
|
let b = -idy
|
||||||
b += -idy
|
|
||||||
* (a[0] * bl[0]
|
* (a[0] * bl[0]
|
||||||
+ a[1] * bl[1]
|
+ a[1] * bl[1]
|
||||||
+ a[2] * bl[2]
|
+ a[2] * bl[2]
|
||||||
|
@ -246,7 +243,7 @@ impl Upwind4 {
|
||||||
let first_elems = prev.slice(s!(..7));
|
let first_elems = prev.slice(s!(..7));
|
||||||
for (bl, f) in block.outer_iter().zip(&mut fut) {
|
for (bl, f) in block.outer_iter().zip(&mut fut) {
|
||||||
let diff = first_elems.dot(&bl);
|
let diff = first_elems.dot(&bl);
|
||||||
*f += diff * idx;
|
*f = diff * idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (window, f) in prev
|
for (window, f) in prev
|
||||||
|
@ -257,13 +254,13 @@ impl Upwind4 {
|
||||||
.take(nx - 8)
|
.take(nx - 8)
|
||||||
{
|
{
|
||||||
let diff = diag.dot(&window);
|
let diff = diag.dot(&window);
|
||||||
*f += diff * idx;
|
*f = diff * idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
let last_elems = prev.slice(s!(nx - 7..;-1));
|
let last_elems = prev.slice(s!(nx - 7..;-1));
|
||||||
for (bl, f) in block.outer_iter().zip(&mut fut.slice_mut(s![nx - 4..;-1])) {
|
for (bl, f) in block.outer_iter().zip(&mut fut.slice_mut(s![nx - 4..;-1])) {
|
||||||
let diff = -bl.dot(&last_elems);
|
let diff = -bl.dot(&last_elems);
|
||||||
*f += diff * idx;
|
*f = diff * idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue