Document what compiler is doing for diffxi
This commit is contained in:
		@@ -123,10 +123,15 @@ impl<const M: usize, const P: usize> Matrix<Float, M, P> {
 | 
			
		||||
    ) {
 | 
			
		||||
        for i in 0..M {
 | 
			
		||||
            for j in 0..P {
 | 
			
		||||
                let mut t = 0.0;
 | 
			
		||||
                for k in 0..N {
 | 
			
		||||
                // Slightly cheaper to do first computation separately
 | 
			
		||||
                // rather than store zero and issue all ops as fma
 | 
			
		||||
                let mut t = if N == 0 {
 | 
			
		||||
                    0.0
 | 
			
		||||
                } else {
 | 
			
		||||
                    lhs[(i, 0)] * rhs[(0, j)]
 | 
			
		||||
                };
 | 
			
		||||
                for k in 1..N {
 | 
			
		||||
                    t = Float::mul_add(lhs[(i, k)], rhs[(k, j)], t);
 | 
			
		||||
                    // t = t + lhs[(i, k)] * rhs[(k, j)];
 | 
			
		||||
                }
 | 
			
		||||
                self[(i, j)] = t;
 | 
			
		||||
            }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user