_mm512_mask_reduce_add_ps
Classification
AVX-512, Arithmetic, CPUID Test: AVX512F
Header File
Synopsis
_mm512_mask_reduce_add_ps(__mmask16 k, __m512 a);
Description
Reduce the packed single-precision (32-bit) floating-point elements in "a" by addition using mask "k". Returns the sum of all active elements in "a".
Operation
DEFINE REDUCE_ADD(src, len) {
IF len == 2
RETURN src[31:0] + src[63:32]
FI
len := len / 2
FOR j:= 0 to (len-1)
i := j*32
src[i+31:i] := src[i+31:i] + src[i+32*len+31:i+32*len]
ENDFOR
RETURN REDUCE_ADD(src[32*len-1:0], len)
}
tmp := a
FOR j := 0 to 16
i := j*32
IF k[j]
tmp[i+31:i] := a[i+31:i]
ELSE
tmp[i+31:i] := 0
FI
ENDFOR
dst[31:0] := REDUCE_ADD(tmp, 16)