_mm256_reduce_add_epi8(__m256i a);
DEFINE REDUCE_ADD(src, len) { IF len == 2 RETURN src[7:0] + src[15:8] FI len := len / 2 FOR j:= 0 to (len-1) i := j*8 src[i+7:i] := src[i+7:i] + src[i+8*len+15:i+8*len] ENDFOR RETURN REDUCE_ADD(src[8*len-1:0], len) } dst[7:0] := REDUCE_ADD(a, 32)