_mm256_mask_unpacklo_epi8
Classification
AVX-512, Miscellaneous, CPUID Test: AVX512BW
Header File
immintrin.h
Instruction
VPUNPCKLBW ymm {k}, ymm, ymm
Synopsis
 _mm256_mask_unpacklo_epi8(__m256i src, __mmask32 k, __m256i a, __m256i b);
Description
Unpack and interleave 8-bit integers from the low half of each 128-bit lane in "a" and "b", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
Operation
DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
	dst[7:0] := src1[7:0] 
	dst[15:8] := src2[7:0] 
	dst[23:16] := src1[15:8] 
	dst[31:24] := src2[15:8] 
	dst[39:32] := src1[23:16] 
	dst[47:40] := src2[23:16] 
	dst[55:48] := src1[31:24] 
	dst[63:56] := src2[31:24] 
	dst[71:64] := src1[39:32]
	dst[79:72] := src2[39:32] 
	dst[87:80] := src1[47:40] 
	dst[95:88] := src2[47:40] 
	dst[103:96] := src1[55:48] 
	dst[111:104] := src2[55:48] 
	dst[119:112] := src1[63:56] 
	dst[127:120] := src2[63:56] 
	RETURN dst[127:0]
}
tmp_dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
tmp_dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
FOR j := 0 to 31
	i := j*8
	IF k[j]
		dst[i+7:i] := tmp_dst[i+7:i]
	ELSE
		dst[i+7:i] := src[i+7:i]
	FI
ENDFOR
dst[MAX:256] := 0