_mm512_mask_permutex_pd
Classification
AVX-512, Swizzle, CPUID Test: AVX512F
Header File
Instruction
VPERMPD zmm {k}, zmm, imm8
Synopsis
_mm512_mask_permutex_pd(__m512d src, __mmask8 k, __m512d a, const int imm8);
Description
Shuffle double-precision (64-bit) floating-point elements in "a" within 256-bit lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
Operation
DEFINE SELECT4(src, control) {
CASE(control[1:0]) OF
0: tmp[63:0] := src[63:0]
1: tmp[63:0] := src[127:64]
2: tmp[63:0] := src[191:128]
3: tmp[63:0] := src[255:192]
ESAC
RETURN tmp[63:0]
}
tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
tmp_dst[319:256] := SELECT4(a[511:256], imm8[1:0])
tmp_dst[383:320] := SELECT4(a[511:256], imm8[3:2])
tmp_dst[447:384] := SELECT4(a[511:256], imm8[5:4])
tmp_dst[511:448] := SELECT4(a[511:256], imm8[7:6])
FOR j := 0 to 7
i := j*64
IF k[j]
dst[i+63:i] := tmp_dst[i+63:i]
ELSE
dst[i+63:i] := src[i+63:i]
FI
ENDFOR
dst[MAX:512] := 0