_mm256_mask_permutex_pd
Classification
AVX-512, Miscellaneous, CPUID Test: AVX512F
Header File
immintrin.h
Instruction
VPERMPD ymm {k}, ymm, imm8
Synopsis
 _mm256_mask_permutex_pd(__m256d src, __mmask8 k, __m256d a, int imm8);
Description
Shuffle double-precision (64-bit) floating-point elements in "a" across lanes using the control in "imm8", and store the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set).
Operation
DEFINE SELECT4(src, control) {
	CASE(control[1:0]) OF
	0:	tmp[63:0] := src[63:0]
	1:	tmp[63:0] := src[127:64]
	2:	tmp[63:0] := src[191:128]
	3:	tmp[63:0] := src[255:192]
	ESAC
	RETURN tmp[63:0]
}
tmp_dst[63:0] := SELECT4(a[255:0], imm8[1:0])
tmp_dst[127:64] := SELECT4(a[255:0], imm8[3:2])
tmp_dst[191:128] := SELECT4(a[255:0], imm8[5:4])
tmp_dst[255:192] := SELECT4(a[255:0], imm8[7:6])
FOR j := 0 to 3
	i := j*64
	IF k[j]
		dst[i+63:i] := tmp_dst[i+63:i]
	ELSE
		dst[i+63:i] := src[i+63:i]
	FI
ENDFOR
dst[MAX:256] := 0