_mm512_mask_extload_ps
Classification
KNC, Load, CPUID Test: KNCNI
Header File
Instruction
VMOVAPS zmm {k}, m512
Synopsis
_mm512_mask_extload_ps(__m512 src, __mmask16 k, void const * mt, _MM_UPCONV_PS_ENUM conv, _MM_BROADCAST32_ENUM bc, int hint);
Description
Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.
Operation
addr := MEM[mt]
FOR j := 0 to 15
i := j*32
IF k[j]
CASE bc OF
_MM_BROADCAST32_NONE:
CASE conv OF
_MM_UPCONV_PS_NONE:
n := j*32
dst[i+31:i] := addr[n+31:n]
_MM_UPCONV_PS_FLOAT16:
n := j*16
dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
_MM_UPCONV_PS_UINT8:
n := j*8
dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
_MM_UPCONV_PS_SINT8:
n := j*8
dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
_MM_UPCONV_PS_UINT16:
n := j*16
dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
_MM_UPCONV_PS_SINT16:
n := j*16
dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
ESAC
_MM_BROADCAST_1X16:
CASE conv OF
_MM_UPCONV_PS_NONE:
n := j*32
dst[i+31:i] := addr[31:0]
_MM_UPCONV_PS_FLOAT16:
n := j*16
dst[i+31:i] := Convert_FP16_To_FP32(addr[15:0])
_MM_UPCONV_PS_UINT8:
n := j*8
dst[i+31:i] := Convert_UInt8_To_FP32(addr[7:0])
_MM_UPCONV_PS_SINT8:
n := j*8
dst[i+31:i] := Convert_Int8_To_FP32(addr[7:0])
_MM_UPCONV_PS_UINT16:
n := j*16
dst[i+31:i] := Convert_UInt16_To_FP32(addr[15:0])
_MM_UPCONV_PS_SINT16:
n := j*16
dst[i+31:i] := Convert_Int16_To_FP32(addr[15:0])
ESAC
_MM_BROADCAST_4X16:
mod := j%4
CASE conv OF
_MM_UPCONV_PS_NONE:
n := mod*32
dst[i+31:i] := addr[n+31:n]
_MM_UPCONV_PS_FLOAT16:
n := mod*16
dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
_MM_UPCONV_PS_UINT8:
n := mod*8
dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
_MM_UPCONV_PS_SINT8:
n := mod*8
dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
_MM_UPCONV_PS_UINT16:
n := mod*16
dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
_MM_UPCONV_PS_SINT16:
n := mod*16
dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
ESAC
ESAC
ELSE
dst[i+31:i] := src[i+31:i]
FI
ENDFOR
dst[MAX:512] := 0