_mm512_mask_extloadunpackhi_ps
Classification
KNC, Load, CPUID Test: KNCNI
Header File
Instruction
VLOADUNPACKHPS zmm {k}, m512
Synopsis
_mm512_mask_extloadunpackhi_ps(__m512 src, __mmask16 k, void const * mt, _MM_UPCONV_PS_ENUM conv, int hint);
Description
Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).
Operation
DEFINE UPCONVERT(addr, offset, convertTo) {
CASE conv OF
_MM_UPCONV_PS_NONE:
RETURN MEM[addr + 4*offset]
_MM_UPCONV_PS_FLOAT16:
RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
_MM_UPCONV_PS_UINT8:
RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
_MM_UPCONV_PS_SINT8:
RETURN Convert_Int8_To_FP32(MEM[addr + offset])
_MM_UPCONV_PS_UINT16:
RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
_MM_UPCONV_PS_SINT16:
RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
ESAC
}
DEFINE UPCONVERT(addr, offset, convertTo) {
CASE conv OF
_MM_UPCONV_PS_NONE:
RETURN MEM[addr + 4*offset]
_MM_UPCONV_PS_FLOAT16:
RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
_MM_UPCONV_PS_UINT8:
RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
_MM_UPCONV_PS_SINT8:
RETURN Convert_Int8_To_FP32(MEM[addr + offset])
_MM_UPCONV_PS_UINT16:
RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
_MM_UPCONV_PS_SINT16:
RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
ESAC
}
dst[511:0] := src[511:0]
loadOffset := 0
foundNext64BytesBoundary := false
upSize := UPCONVERTSIZE(conv)
addr := mt-64
FOR j := 0 to 15
IF k[j]
IF foundNext64BytesBoundary == false
IF (addr + (loadOffset + 1)*upSize % 64) == 0
foundNext64BytesBoundary := true
FI
ELSE
i := j*32
dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
FI
loadOffset := loadOffset + 1
FI
ENDFOR
dst[MAX:512] := 0