_mm512_mask_extloadunpackhi_ps
Classification
KNC, Load, CPUID Test: KNCNI
Header File
immintrin.h
Instruction
VLOADUNPACKHPS zmm {k}, m512
Synopsis
 _mm512_mask_extloadunpackhi_ps(__m512 src, __mmask16 k, void const * mt, _MM_UPCONV_PS_ENUM conv, int hint);
Description
Loads the high-64-byte-aligned portion of the byte/word/doubleword stream starting at element-aligned address mt-64, up-converted depending on the value of "conv", and expanded into packed single-precision (32-bit) floating-point elements in "dst". The initial values of "dst" are copied from "src". Only those converted quadwords that occur at or after the first 64-byte-aligned address following (mt-64) are loaded. Elements in the resulting vector that do not map to those quadwords are taken from "src". "hint" indicates to the processor whether the loaded data is non-temporal. Elements are copied to "dst" according to element selector "k" (elements are skipped when the corresponding mask bit is not set).
Operation
DEFINE UPCONVERT(addr, offset, convertTo) {
	CASE conv OF
	_MM_UPCONV_PS_NONE:
		RETURN MEM[addr + 4*offset]
	_MM_UPCONV_PS_FLOAT16:
		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
	_MM_UPCONV_PS_UINT8:
		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
	_MM_UPCONV_PS_SINT8:
		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
	_MM_UPCONV_PS_UINT16:
		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
	_MM_UPCONV_PS_SINT16:
		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
	ESAC
}
DEFINE UPCONVERT(addr, offset, convertTo) {
	CASE conv OF
	_MM_UPCONV_PS_NONE:
		RETURN MEM[addr + 4*offset]
	_MM_UPCONV_PS_FLOAT16:
		RETURN Convert_FP16_To_FP32(MEM[addr + 4*offset])
	_MM_UPCONV_PS_UINT8:
		RETURN Convert_UInt8_To_FP32(MEM[addr + offset])
	_MM_UPCONV_PS_SINT8:
		RETURN Convert_Int8_To_FP32(MEM[addr + offset])
	_MM_UPCONV_PS_UINT16:
		RETURN Convert_UInt16_To_FP32(MEM[addr + 2*offset])
	_MM_UPCONV_PS_SINT16:
		RETURN Convert_Int16_To_FP32(MEM[addr + 2*offset])
	ESAC
}
dst[511:0] := src[511:0]
loadOffset := 0
foundNext64BytesBoundary := false
upSize := UPCONVERTSIZE(conv)
addr := mt-64
FOR j := 0 to 15
	IF k[j]
		IF foundNext64BytesBoundary == false
			IF (addr + (loadOffset + 1)*upSize % 64) == 0
				foundNext64BytesBoundary := true
			FI
		ELSE
			i := j*32
			dst[i+31:i] := UPCONVERT(addr, loadOffset, conv)
		FI
		loadOffset := loadOffset + 1
	FI
ENDFOR
dst[MAX:512] := 0