_mm512_mask_extload_ps
Classification
KNC, Load, CPUID Test: KNCNI
Header File
immintrin.h
Instruction
VMOVAPS zmm {k}, m512
Synopsis
 _mm512_mask_extload_ps(__m512 src, __mmask16 k, void const * mt, _MM_UPCONV_PS_ENUM conv, _MM_BROADCAST32_ENUM bc, int hint);
Description
Depending on "bc", loads 1, 4, or 16 elements of type and size determined by "conv" from memory address "mt" and converts all elements to single-precision (32-bit) floating-point elements, storing the results in "dst" using writemask "k" (elements are copied from "src" when the corresponding mask bit is not set). "hint" indicates to the processor whether the data is non-temporal.
Operation
addr := MEM[mt]
FOR j := 0 to 15
	i := j*32
	IF k[j]
		CASE bc OF
		_MM_BROADCAST32_NONE:
			CASE conv OF
			_MM_UPCONV_PS_NONE:
				n	 := j*32
				dst[i+31:i] := addr[n+31:n]
			_MM_UPCONV_PS_FLOAT16:
				n	 := j*16
				dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
			_MM_UPCONV_PS_UINT8:
				n	 := j*8
				dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
			_MM_UPCONV_PS_SINT8:
				n	 := j*8
				dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
			_MM_UPCONV_PS_UINT16:
				n	 := j*16
				dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
			_MM_UPCONV_PS_SINT16:
				n	 := j*16
				dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
			ESAC
		_MM_BROADCAST_1X16:
			CASE conv OF
			_MM_UPCONV_PS_NONE:
				n	 := j*32
				dst[i+31:i] := addr[31:0]
			_MM_UPCONV_PS_FLOAT16:
				n	 := j*16
				dst[i+31:i] := Convert_FP16_To_FP32(addr[15:0])
			_MM_UPCONV_PS_UINT8:
				n	 := j*8
				dst[i+31:i] := Convert_UInt8_To_FP32(addr[7:0])
			_MM_UPCONV_PS_SINT8:
				n	 := j*8
				dst[i+31:i] := Convert_Int8_To_FP32(addr[7:0])
			_MM_UPCONV_PS_UINT16:
				n	 := j*16
				dst[i+31:i] := Convert_UInt16_To_FP32(addr[15:0])
			_MM_UPCONV_PS_SINT16:
				n	 := j*16
				dst[i+31:i] := Convert_Int16_To_FP32(addr[15:0])
			ESAC
		_MM_BROADCAST_4X16:
			mod := j%4
			CASE conv OF
			_MM_UPCONV_PS_NONE:
				n := mod*32
				dst[i+31:i] := addr[n+31:n]
			_MM_UPCONV_PS_FLOAT16:
				n := mod*16
				dst[i+31:i] := Convert_FP16_To_FP32(addr[n+15:n])
			_MM_UPCONV_PS_UINT8:
				n := mod*8
				dst[i+31:i] := Convert_UInt8_To_FP32(addr[n+7:n])
			_MM_UPCONV_PS_SINT8:
				n := mod*8
				dst[i+31:i] := Convert_Int8_To_FP32(addr[n+7:n])
			_MM_UPCONV_PS_UINT16:
				n := mod*16
				dst[i+31:i] := Convert_UInt16_To_FP32(addr[n+15:n])
			_MM_UPCONV_PS_SINT16:
				n := mod*16
				dst[i+31:i] := Convert_Int16_To_FP32(addr[n+15:n])
			ESAC
		ESAC
	ELSE
		dst[i+31:i] := src[i+31:i]
	FI
ENDFOR
dst[MAX:512] := 0