_mm_mask_roundscale_round_sh
Classification
AVX-512, Miscellaneous, CPUID Test: AVX512_FP16
Header File
immintrin.h
Instruction
VRNDSCALESH xmm {k}, xmm, xmm {sae}, imm8
Synopsis
 _mm_mask_roundscale_round_sh(__m128h src, __mmask8 k, __m128h a, __m128h b, int imm8, const int sae);
Description
Round the lower half-precision (16-bit) floating-point element in "b" to the number of fraction bits specified by "imm8", store the result in the lower element of "dst" using writemask "k" (the element is copied from "src" when mask bit 0 is not set), and copy the upper 7 packed elements from "a" to the upper elements of "dst". [round_imm_note][sae_note]
Operation
DEFINE RoundScaleFP16(src.fp16, imm8[7:0]) {
	m.fp16 := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved
	tmp.fp16 := POW(FP16(2.0), -m) * ROUND(POW(FP16(2.0), m) * src.fp16, imm8[3:0])
	RETURN tmp.fp16
}
IF k[0]
	dst.fp16[0] := RoundScaleFP16(b.fp16[0], imm8)
ELSE
	dst.fp16[0] := src.fp16[0]
FI
dst[127:16] := a[127:16]
dest[MAX:128] := 0