$(DDOC $(DDOC_BLANKLINE ) $(DDOC_BLANKLINE ) $(SPEC_S Vector Extensions, $(DDOC_BLANKLINE ) $(HEADERNAV_TOC $(HEADERNAV_SUBITEMS core_simd, $(D core.simd), $(HEADERNAV_ITEM properties, Properties) $(HEADERNAV_ITEM conversions, Conversions) $(HEADERNAV_ITEM accessing_individual_elems, Accessing Individual Vector Elements) $(HEADERNAV_ITEM conditional_compilation, Conditional Compilation) ) $(HEADERNAV_SUBITEMS x86_64_vec, X86 And X86$(UNDERSCORE )64 Vector Extension Implementation, $(HEADERNAV_ITEM vector_op_intrinsics, Vector Operation Intrinsics) ) ) $(DDOC_BLANKLINE ) $(P CPUs often support specialized vector types and vector operations (a.k.a. $(I media instructions)). Vector types are a fixed array of floating or integer types, and vector operations operate simultaneously on them.) $(DDOC_BLANKLINE ) $(P Specialized $(GLINK2 type, Vector) types provide access to them.) $(DDOC_BLANKLINE ) $(P The $(GLINK2 type, VectorBaseType) must be a $(DDSUBLINK spec/arrays, static-arrays, Static Array). The $(GNAME VectorElementType) is the unqualified element type of the static array. The dimension of the static array is the number of elements in the vector. ) $(DDOC_BLANKLINE ) $(IMPLEMENTATION_DEFINED Which vector types are supported depends on the target. The implementation is expected to only support the vector types and operations that are implemented in the target's hardware. ) $(DDOC_BLANKLINE ) $(RATIONALE Emulating unsupported vector types and operations can exhibit such poor performance that the user is likely better off selecting a different algorithm than relying on emulation.) $(DDOC_BLANKLINE ) $(BEST_PRACTICE Use the declarations in $(LINK2 $(ROOT_DIR )phobos/core_simd.html, $(D core.simd)) instead of the language $(GLINK2 type, Vector) grammar. ) $(DDOC_BLANKLINE )

$(LNAME2 core_simd, $(D core.simd))

$(DDOC_BLANKLINE ) $(P Vector types and operations are introduced by importing $(LINK2 $(ROOT_DIR )phobos/core_simd.html, $(D core.simd)):) $(DDOC_BLANKLINE ) $(D_CODE $(D_KEYWORD import) core.simd; ) $(DDOC_BLANKLINE ) $(IMPLEMENTATION_DEFINED These types and operations will be the ones defined for the architecture the compiler is targeting. If a particular CPU family has varying support for vector types, an additional runtime check may be necessary. The compiler does not emit runtime checks; those must be done by the programmer. ) $(DDOC_BLANKLINE ) $(IMPLEMENTATION_DEFINED Depending on the target architecture, compiler flags may be required to activate support for SIMD types. ) $(DDOC_BLANKLINE ) $(P The types defined will all follow the naming convention:) $(DDOC_BLANKLINE ) $(GRAMMAR_INFORMATIVE $(I typeNN) ) where $(I type) is the vector element type and $(I NN) is the number of those elements in the vector type. The type names will not be keywords. $(DDOC_BLANKLINE )

$(LNAME2 properties, Properties)

$(DDOC_BLANKLINE ) $(P Vector types have the property:) $(DDOC_BLANKLINE ) $(TABLE2 Vector Type Properties, Property, Description $(TROW .array, Returns static array representation) ) $(DDOC_BLANKLINE ) $(P Vectors support the following properties based on the vector element type. The value produced is that of a vector of the same type with each element set to the value corresponding to the property value for the element type.) $(DDOC_BLANKLINE ) $(TABLE2 Integral Vector Type Properties, Property, Description $(TROW .min, minimum value) $(TROW .max, maximum value) ) $(DDOC_BLANKLINE ) $(TABLE2 Floating Point Vector Type Properties, Property, Description $(TROW .epsilon, smallest increment to the value 1) $(TROW .infinity, infinity value) $(TROW .max, largest representable value that is not infinity) $(TROW .min_normal, smallest representable value that is not 0) $(TROW .nan, NaN value) ) $(DDOC_BLANKLINE )

$(LNAME2 conversions, Conversions)

$(DDOC_BLANKLINE ) $(P Vector types of the same size (number_of_elements * size_of_element) can be implicitly converted among each other, this is done as a reinterpret cast (a type paint). Vector types can be cast to their $(GLINK2 type, VectorBaseType).) $(DDOC_BLANKLINE ) $(P Integers and floating point values can be implicitly converted to their vector equivalents:) $(DDOC_BLANKLINE ) $(D_CODE int4 v = 7; v = 3 + v; $(D_COMMENT // add 3 to each element in v )) $(DDOC_BLANKLINE )

$(LNAME2 accessing_individual_elems, Accessing Individual Vector Elements)

$(DDOC_BLANKLINE ) $(P They cannot be accessed directly, but can be when converted to an array type:) $(DDOC_BLANKLINE ) $(D_CODE int4 v; ($(D_KEYWORD cast)($(D_KEYWORD int)*)&v)[3] = 2; $(D_COMMENT // set 3rd element of the 4 int vector )($(D_KEYWORD cast)($(D_KEYWORD int)[4])v)[3] = 2; $(D_COMMENT // set 3rd element of the 4 int vector )v.array[3] = 2; $(D_COMMENT // set 3rd element of the 4 int vector )v.ptr[3] = 2; $(D_COMMENT // set 3rd element of the 4 int vector )) $(DDOC_BLANKLINE ) $(DDOC_BLANKLINE )

$(LNAME2 conditional_compilation, Conditional Compilation)

$(DDOC_BLANKLINE ) $(P If vector extensions are implemented, the $(DDSUBLINK spec/version, PredefinedVersions, version identifier) $(D D_SIMD) is set.) $(DDOC_BLANKLINE ) $(P Whether a type exists or not can be tested at compile time with an $(DDSUBLINK spec/expression, IsExpression, $(I IsExpression)): ) $(DDOC_BLANKLINE ) $(D_CODE $(D_KEYWORD static) $(D_KEYWORD if) ($(D_KEYWORD is)(typeNN)) ... yes, it $(D_KEYWORD is) supported ... $(D_KEYWORD else) ... nope, use workaround ... ) $(DDOC_BLANKLINE ) $(P Whether a particular operation on a type is supported can be tested at compile time with: ) $(DDOC_BLANKLINE ) $(D_CODE float4 a,b; $(D_KEYWORD static) $(D_KEYWORD if) ($(D_KEYWORD __traits)(compiles, a+b)) ... yes, add $(D_KEYWORD is) supported $(D_KEYWORD for) float4 ... $(D_KEYWORD else) ... nope, use workaround ... ) $(DDOC_BLANKLINE ) $(P For runtime testing to see if certain vector instructions are available, see the functions in $(LINK2 $(ROOT_DIR )phobos/core_cpuid.html, core.cpuid). ) $(DDOC_BLANKLINE ) $(P A typical workaround for unsupported vector operations would be to use array operations instead:) $(DDOC_BLANKLINE ) $(D_CODE float4 a,b; $(D_KEYWORD static) $(D_KEYWORD if) ($(D_KEYWORD __traits)(compiles, a/b)) c = a / b; $(D_KEYWORD else) c[] = a[] / b[]; ) $(DDOC_BLANKLINE )

$(LNAME2 x86_64_vec, X86 And X86$(UNDERSCORE )64 Vector Extension Implementation)

$(DDOC_BLANKLINE ) $(IMPLEMENTATION_DEFINED $(DDOC_BLANKLINE ) $(P The following describes the specific implementation of the vector types for the X86 and X86$(UNDERSCORE )64 architectures. ) $(DDOC_BLANKLINE ) $(P The vector extensions are currently implemented for the OS X 32 bit target, and all 64 bit targets.) $(DDOC_BLANKLINE ) $(P $(LINK2 $(ROOT_DIR )phobos/core_simd.html, $(D core.simd)) defines the following types: ) $(DDOC_BLANKLINE ) $(TABLE2 Vector Types, Type Name, Description, gcc Equivalent $(TROW void16, 16 bytes of untyped data, $(I no equivalent)) $(TROW byte16, 16 $(D byte)s, $(D signed char __attribute__((vector_size(16))))) $(TROW ubyte16, 16 $(D ubyte)s, $(D unsigned char __attribute__((vector_size(16))))) $(TROW short8, 8 $(D short)s, $(D short __attribute__((vector_size(16))))) $(TROW ushort8, 8 $(D ushort)s, $(D ushort __attribute__((vector_size(16))))) $(TROW int4, 4 $(D int)s, $(D int __attribute__((vector_size(16))))) $(TROW uint4, 4 $(D uint)s, $(D unsigned __attribute__((vector_size(16))))) $(TROW long2, 2 $(D long)s, $(D long __attribute__((vector_size(16))))) $(TROW ulong2, 2 $(D ulong)s, $(D unsigned long __attribute__((vector_size(16))))) $(TROW float4, 4 $(D float)s, $(D float __attribute__((vector_size(16))))) $(TROW double2, 2 $(D double)s, $(D double __attribute__((vector_size(16))))) $(TROW void32, 32 bytes of untyped data, $(I no equivalent)) $(TROW byte32, 32 $(D byte)s, $(D signed char __attribute__((vector_size(32))))) $(TROW ubyte32, 32 $(D ubyte)s, $(D unsigned char __attribute__((vector_size(32))))) $(TROW short16, 16 $(D short)s, $(D short __attribute__((vector_size(32))))) $(TROW ushort16, 16 $(D ushort)s, $(D ushort __attribute__((vector_size(32))))) $(TROW int8, 8 $(D int)s, $(D int __attribute__((vector_size(32))))) $(TROW uint8, 8 $(D uint)s, $(D unsigned __attribute__((vector_size(32))))) $(TROW long4, 4 $(D long)s, $(D long __attribute__((vector_size(32))))) $(TROW ulong4, 4 $(D ulong)s, $(D unsigned long __attribute__((vector_size(32))))) $(TROW float8, 8 $(D float)s, $(D float __attribute__((vector_size(32))))) $(TROW double4, 4 $(D double)s, $(D double __attribute__((vector_size(32))))) ) $(DDOC_BLANKLINE ) $(NOTE for 32 bit gcc and clang, it's $(D long long) instead of $(D long).) $(DDOC_BLANKLINE ) $(TABLE2 Supported 128-bit Vector Operators, Operator,void16,byte16,ubyte16,short8,ushort8,int4,uint4,long2,ulong2,float4,double2 $(TROW =,$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW +,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW -,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW *,$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW /,$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW $(CODE_AMP ),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW |,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW $(D ^),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW +=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW -=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW *=,$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW /=,$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW $(CODE_AMP )=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW |=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW $(D ^=),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW ==,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW !=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW <, $(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW <=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW >=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW >, $(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW $(I unary)$(D ~),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW $(I unary)+,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW $(I unary)-,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) ) $(DDOC_BLANKLINE ) $(TABLE2 Supported 256-bit Vector Operators, Operator,void32,byte32,ubyte32,short16,ushort16,int8,uint8,long4,ulong4,float8,double4 $(TROW =,$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW +,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW -,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW *,$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW /,$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW $(CODE_AMP ),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW |,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW $(D ^),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW +=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW -=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW *=,$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW /=,$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(NDASH ),$(TIMES ),$(TIMES )) $(TROW $(CODE_AMP )=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW |=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW $(D ^=),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW ==,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW !=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW <, $(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW <=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW >=,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW >, $(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW $(I unary)$(D ~),$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(NDASH ),$(NDASH )) $(TROW $(I unary)+,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) $(TROW $(I unary)-,$(NDASH ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES ),$(TIMES )) ) $(DDOC_BLANKLINE ) $(P Operators not listed are not supported at all.) $(DDOC_BLANKLINE ) ) $(DDOC_BLANKLINE )

$(LNAME2 vector_op_intrinsics, Vector Operation Intrinsics)

$(DDOC_BLANKLINE ) $(P See $(LINK2 $(ROOT_DIR )phobos/core_simd.html, $(D core.simd)) for the supported intrinsics.) $(SPEC_SUBNAV_PREV_NEXT abi, Application Binary Interface, betterc, Better C) ) )