; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE4A ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 ; Test codegen for under aligned nontemporal vector stores ; XMM versions. define void @test_constant_v2f64_align1(ptr %dst) nounwind { ; CHECK-LABEL: test_constant_v2f64_align1: ; CHECK: # %bb.0: ; CHECK-NEXT: movabsq $4611686018427387904, %rax # imm = 0x4000000000000000 ; CHECK-NEXT: movntiq %rax, 8(%rdi) ; CHECK-NEXT: movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000 ; CHECK-NEXT: movntiq %rax, (%rdi) ; CHECK-NEXT: retq store <2 x double> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v4f32_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v4f32_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v4f32_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [2.0000004731118679E+0,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v4f32_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v4f32_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v4f32_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $4647714816524288000, %rax # imm = 0x4080000040400000 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $4611686019492741120, %rax # imm = 0x400000003F800000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: retq store <4 x float> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v2i64_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v2i64_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movl $1, %eax ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v2i64_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [4.9406564584124654E-324,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: xorl %eax, %eax ; SSE4A-NEXT: movntiq %rax, (%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v2i64_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movl $1, %eax ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v2i64_align1: ; AVX: # %bb.0: ; AVX-NEXT: movl $1, %eax ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v2i64_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movl $1, %eax ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: xorl %eax, %eax ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: retq store <2 x i64> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v4i32_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v4i32_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v4i32_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [2.1219957909652723E-314,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v4i32_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v4i32_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v4i32_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $12884901890, %rax # imm = 0x300000002 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: retq store <4 x i32> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8i16_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v8i16_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v8i16_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [4.1720559249406128E-309,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v8i16_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v8i16_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8i16_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $1970350607106052, %rax # imm = 0x7000600050004 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $844433520132096, %rax # imm = 0x3000200010000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: retq store <8 x i16> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v16i8_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v16i8_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v16i8_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [7.9499288951273625E-275,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v16i8_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v16i8_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16i8_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $1084818905618843912, %rax # imm = 0xF0E0D0C0B0A0908 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $506097522914230528, %rax # imm = 0x706050403020100 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: retq store <16 x i8> , ptr %dst, align 1, !nontemporal !1 ret void } ; YMM versions. define void @test_constant_v4f64_align1(ptr %dst) nounwind { ; CHECK-LABEL: test_constant_v4f64_align1: ; CHECK: # %bb.0: ; CHECK-NEXT: movabsq $-4616189618054758400, %rax # imm = 0xBFF0000000000000 ; CHECK-NEXT: movntiq %rax, 8(%rdi) ; CHECK-NEXT: movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000 ; CHECK-NEXT: movntiq %rax, (%rdi) ; CHECK-NEXT: movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000 ; CHECK-NEXT: movntiq %rax, 24(%rdi) ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movntiq %rax, 16(%rdi) ; CHECK-NEXT: retq store <4 x double> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8f32_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v8f32_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v8f32_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v8f32_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v8f32_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8f32_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: retq store <8 x float> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v4i64_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v4i64_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movq $-1, %rax ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movq $-3, %rax ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movq $-2, %rax ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v4i64_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: xorl %eax, %eax ; SSE4A-NEXT: movntiq %rax, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v4i64_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movq $-1, %rax ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movq $-3, %rax ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movq $-2, %rax ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v4i64_align1: ; AVX: # %bb.0: ; AVX-NEXT: movq $-1, %rax ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movq $-3, %rax ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movq $-2, %rax ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v4i64_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movq $-1, %rax ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movq $-3, %rax ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movq $-2, %rax ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: xorl %eax, %eax ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: retq store <4 x i64> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8i32_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v8i32_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v8i32_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v8i32_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v8i32_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8i32_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: retq store <8 x i32> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v16i16_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v16i16_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v16i16_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v16i16_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v16i16_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16i16_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: retq store <16 x i16> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v32i8_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v32i8_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v32i8_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v32i8_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v32i8_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v32i8_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: retq store <32 x i8> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v4f64_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v4f64_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v4f64_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v4f64_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: retq store <4 x double> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v8f32_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v8f32_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v8f32_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8f32_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: retq store <8 x float> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v4i64_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v4i64_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v4i64_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v4i64_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: retq store <4 x i64> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v8i32_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v8i32_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v8i32_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8i32_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: retq store <8 x i32> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v16i16_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v16i16_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v16i16_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16i16_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: retq store <16 x i16> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v32i8_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v32i8_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v32i8_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v32i8_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: retq store <32 x i8> , ptr %dst, align 16, !nontemporal !1 ret void } ; ZMM versions. define void @test_constant_v8f64_align1(ptr %dst) nounwind { ; CHECK-LABEL: test_constant_v8f64_align1: ; CHECK: # %bb.0: ; CHECK-NEXT: movabsq $-4616189618054758400, %rax # imm = 0xBFF0000000000000 ; CHECK-NEXT: movntiq %rax, 8(%rdi) ; CHECK-NEXT: movabsq $-4611686018427387904, %rax # imm = 0xC000000000000000 ; CHECK-NEXT: movntiq %rax, (%rdi) ; CHECK-NEXT: movabsq $4607182418800017408, %rax # imm = 0x3FF0000000000000 ; CHECK-NEXT: movntiq %rax, 24(%rdi) ; CHECK-NEXT: movabsq $4613937818241073152, %rax # imm = 0x4008000000000000 ; CHECK-NEXT: movntiq %rax, 40(%rdi) ; CHECK-NEXT: movabsq $4611686018427387904, %rax # imm = 0x4000000000000000 ; CHECK-NEXT: movntiq %rax, 32(%rdi) ; CHECK-NEXT: movabsq $4617315517961601024, %rax # imm = 0x4014000000000000 ; CHECK-NEXT: movntiq %rax, 56(%rdi) ; CHECK-NEXT: movabsq $4616189618054758400, %rax # imm = 0x4010000000000000 ; CHECK-NEXT: movntiq %rax, 48(%rdi) ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: movntiq %rax, 16(%rdi) ; CHECK-NEXT: retq store <8 x double> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v16f32_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v16f32_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 ; SSE2-NEXT: movntiq %rax, 40(%rdi) ; SSE2-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 ; SSE2-NEXT: movntiq %rax, 32(%rdi) ; SSE2-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 ; SSE2-NEXT: movntiq %rax, 56(%rdi) ; SSE2-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 ; SSE2-NEXT: movntiq %rax, 48(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v16f32_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.3107209417724609E+5,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-2.0971535092773438E+6,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v16f32_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 ; SSE41-NEXT: movntiq %rax, 40(%rdi) ; SSE41-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 ; SSE41-NEXT: movntiq %rax, 32(%rdi) ; SSE41-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 ; SSE41-NEXT: movntiq %rax, 56(%rdi) ; SSE41-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 ; SSE41-NEXT: movntiq %rax, 48(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v16f32_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 ; AVX-NEXT: movntiq %rax, 40(%rdi) ; AVX-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 ; AVX-NEXT: movntiq %rax, 32(%rdi) ; AVX-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 ; AVX-NEXT: movntiq %rax, 56(%rdi) ; AVX-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 ; AVX-NEXT: movntiq %rax, 48(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16f32_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-4611686015214551040, %rax # imm = 0xC0000000BF800000 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-4557642819667230720, %rax # imm = 0xC0C00000C0A00000 ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-4575657218183004160, %rax # imm = 0xC0800000C0400000 ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: movabsq $-4530621221895667712, %rax # imm = 0xC1200000C1100000 ; AVX512-NEXT: movntiq %rax, 40(%rdi) ; AVX512-NEXT: movabsq $-4539628421153554432, %rax # imm = 0xC1000000C0E00000 ; AVX512-NEXT: movntiq %rax, 32(%rdi) ; AVX512-NEXT: movabsq $-4512606823381991424, %rax # imm = 0xC1600000C1500000 ; AVX512-NEXT: movntiq %rax, 56(%rdi) ; AVX512-NEXT: movabsq $-4521614022638829568, %rax # imm = 0xC1400000C1300000 ; AVX512-NEXT: movntiq %rax, 48(%rdi) ; AVX512-NEXT: retq store <16 x float> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8i64_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v8i64_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movq $-1, %rax ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movq $-3, %rax ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movq $-2, %rax ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: movq $-5, %rax ; SSE2-NEXT: movntiq %rax, 40(%rdi) ; SSE2-NEXT: movq $-4, %rax ; SSE2-NEXT: movntiq %rax, 32(%rdi) ; SSE2-NEXT: movq $-7, %rax ; SSE2-NEXT: movntiq %rax, 56(%rdi) ; SSE2-NEXT: movq $-6, %rax ; SSE2-NEXT: movntiq %rax, 48(%rdi) ; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v8i64_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: xorl %eax, %eax ; SSE4A-NEXT: movntiq %rax, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v8i64_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movq $-1, %rax ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movq $-3, %rax ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movq $-2, %rax ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: movq $-5, %rax ; SSE41-NEXT: movntiq %rax, 40(%rdi) ; SSE41-NEXT: movq $-4, %rax ; SSE41-NEXT: movntiq %rax, 32(%rdi) ; SSE41-NEXT: movq $-7, %rax ; SSE41-NEXT: movntiq %rax, 56(%rdi) ; SSE41-NEXT: movq $-6, %rax ; SSE41-NEXT: movntiq %rax, 48(%rdi) ; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v8i64_align1: ; AVX: # %bb.0: ; AVX-NEXT: movq $-1, %rax ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movq $-3, %rax ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movq $-2, %rax ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: movq $-5, %rax ; AVX-NEXT: movntiq %rax, 40(%rdi) ; AVX-NEXT: movq $-4, %rax ; AVX-NEXT: movntiq %rax, 32(%rdi) ; AVX-NEXT: movq $-7, %rax ; AVX-NEXT: movntiq %rax, 56(%rdi) ; AVX-NEXT: movq $-6, %rax ; AVX-NEXT: movntiq %rax, 48(%rdi) ; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8i64_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movq $-1, %rax ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movq $-3, %rax ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movq $-2, %rax ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: movq $-5, %rax ; AVX512-NEXT: movntiq %rax, 40(%rdi) ; AVX512-NEXT: movq $-4, %rax ; AVX512-NEXT: movntiq %rax, 32(%rdi) ; AVX512-NEXT: movq $-7, %rax ; AVX512-NEXT: movntiq %rax, 56(%rdi) ; AVX512-NEXT: movq $-6, %rax ; AVX512-NEXT: movntiq %rax, 48(%rdi) ; AVX512-NEXT: xorl %eax, %eax ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: retq store <8 x i64> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v16i32_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v16i32_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 ; SSE2-NEXT: movntiq %rax, 40(%rdi) ; SSE2-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 ; SSE2-NEXT: movntiq %rax, 32(%rdi) ; SSE2-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 ; SSE2-NEXT: movntiq %rax, 56(%rdi) ; SSE2-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 ; SSE2-NEXT: movntiq %rax, 48(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v16i32_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v16i32_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 ; SSE41-NEXT: movntiq %rax, 40(%rdi) ; SSE41-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 ; SSE41-NEXT: movntiq %rax, 32(%rdi) ; SSE41-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 ; SSE41-NEXT: movntiq %rax, 56(%rdi) ; SSE41-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 ; SSE41-NEXT: movntiq %rax, 48(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v16i32_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 ; AVX-NEXT: movntiq %rax, 40(%rdi) ; AVX-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 ; AVX-NEXT: movntiq %rax, 32(%rdi) ; AVX-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 ; AVX-NEXT: movntiq %rax, 56(%rdi) ; AVX-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 ; AVX-NEXT: movntiq %rax, 48(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16i32_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-8589934594, %rax # imm = 0xFFFFFFFDFFFFFFFE ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-25769803782, %rax # imm = 0xFFFFFFF9FFFFFFFA ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-17179869188, %rax # imm = 0xFFFFFFFBFFFFFFFC ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: movabsq $-42949672970, %rax # imm = 0xFFFFFFF5FFFFFFF6 ; AVX512-NEXT: movntiq %rax, 40(%rdi) ; AVX512-NEXT: movabsq $-34359738376, %rax # imm = 0xFFFFFFF7FFFFFFF8 ; AVX512-NEXT: movntiq %rax, 32(%rdi) ; AVX512-NEXT: movabsq $-60129542158, %rax # imm = 0xFFFFFFF1FFFFFFF2 ; AVX512-NEXT: movntiq %rax, 56(%rdi) ; AVX512-NEXT: movabsq $-51539607564, %rax # imm = 0xFFFFFFF3FFFFFFF4 ; AVX512-NEXT: movntiq %rax, 48(%rdi) ; AVX512-NEXT: retq store <16 x i32> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v32i16_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v32i16_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC ; SSE2-NEXT: movntiq %rax, 40(%rdi) ; SSE2-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 ; SSE2-NEXT: movntiq %rax, 32(%rdi) ; SSE2-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 ; SSE2-NEXT: movntiq %rax, 56(%rdi) ; SSE2-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 ; SSE2-NEXT: movntiq %rax, 48(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v32i16_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6853227412070812E+308,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.2358925997317751E+308,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v32i16_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC ; SSE41-NEXT: movntiq %rax, 40(%rdi) ; SSE41-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 ; SSE41-NEXT: movntiq %rax, 32(%rdi) ; SSE41-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 ; SSE41-NEXT: movntiq %rax, 56(%rdi) ; SSE41-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 ; SSE41-NEXT: movntiq %rax, 48(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v32i16_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC ; AVX-NEXT: movntiq %rax, 40(%rdi) ; AVX-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 ; AVX-NEXT: movntiq %rax, 32(%rdi) ; AVX-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 ; AVX-NEXT: movntiq %rax, 56(%rdi) ; AVX-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 ; AVX-NEXT: movntiq %rax, 48(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v32i16_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-1688871335362564, %rax # imm = 0xFFF9FFFAFFFBFFFC ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-562954248454144, %rax # imm = 0xFFFDFFFEFFFF0000 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-3940705509310476, %rax # imm = 0xFFF1FFF2FFF3FFF4 ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-2814788422336520, %rax # imm = 0xFFF5FFF6FFF7FFF8 ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: movabsq $-6192539683258388, %rax # imm = 0xFFE9FFEAFFEBFFEC ; AVX512-NEXT: movntiq %rax, 40(%rdi) ; AVX512-NEXT: movabsq $-5066622596284432, %rax # imm = 0xFFEDFFEEFFEFFFF0 ; AVX512-NEXT: movntiq %rax, 32(%rdi) ; AVX512-NEXT: movabsq $-8444373857206300, %rax # imm = 0xFFE1FFE2FFE3FFE4 ; AVX512-NEXT: movntiq %rax, 56(%rdi) ; AVX512-NEXT: movabsq $-7318456770232344, %rax # imm = 0xFFE5FFE6FFE7FFE8 ; AVX512-NEXT: movntiq %rax, 48(%rdi) ; AVX512-NEXT: retq store <32 x i16> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v64i8_align1(ptr %dst) nounwind { ; SSE2-LABEL: test_constant_v64i8_align1: ; SSE2: # %bb.0: ; SSE2-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; SSE2-NEXT: movntiq %rax, 8(%rdi) ; SSE2-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; SSE2-NEXT: movntiq %rax, (%rdi) ; SSE2-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; SSE2-NEXT: movntiq %rax, 24(%rdi) ; SSE2-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; SSE2-NEXT: movntiq %rax, 16(%rdi) ; SSE2-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 ; SSE2-NEXT: movntiq %rax, 40(%rdi) ; SSE2-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 ; SSE2-NEXT: movntiq %rax, 32(%rdi) ; SSE2-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 ; SSE2-NEXT: movntiq %rax, 56(%rdi) ; SSE2-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 ; SSE2-NEXT: movntiq %rax, 48(%rdi) ; SSE2-NEXT: retq ; ; SSE4A-LABEL: test_constant_v64i8_align1: ; SSE4A: # %bb.0: ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 8(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, (%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 24(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 16(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 40(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-7.1020783099933495E+124,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 32(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE4A-NEXT: movntsd %xmm0, 56(%rdi) ; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.0595730451167367E+47,0.0E+0] ; SSE4A-NEXT: movntsd %xmm0, 48(%rdi) ; SSE4A-NEXT: retq ; ; SSE41-LABEL: test_constant_v64i8_align1: ; SSE41: # %bb.0: ; SSE41-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; SSE41-NEXT: movntiq %rax, 8(%rdi) ; SSE41-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; SSE41-NEXT: movntiq %rax, (%rdi) ; SSE41-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; SSE41-NEXT: movntiq %rax, 24(%rdi) ; SSE41-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; SSE41-NEXT: movntiq %rax, 16(%rdi) ; SSE41-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 ; SSE41-NEXT: movntiq %rax, 40(%rdi) ; SSE41-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 ; SSE41-NEXT: movntiq %rax, 32(%rdi) ; SSE41-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 ; SSE41-NEXT: movntiq %rax, 56(%rdi) ; SSE41-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 ; SSE41-NEXT: movntiq %rax, 48(%rdi) ; SSE41-NEXT: retq ; ; AVX-LABEL: test_constant_v64i8_align1: ; AVX: # %bb.0: ; AVX-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; AVX-NEXT: movntiq %rax, 8(%rdi) ; AVX-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; AVX-NEXT: movntiq %rax, (%rdi) ; AVX-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; AVX-NEXT: movntiq %rax, 24(%rdi) ; AVX-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; AVX-NEXT: movntiq %rax, 16(%rdi) ; AVX-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 ; AVX-NEXT: movntiq %rax, 40(%rdi) ; AVX-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 ; AVX-NEXT: movntiq %rax, 32(%rdi) ; AVX-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 ; AVX-NEXT: movntiq %rax, 56(%rdi) ; AVX-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 ; AVX-NEXT: movntiq %rax, 48(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v64i8_align1: ; AVX512: # %bb.0: ; AVX512-NEXT: movabsq $-1012478732780767240, %rax # imm = 0xF1F2F3F4F5F6F7F8 ; AVX512-NEXT: movntiq %rax, 8(%rdi) ; AVX512-NEXT: movabsq $-433757350076154112, %rax # imm = 0xF9FAFBFCFDFEFF00 ; AVX512-NEXT: movntiq %rax, (%rdi) ; AVX512-NEXT: movabsq $-2169921498189994008, %rax # imm = 0xE1E2E3E4E5E6E7E8 ; AVX512-NEXT: movntiq %rax, 24(%rdi) ; AVX512-NEXT: movabsq $-1591200115485380624, %rax # imm = 0xE9EAEBECEDEEEFF0 ; AVX512-NEXT: movntiq %rax, 16(%rdi) ; AVX512-NEXT: movabsq $-3327364263599220776, %rax # imm = 0xD1D2D3D4D5D6D7D8 ; AVX512-NEXT: movntiq %rax, 40(%rdi) ; AVX512-NEXT: movabsq $-2748642880894607392, %rax # imm = 0xD9DADBDCDDDEDFE0 ; AVX512-NEXT: movntiq %rax, 32(%rdi) ; AVX512-NEXT: movabsq $-4484807029008447544, %rax # imm = 0xC1C2C3C4C5C6C7C8 ; AVX512-NEXT: movntiq %rax, 56(%rdi) ; AVX512-NEXT: movabsq $-3906085646303834160, %rax # imm = 0xC9CACBCCCDCECFD0 ; AVX512-NEXT: movntiq %rax, 48(%rdi) ; AVX512-NEXT: retq store <64 x i8> , ptr %dst, align 1, !nontemporal !1 ret void } define void @test_constant_v8f64_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v8f64_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v8f64_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0] ; AVX-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0] ; AVX-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8f64_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0] ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0] ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: retq store <8 x double> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v16f32_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v16f32_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v16f32_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1] ; AVX-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1] ; AVX-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16f32_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1] ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1] ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX512-NEXT: retq store <16 x float> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v8i64_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v8i64_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v8i64_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609] ; AVX-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611] ; AVX-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8i64_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609] ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611] ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX512-NEXT: retq store <8 x i64> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v16i32_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v16i32_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v16i32_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281] ; AVX-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285] ; AVX-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16i32_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281] ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285] ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX512-NEXT: retq store <16 x i32> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v32i16_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v32i16_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v32i16_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505] ; AVX-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513] ; AVX-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v32i16_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505] ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513] ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX512-NEXT: retq store <32 x i16> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v64i8_align16(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v64i8_align16: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v64i8_align16: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; AVX-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] ; AVX-NEXT: vmovntps %xmm0, (%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193] ; AVX-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209] ; AVX-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v64i8_align16: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] ; AVX512-NEXT: vmovntps %xmm0, (%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193] ; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209] ; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) ; AVX512-NEXT: retq store <64 x i8> , ptr %dst, align 16, !nontemporal !1 ret void } define void @test_constant_v8f64_align32(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v8f64_align32: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4.0E+0,5.0E+0] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2.0E+0,3.0E+0] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-2.0E+0,-1.0E+0] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v8f64_align32: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2.0E+0,3.0E+0,4.0E+0,5.0E+0] ; AVX-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [-2.0E+0,-1.0E+0,0.0E+0,1.0E+0] ; AVX-NEXT: vmovntps %ymm0, (%rdi) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8f64_align32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [2.0E+0,3.0E+0,4.0E+0,5.0E+0] ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [-2.0E+0,-1.0E+0,0.0E+0,1.0E+0] ; AVX512-NEXT: vmovntps %ymm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq store <8 x double> , ptr %dst, align 32, !nontemporal !1 ret void } define void @test_constant_v16f32_align32(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v16f32_align32: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v16f32_align32: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1,-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1] ; AVX-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0,-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; AVX-NEXT: vmovntps %ymm0, (%rdi) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16f32_align32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [-7.0E+0,-8.0E+0,-9.0E+0,-1.0E+1,-1.1E+1,-1.2E+1,-1.3E+1,-1.4E+1] ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [0.0E+0,-0.0E+0,-1.0E+0,-2.0E+0,-3.0E+0,-4.0E+0,-5.0E+0,-6.0E+0] ; AVX512-NEXT: vmovntps %ymm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq store <16 x float> , ptr %dst, align 32, !nontemporal !1 ret void } define void @test_constant_v8i64_align32(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v8i64_align32: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551610,18446744073709551609] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551612,18446744073709551611] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551614,18446744073709551613] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v8i64_align32: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551612,18446744073709551611,18446744073709551610,18446744073709551609] ; AVX-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551614,18446744073709551613] ; AVX-NEXT: vmovntps %ymm0, (%rdi) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v8i64_align32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551612,18446744073709551611,18446744073709551610,18446744073709551609] ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551614,18446744073709551613] ; AVX512-NEXT: vmovntps %ymm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq store <8 x i64> , ptr %dst, align 32, !nontemporal !1 ret void } define void @test_constant_v16i32_align32(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v16i32_align32: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967284,4294967283,4294967282,4294967281] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967288,4294967287,4294967286,4294967285] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967292,4294967291,4294967290,4294967289] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,4294967295,4294967294,4294967293] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v16i32_align32: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967288,4294967287,4294967286,4294967285,4294967284,4294967283,4294967282,4294967281] ; AVX-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,4294967294,4294967293,4294967292,4294967291,4294967290,4294967289] ; AVX-NEXT: vmovntps %ymm0, (%rdi) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v16i32_align32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4294967288,4294967287,4294967286,4294967285,4294967284,4294967283,4294967282,4294967281] ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [0,4294967295,4294967294,4294967293,4294967292,4294967291,4294967290,4294967289] ; AVX512-NEXT: vmovntps %ymm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq store <16 x i32> , ptr %dst, align 32, !nontemporal !1 ret void } define void @test_constant_v32i16_align32(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v32i16_align32: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65512,65511,65510,65509,65508,65507,65506,65505] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65520,65519,65518,65517,65516,65515,65514,65513] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [65528,65527,65526,65525,65524,65523,65522,65521] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65534,65533,65532,65531,65530,65529] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v32i16_align32: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65520,65519,65518,65517,65516,65515,65514,65513,65512,65511,65510,65509,65508,65507,65506,65505] ; AVX-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521] ; AVX-NEXT: vmovntps %ymm0, (%rdi) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v32i16_align32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [65520,65519,65518,65517,65516,65515,65514,65513,65512,65511,65510,65509,65508,65507,65506,65505] ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [0,65535,65534,65533,65532,65531,65530,65529,65528,65527,65526,65525,65524,65523,65522,65521] ; AVX512-NEXT: vmovntps %ymm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq store <32 x i16> , ptr %dst, align 32, !nontemporal !1 ret void } define void @test_constant_v64i8_align32(ptr %dst) nounwind { ; SSE-LABEL: test_constant_v64i8_align32: ; SSE: # %bb.0: ; SSE-NEXT: movaps {{.*#+}} xmm0 = [208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193] ; SSE-NEXT: movntps %xmm0, 48(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209] ; SSE-NEXT: movntps %xmm0, 32(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; SSE-NEXT: movntps %xmm0, 16(%rdi) ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241] ; SSE-NEXT: movntps %xmm0, (%rdi) ; SSE-NEXT: retq ; ; AVX-LABEL: test_constant_v64i8_align32: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209,208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193] ; AVX-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241,240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; AVX-NEXT: vmovntps %ymm0, (%rdi) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; ; AVX512-LABEL: test_constant_v64i8_align32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [224,223,222,221,220,219,218,217,216,215,214,213,212,211,210,209,208,207,206,205,204,203,202,201,200,199,198,197,196,195,194,193] ; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) ; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241,240,239,238,237,236,235,234,233,232,231,230,229,228,227,226,225] ; AVX512-NEXT: vmovntps %ymm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq store <64 x i8> , ptr %dst, align 32, !nontemporal !1 ret void } !1 = !{i32 1}