; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK-NO_FP16 ; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK-WITH_FP16 ; Note: We could check more configurations, but anything with software ; emulation of fp16 generates a ton of assembly code and is not particularly ; interesting. ;---------------------------------------- ; i8 input ;---------------------------------------- ; uint8_t to float. ; - Go from i8 to i32: zext ; - Convert i32 to float define float @uint8ToFloat(i8 %int8) { ; CHECK-NO_FP16-LABEL: uint8ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movzbl %dil, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: uint8ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp i8 %int8 to float ret float %fp32 } ; vector uint8_t to float. ; Same as @uint8ToFloat but with vector types. define <16 x float> @vector_uint8ToFloat(<16 x i8> %int8) { ; CHECK-NO_FP16-LABEL: vector_uint8ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_uint8ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp <16 x i8> %int8 to <16 x float> ret <16 x float> %fp32 } ; uint8_t to half. ; ; If no half support: ; - Go from i8 to i32: zext ; - Convert i32 to float ; - Trunc from float to half ; ; Else if half support: ; - Go from i8 to i32: zext ; - Convert i32 to half define half @uint8ToHalf(i8 %int8) { ; CHECK-NO_FP16-LABEL: uint8ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movzbl %dil, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: uint8ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp i8 %int8 to half ret half %fp32 } ; vector uint8_t to half. ; ; If no half support: ; - Go from i8 to i32: zext ; - Convert i32 to float ; - Trunc from float to half ; ; Else if half support: ; - Go from i8 to i16: zext ; - Convert i16 to half ; ; The difference with the scalar version (uint8ToHalf) is that we use i16 ; for the intermediate type when we have half support. define <16 x half> @vector_uint8ToHalf(<16 x i8> %int8) { ; CHECK-NO_FP16-LABEL: vector_uint8ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_uint8ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp <16 x i8> %int8 to <16 x half> ret <16 x half> %fp32 } ; Same as uint8_t but with the signed variant. ; I.e., use sext instead of zext. define float @sint8ToFloat(i8 %int8) { ; CHECK-NO_FP16-LABEL: sint8ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movsbl %dil, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: sint8ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp i8 %int8 to float ret float %fp32 } define <16 x float> @vector_sint8ToFloat(<16 x i8> %int8) { ; CHECK-NO_FP16-LABEL: vector_sint8ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_sint8ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp <16 x i8> %int8 to <16 x float> ret <16 x float> %fp32 } define half @sint8ToHalf(i8 %int8) { ; CHECK-NO_FP16-LABEL: sint8ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movsbl %dil, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: sint8ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp i8 %int8 to half ret half %fp32 } define <16 x half> @vector_sint8ToHalf(<16 x i8> %int8) { ; CHECK-NO_FP16-LABEL: vector_sint8ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_sint8ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vpmovsxbw %xmm0, %ymm0 ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp <16 x i8> %int8 to <16 x half> ret <16 x half> %fp32 } ;---------------------------------------- ; i16 input ;---------------------------------------- ; Similar lowering as i8, but with i16 as the input type. define float @uint16ToFloat(i16 %int16) { ; CHECK-NO_FP16-LABEL: uint16ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movzwl %di, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: uint16ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movzwl %di, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp i16 %int16 to float ret float %fp32 } define <16 x float> @vector_uint16ToFloat(<16 x i16> %int16) { ; CHECK-NO_FP16-LABEL: vector_uint16ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_uint16ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp <16 x i16> %int16 to <16 x float> ret <16 x float> %fp32 } define half @uint16ToHalf(i16 %int16) { ; CHECK-NO_FP16-LABEL: uint16ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movzwl %di, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: uint16ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movzwl %di, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp i16 %int16 to half ret half %fp32 } define <16 x half> @vector_uint16ToHalf(<16 x i16> %int16) { ; CHECK-NO_FP16-LABEL: vector_uint16ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_uint16ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vcvtuw2ph %ymm0, %ymm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = uitofp <16 x i16> %int16 to <16 x half> ret <16 x half> %fp32 } define float @sint16ToFloat(i16 %int16) { ; CHECK-NO_FP16-LABEL: sint16ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movswl %di, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: sint16ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movswl %di, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp i16 %int16 to float ret float %fp32 } define <16 x float> @vector_sint16ToFloat(<16 x i16> %int16) { ; CHECK-NO_FP16-LABEL: vector_sint16ToFloat: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_sint16ToFloat: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 ; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp <16 x i16> %int16 to <16 x float> ret <16 x float> %fp32 } define half @sint16ToHalf(i16 %int16) { ; CHECK-NO_FP16-LABEL: sint16ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: movswl %di, %eax ; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax ; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: sint16ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: movswl %di, %eax ; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp i16 %int16 to half ret half %fp32 } define <16 x half> @vector_sint16ToHalf(<16 x i16> %int16) { ; CHECK-NO_FP16-LABEL: vector_sint16ToHalf: ; CHECK-NO_FP16: # %bb.0: ; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 ; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 ; CHECK-NO_FP16-NEXT: retq ; ; CHECK-WITH_FP16-LABEL: vector_sint16ToHalf: ; CHECK-WITH_FP16: # %bb.0: ; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 ; CHECK-WITH_FP16-NEXT: retq %fp32 = sitofp <16 x i16> %int16 to <16 x half> ret <16 x half> %fp32 }