llvm-project
61 строка · 2.1 Кб
1// REQUIRES: amdgpu-registered-target
2// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -emit-llvm -o - %s | FileCheck %s
3// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s
4
5typedef float v2f __attribute__((ext_vector_type(2)));
6
7// CHECK-LABEL: @test_cvt_f32_bf8
8// CHECK: call float @llvm.amdgcn.cvt.f32.bf8(i32 %a, i32 0)
9void test_cvt_f32_bf8(global int* out, int a)
10{
11*out = __builtin_amdgcn_cvt_f32_bf8(a, 0);
12}
13
14// CHECK-LABEL: @test_cvt_f32_fp8
15// CHECK: call float @llvm.amdgcn.cvt.f32.fp8(i32 %a, i32 1)
16void test_cvt_f32_fp8(global int* out, int a)
17{
18*out = __builtin_amdgcn_cvt_f32_fp8(a, 1);
19}
20
21// CHECK-LABEL: @test_cvt_pk_f32_bf8
22// CHECK: call <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32 %a, i1 false)
23void test_cvt_pk_f32_bf8(global v2f* out, int a)
24{
25*out = __builtin_amdgcn_cvt_pk_f32_bf8(a, false);
26}
27
28// CHECK-LABEL: @test_cvt_pk_f32_fp8
29// CHECK: call <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32 %a, i1 true)
30void test_cvt_pk_f32_fp8(global v2f* out, int a)
31{
32*out = __builtin_amdgcn_cvt_pk_f32_fp8(a, true);
33}
34
35// CHECK-LABEL: @test_cvt_pk_bf8_f32
36// CHECK: call i32 @llvm.amdgcn.cvt.pk.bf8.f32(float %a, float %b, i32 %old, i1 false)
37void test_cvt_pk_bf8_f32(global int* out, int old, float a, float b)
38{
39*out = __builtin_amdgcn_cvt_pk_bf8_f32(a, b, old, false);
40}
41
42// CHECK-LABEL: @test_cvt_pk_fp8_f32
43// CHECK: call i32 @llvm.amdgcn.cvt.pk.fp8.f32(float %a, float %b, i32 %old, i1 true)
44void test_cvt_pk_fp8_f32(global int* out, int old, float a, float b)
45{
46*out = __builtin_amdgcn_cvt_pk_fp8_f32(a, b, old, true);
47}
48
49// CHECK-LABEL: @test_cvt_sr_bf8_f32
50// CHECK: call i32 @llvm.amdgcn.cvt.sr.bf8.f32(float %a, i32 %b, i32 %old, i32 2)
51void test_cvt_sr_bf8_f32(global int* out, int old, float a, int b)
52{
53*out = __builtin_amdgcn_cvt_sr_bf8_f32(a, b, old, 2);
54}
55
56// CHECK-LABEL: @test_cvt_sr_fp8_f32
57// CHECK: call i32 @llvm.amdgcn.cvt.sr.fp8.f32(float %a, i32 %b, i32 %old, i32 3)
58void test_cvt_sr_fp8_f32(global int* out, int old, float a, int b)
59{
60*out = __builtin_amdgcn_cvt_sr_fp8_f32(a, b, old, 3);
61}
62