llvm-project
261 строка · 11.6 Кб
1// REQUIRES: amdgpu-registered-target
2// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
3// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
4// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
5// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
6// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
7
8#pragma OPENCL EXTENSION cl_khr_fp16 : enable9
10typedef unsigned long ulong;11typedef unsigned int uint;12
13// CHECK-LABEL: @test_div_fixup_f16
14// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.div.fixup.f16
15void test_div_fixup_f16(global half* out, half a, half b, half c)16{
17*out = __builtin_amdgcn_div_fixuph(a, b, c);18}
19
20// CHECK-LABEL: @test_rcp_f16
21// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rcp.f16
22void test_rcp_f16(global half* out, half a)23{
24*out = __builtin_amdgcn_rcph(a);25}
26
27// CHECK-LABEL: @test_sqrt_f16
28// CHECK: {{.*}}call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16
29void test_sqrt_f16(global half* out, half a)30{
31*out = __builtin_amdgcn_sqrth(a);32}
33
34// CHECK-LABEL: @test_rsq_f16
35// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rsq.f16
36void test_rsq_f16(global half* out, half a)37{
38*out = __builtin_amdgcn_rsqh(a);39}
40
41// CHECK-LABEL: @test_sin_f16
42// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.sin.f16
43void test_sin_f16(global half* out, half a)44{
45*out = __builtin_amdgcn_sinh(a);46}
47
48// CHECK-LABEL: @test_cos_f16
49// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.cos.f16
50void test_cos_f16(global half* out, half a)51{
52*out = __builtin_amdgcn_cosh(a);53}
54
55// CHECK-LABEL: @test_ldexp_f16
56// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32
57// CHECK: {{.*}}call{{.*}} half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
58void test_ldexp_f16(global half* out, half a, int b)59{
60*out = __builtin_amdgcn_ldexph(a, b);61}
62
63// CHECK-LABEL: @test_frexp_mant_f16
64// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16
65void test_frexp_mant_f16(global half* out, half a)66{
67*out = __builtin_amdgcn_frexp_manth(a);68}
69
70// CHECK-LABEL: @test_frexp_exp_f16
71// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16
72void test_frexp_exp_f16(global short* out, half a)73{
74*out = __builtin_amdgcn_frexp_exph(a);75}
76
77// CHECK-LABEL: @test_fract_f16
78// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16
79void test_fract_f16(global half* out, half a)80{
81*out = __builtin_amdgcn_fracth(a);82}
83
84// CHECK-LABEL: @test_class_f16
85// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16
86void test_class_f16(global half* out, half a, int b)87{
88*out = __builtin_amdgcn_classh(a, b);89}
90
91// CHECK-LABEL: @test_s_memrealtime
92// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime()
93void test_s_memrealtime(global ulong* out)94{
95*out = __builtin_amdgcn_s_memrealtime();96}
97
98// CHECK-LABEL: @test_s_dcache_wb()
99// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb()
100void test_s_dcache_wb()101{
102__builtin_amdgcn_s_dcache_wb();103}
104
105// CHECK-LABEL: @test_mov_dpp
106// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
107void test_mov_dpp(global int* out, int src)108{
109*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);110}
111
112// CHECK-LABEL: @test_update_dpp
113// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
114void test_update_dpp(global int* out, int arg1, int arg2)115{
116*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);117}
118
119// CHECK-LABEL: @test_ds_fadd
120// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
121// CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
122
123// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
124// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
125// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src release, align 4{{$}}
126// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
127// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
128// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
129
130// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
131// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
132// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
133// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
134// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
135#if !defined(__SPIRV__)136void test_ds_faddf(local float *out, float src) {137#else138void test_ds_faddf(__attribute__((address_space(3))) float *out, float src) {139#endif140
141*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, false);142*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, true);143
144// Test all orders.145*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);146*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);147*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);148*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);149*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);150*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid151
152// Test all syncscopes.153*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);154*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);155*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);156*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);157*out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, 5, false); // invalid158}
159
160// CHECK-LABEL: @test_ds_fmin
161// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
162// CHECK: atomicrmw volatile fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
163
164// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
165// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
166// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src release, align 4{{$}}
167// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
168// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
169// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
170
171// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
172// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
173// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
174// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
175// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
176
177#if !defined(__SPIRV__)178void test_ds_fminf(local float *out, float src) {179#else180void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {181#endif182*out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);183*out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, true);184
185// Test all orders.186*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);187*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);188*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);189*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);190*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);191*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid192
193// Test all syncscopes.194*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);195*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);196*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);197*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);198*out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, 5, false); // invalid199}
200
201// CHECK-LABEL: @test_ds_fmax
202// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
203// CHECK: atomicrmw volatile fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
204
205// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
206// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
207// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src release, align 4{{$}}
208// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
209// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
210// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
211
212// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
213// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
214// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
215// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
216// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
217
218#if !defined(__SPIRV__)219void test_ds_fmaxf(local float *out, float src) {220#else221void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {222#endif223*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);224*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, true);225
226// Test all orders.227*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);228*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);229*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);230*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);231*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);232*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid233
234// Test all syncscopes.235*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);236*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);237*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);238*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);239*out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, 5, false); // invalid240}
241
242// CHECK-LABEL: @test_s_memtime
243// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
244void test_s_memtime(global ulong* out)245{
246*out = __builtin_amdgcn_s_memtime();247}
248
249// CHECK-LABEL: @test_perm
250// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
251void test_perm(global uint* out, uint a, uint b, uint s)252{
253*out = __builtin_amdgcn_perm(a, b, s);254}
255
256// CHECK-LABEL: @test_groupstaticsize
257// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.groupstaticsize()
258void test_groupstaticsize(global uint* out)259{
260*out = __builtin_amdgcn_groupstaticsize();261}
262