llvm-project

Форк
0
/
builtins-amdgcn-vi.cl 
261 строка · 11.6 Кб
1
// REQUIRES: amdgpu-registered-target
2
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
3
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
4
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
5
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
6
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
7

8
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
9

10
typedef unsigned long ulong;
11
typedef unsigned int  uint;
12

13
// CHECK-LABEL: @test_div_fixup_f16
14
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.div.fixup.f16
15
void test_div_fixup_f16(global half* out, half a, half b, half c)
16
{
17
  *out = __builtin_amdgcn_div_fixuph(a, b, c);
18
}
19

20
// CHECK-LABEL: @test_rcp_f16
21
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rcp.f16
22
void test_rcp_f16(global half* out, half a)
23
{
24
  *out = __builtin_amdgcn_rcph(a);
25
}
26

27
// CHECK-LABEL: @test_sqrt_f16
28
// CHECK: {{.*}}call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16
29
void test_sqrt_f16(global half* out, half a)
30
{
31
  *out = __builtin_amdgcn_sqrth(a);
32
}
33

34
// CHECK-LABEL: @test_rsq_f16
35
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rsq.f16
36
void test_rsq_f16(global half* out, half a)
37
{
38
  *out = __builtin_amdgcn_rsqh(a);
39
}
40

41
// CHECK-LABEL: @test_sin_f16
42
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.sin.f16
43
void test_sin_f16(global half* out, half a)
44
{
45
  *out = __builtin_amdgcn_sinh(a);
46
}
47

48
// CHECK-LABEL: @test_cos_f16
49
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.cos.f16
50
void test_cos_f16(global half* out, half a)
51
{
52
  *out = __builtin_amdgcn_cosh(a);
53
}
54

55
// CHECK-LABEL: @test_ldexp_f16
56
// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32
57
// CHECK: {{.*}}call{{.*}} half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
58
void test_ldexp_f16(global half* out, half a, int b)
59
{
60
  *out = __builtin_amdgcn_ldexph(a, b);
61
}
62

63
// CHECK-LABEL: @test_frexp_mant_f16
64
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16
65
void test_frexp_mant_f16(global half* out, half a)
66
{
67
  *out = __builtin_amdgcn_frexp_manth(a);
68
}
69

70
// CHECK-LABEL: @test_frexp_exp_f16
71
// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16
72
void test_frexp_exp_f16(global short* out, half a)
73
{
74
  *out = __builtin_amdgcn_frexp_exph(a);
75
}
76

77
// CHECK-LABEL: @test_fract_f16
78
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16
79
void test_fract_f16(global half* out, half a)
80
{
81
  *out = __builtin_amdgcn_fracth(a);
82
}
83

84
// CHECK-LABEL: @test_class_f16
85
// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16
86
void test_class_f16(global half* out, half a, int b)
87
{
88
  *out = __builtin_amdgcn_classh(a, b);
89
}
90

91
// CHECK-LABEL: @test_s_memrealtime
92
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime()
93
void test_s_memrealtime(global ulong* out)
94
{
95
  *out = __builtin_amdgcn_s_memrealtime();
96
}
97

98
// CHECK-LABEL: @test_s_dcache_wb()
99
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb()
100
void test_s_dcache_wb()
101
{
102
  __builtin_amdgcn_s_dcache_wb();
103
}
104

105
// CHECK-LABEL: @test_mov_dpp
106
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
107
void test_mov_dpp(global int* out, int src)
108
{
109
  *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
110
}
111

112
// CHECK-LABEL: @test_update_dpp
113
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
114
void test_update_dpp(global int* out, int arg1, int arg2)
115
{
116
  *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
117
}
118

119
// CHECK-LABEL: @test_ds_fadd
120
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
121
// CHECK: atomicrmw volatile fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
122

123
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
124
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acquire, align 4{{$}}
125
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src release, align 4{{$}}
126
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
127
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
128
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
129

130
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
131
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
132
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
133
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
134
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
135
#if !defined(__SPIRV__)
136
void test_ds_faddf(local float *out, float src) {
137
#else
138
  void test_ds_faddf(__attribute__((address_space(3))) float *out, float src) {
139
#endif
140

141
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, false);
142
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM, true);
143

144
  // Test all orders.
145
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
146
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
147
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
148
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
149
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
150
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
151

152
  // Test all syncscopes.
153
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
154
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
155
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
156
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
157
  *out = __builtin_amdgcn_ds_faddf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
158
}
159

160
// CHECK-LABEL: @test_ds_fmin
161
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
162
// CHECK: atomicrmw volatile fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
163

164
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
165
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
166
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src release, align 4{{$}}
167
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
168
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
169
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
170

171
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
172
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
173
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
174
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
175
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
176

177
#if !defined(__SPIRV__)
178
void test_ds_fminf(local float *out, float src) {
179
#else
180
void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
181
#endif
182
  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
183
  *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, true);
184

185
  // Test all orders.
186
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
187
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
188
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
189
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
190
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
191
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
192

193
  // Test all syncscopes.
194
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
195
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
196
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
197
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
198
  *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
199
}
200

201
// CHECK-LABEL: @test_ds_fmax
202
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
203
// CHECK: atomicrmw volatile fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
204

205
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
206
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
207
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src release, align 4{{$}}
208
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
209
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
210
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
211

212
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
213
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
214
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
215
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
216
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
217

218
#if !defined(__SPIRV__)
219
void test_ds_fmaxf(local float *out, float src) {
220
#else
221
void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
222
#endif
223
  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
224
  *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, true);
225

226
  // Test all orders.
227
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
228
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
229
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
230
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
231
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
232
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
233

234
  // Test all syncscopes.
235
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
236
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
237
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
238
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
239
  *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
240
}
241

242
// CHECK-LABEL: @test_s_memtime
243
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
244
void test_s_memtime(global ulong* out)
245
{
246
  *out = __builtin_amdgcn_s_memtime();
247
}
248

249
// CHECK-LABEL: @test_perm
250
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
251
void test_perm(global uint* out, uint a, uint b, uint s)
252
{
253
  *out = __builtin_amdgcn_perm(a, b, s);
254
}
255

256
// CHECK-LABEL: @test_groupstaticsize
257
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.groupstaticsize()
258
void test_groupstaticsize(global uint* out)
259
{
260
  *out = __builtin_amdgcn_groupstaticsize();
261
}
262

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.