jdk

immediate_aarch64.cpp
448 строк · 12.1 Кб
Перенос по словам
1
/*
2
 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
3
 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
4
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5
 *
6
 * This code is free software; you can redistribute it and/or modify it
7
 * under the terms of the GNU General Public License version 2 only, as
8
 * published by the Free Software Foundation.
9
 *
10
 * This code is distributed in the hope that it will be useful, but WITHOUT
11
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13
 * version 2 for more details (a copy is included in the LICENSE file that
14
 * accompanied this code).
15
 *
16
 * You should have received a copy of the GNU General Public License version
17
 * 2 along with this work; if not, write to the Free Software Foundation,
18
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19
 *
20
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21
 * or visit www.oracle.com if you need additional information or have any
22
 * questions.
23
 *
24
 */
25

26
#include <stdlib.h>
27
#include <stdint.h>
28

29
#include "precompiled.hpp"
30
#include "immediate_aarch64.hpp"
31
#include "metaprogramming/primitiveConversions.hpp"
32
#include "utilities/globalDefinitions.hpp"
33

34
// there are at most 2^13 possible logical immediate encodings
35
// however, some combinations of immr and imms are invalid
36
static const unsigned  LI_TABLE_SIZE = (1 << 13);
37

38
static int li_table_entry_count;
39

40
// for forward lookup we just use a direct array lookup
41
// and assume that the cient has supplied a valid encoding
42
// table[encoding] = immediate
43
static uint64_t LITable[LI_TABLE_SIZE];
44

45
// for reverse lookup we need a sparse map so we store a table of
46
// immediate and encoding pairs sorted by immediate value
47

48
struct li_pair {
49
  uint64_t immediate;
50
  uint32_t encoding;
51
};
52

53
static struct li_pair InverseLITable[LI_TABLE_SIZE];
54

55
// comparator to sort entries in the inverse table
56
static int compare_immediate_pair(const void *i1, const void *i2)
57
{
58
  struct li_pair *li1 = (struct li_pair *)i1;
59
  struct li_pair *li2 = (struct li_pair *)i2;
60
  if (li1->immediate < li2->immediate) {
61
    return -1;
62
  }
63
  if (li1->immediate > li2->immediate) {
64
    return 1;
65
  }
66
  return 0;
67
}
68

69
// helper functions used by expandLogicalImmediate
70

71
// for i = 1, ... N result<i-1> = 1 other bits are zero
72
static inline uint64_t ones(int N)
73
{
74
  return (N == 64 ? -1ULL : (1ULL << N) - 1);
75
}
76

77
/*
78
 * bit twiddling helpers for instruction decode
79
 */
80

81
// 32 bit mask with bits [hi,...,lo] set
82
static inline uint32_t mask32(int hi = 31, int lo = 0)
83
{
84
  int nbits = (hi + 1) - lo;
85
  return ((1 << nbits) - 1) << lo;
86
}
87

88
static inline uint64_t mask64(int hi = 63, int lo = 0)
89
{
90
  int nbits = (hi + 1) - lo;
91
  return ((1L << nbits) - 1) << lo;
92
}
93

94
// pick bits [hi,...,lo] from val
95
static inline uint32_t pick32(uint32_t val, int hi = 31, int lo = 0)
96
{
97
  return (val & mask32(hi, lo));
98
}
99

100
// pick bits [hi,...,lo] from val
101
static inline uint64_t pick64(uint64_t val, int hi = 31, int lo = 0)
102
{
103
  return (val & mask64(hi, lo));
104
}
105

106
// mask [hi,lo] and shift down to start at bit 0
107
static inline uint32_t pickbits32(uint32_t val, int hi = 31, int lo = 0)
108
{
109
  return (pick32(val, hi, lo) >> lo);
110
}
111

112
// mask [hi,lo] and shift down to start at bit 0
113
static inline uint64_t pickbits64(uint64_t val, int hi = 63, int lo = 0)
114
{
115
  return (pick64(val, hi, lo) >> lo);
116
}
117

118
// result<0> to val<N>
119
static inline uint64_t pickbit(uint64_t val, int N)
120
{
121
  return pickbits64(val, N, N);
122
}
123

124
static inline uint32_t uimm(uint32_t val, int hi, int lo)
125
{
126
  return pickbits32(val, hi, lo);
127
}
128

129
// SPEC
130
//
131
// bits(M*N) Replicate(bits(M) B, integer N);
132
//
133
// given bit string B of width M (M > 0) and count N (N > 0)
134
// concatenate N copies of B to generate a bit string of width N * M
135
// (N * M <= 64)
136
//
137
// inputs
138
// bits : bit string to be replicated starting from bit 0
139
// nbits : width of the bit string string passed in bits
140
// count : number of copies of bit string to be concatenated
141
//
142
// result
143
// a bit string containing count copies of input bit string
144
//
145
static uint64_t replicate(uint64_t bits, int nbits, int count)
146
{
147
  assert(count > 0, "must be");
148
  assert(nbits > 0, "must be");
149
  assert(count * nbits <= 64, "must be");
150

151
  // Special case nbits == 64 since the shift below with that nbits value
152
  // would result in undefined behavior.
153
  if (nbits == 64) {
154
    return bits;
155
  }
156

157
  uint64_t result = 0;
158
  uint64_t mask = ones(nbits);
159
  for (int i = 0; i < count ; i++) {
160
    result <<= nbits;
161
    result |= (bits & mask);
162
  }
163
  return result;
164
}
165

166
// construct a 64 bit immediate value for a logical immediate operation
167
//
168
// SPEC:
169
//
170
// {(0,_), (1, uint64)} = expandLogicalImmediate(immN, immr, imms)
171
//
172
// For valid combinations of immN, immr and imms, this function
173
// replicates a derived bit string, whose width is a power of 2, into
174
// a 64 bit result and returns 1.
175
//
176
// for invalid combinations it fails and returns 0
177
//
178
// - immN and imms together define
179
//
180
//    1) the size, 2^k, of the bit string to be replicated (0 < k <= 6)
181
//
182
//    2) the number of bits, p, to set in the string (0 < p < 2^k)
183
//
184
// - immr defines a right rotation on the bit string determined by
185
//   immN and imms
186
//
187
// bit field construction:
188
//
189
// create a bit string of width 2^k
190
//
191
// set the bottom p bits to 1
192
//
193
// rotate the bit string right by immr bits
194
//
195
// replicate the 2^k bit string into 64 bits
196
//
197
// derivation of k and p and validity checks:
198
//
199
// when immN is 1 then k == 6 and immr/imms are masked to 6 bit
200
// integers
201
//
202
// when immN is 0 then k is the index of the first 0 bit in imms and
203
// immr/imms are masked to k-bit integers (i.e. any leading 1s and the
204
// first 0 in imms determine dead bits of imms/immr)
205
//
206
// if (pre-masking) immr >= 2^k then fail and return 0 (this is a
207
// uniqueness constraint that ensures each output bit string is only
208
// generated by one valid combination of immN, imms and immr).
209
//
210
// if k == 0 then fail and return 0. Note that this means that
211
// 2^k > 1 or equivalently 2^k - 1 > 0
212
//
213
// If imms == all 1s (modulo 2^k) then fail and return 0. Note that
214
// this means that 0 <= imms < 2^k - 1
215
//
216
// set p = imms + 1. Consequently, 0 < p < 2^k which is the condition
217
// that an all 0s or all 1s bit pattern is never generated.
218
//
219
// example output:
220
//
221
//   11001111_11001111_11001111_11001111_11001111_11001111_11001111_11001111
222
//
223
// which corresponds to the inputs
224
//
225
//   immN = 0, imms = 110101, immr = 000010
226
//
227
// For these inputs k = 3,  2^k = 8, p = 6, rotation = 2
228
//
229
// implementation note:
230
//
231
// For historical reasons the implementation of this function is much
232
// more convoluted than is really necessary.
233

234
static int expandLogicalImmediate(uint32_t immN, uint32_t immr,
235
                                  uint32_t imms, uint64_t &bimm)
236
{
237
  int len;                 // ought to be <= 6
238
  uint32_t levels;         // 6 bits
239
  uint32_t tmask_and;      // 6 bits
240
  uint32_t wmask_and;      // 6 bits
241
  uint32_t tmask_or;       // 6 bits
242
  uint32_t wmask_or;       // 6 bits
243
  uint64_t imm64;          // 64 bits
244
  uint64_t tmask, wmask;   // 64 bits
245
  uint32_t S, R, diff;     // 6 bits?
246

247
  if (immN == 1) {
248
    len = 6; // looks like 7 given the spec above but this cannot be!
249
  } else {
250
    len = 0;
251
    uint32_t val = (~imms & 0x3f);
252
    for (int i = 5; i > 0; i--) {
253
      if (val & (1 << i)) {
254
        len = i;
255
        break;
256
      }
257
    }
258
    if (len < 1) {
259
      return 0;
260
    }
261
    // for valid inputs leading 1s in immr must be less than leading
262
    // zeros in imms
263
    int len2 = 0;                   // ought to be < len
264
    uint32_t val2 = (~immr & 0x3f);
265
    for (int i = 5; i > 0; i--) {
266
      if (!(val2 & (1 << i))) {
267
        len2 = i;
268
        break;
269
      }
270
    }
271
    if (len2 >= len) {
272
      return 0;
273
    }
274
  }
275

276
  levels = (1 << len) - 1;
277

278
  if ((imms & levels) == levels) {
279
    return 0;
280
  }
281

282
  S = imms & levels;
283
  R = immr & levels;
284

285
 // 6 bit arithmetic!
286
  diff = S - R;
287
  tmask_and = (diff | ~levels) & 0x3f;
288
  tmask_or = (diff & levels) & 0x3f;
289
  tmask = 0xffffffffffffffffULL;
290

291
  for (int i = 0; i < 6; i++) {
292
    int nbits = 1 << i;
293
    uint64_t and_bit = pickbit(tmask_and, i);
294
    uint64_t or_bit = pickbit(tmask_or, i);
295
    uint64_t and_bits_sub = replicate(and_bit, 1, nbits);
296
    uint64_t or_bits_sub = replicate(or_bit, 1, nbits);
297
    uint64_t and_bits_top = (and_bits_sub << nbits) | ones(nbits);
298
    uint64_t or_bits_top = (0 << nbits) | or_bits_sub;
299

300
    tmask = ((tmask
301
              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
302
             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
303
  }
304

305
  wmask_and = (immr | ~levels) & 0x3f;
306
  wmask_or = (immr & levels) & 0x3f;
307

308
  wmask = 0;
309

310
  for (int i = 0; i < 6; i++) {
311
    int nbits = 1 << i;
312
    uint64_t and_bit = pickbit(wmask_and, i);
313
    uint64_t or_bit = pickbit(wmask_or, i);
314
    uint64_t and_bits_sub = replicate(and_bit, 1, nbits);
315
    uint64_t or_bits_sub = replicate(or_bit, 1, nbits);
316
    uint64_t and_bits_top = (ones(nbits) << nbits) | and_bits_sub;
317
    uint64_t or_bits_top = (or_bits_sub << nbits) | 0;
318

319
    wmask = ((wmask
320
              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
321
             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
322
  }
323

324
  if (diff & (1U << 6)) {
325
    imm64 = tmask & wmask;
326
  } else {
327
    imm64 = tmask | wmask;
328
  }
329

330

331
  bimm = imm64;
332
  return 1;
333
}
334

335
// constructor to initialise the lookup tables
336

337
static void initLITables();
338
// Use an empty struct with a constructor as MSVC doesn't support `__attribute__ ((constructor))`
339
// See https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc
340
static struct initLITables_t { initLITables_t(void) { initLITables(); } } _initLITables;
341
static void initLITables()
342
{
343
  li_table_entry_count = 0;
344
  for (unsigned index = 0; index < LI_TABLE_SIZE; index++) {
345
    uint32_t N = uimm(index, 12, 12);
346
    uint32_t immr = uimm(index, 11, 6);
347
    uint32_t imms = uimm(index, 5, 0);
348
    if (expandLogicalImmediate(N, immr, imms, LITable[index])) {
349
      InverseLITable[li_table_entry_count].immediate = LITable[index];
350
      InverseLITable[li_table_entry_count].encoding = index;
351
      li_table_entry_count++;
352
    }
353
  }
354
  // now sort the inverse table
355
  qsort(InverseLITable, li_table_entry_count,
356
        sizeof(InverseLITable[0]), compare_immediate_pair);
357
}
358

359
// public APIs provided for logical immediate lookup and reverse lookup
360

361
uint64_t logical_immediate_for_encoding(uint32_t encoding)
362
{
363
  return LITable[encoding];
364
}
365

366
uint32_t encoding_for_logical_immediate(uint64_t immediate)
367
{
368
  struct li_pair pair;
369
  struct li_pair *result;
370

371
  pair.immediate = immediate;
372

373
  result = (struct li_pair *)
374
    bsearch(&pair, InverseLITable, li_table_entry_count,
375
            sizeof(InverseLITable[0]), compare_immediate_pair);
376

377
  if (result) {
378
    return result->encoding;
379
  }
380

381
  return 0xffffffff;
382
}
383

384
// floating point immediates are encoded in 8 bits
385
// fpimm[7] = sign bit
386
// fpimm[6:4] = signed exponent
387
// fpimm[3:0] = fraction (assuming leading 1)
388
// i.e. F = s * 1.f * 2^(e - b)
389

390
uint64_t fp_immediate_for_encoding(uint32_t imm8, int is_dp)
391
{
392
  union {
393
    float fpval;
394
    double dpval;
395
    uint64_t val;
396
  };
397

398
  uint32_t s, e, f;
399
  s = (imm8 >> 7 ) & 0x1;
400
  e = (imm8 >> 4) & 0x7;
401
  f = imm8 & 0xf;
402
  // the fp value is s * n/16 * 2r where n is 16+e
403
  fpval = (16.0 + f) / 16.0;
404
  // n.b. exponent is signed
405
  if (e < 4) {
406
    int epos = e;
407
    for (int i = 0; i <= epos; i++) {
408
      fpval *= 2.0;
409
    }
410
  } else {
411
    int eneg = 7 - e;
412
    for (int i = 0; i < eneg; i++) {
413
      fpval /= 2.0;
414
    }
415
  }
416

417
  if (s) {
418
    fpval = -fpval;
419
  }
420
  if (is_dp) {
421
    dpval = (double)fpval;
422
  }
423
  return val;
424
}
425

426
uint32_t encoding_for_fp_immediate(float immediate)
427
{
428
  // given a float which is of the form
429
  //
430
  //     s * n/16 * 2r
431
  //
432
  // where n is 16+f and imm1:s, imm4:f, simm3:r
433
  // return the imm8 result [s:r:f]
434
  //
435

436
  uint32_t val = PrimitiveConversions::cast<uint32_t>(immediate);
437
  uint32_t s, r, f, res;
438
  // sign bit is 31
439
  s = (val >> 31) & 0x1;
440
  // exponent is bits 30-23 but we only want the bottom 3 bits
441
  // strictly we ought to check that the bits bits 30-25 are
442
  // either all 1s or all 0s
443
  r = (val >> 23) & 0x7;
444
  // fraction is bits 22-0
445
  f = (val >> 19) & 0xf;
446
  res = (s << 7) | (r << 4) | f;
447
  return res;
448
}
449
jdk

Использование cookies