llvm-project

DebugTypes.cpp
1236 строк · 46.3 Кб
Перенос по словам
1
//===- DebugTypes.cpp -----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8

9
#include "DebugTypes.h"
10
#include "COFFLinkerContext.h"
11
#include "Chunks.h"
12
#include "Driver.h"
13
#include "InputFiles.h"
14
#include "PDB.h"
15
#include "TypeMerger.h"
16
#include "lld/Common/ErrorHandler.h"
17
#include "lld/Common/Memory.h"
18
#include "llvm/ADT/StringExtras.h"
19
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
20
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
21
#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
22
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
23
#include "llvm/DebugInfo/PDB/GenericError.h"
24
#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
25
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
26
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
27
#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
28
#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
29
#include "llvm/Support/FormatVariadic.h"
30
#include "llvm/Support/Parallel.h"
31
#include "llvm/Support/Path.h"
32
#include "llvm/Support/TimeProfiler.h"
33

34
using namespace llvm;
35
using namespace llvm::codeview;
36
using namespace lld;
37
using namespace lld::coff;
38

39
namespace {
40
class TypeServerIpiSource;
41

42
// The TypeServerSource class represents a PDB type server, a file referenced by
43
// OBJ files compiled with MSVC /Zi. A single PDB can be shared by several OBJ
44
// files, therefore there must be only once instance per OBJ lot. The file path
45
// is discovered from the dependent OBJ's debug type stream. The
46
// TypeServerSource object is then queued and loaded by the COFF Driver. The
47
// debug type stream for such PDB files will be merged first in the final PDB,
48
// before any dependent OBJ.
49
class TypeServerSource : public TpiSource {
50
public:
51
  explicit TypeServerSource(COFFLinkerContext &ctx, PDBInputFile *f)
52
      : TpiSource(ctx, PDB, nullptr), pdbInputFile(f) {
53
    if (f->loadErrorStr)
54
      return;
55
    pdb::PDBFile &file = f->session->getPDBFile();
56
    auto expectedInfo = file.getPDBInfoStream();
57
    if (!expectedInfo)
58
      return;
59
    Guid = expectedInfo->getGuid();
60
    auto it = ctx.typeServerSourceMappings.emplace(Guid, this);
61
    if (!it.second) {
62
      // If we hit here we have collision on Guid's in two PDB files.
63
      // This can happen if the PDB Guid is invalid or if we are really
64
      // unlucky. This should fall back on stright file-system lookup.
65
      it.first->second = nullptr;
66
    }
67
  }
68

69
  Error mergeDebugT(TypeMerger *m) override;
70

71
  void loadGHashes() override;
72
  void remapTpiWithGHashes(GHashState *g) override;
73

74
  bool isDependency() const override { return true; }
75

76
  PDBInputFile *pdbInputFile = nullptr;
77

78
  // TpiSource for IPI stream.
79
  TypeServerIpiSource *ipiSrc = nullptr;
80

81
  // The PDB signature GUID.
82
  codeview::GUID Guid;
83
};
84

85
// Companion to TypeServerSource. Stores the index map for the IPI stream in the
86
// PDB. Modeling PDBs with two sources for TPI and IPI helps establish the
87
// invariant of one type index space per source.
88
class TypeServerIpiSource : public TpiSource {
89
public:
90
  explicit TypeServerIpiSource(COFFLinkerContext &ctx)
91
      : TpiSource(ctx, PDBIpi, nullptr) {}
92

93
  friend class TypeServerSource;
94

95
  // All of the TpiSource methods are no-ops. The parent TypeServerSource
96
  // handles both TPI and IPI.
97
  Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
98
  void loadGHashes() override {}
99
  void remapTpiWithGHashes(GHashState *g) override {}
100
  bool isDependency() const override { return true; }
101
};
102

103
// This class represents the debug type stream of an OBJ file that depends on a
104
// PDB type server (see TypeServerSource).
105
class UseTypeServerSource : public TpiSource {
106
  Expected<TypeServerSource *> getTypeServerSource();
107

108
public:
109
  UseTypeServerSource(COFFLinkerContext &ctx, ObjFile *f, TypeServer2Record ts)
110
      : TpiSource(ctx, UsingPDB, f), typeServerDependency(ts) {}
111

112
  Error mergeDebugT(TypeMerger *m) override;
113

114
  // No need to load ghashes from /Zi objects.
115
  void loadGHashes() override {}
116
  void remapTpiWithGHashes(GHashState *g) override;
117

118
  // Information about the PDB type server dependency, that needs to be loaded
119
  // in before merging this OBJ.
120
  TypeServer2Record typeServerDependency;
121
};
122

123
// This class represents the debug type stream of a Microsoft precompiled
124
// headers OBJ (PCH OBJ). This OBJ kind needs to be merged first in the output
125
// PDB, before any other OBJs that depend on this. Note that only MSVC generate
126
// such files, clang does not.
127
class PrecompSource : public TpiSource {
128
public:
129
  PrecompSource(COFFLinkerContext &ctx, ObjFile *f) : TpiSource(ctx, PCH, f) {
130
    // If the S_OBJNAME record contains the PCH signature, we'll register this
131
    // source file right away.
132
    registerMapping();
133
  }
134

135
  Error mergeDebugT(TypeMerger *m) override;
136

137
  void loadGHashes() override;
138

139
  bool isDependency() const override { return true; }
140

141
private:
142
  void registerMapping();
143

144
  // Whether this precomp OBJ was recorded in the precompSourceMappings map.
145
  // Only happens if the file->pchSignature is valid.
146
  bool registered = false;
147
};
148

149
// This class represents the debug type stream of an OBJ file that depends on a
150
// Microsoft precompiled headers OBJ (see PrecompSource).
151
class UsePrecompSource : public TpiSource {
152
public:
153
  UsePrecompSource(COFFLinkerContext &ctx, ObjFile *f, PrecompRecord precomp)
154
      : TpiSource(ctx, UsingPCH, f), precompDependency(precomp) {}
155

156
  Error mergeDebugT(TypeMerger *m) override;
157

158
  void loadGHashes() override;
159
  void remapTpiWithGHashes(GHashState *g) override;
160

161
private:
162
  Error mergeInPrecompHeaderObj();
163

164
  PrecompSource *findObjByName(StringRef fileNameOnly);
165
  PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr);
166
  Expected<PrecompSource *> findPrecompMap(ObjFile *file, PrecompRecord &pr);
167

168
public:
169
  // Information about the Precomp OBJ dependency, that needs to be loaded in
170
  // before merging this OBJ.
171
  PrecompRecord precompDependency;
172
};
173
} // namespace
174

175
TpiSource::TpiSource(COFFLinkerContext &ctx, TpiKind k, ObjFile *f)
176
    : ctx(ctx), kind(k), tpiSrcIdx(ctx.tpiSourceList.size()), file(f) {
177
  ctx.addTpiSource(this);
178
}
179

180
// Vtable key method.
181
TpiSource::~TpiSource() {
182
  // Silence any assertions about unchecked errors.
183
  consumeError(std::move(typeMergingError));
184
}
185

186
TpiSource *lld::coff::makeTpiSource(COFFLinkerContext &ctx, ObjFile *file) {
187
  return make<TpiSource>(ctx, TpiSource::Regular, file);
188
}
189

190
TpiSource *lld::coff::makeTypeServerSource(COFFLinkerContext &ctx,
191
                                           PDBInputFile *pdbInputFile) {
192
  // Type server sources come in pairs: the TPI stream, and the IPI stream.
193
  auto *tpiSource = make<TypeServerSource>(ctx, pdbInputFile);
194
  if (pdbInputFile->session->getPDBFile().hasPDBIpiStream())
195
    tpiSource->ipiSrc = make<TypeServerIpiSource>(ctx);
196
  return tpiSource;
197
}
198

199
TpiSource *lld::coff::makeUseTypeServerSource(COFFLinkerContext &ctx,
200
                                              ObjFile *file,
201
                                              TypeServer2Record ts) {
202
  return make<UseTypeServerSource>(ctx, file, ts);
203
}
204

205
TpiSource *lld::coff::makePrecompSource(COFFLinkerContext &ctx, ObjFile *file) {
206
  return make<PrecompSource>(ctx, file);
207
}
208

209
TpiSource *lld::coff::makeUsePrecompSource(COFFLinkerContext &ctx,
210
                                           ObjFile *file,
211
                                           PrecompRecord precomp) {
212
  return make<UsePrecompSource>(ctx, file, precomp);
213
}
214

215
bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
216
  if (ti.isSimple())
217
    return true;
218

219
  // This can be an item index or a type index. Choose the appropriate map.
220
  ArrayRef<TypeIndex> tpiOrIpiMap =
221
      (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
222
  if (ti.toArrayIndex() >= tpiOrIpiMap.size())
223
    return false;
224
  ti = tpiOrIpiMap[ti.toArrayIndex()];
225
  return true;
226
}
227

228
void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
229
                            ArrayRef<TiReference> typeRefs) {
230
  MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
231
  for (const TiReference &ref : typeRefs) {
232
    unsigned byteSize = ref.Count * sizeof(TypeIndex);
233
    if (contents.size() < ref.Offset + byteSize)
234
      fatal("symbol record too short");
235

236
    MutableArrayRef<TypeIndex> indices(
237
        reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
238
    for (TypeIndex &ti : indices) {
239
      if (!remapTypeIndex(ti, ref.Kind)) {
240
        if (ctx.config.verbose) {
241
          uint16_t kind =
242
              reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
243
          StringRef fname = file ? file->getName() : "<unknown PDB>";
244
          log("failed to remap type index in record of kind 0x" +
245
              utohexstr(kind) + " in " + fname + " with bad " +
246
              (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
247
              " index 0x" + utohexstr(ti.getIndex()));
248
        }
249
        ti = TypeIndex(SimpleTypeKind::NotTranslated);
250
        continue;
251
      }
252
    }
253
  }
254
}
255

256
void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
257
  // TODO: Handle errors similar to symbols.
258
  SmallVector<TiReference, 32> typeRefs;
259
  discoverTypeIndices(CVType(rec), typeRefs);
260
  remapRecord(rec, typeRefs);
261
}
262

263
bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
264
  // Discover type index references in the record. Skip it if we don't
265
  // know where they are.
266
  SmallVector<TiReference, 32> typeRefs;
267
  if (!discoverTypeIndicesInSymbol(rec, typeRefs))
268
    return false;
269
  remapRecord(rec, typeRefs);
270
  return true;
271
}
272

273
// A COFF .debug$H section is currently a clang extension.  This function checks
274
// if a .debug$H section is in a format that we expect / understand, so that we
275
// can ignore any sections which are coincidentally also named .debug$H but do
276
// not contain a format we recognize.
277
static bool canUseDebugH(ArrayRef<uint8_t> debugH) {
278
  if (debugH.size() < sizeof(object::debug_h_header))
279
    return false;
280
  auto *header =
281
      reinterpret_cast<const object::debug_h_header *>(debugH.data());
282
  debugH = debugH.drop_front(sizeof(object::debug_h_header));
283
  return header->Magic == COFF::DEBUG_HASHES_SECTION_MAGIC &&
284
         header->Version == 0 &&
285
         header->HashAlgorithm == uint16_t(GlobalTypeHashAlg::BLAKE3) &&
286
         (debugH.size() % 8 == 0);
287
}
288

289
static std::optional<ArrayRef<uint8_t>> getDebugH(ObjFile *file) {
290
  SectionChunk *sec =
291
      SectionChunk::findByName(file->getDebugChunks(), ".debug$H");
292
  if (!sec)
293
    return std::nullopt;
294
  ArrayRef<uint8_t> contents = sec->getContents();
295
  if (!canUseDebugH(contents))
296
    return std::nullopt;
297
  return contents;
298
}
299

300
static ArrayRef<GloballyHashedType>
301
getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
302
  assert(canUseDebugH(debugH));
303
  debugH = debugH.drop_front(sizeof(object::debug_h_header));
304
  uint32_t count = debugH.size() / sizeof(GloballyHashedType);
305
  return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
306
}
307

308
// Merge .debug$T for a generic object file.
309
Error TpiSource::mergeDebugT(TypeMerger *m) {
310
  assert(!ctx.config.debugGHashes &&
311
         "use remapTpiWithGHashes when ghash is enabled");
312

313
  CVTypeArray types;
314
  BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
315
  cantFail(reader.readArray(types, reader.getLength()));
316

317
  // When dealing with PCH.OBJ, some indices were already merged.
318
  unsigned nbHeadIndices = indexMapStorage.size();
319

320
  std::optional<PCHMergerInfo> pchInfo;
321
  if (auto err = mergeTypeAndIdRecords(m->idTable, m->typeTable,
322
                                       indexMapStorage, types, pchInfo))
323
    fatal("codeview::mergeTypeAndIdRecords failed: " +
324
          toString(std::move(err)));
325
  if (pchInfo) {
326
    file->pchSignature = pchInfo->PCHSignature;
327
    endPrecompIdx = pchInfo->EndPrecompIndex;
328
  }
329

330
  // In an object, there is only one mapping for both types and items.
331
  tpiMap = indexMapStorage;
332
  ipiMap = indexMapStorage;
333

334
  if (ctx.config.showSummary) {
335
    nbTypeRecords = indexMapStorage.size() - nbHeadIndices;
336
    nbTypeRecordsBytes = reader.getLength();
337
    // Count how many times we saw each type record in our input. This
338
    // calculation requires a second pass over the type records to classify each
339
    // record as a type or index. This is slow, but this code executes when
340
    // collecting statistics.
341
    m->tpiCounts.resize(m->getTypeTable().size());
342
    m->ipiCounts.resize(m->getIDTable().size());
343
    uint32_t srcIdx = nbHeadIndices;
344
    for (const CVType &ty : types) {
345
      TypeIndex dstIdx = tpiMap[srcIdx++];
346
      // Type merging may fail, so a complex source type may become the simple
347
      // NotTranslated type, which cannot be used as an array index.
348
      if (dstIdx.isSimple())
349
        continue;
350
      SmallVectorImpl<uint32_t> &counts =
351
          isIdRecord(ty.kind()) ? m->ipiCounts : m->tpiCounts;
352
      ++counts[dstIdx.toArrayIndex()];
353
    }
354
  }
355

356
  return Error::success();
357
}
358

359
// Merge types from a type server PDB.
360
Error TypeServerSource::mergeDebugT(TypeMerger *m) {
361
  assert(!ctx.config.debugGHashes &&
362
         "use remapTpiWithGHashes when ghash is enabled");
363

364
  pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
365
  Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
366
  if (auto e = expectedTpi.takeError())
367
    fatal("Type server does not have TPI stream: " + toString(std::move(e)));
368
  pdb::TpiStream *maybeIpi = nullptr;
369
  if (pdbFile.hasPDBIpiStream()) {
370
    Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
371
    if (auto e = expectedIpi.takeError())
372
      fatal("Error getting type server IPI stream: " + toString(std::move(e)));
373
    maybeIpi = &*expectedIpi;
374
  }
375

376
  // Merge TPI first, because the IPI stream will reference type indices.
377
  if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
378
                                  expectedTpi->typeArray()))
379
    fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
380
  tpiMap = indexMapStorage;
381

382
  // Merge IPI.
383
  if (maybeIpi) {
384
    if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
385
                                  maybeIpi->typeArray()))
386
      fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
387
    ipiMap = ipiSrc->indexMapStorage;
388
  }
389

390
  if (ctx.config.showSummary) {
391
    nbTypeRecords = tpiMap.size() + ipiMap.size();
392
    nbTypeRecordsBytes =
393
        expectedTpi->typeArray().getUnderlyingStream().getLength() +
394
        (maybeIpi ? maybeIpi->typeArray().getUnderlyingStream().getLength()
395
                  : 0);
396

397
    // Count how many times we saw each type record in our input. If a
398
    // destination type index is present in the source to destination type index
399
    // map, that means we saw it once in the input. Add it to our histogram.
400
    m->tpiCounts.resize(m->getTypeTable().size());
401
    m->ipiCounts.resize(m->getIDTable().size());
402
    for (TypeIndex ti : tpiMap)
403
      if (!ti.isSimple())
404
        ++m->tpiCounts[ti.toArrayIndex()];
405
    for (TypeIndex ti : ipiMap)
406
      if (!ti.isSimple())
407
        ++m->ipiCounts[ti.toArrayIndex()];
408
  }
409

410
  return Error::success();
411
}
412

413
Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
414
  const codeview::GUID &tsId = typeServerDependency.getGuid();
415
  StringRef tsPath = typeServerDependency.getName();
416

417
  TypeServerSource *tsSrc = nullptr;
418
  auto it = ctx.typeServerSourceMappings.find(tsId);
419
  if (it != ctx.typeServerSourceMappings.end()) {
420
    tsSrc = (TypeServerSource *)it->second;
421
  }
422
  if (tsSrc == nullptr) {
423
    // The file failed to load, lookup by name
424
    PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file);
425
    if (!pdb)
426
      return createFileError(tsPath, errorCodeToError(std::error_code(
427
                                         ENOENT, std::generic_category())));
428
    // If an error occurred during loading, throw it now
429
    if (pdb->loadErrorStr)
430
      return createFileError(
431
          tsPath, make_error<StringError>(*pdb->loadErrorStr,
432
                                          llvm::inconvertibleErrorCode()));
433

434
    tsSrc = (TypeServerSource *)pdb->debugTypesObj;
435

436
    // Just because a file with a matching name was found and it was an actual
437
    // PDB file doesn't mean it matches.  For it to match the InfoStream's GUID
438
    // must match the GUID specified in the TypeServer2 record.
439
    if (tsSrc->Guid != tsId) {
440
      return createFileError(tsPath,
441
                             make_error<pdb::PDBError>(
442
                                 pdb::pdb_error_code::signature_out_of_date));
443
    }
444
  }
445
  return tsSrc;
446
}
447

448
Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
449
  Expected<TypeServerSource *> tsSrc = getTypeServerSource();
450
  if (!tsSrc)
451
    return tsSrc.takeError();
452

453
  pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
454
  auto expectedInfo = pdbSession.getPDBInfoStream();
455
  if (!expectedInfo)
456
    return expectedInfo.takeError();
457

458
  // Reuse the type index map of the type server.
459
  tpiMap = (*tsSrc)->tpiMap;
460
  ipiMap = (*tsSrc)->ipiMap;
461
  return Error::success();
462
}
463

464
static bool equalsPath(StringRef path1, StringRef path2) {
465
#if defined(_WIN32)
466
  return path1.equals_insensitive(path2);
467
#else
468
  return path1 == path2;
469
#endif
470
}
471

472
// Find by name an OBJ provided on the command line
473
PrecompSource *UsePrecompSource::findObjByName(StringRef fileNameOnly) {
474
  SmallString<128> currentPath;
475
  for (auto kv : ctx.precompSourceMappings) {
476
    StringRef currentFileName = sys::path::filename(kv.second->file->getName(),
477
                                                    sys::path::Style::windows);
478

479
    // Compare based solely on the file name (link.exe behavior)
480
    if (equalsPath(currentFileName, fileNameOnly))
481
      return (PrecompSource *)kv.second;
482
  }
483
  return nullptr;
484
}
485

486
PrecompSource *UsePrecompSource::findPrecompSource(ObjFile *file,
487
                                                   PrecompRecord &pr) {
488
  // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
489
  // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
490
  // the paths embedded in the OBJs are in the Windows format.
491
  SmallString<128> prFileName =
492
      sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
493

494
  auto it = ctx.precompSourceMappings.find(pr.getSignature());
495
  if (it != ctx.precompSourceMappings.end()) {
496
    return (PrecompSource *)it->second;
497
  }
498
  // Lookup by name
499
  return findObjByName(prFileName);
500
}
501

502
Expected<PrecompSource *> UsePrecompSource::findPrecompMap(ObjFile *file,
503
                                                           PrecompRecord &pr) {
504
  PrecompSource *precomp = findPrecompSource(file, pr);
505

506
  if (!precomp)
507
    return createFileError(
508
        pr.getPrecompFilePath(),
509
        make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
510

511
  // Don't rely on the PCH signature to validate the concordance between the PCH
512
  // and the OBJ that uses it. However we do validate here that the
513
  // LF_ENDPRECOMP record index lines up with the number of type records
514
  // LF_PRECOMP is expecting.
515
  if (precomp->endPrecompIdx != pr.getTypesCount())
516
    return createFileError(
517
        toString(file),
518
        make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
519

520
  return precomp;
521
}
522

523
/// Merges a precompiled headers TPI map into the current TPI map. The
524
/// precompiled headers object will also be loaded and remapped in the
525
/// process.
526
Error UsePrecompSource::mergeInPrecompHeaderObj() {
527
  auto e = findPrecompMap(file, precompDependency);
528
  if (!e)
529
    return e.takeError();
530

531
  PrecompSource *precompSrc = *e;
532
  if (precompSrc->tpiMap.empty())
533
    return Error::success();
534

535
  assert(precompDependency.getStartTypeIndex() ==
536
         TypeIndex::FirstNonSimpleIndex);
537
  assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
538
  // Use the previously remapped index map from the precompiled headers.
539
  indexMapStorage.insert(indexMapStorage.begin(), precompSrc->tpiMap.begin(),
540
                         precompSrc->tpiMap.begin() +
541
                             precompDependency.getTypesCount());
542

543
  return Error::success();
544
}
545

546
Error UsePrecompSource::mergeDebugT(TypeMerger *m) {
547
  // This object was compiled with /Yu, so process the corresponding
548
  // precompiled headers object (/Yc) first. Some type indices in the current
549
  // object are referencing data in the precompiled headers object, so we need
550
  // both to be loaded.
551
  if (Error e = mergeInPrecompHeaderObj())
552
    return e;
553

554
  return TpiSource::mergeDebugT(m);
555
}
556

557
Error PrecompSource::mergeDebugT(TypeMerger *m) {
558
  // In some cases, the S_OBJNAME record doesn't contain the PCH signature.
559
  // The signature comes later with the LF_ENDPRECOMP record, so we first need
560
  // to merge in all the .PCH.OBJ file type records, before registering below.
561
  if (Error e = TpiSource::mergeDebugT(m))
562
    return e;
563

564
  registerMapping();
565

566
  return Error::success();
567
}
568

569
void PrecompSource::registerMapping() {
570
  if (registered)
571
    return;
572
  if (file->pchSignature && *file->pchSignature) {
573
    auto it = ctx.precompSourceMappings.emplace(*file->pchSignature, this);
574
    if (!it.second)
575
      fatal("a PCH object with the same signature has already been provided (" +
576
            toString(it.first->second->file) + " and " + toString(file) + ")");
577
    registered = true;
578
  }
579
}
580

581
//===----------------------------------------------------------------------===//
582
// Parellel GHash type merging implementation.
583
//===----------------------------------------------------------------------===//
584

585
void TpiSource::loadGHashes() {
586
  if (std::optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
587
    ghashes = getHashesFromDebugH(*debugH);
588
    ownedGHashes = false;
589
  } else {
590
    CVTypeArray types;
591
    BinaryStreamReader reader(file->debugTypes, llvm::endianness::little);
592
    cantFail(reader.readArray(types, reader.getLength()));
593
    assignGHashesFromVector(GloballyHashedType::hashTypes(types));
594
  }
595

596
  fillIsItemIndexFromDebugT();
597
}
598

599
// Copies ghashes from a vector into an array. These are long lived, so it's
600
// worth the time to copy these into an appropriately sized vector to reduce
601
// memory usage.
602
void TpiSource::assignGHashesFromVector(
603
    std::vector<GloballyHashedType> &&hashVec) {
604
  if (hashVec.empty())
605
    return;
606
  GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
607
  memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
608
  ghashes = ArrayRef(hashes, hashVec.size());
609
  ownedGHashes = true;
610
}
611

612
// Faster way to iterate type records. forEachTypeChecked is faster than
613
// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
614
static void forEachTypeChecked(ArrayRef<uint8_t> types,
615
                               function_ref<void(const CVType &)> fn) {
616
  checkError(
617
      forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
618
        fn(ty);
619
        return Error::success();
620
      }));
621
}
622

623
// Walk over file->debugTypes and fill in the isItemIndex bit vector.
624
// TODO: Store this information in .debug$H so that we don't have to recompute
625
// it. This is the main bottleneck slowing down parallel ghashing with one
626
// thread over single-threaded ghashing.
627
void TpiSource::fillIsItemIndexFromDebugT() {
628
  uint32_t index = 0;
629
  isItemIndex.resize(ghashes.size());
630
  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
631
    if (isIdRecord(ty.kind()))
632
      isItemIndex.set(index);
633
    ++index;
634
  });
635
}
636

637
void TpiSource::mergeTypeRecord(TypeIndex curIndex, CVType ty) {
638
  // Decide if the merged type goes into TPI or IPI.
639
  bool isItem = isIdRecord(ty.kind());
640
  MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
641

642
  // Copy the type into our mutable buffer.
643
  assert(ty.length() <= codeview::MaxRecordLength);
644
  size_t offset = merged.recs.size();
645
  size_t newSize = alignTo(ty.length(), 4);
646
  merged.recs.resize(offset + newSize);
647
  auto newRec = MutableArrayRef(&merged.recs[offset], newSize);
648
  memcpy(newRec.data(), ty.data().data(), newSize);
649

650
  // Fix up the record prefix and padding bytes if it required resizing.
651
  if (newSize != ty.length()) {
652
    reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
653
    for (size_t i = ty.length(); i < newSize; ++i)
654
      newRec[i] = LF_PAD0 + (newSize - i);
655
  }
656

657
  // Remap the type indices in the new record.
658
  remapTypesInTypeRecord(newRec);
659
  uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
660
  merged.recSizes.push_back(static_cast<uint16_t>(newSize));
661
  merged.recHashes.push_back(pdbHash);
662

663
  // Retain a mapping from PDB function id to PDB function type. This mapping is
664
  // used during symbol processing to rewrite S_GPROC32_ID symbols to S_GPROC32
665
  // symbols.
666
  if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
667
    bool success = ty.length() >= 12;
668
    TypeIndex funcId = curIndex;
669
    if (success)
670
      success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
671
    TypeIndex funcType =
672
        *reinterpret_cast<const TypeIndex *>(&newRec.data()[8]);
673
    if (success) {
674
      funcIdToType.push_back({funcId, funcType});
675
    } else {
676
      StringRef fname = file ? file->getName() : "<unknown PDB>";
677
      warn("corrupt LF_[M]FUNC_ID record 0x" + utohexstr(curIndex.getIndex()) +
678
           " in " + fname);
679
    }
680
  }
681
}
682

683
void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
684
                                       TypeIndex beginIndex) {
685
  // Re-sort the list of unique types by index.
686
  if (kind == PDB)
687
    assert(llvm::is_sorted(uniqueTypes));
688
  else
689
    llvm::sort(uniqueTypes);
690

691
  // Accumulate all the unique types into one buffer in mergedTypes.
692
  uint32_t ghashIndex = 0;
693
  auto nextUniqueIndex = uniqueTypes.begin();
694
  assert(mergedTpi.recs.empty());
695
  assert(mergedIpi.recs.empty());
696

697
  // Pre-compute the number of elements in advance to avoid std::vector resizes.
698
  unsigned nbTpiRecs = 0;
699
  unsigned nbIpiRecs = 0;
700
  forEachTypeChecked(typeRecords, [&](const CVType &ty) {
701
    if (nextUniqueIndex != uniqueTypes.end() &&
702
        *nextUniqueIndex == ghashIndex) {
703
      assert(ty.length() <= codeview::MaxRecordLength);
704
      size_t newSize = alignTo(ty.length(), 4);
705
      (isIdRecord(ty.kind()) ? nbIpiRecs : nbTpiRecs) += newSize;
706
      ++nextUniqueIndex;
707
    }
708
    ++ghashIndex;
709
  });
710
  mergedTpi.recs.reserve(nbTpiRecs);
711
  mergedIpi.recs.reserve(nbIpiRecs);
712

713
  // Do the actual type merge.
714
  ghashIndex = 0;
715
  nextUniqueIndex = uniqueTypes.begin();
716
  forEachTypeChecked(typeRecords, [&](const CVType &ty) {
717
    if (nextUniqueIndex != uniqueTypes.end() &&
718
        *nextUniqueIndex == ghashIndex) {
719
      mergeTypeRecord(beginIndex + ghashIndex, ty);
720
      ++nextUniqueIndex;
721
    }
722
    ++ghashIndex;
723
  });
724
  assert(nextUniqueIndex == uniqueTypes.end() &&
725
         "failed to merge all desired records");
726
  assert(uniqueTypes.size() ==
727
             mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
728
         "missing desired record");
729
}
730

731
void TpiSource::remapTpiWithGHashes(GHashState *g) {
732
  assert(ctx.config.debugGHashes && "ghashes must be enabled");
733
  fillMapFromGHashes(g);
734
  tpiMap = indexMapStorage;
735
  ipiMap = indexMapStorage;
736
  mergeUniqueTypeRecords(file->debugTypes);
737
  // TODO: Free all unneeded ghash resources now that we have a full index map.
738

739
  if (ctx.config.showSummary) {
740
    nbTypeRecords = ghashes.size();
741
    nbTypeRecordsBytes = file->debugTypes.size();
742
  }
743
}
744

745
// PDBs do not actually store global hashes, so when merging a type server
746
// PDB we have to synthesize global hashes.  To do this, we first synthesize
747
// global hashes for the TPI stream, since it is independent, then we
748
// synthesize hashes for the IPI stream, using the hashes for the TPI stream
749
// as inputs.
750
void TypeServerSource::loadGHashes() {
751
  // Don't hash twice.
752
  if (!ghashes.empty())
753
    return;
754
  pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
755

756
  // Hash TPI stream.
757
  Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
758
  if (auto e = expectedTpi.takeError())
759
    fatal("Type server does not have TPI stream: " + toString(std::move(e)));
760
  assignGHashesFromVector(
761
      GloballyHashedType::hashTypes(expectedTpi->typeArray()));
762
  isItemIndex.resize(ghashes.size());
763

764
  // Hash IPI stream, which depends on TPI ghashes.
765
  if (!pdbFile.hasPDBIpiStream())
766
    return;
767
  Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
768
  if (auto e = expectedIpi.takeError())
769
    fatal("error retrieving IPI stream: " + toString(std::move(e)));
770
  ipiSrc->assignGHashesFromVector(
771
      GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
772

773
  // The IPI stream isItemIndex bitvector should be all ones.
774
  ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
775
  ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
776
}
777

778
// Flatten discontiguous PDB type arrays to bytes so that we can use
779
// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
780
// type servers is faster than iterating all object files compiled with /Z7 with
781
// CVTypeArray, which has high overheads due to the virtual interface of
782
// BinaryStream::readBytes.
783
static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
784
  BinaryStreamRef stream = types.getUnderlyingStream();
785
  ArrayRef<uint8_t> debugTypes;
786
  checkError(stream.readBytes(0, stream.getLength(), debugTypes));
787
  return debugTypes;
788
}
789

790
// Merge types from a type server PDB.
791
void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
792
  assert(ctx.config.debugGHashes && "ghashes must be enabled");
793

794
  // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
795
  // propagate errors, those should've been handled during ghash loading.
796
  pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
797
  pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
798
  fillMapFromGHashes(g);
799
  tpiMap = indexMapStorage;
800
  mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
801
  if (pdbFile.hasPDBIpiStream()) {
802
    pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
803
    ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
804
    ipiSrc->fillMapFromGHashes(g);
805
    ipiMap = ipiSrc->indexMapStorage;
806
    ipiSrc->tpiMap = tpiMap;
807
    ipiSrc->ipiMap = ipiMap;
808
    ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
809

810
    if (ctx.config.showSummary) {
811
      nbTypeRecords = ipiSrc->ghashes.size();
812
      nbTypeRecordsBytes = ipi.typeArray().getUnderlyingStream().getLength();
813
    }
814
  }
815

816
  if (ctx.config.showSummary) {
817
    nbTypeRecords += ghashes.size();
818
    nbTypeRecordsBytes += tpi.typeArray().getUnderlyingStream().getLength();
819
  }
820
}
821

822
void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
823
  // No remapping to do with /Zi objects. Simply use the index map from the type
824
  // server. Errors should have been reported earlier. Symbols from this object
825
  // will be ignored.
826
  Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
827
  if (!maybeTsSrc) {
828
    typeMergingError =
829
        joinErrors(std::move(typeMergingError), maybeTsSrc.takeError());
830
    return;
831
  }
832
  TypeServerSource *tsSrc = *maybeTsSrc;
833
  tpiMap = tsSrc->tpiMap;
834
  ipiMap = tsSrc->ipiMap;
835
}
836

837
void PrecompSource::loadGHashes() {
838
  if (getDebugH(file)) {
839
    warn("ignoring .debug$H section; pch with ghash is not implemented");
840
  }
841

842
  uint32_t ghashIdx = 0;
843
  std::vector<GloballyHashedType> hashVec;
844
  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
845
    // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
846
    // the PDB. There must be an entry in the list of ghashes so that the type
847
    // indexes of the following records in the /Yc PCH object line up.
848
    if (ty.kind() == LF_ENDPRECOMP) {
849
      EndPrecompRecord endPrecomp;
850
      cantFail(TypeDeserializer::deserializeAs<EndPrecompRecord>(
851
          const_cast<CVType &>(ty), endPrecomp));
852
      file->pchSignature = endPrecomp.getSignature();
853
      registerMapping();
854
      endPrecompIdx = ghashIdx;
855
    }
856

857
    hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
858
    isItemIndex.push_back(isIdRecord(ty.kind()));
859
    ++ghashIdx;
860
  });
861
  assignGHashesFromVector(std::move(hashVec));
862
}
863

864
void UsePrecompSource::loadGHashes() {
865
  auto e = findPrecompMap(file, precompDependency);
866
  if (!e) {
867
    warn(toString(e.takeError()));
868
    return;
869
  }
870

871
  PrecompSource *pchSrc = *e;
872

873
  // To compute ghashes of a /Yu object file, we need to build on the ghashes of
874
  // the /Yc PCH object. After we are done hashing, discard the ghashes from the
875
  // PCH source so we don't unnecessarily try to deduplicate them.
876
  std::vector<GloballyHashedType> hashVec =
877
      pchSrc->ghashes.take_front(precompDependency.getTypesCount());
878
  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
879
    hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
880
    isItemIndex.push_back(isIdRecord(ty.kind()));
881
  });
882
  hashVec.erase(hashVec.begin(),
883
                hashVec.begin() + precompDependency.getTypesCount());
884
  assignGHashesFromVector(std::move(hashVec));
885
}
886

887
void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
888
  fillMapFromGHashes(g);
889
  // This object was compiled with /Yu, so process the corresponding
890
  // precompiled headers object (/Yc) first. Some type indices in the current
891
  // object are referencing data in the precompiled headers object, so we need
892
  // both to be loaded.
893
  if (Error e = mergeInPrecompHeaderObj()) {
894
    typeMergingError = joinErrors(std::move(typeMergingError), std::move(e));
895
    return;
896
  }
897

898
  tpiMap = indexMapStorage;
899
  ipiMap = indexMapStorage;
900
  mergeUniqueTypeRecords(file->debugTypes,
901
                         TypeIndex(precompDependency.getStartTypeIndex() +
902
                                   precompDependency.getTypesCount()));
903
  if (ctx.config.showSummary) {
904
    nbTypeRecords = ghashes.size();
905
    nbTypeRecordsBytes = file->debugTypes.size();
906
  }
907
}
908

909
namespace {
910
/// A concurrent hash table for global type hashing. It is based on this paper:
911
/// Concurrent Hash Tables: Fast and General(?)!
912
/// https://dl.acm.org/doi/10.1145/3309206
913
///
914
/// This hash table is meant to be used in two phases:
915
/// 1. concurrent insertions
916
/// 2. concurrent reads
917
/// It does not support lookup, deletion, or rehashing. It uses linear probing.
918
///
919
/// The paper describes storing a key-value pair in two machine words.
920
/// Generally, the values stored in this map are type indices, and we can use
921
/// those values to recover the ghash key from a side table. This allows us to
922
/// shrink the table entries further at the cost of some loads, and sidesteps
923
/// the need for a 128 bit atomic compare-and-swap operation.
924
///
925
/// During insertion, a priority function is used to decide which insertion
926
/// should be preferred. This ensures that the output is deterministic. For
927
/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
928
///
929
class GHashCell;
930
struct GHashTable {
931
  GHashCell *table = nullptr;
932
  uint32_t tableSize = 0;
933

934
  GHashTable() = default;
935
  ~GHashTable();
936

937
  /// Initialize the table with the given size. Because the table cannot be
938
  /// resized, the initial size of the table must be large enough to contain all
939
  /// inputs, or insertion may not be able to find an empty cell.
940
  void init(uint32_t newTableSize);
941

942
  /// Insert the cell with the given ghash into the table. Return the insertion
943
  /// position in the table. It is safe for the caller to store the insertion
944
  /// position because the table cannot be resized.
945
  uint32_t insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
946
                  GHashCell newCell);
947
};
948

949
/// A ghash table cell for deduplicating types from TpiSources.
950
class GHashCell {
951
  // Force "data" to be 64-bit aligned; otherwise, some versions of clang
952
  // will generate calls to libatomic when using some versions of libstdc++
953
  // on 32-bit targets.  (Also, in theory, there could be a target where
954
  // new[] doesn't always return an 8-byte-aligned allocation.)
955
  alignas(sizeof(uint64_t)) uint64_t data = 0;
956

957
public:
958
  GHashCell() = default;
959

960
  // Construct data most to least significant so that sorting works well:
961
  // - isItem
962
  // - tpiSrcIdx
963
  // - ghashIdx
964
  // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
965
  // non-zero representation.
966
  GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
967
      : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
968
             ghashIdx) {
969
    assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
970
    assert(ghashIdx == getGHashIdx() && "round trip failure");
971
  }
972

973
  explicit GHashCell(uint64_t data) : data(data) {}
974

975
  // The empty cell is all zeros.
976
  bool isEmpty() const { return data == 0ULL; }
977

978
  /// Extract the tpiSrcIdx.
979
  uint32_t getTpiSrcIdx() const {
980
    return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
981
  }
982

983
  /// Extract the index into the ghash array of the TpiSource.
984
  uint32_t getGHashIdx() const { return (uint32_t)data; }
985

986
  bool isItem() const { return data & (1ULL << 63U); }
987

988
  /// Get the ghash key for this cell.
989
  GloballyHashedType getGHash(const COFFLinkerContext &ctx) const {
990
    return ctx.tpiSourceList[getTpiSrcIdx()]->ghashes[getGHashIdx()];
991
  }
992

993
  /// The priority function for the cell. The data is stored such that lower
994
  /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
995
  /// from earlier sources are more likely to prevail.
996
  friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
997
    return l.data < r.data;
998
  }
999
};
1000
} // namespace
1001

1002
namespace lld::coff {
1003
/// This type is just a wrapper around GHashTable with external linkage so it
1004
/// can be used from a header.
1005
struct GHashState {
1006
  GHashTable table;
1007
};
1008
} // namespace lld::coff
1009

1010
GHashTable::~GHashTable() { delete[] table; }
1011

1012
void GHashTable::init(uint32_t newTableSize) {
1013
  table = new GHashCell[newTableSize];
1014
  memset(table, 0, newTableSize * sizeof(GHashCell));
1015
  tableSize = newTableSize;
1016
}
1017

1018
uint32_t GHashTable::insert(COFFLinkerContext &ctx, GloballyHashedType ghash,
1019
                            GHashCell newCell) {
1020
  assert(!newCell.isEmpty() && "cannot insert empty cell value");
1021

1022
  // FIXME: The low bytes of SHA1 have low entropy for short records, which
1023
  // type records are. Swap the byte order for better entropy. A better ghash
1024
  // won't need this.
1025
  uint32_t startIdx =
1026
      llvm::byteswap<uint64_t>(*reinterpret_cast<uint64_t *>(&ghash)) %
1027
      tableSize;
1028

1029
  // Do a linear probe starting at startIdx.
1030
  uint32_t idx = startIdx;
1031
  while (true) {
1032
    // Run a compare and swap loop. There are four cases:
1033
    // - cell is empty: CAS into place and return
1034
    // - cell has matching key, earlier priority: do nothing, return
1035
    // - cell has matching key, later priority: CAS into place and return
1036
    // - cell has non-matching key: hash collision, probe next cell
1037
    auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
1038
    GHashCell oldCell(cellPtr->load());
1039
    while (oldCell.isEmpty() || oldCell.getGHash(ctx) == ghash) {
1040
      // Check if there is an existing ghash entry with a higher priority
1041
      // (earlier ordering). If so, this is a duplicate, we are done.
1042
      if (!oldCell.isEmpty() && oldCell < newCell)
1043
        return idx;
1044
      // Either the cell is empty, or our value is higher priority. Try to
1045
      // compare and swap. If it succeeds, we are done.
1046
      if (cellPtr->compare_exchange_weak(oldCell, newCell))
1047
        return idx;
1048
      // If the CAS failed, check this cell again.
1049
    }
1050

1051
    // Advance the probe. Wrap around to the beginning if we run off the end.
1052
    ++idx;
1053
    idx = idx == tableSize ? 0 : idx;
1054
    if (idx == startIdx) {
1055
      // If this becomes an issue, we could mark failure and rehash from the
1056
      // beginning with a bigger table. There is no difference between rehashing
1057
      // internally and starting over.
1058
      report_fatal_error("ghash table is full");
1059
    }
1060
  }
1061
  llvm_unreachable("left infloop");
1062
}
1063

1064
TypeMerger::TypeMerger(COFFLinkerContext &c, llvm::BumpPtrAllocator &alloc)
1065
    : typeTable(alloc), idTable(alloc), ctx(c) {}
1066

1067
TypeMerger::~TypeMerger() = default;
1068

1069
void TypeMerger::mergeTypesWithGHash() {
1070
  // Load ghashes. Do type servers and PCH objects first.
1071
  {
1072
    llvm::TimeTraceScope timeScope("Load GHASHes");
1073
    ScopedTimer t1(ctx.loadGHashTimer);
1074
    parallelForEach(dependencySources,
1075
                    [&](TpiSource *source) { source->loadGHashes(); });
1076
    parallelForEach(objectSources,
1077
                    [&](TpiSource *source) { source->loadGHashes(); });
1078
  }
1079

1080
  llvm::TimeTraceScope timeScope("Merge types (GHASH)");
1081
  ScopedTimer t2(ctx.mergeGHashTimer);
1082
  GHashState ghashState;
1083

1084
  // Estimate the size of hash table needed to deduplicate ghashes. This *must*
1085
  // be larger than the number of unique types, or hash table insertion may not
1086
  // be able to find a vacant slot. Summing the input types guarantees this, but
1087
  // it is a gross overestimate. The table size could be reduced to save memory,
1088
  // but it would require implementing rehashing, and this table is generally
1089
  // small compared to total memory usage, at eight bytes per input type record,
1090
  // and most input type records are larger than eight bytes.
1091
  size_t tableSize = 0;
1092
  for (TpiSource *source : ctx.tpiSourceList)
1093
    tableSize += source->ghashes.size();
1094

1095
  // Cap the table size so that we can use 32-bit cell indices. Type indices are
1096
  // also 32-bit, so this is an inherent PDB file format limit anyway.
1097
  tableSize =
1098
      std::min(size_t(INT32_MAX) - TypeIndex::FirstNonSimpleIndex, tableSize);
1099
  ghashState.table.init(static_cast<uint32_t>(tableSize));
1100

1101
  // Insert ghashes in parallel. During concurrent insertion, we cannot observe
1102
  // the contents of the hash table cell, but we can remember the insertion
1103
  // position. Because the table does not rehash, the position will not change
1104
  // under insertion. After insertion is done, the value of the cell can be read
1105
  // to retrieve the final PDB type index.
1106
  parallelFor(0, ctx.tpiSourceList.size(), [&](size_t tpiSrcIdx) {
1107
    TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1108
    source->indexMapStorage.resize(source->ghashes.size());
1109
    for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
1110
      if (source->shouldOmitFromPdb(i)) {
1111
        source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
1112
        continue;
1113
      }
1114
      GloballyHashedType ghash = source->ghashes[i];
1115
      bool isItem = source->isItemIndex.test(i);
1116
      uint32_t cellIdx =
1117
          ghashState.table.insert(ctx, ghash, GHashCell(isItem, tpiSrcIdx, i));
1118

1119
      // Store the ghash cell index as a type index in indexMapStorage. Later
1120
      // we will replace it with the PDB type index.
1121
      source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
1122
    }
1123
  });
1124

1125
  // Collect all non-empty cells and sort them. This will implicitly assign
1126
  // destination type indices, and partition the entries into type records and
1127
  // item records. It arranges types in this order:
1128
  // - type records
1129
  //   - source 0, type 0...
1130
  //   - source 1, type 1...
1131
  // - item records
1132
  //   - source 0, type 1...
1133
  //   - source 1, type 0...
1134
  std::vector<GHashCell> entries;
1135
  for (const GHashCell &cell : ArrayRef(ghashState.table.table, tableSize)) {
1136
    if (!cell.isEmpty())
1137
      entries.push_back(cell);
1138
  }
1139
  parallelSort(entries, std::less<GHashCell>());
1140
  log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
1141
              tableSize ? double(entries.size()) / tableSize : 0,
1142
              entries.size(), tableSize));
1143

1144
  // Find out how many type and item indices there are.
1145
  auto mid = llvm::lower_bound(entries, GHashCell(true, 0, 0));
1146
  assert((mid == entries.end() || mid->isItem()) &&
1147
         (mid == entries.begin() || !std::prev(mid)->isItem()) &&
1148
         "midpoint is not midpoint");
1149
  uint32_t numTypes = std::distance(entries.begin(), mid);
1150
  uint32_t numItems = std::distance(mid, entries.end());
1151
  log("Tpi record count: " + Twine(numTypes));
1152
  log("Ipi record count: " + Twine(numItems));
1153

1154
  // Make a list of the "unique" type records to merge for each tpi source. Type
1155
  // merging will skip indices not on this list. Store the destination PDB type
1156
  // index for these unique types in the tpiMap for each source. The entries for
1157
  // non-unique types will be filled in prior to type merging.
1158
  for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
1159
    auto &cell = entries[i];
1160
    uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
1161
    TpiSource *source = ctx.tpiSourceList[tpiSrcIdx];
1162
    source->uniqueTypes.push_back(cell.getGHashIdx());
1163

1164
    // Update the ghash table to store the destination PDB type index in the
1165
    // table.
1166
    uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
1167
    uint32_t ghashCellIndex =
1168
        source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
1169
    ghashState.table.table[ghashCellIndex] =
1170
        GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
1171
  }
1172

1173
  // In parallel, remap all types.
1174
  for (TpiSource *source : dependencySources)
1175
    source->remapTpiWithGHashes(&ghashState);
1176
  parallelForEach(objectSources, [&](TpiSource *source) {
1177
    source->remapTpiWithGHashes(&ghashState);
1178
  });
1179

1180
  // Build a global map of from function ID to function type.
1181
  for (TpiSource *source : ctx.tpiSourceList) {
1182
    for (auto idToType : source->funcIdToType)
1183
      funcIdToType.insert(idToType);
1184
    source->funcIdToType.clear();
1185
  }
1186

1187
  clearGHashes();
1188
}
1189

1190
void TypeMerger::sortDependencies() {
1191
  // Order dependencies first, but preserve the existing order.
1192
  std::vector<TpiSource *> deps;
1193
  std::vector<TpiSource *> objs;
1194
  for (TpiSource *s : ctx.tpiSourceList)
1195
    (s->isDependency() ? deps : objs).push_back(s);
1196
  uint32_t numDeps = deps.size();
1197
  uint32_t numObjs = objs.size();
1198
  ctx.tpiSourceList = std::move(deps);
1199
  ctx.tpiSourceList.insert(ctx.tpiSourceList.end(), objs.begin(), objs.end());
1200
  for (uint32_t i = 0, e = ctx.tpiSourceList.size(); i < e; ++i)
1201
    ctx.tpiSourceList[i]->tpiSrcIdx = i;
1202
  dependencySources = ArrayRef(ctx.tpiSourceList.data(), numDeps);
1203
  objectSources = ArrayRef(ctx.tpiSourceList.data() + numDeps, numObjs);
1204
}
1205

1206
/// Given the index into the ghash table for a particular type, return the type
1207
/// index for that type in the output PDB.
1208
static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
1209
                                          uint32_t ghashCellIdx) {
1210
  GHashCell cell = g->table.table[ghashCellIdx];
1211
  return TypeIndex::fromArrayIndex(cell.getGHashIdx());
1212
}
1213

1214
/// Free heap allocated ghashes.
1215
void TypeMerger::clearGHashes() {
1216
  for (TpiSource *src : ctx.tpiSourceList) {
1217
    if (src->ownedGHashes)
1218
      delete[] src->ghashes.data();
1219
    src->ghashes = {};
1220
    src->isItemIndex.clear();
1221
    src->uniqueTypes.clear();
1222
  }
1223
}
1224

1225
// Fill in a TPI or IPI index map using ghashes. For each source type, use its
1226
// ghash to lookup its final type index in the PDB, and store that in the map.
1227
void TpiSource::fillMapFromGHashes(GHashState *g) {
1228
  for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
1229
    TypeIndex fakeCellIndex = indexMapStorage[i];
1230
    if (fakeCellIndex.isSimple())
1231
      indexMapStorage[i] = fakeCellIndex;
1232
    else
1233
      indexMapStorage[i] =
1234
          loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
1235
  }
1236
}
1237
llvm-project

Использование cookies