llvm-project

Форк
0
/
ExportTrie.cpp 
345 строк · 10.4 Кб
1
//===- ExportTrie.cpp -----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This is a partial implementation of the Mach-O export trie format. It's
10
// essentially a symbol table encoded as a compressed prefix trie, meaning that
11
// the common prefixes of each symbol name are shared for a more compact
12
// representation. The prefixes are stored on the edges of the trie, and one
13
// edge can represent multiple characters. For example, given two exported
14
// symbols _bar and _baz, we will have a trie like this (terminal nodes are
15
// marked with an asterisk):
16
//
17
//              +-+-+
18
//              |   | // root node
19
//              +-+-+
20
//                |
21
//                | _ba
22
//                |
23
//              +-+-+
24
//              |   |
25
//              +-+-+
26
//           r /     \ z
27
//            /       \
28
//        +-+-+       +-+-+
29
//        | * |       | * |
30
//        +-+-+       +-+-+
31
//
32
// More documentation of the format can be found in
33
// llvm/tools/obj2yaml/macho2yaml.cpp.
34
//
35
//===----------------------------------------------------------------------===//
36

37
#include "ExportTrie.h"
38
#include "Symbols.h"
39

40
#include "lld/Common/ErrorHandler.h"
41
#include "lld/Common/Memory.h"
42
#include "llvm/BinaryFormat/MachO.h"
43
#include "llvm/Support/LEB128.h"
44
#include <optional>
45

46
using namespace llvm;
47
using namespace lld;
48
using namespace lld::macho;
49

50
namespace {
51

52
struct Edge {
53
  Edge(StringRef s, TrieNode *node) : substring(s), child(node) {}
54

55
  StringRef substring;
56
  struct TrieNode *child;
57
};
58

59
struct ExportInfo {
60
  uint64_t address;
61
  uint64_t ordinal = 0;
62
  uint8_t flags = 0;
63
  ExportInfo(const Symbol &sym, uint64_t imageBase)
64
      : address(sym.getVA() - imageBase) {
65
    using namespace llvm::MachO;
66
    if (sym.isWeakDef())
67
      flags |= EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION;
68
    if (sym.isTlv())
69
      flags |= EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL;
70
    // TODO: Add proper support for stub-and-resolver flags.
71

72
    if (auto *defined = dyn_cast<Defined>(&sym)) {
73
      if (defined->isAbsolute())
74
        flags |= EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE;
75
    } else if (auto *dysym = dyn_cast<DylibSymbol>(&sym)) {
76
      flags |= EXPORT_SYMBOL_FLAGS_REEXPORT;
77
      if (!dysym->isDynamicLookup())
78
        ordinal = dysym->getFile()->ordinal;
79
    }
80
  }
81
};
82

83
} // namespace
84

85
struct macho::TrieNode {
86
  std::vector<Edge> edges;
87
  std::optional<ExportInfo> info;
88
  // Estimated offset from the start of the serialized trie to the current node.
89
  // This will converge to the true offset when updateOffset() is run to a
90
  // fixpoint.
91
  size_t offset = 0;
92

93
  uint32_t getTerminalSize() const;
94
  // Returns whether the new estimated offset differs from the old one.
95
  bool updateOffset(size_t &nextOffset);
96
  void writeTo(uint8_t *buf) const;
97
};
98

99
// For regular symbols, the node layout (excluding the children) is
100
//
101
//   uleb128 terminalSize;
102
//   uleb128 flags;
103
//   uleb128 address;
104
//
105
// For re-exported symbols, the layout is
106
//
107
//   uleb128 terminalSize;
108
//   uleb128 flags;
109
//   uleb128 ordinal;
110
//   char[] originalName;
111
//
112
// If libfoo.dylib is linked against libbar.dylib, and libfoo exports an alias
113
// _foo to a symbol _bar in libbar, then originalName will be "_bar". If libfoo
114
// re-exports _bar directly (i.e. not via an alias), then originalName will be
115
// the empty string.
116
//
117
// TODO: Support aliased re-exports. (Since we don't yet support these,
118
// originalName will always be the empty string.)
119
//
120
// For stub-and-resolver nodes, the layout is
121
//
122
//   uleb128 terminalSize;
123
//   uleb128 flags;
124
//   uleb128 stubAddress;
125
//   uleb128 resolverAddress;
126
//
127
// TODO: Support stub-and-resolver nodes.
128
uint32_t TrieNode::getTerminalSize() const {
129
  uint32_t size = getULEB128Size(info->flags);
130
  if (info->flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT)
131
    size += getULEB128Size(info->ordinal) + 1; // + 1 for the null-terminator
132
  else
133
    size += getULEB128Size(info->address);
134
  return size;
135
}
136

137
bool TrieNode::updateOffset(size_t &nextOffset) {
138
  // Size of the whole node (including the terminalSize and the outgoing edges.)
139
  // In contrast, terminalSize only records the size of the other data in the
140
  // node.
141
  size_t nodeSize;
142
  if (info) {
143
    uint32_t terminalSize = getTerminalSize();
144
    // Overall node size so far is the uleb128 size of the length of the symbol
145
    // info + the symbol info itself.
146
    nodeSize = terminalSize + getULEB128Size(terminalSize);
147
  } else {
148
    nodeSize = 1; // Size of terminalSize (which has a value of 0)
149
  }
150
  // Compute size of all child edges.
151
  ++nodeSize; // Byte for number of children.
152
  for (const Edge &edge : edges) {
153
    nodeSize += edge.substring.size() + 1             // String length.
154
                + getULEB128Size(edge.child->offset); // Offset len.
155
  }
156
  // On input, 'nextOffset' is the new preferred location for this node.
157
  bool result = (offset != nextOffset);
158
  // Store new location in node object for use by parents.
159
  offset = nextOffset;
160
  nextOffset += nodeSize;
161
  return result;
162
}
163

164
void TrieNode::writeTo(uint8_t *buf) const {
165
  buf += offset;
166
  if (info) {
167
    uint32_t terminalSize = getTerminalSize();
168
    buf += encodeULEB128(terminalSize, buf);
169
    buf += encodeULEB128(info->flags, buf);
170
    if (info->flags & MachO::EXPORT_SYMBOL_FLAGS_REEXPORT) {
171
      buf += encodeULEB128(info->ordinal, buf);
172
      *buf++ = 0; // empty originalName string
173
    } else {
174
      buf += encodeULEB128(info->address, buf);
175
    }
176
  } else {
177
    // TrieNode with no Symbol info.
178
    *buf++ = 0; // terminalSize
179
  }
180
  // Add number of children. TODO: Handle case where we have more than 256.
181
  assert(edges.size() < 256);
182
  *buf++ = edges.size();
183
  // Append each child edge substring and node offset.
184
  for (const Edge &edge : edges) {
185
    memcpy(buf, edge.substring.data(), edge.substring.size());
186
    buf += edge.substring.size();
187
    *buf++ = '\0';
188
    buf += encodeULEB128(edge.child->offset, buf);
189
  }
190
}
191

192
TrieBuilder::~TrieBuilder() {
193
  for (TrieNode *node : nodes)
194
    delete node;
195
}
196

197
TrieNode *TrieBuilder::makeNode() {
198
  auto *node = new TrieNode();
199
  nodes.emplace_back(node);
200
  return node;
201
}
202

203
static int charAt(const Symbol *sym, size_t pos) {
204
  StringRef str = sym->getName();
205
  if (pos >= str.size())
206
    return -1;
207
  return str[pos];
208
}
209

210
// Build the trie by performing a three-way radix quicksort: We start by sorting
211
// the strings by their first characters, then sort the strings with the same
212
// first characters by their second characters, and so on recursively. Each
213
// time the prefixes diverge, we add a node to the trie.
214
//
215
// node:    The most recently created node along this path in the trie (i.e.
216
//          the furthest from the root.)
217
// lastPos: The prefix length of the most recently created node, i.e. the number
218
//          of characters along its path from the root.
219
// pos:     The string index we are currently sorting on. Note that each symbol
220
//          S contained in vec has the same prefix S[0...pos).
221
void TrieBuilder::sortAndBuild(MutableArrayRef<const Symbol *> vec,
222
                               TrieNode *node, size_t lastPos, size_t pos) {
223
tailcall:
224
  if (vec.empty())
225
    return;
226

227
  // Partition items so that items in [0, i) are less than the pivot,
228
  // [i, j) are the same as the pivot, and [j, vec.size()) are greater than
229
  // the pivot.
230
  const Symbol *pivotSymbol = vec[vec.size() / 2];
231
  int pivot = charAt(pivotSymbol, pos);
232
  size_t i = 0;
233
  size_t j = vec.size();
234
  for (size_t k = 0; k < j;) {
235
    int c = charAt(vec[k], pos);
236
    if (c < pivot)
237
      std::swap(vec[i++], vec[k++]);
238
    else if (c > pivot)
239
      std::swap(vec[--j], vec[k]);
240
    else
241
      k++;
242
  }
243

244
  bool isTerminal = pivot == -1;
245
  bool prefixesDiverge = i != 0 || j != vec.size();
246
  if (lastPos != pos && (isTerminal || prefixesDiverge)) {
247
    TrieNode *newNode = makeNode();
248
    node->edges.emplace_back(pivotSymbol->getName().slice(lastPos, pos),
249
                             newNode);
250
    node = newNode;
251
    lastPos = pos;
252
  }
253

254
  sortAndBuild(vec.slice(0, i), node, lastPos, pos);
255
  sortAndBuild(vec.slice(j), node, lastPos, pos);
256

257
  if (isTerminal) {
258
    assert(j - i == 1); // no duplicate symbols
259
    node->info = ExportInfo(*pivotSymbol, imageBase);
260
  } else {
261
    // This is the tail-call-optimized version of the following:
262
    // sortAndBuild(vec.slice(i, j - i), node, lastPos, pos + 1);
263
    vec = vec.slice(i, j - i);
264
    ++pos;
265
    goto tailcall;
266
  }
267
}
268

269
size_t TrieBuilder::build() {
270
  if (exported.empty())
271
    return 0;
272

273
  TrieNode *root = makeNode();
274
  sortAndBuild(exported, root, 0, 0);
275

276
  // Assign each node in the vector an offset in the trie stream, iterating
277
  // until all uleb128 sizes have stabilized.
278
  size_t offset;
279
  bool more;
280
  do {
281
    offset = 0;
282
    more = false;
283
    for (TrieNode *node : nodes)
284
      more |= node->updateOffset(offset);
285
  } while (more);
286

287
  return offset;
288
}
289

290
void TrieBuilder::writeTo(uint8_t *buf) const {
291
  for (TrieNode *node : nodes)
292
    node->writeTo(buf);
293
}
294

295
namespace {
296

297
// Parse a serialized trie and invoke a callback for each entry.
298
class TrieParser {
299
public:
300
  TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
301
      : start(buf), end(start + size), callback(callback) {}
302

303
  void parse(const uint8_t *buf, const Twine &cumulativeString);
304

305
  void parse() { parse(start, ""); }
306

307
  const uint8_t *start;
308
  const uint8_t *end;
309
  const TrieEntryCallback &callback;
310
};
311

312
} // namespace
313

314
void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
315
  if (buf >= end)
316
    fatal("Node offset points outside export section");
317

318
  unsigned ulebSize;
319
  uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
320
  buf += ulebSize;
321
  uint64_t flags = 0;
322
  size_t offset;
323
  if (terminalSize != 0) {
324
    flags = decodeULEB128(buf, &ulebSize);
325
    callback(cumulativeString, flags);
326
  }
327
  buf += terminalSize;
328
  uint8_t numEdges = *buf++;
329
  for (uint8_t i = 0; i < numEdges; ++i) {
330
    const char *cbuf = reinterpret_cast<const char *>(buf);
331
    StringRef substring = StringRef(cbuf, strnlen(cbuf, end - buf));
332
    buf += substring.size() + 1;
333
    offset = decodeULEB128(buf, &ulebSize);
334
    buf += ulebSize;
335
    parse(start + offset, cumulativeString + substring);
336
  }
337
}
338

339
void macho::parseTrie(const uint8_t *buf, size_t size,
340
                      const TrieEntryCallback &callback) {
341
  if (size == 0)
342
    return;
343

344
  TrieParser(buf, size, callback).parse();
345
}
346

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.