1 //===-- BitcodeReader.cpp - ClangDoc Bitcode Reader ------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "BitcodeReader.h"
11 #include "llvm/ADT/IndexedMap.h"
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/Support/raw_ostream.h"
14
15 namespace clang {
16 namespace doc {
17
18 using Record = llvm::SmallVector<uint64_t, 1024>;
19
decodeRecord(Record R,llvm::SmallVectorImpl<char> & Field,llvm::StringRef Blob)20 bool decodeRecord(Record R, llvm::SmallVectorImpl<char> &Field,
21 llvm::StringRef Blob) {
22 Field.assign(Blob.begin(), Blob.end());
23 return true;
24 }
25
decodeRecord(Record R,SymbolID & Field,llvm::StringRef Blob)26 bool decodeRecord(Record R, SymbolID &Field, llvm::StringRef Blob) {
27 if (R[0] != BitCodeConstants::USRHashSize)
28 return false;
29
30 // First position in the record is the length of the following array, so we
31 // copy the following elements to the field.
32 for (int I = 0, E = R[0]; I < E; ++I)
33 Field[I] = R[I + 1];
34 return true;
35 }
36
decodeRecord(Record R,bool & Field,llvm::StringRef Blob)37 bool decodeRecord(Record R, bool &Field, llvm::StringRef Blob) {
38 Field = R[0] != 0;
39 return true;
40 }
41
decodeRecord(Record R,int & Field,llvm::StringRef Blob)42 bool decodeRecord(Record R, int &Field, llvm::StringRef Blob) {
43 if (R[0] > INT_MAX)
44 return false;
45 Field = (int)R[0];
46 return true;
47 }
48
decodeRecord(Record R,AccessSpecifier & Field,llvm::StringRef Blob)49 bool decodeRecord(Record R, AccessSpecifier &Field, llvm::StringRef Blob) {
50 switch (R[0]) {
51 case AS_public:
52 case AS_private:
53 case AS_protected:
54 case AS_none:
55 Field = (AccessSpecifier)R[0];
56 return true;
57 default:
58 return false;
59 }
60 }
61
decodeRecord(Record R,TagTypeKind & Field,llvm::StringRef Blob)62 bool decodeRecord(Record R, TagTypeKind &Field, llvm::StringRef Blob) {
63 switch (R[0]) {
64 case TTK_Struct:
65 case TTK_Interface:
66 case TTK_Union:
67 case TTK_Class:
68 case TTK_Enum:
69 Field = (TagTypeKind)R[0];
70 return true;
71 default:
72 return false;
73 }
74 }
75
decodeRecord(Record R,llvm::Optional<Location> & Field,llvm::StringRef Blob)76 bool decodeRecord(Record R, llvm::Optional<Location> &Field,
77 llvm::StringRef Blob) {
78 if (R[0] > INT_MAX)
79 return false;
80 Field.emplace((int)R[0], Blob);
81 return true;
82 }
83
decodeRecord(Record R,InfoType & Field,llvm::StringRef Blob)84 bool decodeRecord(Record R, InfoType &Field, llvm::StringRef Blob) {
85 switch (auto IT = static_cast<InfoType>(R[0])) {
86 case InfoType::IT_namespace:
87 case InfoType::IT_record:
88 case InfoType::IT_function:
89 case InfoType::IT_default:
90 case InfoType::IT_enum:
91 Field = IT;
92 return true;
93 }
94 return false;
95 }
96
decodeRecord(Record R,FieldId & Field,llvm::StringRef Blob)97 bool decodeRecord(Record R, FieldId &Field, llvm::StringRef Blob) {
98 switch (auto F = static_cast<FieldId>(R[0])) {
99 case FieldId::F_namespace:
100 case FieldId::F_parent:
101 case FieldId::F_vparent:
102 case FieldId::F_type:
103 case FieldId::F_default:
104 Field = F;
105 return true;
106 }
107 return false;
108 }
109
decodeRecord(Record R,llvm::SmallVectorImpl<llvm::SmallString<16>> & Field,llvm::StringRef Blob)110 bool decodeRecord(Record R, llvm::SmallVectorImpl<llvm::SmallString<16>> &Field,
111 llvm::StringRef Blob) {
112 Field.push_back(Blob);
113 return true;
114 }
115
decodeRecord(Record R,llvm::SmallVectorImpl<Location> & Field,llvm::StringRef Blob)116 bool decodeRecord(Record R, llvm::SmallVectorImpl<Location> &Field,
117 llvm::StringRef Blob) {
118 if (R[0] > INT_MAX)
119 return false;
120 Field.emplace_back((int)R[0], Blob);
121 return true;
122 }
123
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,const unsigned VersionNo)124 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
125 const unsigned VersionNo) {
126 if (ID == VERSION && R[0] == VersionNo)
127 return true;
128 return false;
129 }
130
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,NamespaceInfo * I)131 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
132 NamespaceInfo *I) {
133 switch (ID) {
134 case NAMESPACE_USR:
135 return decodeRecord(R, I->USR, Blob);
136 case NAMESPACE_NAME:
137 return decodeRecord(R, I->Name, Blob);
138 default:
139 return false;
140 }
141 }
142
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,RecordInfo * I)143 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, RecordInfo *I) {
144 switch (ID) {
145 case RECORD_USR:
146 return decodeRecord(R, I->USR, Blob);
147 case RECORD_NAME:
148 return decodeRecord(R, I->Name, Blob);
149 case RECORD_DEFLOCATION:
150 return decodeRecord(R, I->DefLoc, Blob);
151 case RECORD_LOCATION:
152 return decodeRecord(R, I->Loc, Blob);
153 case RECORD_TAG_TYPE:
154 return decodeRecord(R, I->TagType, Blob);
155 default:
156 return false;
157 }
158 }
159
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,EnumInfo * I)160 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, EnumInfo *I) {
161 switch (ID) {
162 case ENUM_USR:
163 return decodeRecord(R, I->USR, Blob);
164 case ENUM_NAME:
165 return decodeRecord(R, I->Name, Blob);
166 case ENUM_DEFLOCATION:
167 return decodeRecord(R, I->DefLoc, Blob);
168 case ENUM_LOCATION:
169 return decodeRecord(R, I->Loc, Blob);
170 case ENUM_MEMBER:
171 return decodeRecord(R, I->Members, Blob);
172 case ENUM_SCOPED:
173 return decodeRecord(R, I->Scoped, Blob);
174 default:
175 return false;
176 }
177 }
178
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,FunctionInfo * I)179 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, FunctionInfo *I) {
180 switch (ID) {
181 case FUNCTION_USR:
182 return decodeRecord(R, I->USR, Blob);
183 case FUNCTION_NAME:
184 return decodeRecord(R, I->Name, Blob);
185 case FUNCTION_DEFLOCATION:
186 return decodeRecord(R, I->DefLoc, Blob);
187 case FUNCTION_LOCATION:
188 return decodeRecord(R, I->Loc, Blob);
189 case FUNCTION_ACCESS:
190 return decodeRecord(R, I->Access, Blob);
191 case FUNCTION_IS_METHOD:
192 return decodeRecord(R, I->IsMethod, Blob);
193 default:
194 return false;
195 }
196 }
197
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,TypeInfo * I)198 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, TypeInfo *I) {
199 return true;
200 }
201
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,FieldTypeInfo * I)202 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
203 FieldTypeInfo *I) {
204 switch (ID) {
205 case FIELD_TYPE_NAME:
206 return decodeRecord(R, I->Name, Blob);
207 default:
208 return false;
209 }
210 }
211
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,MemberTypeInfo * I)212 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
213 MemberTypeInfo *I) {
214 switch (ID) {
215 case MEMBER_TYPE_NAME:
216 return decodeRecord(R, I->Name, Blob);
217 case MEMBER_TYPE_ACCESS:
218 return decodeRecord(R, I->Access, Blob);
219 default:
220 return false;
221 }
222 }
223
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,CommentInfo * I)224 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, CommentInfo *I) {
225 switch (ID) {
226 case COMMENT_KIND:
227 return decodeRecord(R, I->Kind, Blob);
228 case COMMENT_TEXT:
229 return decodeRecord(R, I->Text, Blob);
230 case COMMENT_NAME:
231 return decodeRecord(R, I->Name, Blob);
232 case COMMENT_DIRECTION:
233 return decodeRecord(R, I->Direction, Blob);
234 case COMMENT_PARAMNAME:
235 return decodeRecord(R, I->ParamName, Blob);
236 case COMMENT_CLOSENAME:
237 return decodeRecord(R, I->CloseName, Blob);
238 case COMMENT_ATTRKEY:
239 return decodeRecord(R, I->AttrKeys, Blob);
240 case COMMENT_ATTRVAL:
241 return decodeRecord(R, I->AttrValues, Blob);
242 case COMMENT_ARG:
243 return decodeRecord(R, I->Args, Blob);
244 case COMMENT_SELFCLOSING:
245 return decodeRecord(R, I->SelfClosing, Blob);
246 case COMMENT_EXPLICIT:
247 return decodeRecord(R, I->Explicit, Blob);
248 default:
249 return false;
250 }
251 }
252
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,Reference * I,FieldId & F)253 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, Reference *I,
254 FieldId &F) {
255 switch (ID) {
256 case REFERENCE_USR:
257 return decodeRecord(R, I->USR, Blob);
258 case REFERENCE_NAME:
259 return decodeRecord(R, I->Name, Blob);
260 case REFERENCE_TYPE:
261 return decodeRecord(R, I->RefType, Blob);
262 case REFERENCE_FIELD:
263 return decodeRecord(R, F, Blob);
264 default:
265 return false;
266 }
267 }
268
getCommentInfo(T I)269 template <typename T> CommentInfo *getCommentInfo(T I) {
270 llvm::errs() << "Cannot have comment subblock.\n";
271 exit(1);
272 }
273
getCommentInfo(FunctionInfo * I)274 template <> CommentInfo *getCommentInfo(FunctionInfo *I) {
275 I->Description.emplace_back();
276 return &I->Description.back();
277 }
278
getCommentInfo(NamespaceInfo * I)279 template <> CommentInfo *getCommentInfo(NamespaceInfo *I) {
280 I->Description.emplace_back();
281 return &I->Description.back();
282 }
283
getCommentInfo(RecordInfo * I)284 template <> CommentInfo *getCommentInfo(RecordInfo *I) {
285 I->Description.emplace_back();
286 return &I->Description.back();
287 }
288
getCommentInfo(EnumInfo * I)289 template <> CommentInfo *getCommentInfo(EnumInfo *I) {
290 I->Description.emplace_back();
291 return &I->Description.back();
292 }
293
getCommentInfo(CommentInfo * I)294 template <> CommentInfo *getCommentInfo(CommentInfo *I) {
295 I->Children.emplace_back(llvm::make_unique<CommentInfo>());
296 return I->Children.back().get();
297 }
298
getCommentInfo(std::unique_ptr<CommentInfo> & I)299 template <> CommentInfo *getCommentInfo(std::unique_ptr<CommentInfo> &I) {
300 return getCommentInfo(I.get());
301 }
302
303 template <typename T, typename TTypeInfo>
addTypeInfo(T I,TTypeInfo && TI)304 void addTypeInfo(T I, TTypeInfo &&TI) {
305 llvm::errs() << "Invalid type for info.\n";
306 exit(1);
307 }
308
addTypeInfo(RecordInfo * I,MemberTypeInfo && T)309 template <> void addTypeInfo(RecordInfo *I, MemberTypeInfo &&T) {
310 I->Members.emplace_back(std::move(T));
311 }
312
addTypeInfo(FunctionInfo * I,TypeInfo && T)313 template <> void addTypeInfo(FunctionInfo *I, TypeInfo &&T) {
314 I->ReturnType = std::move(T);
315 }
316
addTypeInfo(FunctionInfo * I,FieldTypeInfo && T)317 template <> void addTypeInfo(FunctionInfo *I, FieldTypeInfo &&T) {
318 I->Params.emplace_back(std::move(T));
319 }
320
addReference(T I,Reference && R,FieldId F)321 template <typename T> void addReference(T I, Reference &&R, FieldId F) {
322 llvm::errs() << "Invalid field type for info.\n";
323 exit(1);
324 }
325
addReference(TypeInfo * I,Reference && R,FieldId F)326 template <> void addReference(TypeInfo *I, Reference &&R, FieldId F) {
327 switch (F) {
328 case FieldId::F_type:
329 I->Type = std::move(R);
330 break;
331 default:
332 llvm::errs() << "Invalid field type for info.\n";
333 exit(1);
334 }
335 }
336
addReference(FieldTypeInfo * I,Reference && R,FieldId F)337 template <> void addReference(FieldTypeInfo *I, Reference &&R, FieldId F) {
338 switch (F) {
339 case FieldId::F_type:
340 I->Type = std::move(R);
341 break;
342 default:
343 llvm::errs() << "Invalid field type for info.\n";
344 exit(1);
345 }
346 }
347
addReference(MemberTypeInfo * I,Reference && R,FieldId F)348 template <> void addReference(MemberTypeInfo *I, Reference &&R, FieldId F) {
349 switch (F) {
350 case FieldId::F_type:
351 I->Type = std::move(R);
352 break;
353 default:
354 llvm::errs() << "Invalid field type for info.\n";
355 exit(1);
356 }
357 }
358
addReference(EnumInfo * I,Reference && R,FieldId F)359 template <> void addReference(EnumInfo *I, Reference &&R, FieldId F) {
360 switch (F) {
361 case FieldId::F_namespace:
362 I->Namespace.emplace_back(std::move(R));
363 break;
364 default:
365 llvm::errs() << "Invalid field type for info.\n";
366 exit(1);
367 }
368 }
369
addReference(NamespaceInfo * I,Reference && R,FieldId F)370 template <> void addReference(NamespaceInfo *I, Reference &&R, FieldId F) {
371 switch (F) {
372 case FieldId::F_namespace:
373 I->Namespace.emplace_back(std::move(R));
374 break;
375 default:
376 llvm::errs() << "Invalid field type for info.\n";
377 exit(1);
378 }
379 }
380
addReference(FunctionInfo * I,Reference && R,FieldId F)381 template <> void addReference(FunctionInfo *I, Reference &&R, FieldId F) {
382 switch (F) {
383 case FieldId::F_namespace:
384 I->Namespace.emplace_back(std::move(R));
385 break;
386 case FieldId::F_parent:
387 I->Parent = std::move(R);
388 break;
389 default:
390 llvm::errs() << "Invalid field type for info.\n";
391 exit(1);
392 }
393 }
394
addReference(RecordInfo * I,Reference && R,FieldId F)395 template <> void addReference(RecordInfo *I, Reference &&R, FieldId F) {
396 switch (F) {
397 case FieldId::F_namespace:
398 I->Namespace.emplace_back(std::move(R));
399 break;
400 case FieldId::F_parent:
401 I->Parents.emplace_back(std::move(R));
402 break;
403 case FieldId::F_vparent:
404 I->VirtualParents.emplace_back(std::move(R));
405 break;
406 default:
407 llvm::errs() << "Invalid field type for info.\n";
408 exit(1);
409 }
410 }
411
412 // Read records from bitcode into a given info.
readRecord(unsigned ID,T I)413 template <typename T> bool ClangDocBitcodeReader::readRecord(unsigned ID, T I) {
414 Record R;
415 llvm::StringRef Blob;
416 unsigned RecID = Stream.readRecord(ID, R, &Blob);
417 return parseRecord(R, RecID, Blob, I);
418 }
419
readRecord(unsigned ID,Reference * I)420 template <> bool ClangDocBitcodeReader::readRecord(unsigned ID, Reference *I) {
421 Record R;
422 llvm::StringRef Blob;
423 unsigned RecID = Stream.readRecord(ID, R, &Blob);
424 return parseRecord(R, RecID, Blob, I, CurrentReferenceField);
425 }
426
427 // Read a block of records into a single info.
readBlock(unsigned ID,T I)428 template <typename T> bool ClangDocBitcodeReader::readBlock(unsigned ID, T I) {
429 if (Stream.EnterSubBlock(ID))
430 return false;
431
432 while (true) {
433 unsigned BlockOrCode = 0;
434 Cursor Res = skipUntilRecordOrBlock(BlockOrCode);
435
436 switch (Res) {
437 case Cursor::BadBlock:
438 return false;
439 case Cursor::BlockEnd:
440 return true;
441 case Cursor::BlockBegin:
442 if (readSubBlock(BlockOrCode, I))
443 continue;
444 if (!Stream.SkipBlock())
445 return false;
446 continue;
447 case Cursor::Record:
448 break;
449 }
450 if (!readRecord(BlockOrCode, I))
451 return false;
452 }
453 }
454
455 template <typename T>
readSubBlock(unsigned ID,T I)456 bool ClangDocBitcodeReader::readSubBlock(unsigned ID, T I) {
457 switch (ID) {
458 // Blocks can only have Comment, Reference, or TypeInfo subblocks
459 case BI_COMMENT_BLOCK_ID:
460 if (readBlock(ID, getCommentInfo(I)))
461 return true;
462 return false;
463 case BI_TYPE_BLOCK_ID: {
464 TypeInfo TI;
465 if (readBlock(ID, &TI)) {
466 addTypeInfo(I, std::move(TI));
467 return true;
468 }
469 return false;
470 }
471 case BI_FIELD_TYPE_BLOCK_ID: {
472 FieldTypeInfo TI;
473 if (readBlock(ID, &TI)) {
474 addTypeInfo(I, std::move(TI));
475 return true;
476 }
477 return false;
478 }
479 case BI_MEMBER_TYPE_BLOCK_ID: {
480 MemberTypeInfo TI;
481 if (readBlock(ID, &TI)) {
482 addTypeInfo(I, std::move(TI));
483 return true;
484 }
485 return false;
486 }
487 case BI_REFERENCE_BLOCK_ID: {
488 Reference R;
489 if (readBlock(ID, &R)) {
490 addReference(I, std::move(R), CurrentReferenceField);
491 return true;
492 }
493 return false;
494 }
495 default:
496 llvm::errs() << "Invalid subblock type.\n";
497 return false;
498 }
499 }
500
501 ClangDocBitcodeReader::Cursor
skipUntilRecordOrBlock(unsigned & BlockOrRecordID)502 ClangDocBitcodeReader::skipUntilRecordOrBlock(unsigned &BlockOrRecordID) {
503 BlockOrRecordID = 0;
504
505 while (!Stream.AtEndOfStream()) {
506 unsigned Code = Stream.ReadCode();
507
508 switch ((llvm::bitc::FixedAbbrevIDs)Code) {
509 case llvm::bitc::ENTER_SUBBLOCK:
510 BlockOrRecordID = Stream.ReadSubBlockID();
511 return Cursor::BlockBegin;
512 case llvm::bitc::END_BLOCK:
513 if (Stream.ReadBlockEnd())
514 return Cursor::BadBlock;
515 return Cursor::BlockEnd;
516 case llvm::bitc::DEFINE_ABBREV:
517 Stream.ReadAbbrevRecord();
518 continue;
519 case llvm::bitc::UNABBREV_RECORD:
520 return Cursor::BadBlock;
521 default:
522 BlockOrRecordID = Code;
523 return Cursor::Record;
524 }
525 }
526 llvm_unreachable("Premature stream end.");
527 }
528
validateStream()529 bool ClangDocBitcodeReader::validateStream() {
530 if (Stream.AtEndOfStream())
531 return false;
532
533 // Sniff for the signature.
534 if (Stream.Read(8) != BitCodeConstants::Signature[0] ||
535 Stream.Read(8) != BitCodeConstants::Signature[1] ||
536 Stream.Read(8) != BitCodeConstants::Signature[2] ||
537 Stream.Read(8) != BitCodeConstants::Signature[3])
538 return false;
539 return true;
540 }
541
readBlockInfoBlock()542 bool ClangDocBitcodeReader::readBlockInfoBlock() {
543 BlockInfo = Stream.ReadBlockInfoBlock();
544 if (!BlockInfo)
545 return false;
546 Stream.setBlockInfo(&*BlockInfo);
547 return true;
548 }
549
550 template <typename T>
createInfo(unsigned ID)551 std::unique_ptr<Info> ClangDocBitcodeReader::createInfo(unsigned ID) {
552 std::unique_ptr<Info> I = llvm::make_unique<T>();
553 if (readBlock(ID, static_cast<T *>(I.get())))
554 return I;
555 llvm::errs() << "Error reading from block.\n";
556 return nullptr;
557 }
558
readBlockToInfo(unsigned ID)559 std::unique_ptr<Info> ClangDocBitcodeReader::readBlockToInfo(unsigned ID) {
560 switch (ID) {
561 case BI_NAMESPACE_BLOCK_ID:
562 return createInfo<NamespaceInfo>(ID);
563 case BI_RECORD_BLOCK_ID:
564 return createInfo<RecordInfo>(ID);
565 case BI_ENUM_BLOCK_ID:
566 return createInfo<EnumInfo>(ID);
567 case BI_FUNCTION_BLOCK_ID:
568 return createInfo<FunctionInfo>(ID);
569 default:
570 llvm::errs() << "Error reading from block.\n";
571 return nullptr;
572 }
573 }
574
575 // Entry point
readBitcode()576 std::vector<std::unique_ptr<Info>> ClangDocBitcodeReader::readBitcode() {
577 std::vector<std::unique_ptr<Info>> Infos;
578 if (!validateStream())
579 return Infos;
580
581 // Read the top level blocks.
582 while (!Stream.AtEndOfStream()) {
583 unsigned Code = Stream.ReadCode();
584 if (Code != llvm::bitc::ENTER_SUBBLOCK)
585 return Infos;
586
587 unsigned ID = Stream.ReadSubBlockID();
588 switch (ID) {
589 // NamedType and Comment blocks should not appear at the top level
590 case BI_TYPE_BLOCK_ID:
591 case BI_FIELD_TYPE_BLOCK_ID:
592 case BI_MEMBER_TYPE_BLOCK_ID:
593 case BI_COMMENT_BLOCK_ID:
594 case BI_REFERENCE_BLOCK_ID:
595 llvm::errs() << "Invalid top level block.\n";
596 return Infos;
597 case BI_NAMESPACE_BLOCK_ID:
598 case BI_RECORD_BLOCK_ID:
599 case BI_ENUM_BLOCK_ID:
600 case BI_FUNCTION_BLOCK_ID:
601 if (std::unique_ptr<Info> I = readBlockToInfo(ID)) {
602 Infos.emplace_back(std::move(I));
603 }
604 return Infos;
605 case BI_VERSION_BLOCK_ID:
606 if (readBlock(ID, VersionNumber))
607 continue;
608 return Infos;
609 case llvm::bitc::BLOCKINFO_BLOCK_ID:
610 if (readBlockInfoBlock())
611 continue;
612 return Infos;
613 default:
614 if (!Stream.SkipBlock())
615 continue;
616 }
617 }
618 return Infos;
619 }
620
621 } // namespace doc
622 } // namespace clang
623