1 //===--  BitcodeReader.cpp - ClangDoc Bitcode Reader ------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "BitcodeReader.h"
11 #include "llvm/ADT/IndexedMap.h"
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/Support/raw_ostream.h"
14 
15 namespace clang {
16 namespace doc {
17 
18 using Record = llvm::SmallVector<uint64_t, 1024>;
19 
decodeRecord(Record R,llvm::SmallVectorImpl<char> & Field,llvm::StringRef Blob)20 bool decodeRecord(Record R, llvm::SmallVectorImpl<char> &Field,
21                   llvm::StringRef Blob) {
22   Field.assign(Blob.begin(), Blob.end());
23   return true;
24 }
25 
decodeRecord(Record R,SymbolID & Field,llvm::StringRef Blob)26 bool decodeRecord(Record R, SymbolID &Field, llvm::StringRef Blob) {
27   if (R[0] != BitCodeConstants::USRHashSize)
28     return false;
29 
30   // First position in the record is the length of the following array, so we
31   // copy the following elements to the field.
32   for (int I = 0, E = R[0]; I < E; ++I)
33     Field[I] = R[I + 1];
34   return true;
35 }
36 
decodeRecord(Record R,bool & Field,llvm::StringRef Blob)37 bool decodeRecord(Record R, bool &Field, llvm::StringRef Blob) {
38   Field = R[0] != 0;
39   return true;
40 }
41 
decodeRecord(Record R,int & Field,llvm::StringRef Blob)42 bool decodeRecord(Record R, int &Field, llvm::StringRef Blob) {
43   if (R[0] > INT_MAX)
44     return false;
45   Field = (int)R[0];
46   return true;
47 }
48 
decodeRecord(Record R,AccessSpecifier & Field,llvm::StringRef Blob)49 bool decodeRecord(Record R, AccessSpecifier &Field, llvm::StringRef Blob) {
50   switch (R[0]) {
51   case AS_public:
52   case AS_private:
53   case AS_protected:
54   case AS_none:
55     Field = (AccessSpecifier)R[0];
56     return true;
57   default:
58     return false;
59   }
60 }
61 
decodeRecord(Record R,TagTypeKind & Field,llvm::StringRef Blob)62 bool decodeRecord(Record R, TagTypeKind &Field, llvm::StringRef Blob) {
63   switch (R[0]) {
64   case TTK_Struct:
65   case TTK_Interface:
66   case TTK_Union:
67   case TTK_Class:
68   case TTK_Enum:
69     Field = (TagTypeKind)R[0];
70     return true;
71   default:
72     return false;
73   }
74 }
75 
decodeRecord(Record R,llvm::Optional<Location> & Field,llvm::StringRef Blob)76 bool decodeRecord(Record R, llvm::Optional<Location> &Field,
77                   llvm::StringRef Blob) {
78   if (R[0] > INT_MAX)
79     return false;
80   Field.emplace((int)R[0], Blob);
81   return true;
82 }
83 
decodeRecord(Record R,InfoType & Field,llvm::StringRef Blob)84 bool decodeRecord(Record R, InfoType &Field, llvm::StringRef Blob) {
85   switch (auto IT = static_cast<InfoType>(R[0])) {
86   case InfoType::IT_namespace:
87   case InfoType::IT_record:
88   case InfoType::IT_function:
89   case InfoType::IT_default:
90   case InfoType::IT_enum:
91     Field = IT;
92     return true;
93   }
94   return false;
95 }
96 
decodeRecord(Record R,FieldId & Field,llvm::StringRef Blob)97 bool decodeRecord(Record R, FieldId &Field, llvm::StringRef Blob) {
98   switch (auto F = static_cast<FieldId>(R[0])) {
99   case FieldId::F_namespace:
100   case FieldId::F_parent:
101   case FieldId::F_vparent:
102   case FieldId::F_type:
103   case FieldId::F_default:
104     Field = F;
105     return true;
106   }
107   return false;
108 }
109 
decodeRecord(Record R,llvm::SmallVectorImpl<llvm::SmallString<16>> & Field,llvm::StringRef Blob)110 bool decodeRecord(Record R, llvm::SmallVectorImpl<llvm::SmallString<16>> &Field,
111                   llvm::StringRef Blob) {
112   Field.push_back(Blob);
113   return true;
114 }
115 
decodeRecord(Record R,llvm::SmallVectorImpl<Location> & Field,llvm::StringRef Blob)116 bool decodeRecord(Record R, llvm::SmallVectorImpl<Location> &Field,
117                   llvm::StringRef Blob) {
118   if (R[0] > INT_MAX)
119     return false;
120   Field.emplace_back((int)R[0], Blob);
121   return true;
122 }
123 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,const unsigned VersionNo)124 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
125                  const unsigned VersionNo) {
126   if (ID == VERSION && R[0] == VersionNo)
127     return true;
128   return false;
129 }
130 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,NamespaceInfo * I)131 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
132                  NamespaceInfo *I) {
133   switch (ID) {
134   case NAMESPACE_USR:
135     return decodeRecord(R, I->USR, Blob);
136   case NAMESPACE_NAME:
137     return decodeRecord(R, I->Name, Blob);
138   default:
139     return false;
140   }
141 }
142 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,RecordInfo * I)143 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, RecordInfo *I) {
144   switch (ID) {
145   case RECORD_USR:
146     return decodeRecord(R, I->USR, Blob);
147   case RECORD_NAME:
148     return decodeRecord(R, I->Name, Blob);
149   case RECORD_DEFLOCATION:
150     return decodeRecord(R, I->DefLoc, Blob);
151   case RECORD_LOCATION:
152     return decodeRecord(R, I->Loc, Blob);
153   case RECORD_TAG_TYPE:
154     return decodeRecord(R, I->TagType, Blob);
155   default:
156     return false;
157   }
158 }
159 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,EnumInfo * I)160 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, EnumInfo *I) {
161   switch (ID) {
162   case ENUM_USR:
163     return decodeRecord(R, I->USR, Blob);
164   case ENUM_NAME:
165     return decodeRecord(R, I->Name, Blob);
166   case ENUM_DEFLOCATION:
167     return decodeRecord(R, I->DefLoc, Blob);
168   case ENUM_LOCATION:
169     return decodeRecord(R, I->Loc, Blob);
170   case ENUM_MEMBER:
171     return decodeRecord(R, I->Members, Blob);
172   case ENUM_SCOPED:
173     return decodeRecord(R, I->Scoped, Blob);
174   default:
175     return false;
176   }
177 }
178 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,FunctionInfo * I)179 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, FunctionInfo *I) {
180   switch (ID) {
181   case FUNCTION_USR:
182     return decodeRecord(R, I->USR, Blob);
183   case FUNCTION_NAME:
184     return decodeRecord(R, I->Name, Blob);
185   case FUNCTION_DEFLOCATION:
186     return decodeRecord(R, I->DefLoc, Blob);
187   case FUNCTION_LOCATION:
188     return decodeRecord(R, I->Loc, Blob);
189   case FUNCTION_ACCESS:
190     return decodeRecord(R, I->Access, Blob);
191   case FUNCTION_IS_METHOD:
192     return decodeRecord(R, I->IsMethod, Blob);
193   default:
194     return false;
195   }
196 }
197 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,TypeInfo * I)198 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, TypeInfo *I) {
199   return true;
200 }
201 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,FieldTypeInfo * I)202 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
203                  FieldTypeInfo *I) {
204   switch (ID) {
205   case FIELD_TYPE_NAME:
206     return decodeRecord(R, I->Name, Blob);
207   default:
208     return false;
209   }
210 }
211 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,MemberTypeInfo * I)212 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob,
213                  MemberTypeInfo *I) {
214   switch (ID) {
215   case MEMBER_TYPE_NAME:
216     return decodeRecord(R, I->Name, Blob);
217   case MEMBER_TYPE_ACCESS:
218     return decodeRecord(R, I->Access, Blob);
219   default:
220     return false;
221   }
222 }
223 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,CommentInfo * I)224 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, CommentInfo *I) {
225   switch (ID) {
226   case COMMENT_KIND:
227     return decodeRecord(R, I->Kind, Blob);
228   case COMMENT_TEXT:
229     return decodeRecord(R, I->Text, Blob);
230   case COMMENT_NAME:
231     return decodeRecord(R, I->Name, Blob);
232   case COMMENT_DIRECTION:
233     return decodeRecord(R, I->Direction, Blob);
234   case COMMENT_PARAMNAME:
235     return decodeRecord(R, I->ParamName, Blob);
236   case COMMENT_CLOSENAME:
237     return decodeRecord(R, I->CloseName, Blob);
238   case COMMENT_ATTRKEY:
239     return decodeRecord(R, I->AttrKeys, Blob);
240   case COMMENT_ATTRVAL:
241     return decodeRecord(R, I->AttrValues, Blob);
242   case COMMENT_ARG:
243     return decodeRecord(R, I->Args, Blob);
244   case COMMENT_SELFCLOSING:
245     return decodeRecord(R, I->SelfClosing, Blob);
246   case COMMENT_EXPLICIT:
247     return decodeRecord(R, I->Explicit, Blob);
248   default:
249     return false;
250   }
251 }
252 
parseRecord(Record R,unsigned ID,llvm::StringRef Blob,Reference * I,FieldId & F)253 bool parseRecord(Record R, unsigned ID, llvm::StringRef Blob, Reference *I,
254                  FieldId &F) {
255   switch (ID) {
256   case REFERENCE_USR:
257     return decodeRecord(R, I->USR, Blob);
258   case REFERENCE_NAME:
259     return decodeRecord(R, I->Name, Blob);
260   case REFERENCE_TYPE:
261     return decodeRecord(R, I->RefType, Blob);
262   case REFERENCE_FIELD:
263     return decodeRecord(R, F, Blob);
264   default:
265     return false;
266   }
267 }
268 
getCommentInfo(T I)269 template <typename T> CommentInfo *getCommentInfo(T I) {
270   llvm::errs() << "Cannot have comment subblock.\n";
271   exit(1);
272 }
273 
getCommentInfo(FunctionInfo * I)274 template <> CommentInfo *getCommentInfo(FunctionInfo *I) {
275   I->Description.emplace_back();
276   return &I->Description.back();
277 }
278 
getCommentInfo(NamespaceInfo * I)279 template <> CommentInfo *getCommentInfo(NamespaceInfo *I) {
280   I->Description.emplace_back();
281   return &I->Description.back();
282 }
283 
getCommentInfo(RecordInfo * I)284 template <> CommentInfo *getCommentInfo(RecordInfo *I) {
285   I->Description.emplace_back();
286   return &I->Description.back();
287 }
288 
getCommentInfo(EnumInfo * I)289 template <> CommentInfo *getCommentInfo(EnumInfo *I) {
290   I->Description.emplace_back();
291   return &I->Description.back();
292 }
293 
getCommentInfo(CommentInfo * I)294 template <> CommentInfo *getCommentInfo(CommentInfo *I) {
295   I->Children.emplace_back(llvm::make_unique<CommentInfo>());
296   return I->Children.back().get();
297 }
298 
getCommentInfo(std::unique_ptr<CommentInfo> & I)299 template <> CommentInfo *getCommentInfo(std::unique_ptr<CommentInfo> &I) {
300   return getCommentInfo(I.get());
301 }
302 
303 template <typename T, typename TTypeInfo>
addTypeInfo(T I,TTypeInfo && TI)304 void addTypeInfo(T I, TTypeInfo &&TI) {
305   llvm::errs() << "Invalid type for info.\n";
306   exit(1);
307 }
308 
addTypeInfo(RecordInfo * I,MemberTypeInfo && T)309 template <> void addTypeInfo(RecordInfo *I, MemberTypeInfo &&T) {
310   I->Members.emplace_back(std::move(T));
311 }
312 
addTypeInfo(FunctionInfo * I,TypeInfo && T)313 template <> void addTypeInfo(FunctionInfo *I, TypeInfo &&T) {
314   I->ReturnType = std::move(T);
315 }
316 
addTypeInfo(FunctionInfo * I,FieldTypeInfo && T)317 template <> void addTypeInfo(FunctionInfo *I, FieldTypeInfo &&T) {
318   I->Params.emplace_back(std::move(T));
319 }
320 
addReference(T I,Reference && R,FieldId F)321 template <typename T> void addReference(T I, Reference &&R, FieldId F) {
322   llvm::errs() << "Invalid field type for info.\n";
323   exit(1);
324 }
325 
addReference(TypeInfo * I,Reference && R,FieldId F)326 template <> void addReference(TypeInfo *I, Reference &&R, FieldId F) {
327   switch (F) {
328   case FieldId::F_type:
329     I->Type = std::move(R);
330     break;
331   default:
332     llvm::errs() << "Invalid field type for info.\n";
333     exit(1);
334   }
335 }
336 
addReference(FieldTypeInfo * I,Reference && R,FieldId F)337 template <> void addReference(FieldTypeInfo *I, Reference &&R, FieldId F) {
338   switch (F) {
339   case FieldId::F_type:
340     I->Type = std::move(R);
341     break;
342   default:
343     llvm::errs() << "Invalid field type for info.\n";
344     exit(1);
345   }
346 }
347 
addReference(MemberTypeInfo * I,Reference && R,FieldId F)348 template <> void addReference(MemberTypeInfo *I, Reference &&R, FieldId F) {
349   switch (F) {
350   case FieldId::F_type:
351     I->Type = std::move(R);
352     break;
353   default:
354     llvm::errs() << "Invalid field type for info.\n";
355     exit(1);
356   }
357 }
358 
addReference(EnumInfo * I,Reference && R,FieldId F)359 template <> void addReference(EnumInfo *I, Reference &&R, FieldId F) {
360   switch (F) {
361   case FieldId::F_namespace:
362     I->Namespace.emplace_back(std::move(R));
363     break;
364   default:
365     llvm::errs() << "Invalid field type for info.\n";
366     exit(1);
367   }
368 }
369 
addReference(NamespaceInfo * I,Reference && R,FieldId F)370 template <> void addReference(NamespaceInfo *I, Reference &&R, FieldId F) {
371   switch (F) {
372   case FieldId::F_namespace:
373     I->Namespace.emplace_back(std::move(R));
374     break;
375   default:
376     llvm::errs() << "Invalid field type for info.\n";
377     exit(1);
378   }
379 }
380 
addReference(FunctionInfo * I,Reference && R,FieldId F)381 template <> void addReference(FunctionInfo *I, Reference &&R, FieldId F) {
382   switch (F) {
383   case FieldId::F_namespace:
384     I->Namespace.emplace_back(std::move(R));
385     break;
386   case FieldId::F_parent:
387     I->Parent = std::move(R);
388     break;
389   default:
390     llvm::errs() << "Invalid field type for info.\n";
391     exit(1);
392   }
393 }
394 
addReference(RecordInfo * I,Reference && R,FieldId F)395 template <> void addReference(RecordInfo *I, Reference &&R, FieldId F) {
396   switch (F) {
397   case FieldId::F_namespace:
398     I->Namespace.emplace_back(std::move(R));
399     break;
400   case FieldId::F_parent:
401     I->Parents.emplace_back(std::move(R));
402     break;
403   case FieldId::F_vparent:
404     I->VirtualParents.emplace_back(std::move(R));
405     break;
406   default:
407     llvm::errs() << "Invalid field type for info.\n";
408     exit(1);
409   }
410 }
411 
412 // Read records from bitcode into a given info.
readRecord(unsigned ID,T I)413 template <typename T> bool ClangDocBitcodeReader::readRecord(unsigned ID, T I) {
414   Record R;
415   llvm::StringRef Blob;
416   unsigned RecID = Stream.readRecord(ID, R, &Blob);
417   return parseRecord(R, RecID, Blob, I);
418 }
419 
readRecord(unsigned ID,Reference * I)420 template <> bool ClangDocBitcodeReader::readRecord(unsigned ID, Reference *I) {
421   Record R;
422   llvm::StringRef Blob;
423   unsigned RecID = Stream.readRecord(ID, R, &Blob);
424   return parseRecord(R, RecID, Blob, I, CurrentReferenceField);
425 }
426 
427 // Read a block of records into a single info.
readBlock(unsigned ID,T I)428 template <typename T> bool ClangDocBitcodeReader::readBlock(unsigned ID, T I) {
429   if (Stream.EnterSubBlock(ID))
430     return false;
431 
432   while (true) {
433     unsigned BlockOrCode = 0;
434     Cursor Res = skipUntilRecordOrBlock(BlockOrCode);
435 
436     switch (Res) {
437     case Cursor::BadBlock:
438       return false;
439     case Cursor::BlockEnd:
440       return true;
441     case Cursor::BlockBegin:
442       if (readSubBlock(BlockOrCode, I))
443         continue;
444       if (!Stream.SkipBlock())
445         return false;
446       continue;
447     case Cursor::Record:
448       break;
449     }
450     if (!readRecord(BlockOrCode, I))
451       return false;
452   }
453 }
454 
455 template <typename T>
readSubBlock(unsigned ID,T I)456 bool ClangDocBitcodeReader::readSubBlock(unsigned ID, T I) {
457   switch (ID) {
458   // Blocks can only have Comment, Reference, or TypeInfo subblocks
459   case BI_COMMENT_BLOCK_ID:
460     if (readBlock(ID, getCommentInfo(I)))
461       return true;
462     return false;
463   case BI_TYPE_BLOCK_ID: {
464     TypeInfo TI;
465     if (readBlock(ID, &TI)) {
466       addTypeInfo(I, std::move(TI));
467       return true;
468     }
469     return false;
470   }
471   case BI_FIELD_TYPE_BLOCK_ID: {
472     FieldTypeInfo TI;
473     if (readBlock(ID, &TI)) {
474       addTypeInfo(I, std::move(TI));
475       return true;
476     }
477     return false;
478   }
479   case BI_MEMBER_TYPE_BLOCK_ID: {
480     MemberTypeInfo TI;
481     if (readBlock(ID, &TI)) {
482       addTypeInfo(I, std::move(TI));
483       return true;
484     }
485     return false;
486   }
487   case BI_REFERENCE_BLOCK_ID: {
488     Reference R;
489     if (readBlock(ID, &R)) {
490       addReference(I, std::move(R), CurrentReferenceField);
491       return true;
492     }
493     return false;
494   }
495   default:
496     llvm::errs() << "Invalid subblock type.\n";
497     return false;
498   }
499 }
500 
501 ClangDocBitcodeReader::Cursor
skipUntilRecordOrBlock(unsigned & BlockOrRecordID)502 ClangDocBitcodeReader::skipUntilRecordOrBlock(unsigned &BlockOrRecordID) {
503   BlockOrRecordID = 0;
504 
505   while (!Stream.AtEndOfStream()) {
506     unsigned Code = Stream.ReadCode();
507 
508     switch ((llvm::bitc::FixedAbbrevIDs)Code) {
509     case llvm::bitc::ENTER_SUBBLOCK:
510       BlockOrRecordID = Stream.ReadSubBlockID();
511       return Cursor::BlockBegin;
512     case llvm::bitc::END_BLOCK:
513       if (Stream.ReadBlockEnd())
514         return Cursor::BadBlock;
515       return Cursor::BlockEnd;
516     case llvm::bitc::DEFINE_ABBREV:
517       Stream.ReadAbbrevRecord();
518       continue;
519     case llvm::bitc::UNABBREV_RECORD:
520       return Cursor::BadBlock;
521     default:
522       BlockOrRecordID = Code;
523       return Cursor::Record;
524     }
525   }
526   llvm_unreachable("Premature stream end.");
527 }
528 
validateStream()529 bool ClangDocBitcodeReader::validateStream() {
530   if (Stream.AtEndOfStream())
531     return false;
532 
533   // Sniff for the signature.
534   if (Stream.Read(8) != BitCodeConstants::Signature[0] ||
535       Stream.Read(8) != BitCodeConstants::Signature[1] ||
536       Stream.Read(8) != BitCodeConstants::Signature[2] ||
537       Stream.Read(8) != BitCodeConstants::Signature[3])
538     return false;
539   return true;
540 }
541 
readBlockInfoBlock()542 bool ClangDocBitcodeReader::readBlockInfoBlock() {
543   BlockInfo = Stream.ReadBlockInfoBlock();
544   if (!BlockInfo)
545     return false;
546   Stream.setBlockInfo(&*BlockInfo);
547   return true;
548 }
549 
550 template <typename T>
createInfo(unsigned ID)551 std::unique_ptr<Info> ClangDocBitcodeReader::createInfo(unsigned ID) {
552   std::unique_ptr<Info> I = llvm::make_unique<T>();
553   if (readBlock(ID, static_cast<T *>(I.get())))
554     return I;
555   llvm::errs() << "Error reading from block.\n";
556   return nullptr;
557 }
558 
readBlockToInfo(unsigned ID)559 std::unique_ptr<Info> ClangDocBitcodeReader::readBlockToInfo(unsigned ID) {
560   switch (ID) {
561   case BI_NAMESPACE_BLOCK_ID:
562     return createInfo<NamespaceInfo>(ID);
563   case BI_RECORD_BLOCK_ID:
564     return createInfo<RecordInfo>(ID);
565   case BI_ENUM_BLOCK_ID:
566     return createInfo<EnumInfo>(ID);
567   case BI_FUNCTION_BLOCK_ID:
568     return createInfo<FunctionInfo>(ID);
569   default:
570     llvm::errs() << "Error reading from block.\n";
571     return nullptr;
572   }
573 }
574 
575 // Entry point
readBitcode()576 std::vector<std::unique_ptr<Info>> ClangDocBitcodeReader::readBitcode() {
577   std::vector<std::unique_ptr<Info>> Infos;
578   if (!validateStream())
579     return Infos;
580 
581   // Read the top level blocks.
582   while (!Stream.AtEndOfStream()) {
583     unsigned Code = Stream.ReadCode();
584     if (Code != llvm::bitc::ENTER_SUBBLOCK)
585       return Infos;
586 
587     unsigned ID = Stream.ReadSubBlockID();
588     switch (ID) {
589     // NamedType and Comment blocks should not appear at the top level
590     case BI_TYPE_BLOCK_ID:
591     case BI_FIELD_TYPE_BLOCK_ID:
592     case BI_MEMBER_TYPE_BLOCK_ID:
593     case BI_COMMENT_BLOCK_ID:
594     case BI_REFERENCE_BLOCK_ID:
595       llvm::errs() << "Invalid top level block.\n";
596       return Infos;
597     case BI_NAMESPACE_BLOCK_ID:
598     case BI_RECORD_BLOCK_ID:
599     case BI_ENUM_BLOCK_ID:
600     case BI_FUNCTION_BLOCK_ID:
601       if (std::unique_ptr<Info> I = readBlockToInfo(ID)) {
602         Infos.emplace_back(std::move(I));
603       }
604       return Infos;
605     case BI_VERSION_BLOCK_ID:
606       if (readBlock(ID, VersionNumber))
607         continue;
608       return Infos;
609     case llvm::bitc::BLOCKINFO_BLOCK_ID:
610       if (readBlockInfoBlock())
611         continue;
612       return Infos;
613     default:
614       if (!Stream.SkipBlock())
615         continue;
616     }
617   }
618   return Infos;
619 }
620 
621 } // namespace doc
622 } // namespace clang
623