1 //===-- BrainF.cpp - BrainF compiler example ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class compiles the BrainF language into LLVM assembly.
10 //
11 // The BrainF language has 8 commands:
12 // Command   Equivalent C    Action
13 // -------   ------------    ------
14 // ,         *h=getchar();   Read a character from stdin, 255 on EOF
15 // .         putchar(*h);    Write a character to stdout
16 // -         --*h;           Decrement tape
17 // +         ++*h;           Increment tape
18 // <         --h;            Move head left
19 // >         ++h;            Move head right
20 // [         while(*h) {     Start loop
21 // ]         }               End loop
22 //
23 //===----------------------------------------------------------------------===//
24 
25 #include "BrainF.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/GlobalVariable.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/Intrinsics.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/Support/Casting.h"
41 #include <cstdlib>
42 #include <iostream>
43 
44 using namespace llvm;
45 
46 //Set the constants for naming
47 const char *BrainF::tapereg = "tape";
48 const char *BrainF::headreg = "head";
49 const char *BrainF::label   = "brainf";
50 const char *BrainF::testreg = "test";
51 
parse(std::istream * in1,int mem,CompileFlags cf,LLVMContext & Context)52 Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf,
53                       LLVMContext& Context) {
54   in       = in1;
55   memtotal = mem;
56   comflag  = cf;
57 
58   header(Context);
59   readloop(nullptr, nullptr, nullptr, Context);
60   delete builder;
61   return module;
62 }
63 
header(LLVMContext & C)64 void BrainF::header(LLVMContext& C) {
65   module = new Module("BrainF", C);
66 
67   //Function prototypes
68 
69   //declare void @llvm.memset.p0i8.i32(i8 *, i8, i32, i32, i1)
70   Type *Tys[] = { Type::getInt8PtrTy(C), Type::getInt32Ty(C) };
71   Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset,
72                                                     Tys);
73 
74   //declare i32 @getchar()
75   getchar_func =
76       module->getOrInsertFunction("getchar", IntegerType::getInt32Ty(C));
77 
78   //declare i32 @putchar(i32)
79   putchar_func = module->getOrInsertFunction(
80       "putchar", IntegerType::getInt32Ty(C), IntegerType::getInt32Ty(C));
81 
82   //Function header
83 
84   //define void @brainf()
85   brainf_func = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
86                                  Function::ExternalLinkage, "brainf", module);
87 
88   builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func));
89 
90   //%arr = malloc i8, i32 %d
91   ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal));
92   BasicBlock* BB = builder->GetInsertBlock();
93   Type* IntPtrTy = IntegerType::getInt32Ty(C);
94   Type* Int8Ty = IntegerType::getInt8Ty(C);
95   Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty);
96   allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
97   ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem,
98                                    nullptr, "arr");
99   BB->getInstList().push_back(cast<Instruction>(ptr_arr));
100 
101   //call void @llvm.memset.p0i8.i32(i8 *%arr, i8 0, i32 %d, i32 1, i1 0)
102   {
103     Value *memset_params[] = {
104       ptr_arr,
105       ConstantInt::get(C, APInt(8, 0)),
106       val_mem,
107       ConstantInt::get(C, APInt(32, 1)),
108       ConstantInt::get(C, APInt(1, 0))
109     };
110 
111     CallInst *memset_call = builder->
112       CreateCall(memset_func, memset_params);
113     memset_call->setTailCall(false);
114   }
115 
116   //%arrmax = getelementptr i8 *%arr, i32 %d
117   if (comflag & flag_arraybounds) {
118     ptr_arrmax = builder->
119       CreateGEP(ptr_arr, ConstantInt::get(C, APInt(32, memtotal)), "arrmax");
120   }
121 
122   //%head.%d = getelementptr i8 *%arr, i32 %d
123   curhead = builder->CreateGEP(ptr_arr,
124                                ConstantInt::get(C, APInt(32, memtotal/2)),
125                                headreg);
126 
127   //Function footer
128 
129   //brainf.end:
130   endbb = BasicBlock::Create(C, label, brainf_func);
131 
132   //call free(i8 *%arr)
133   endbb->getInstList().push_back(CallInst::CreateFree(ptr_arr, endbb));
134 
135   //ret void
136   ReturnInst::Create(C, endbb);
137 
138   //Error block for array out of bounds
139   if (comflag & flag_arraybounds)
140   {
141     //@aberrormsg = internal constant [%d x i8] c"\00"
142     Constant *msg_0 =
143       ConstantDataArray::getString(C, "Error: The head has left the tape.",
144                                    true);
145 
146     GlobalVariable *aberrormsg = new GlobalVariable(
147       *module,
148       msg_0->getType(),
149       true,
150       GlobalValue::InternalLinkage,
151       msg_0,
152       "aberrormsg");
153 
154     //declare i32 @puts(i8 *)
155     FunctionCallee puts_func = module->getOrInsertFunction(
156         "puts", IntegerType::getInt32Ty(C),
157         PointerType::getUnqual(IntegerType::getInt8Ty(C)));
158 
159     //brainf.aberror:
160     aberrorbb = BasicBlock::Create(C, label, brainf_func);
161 
162     //call i32 @puts(i8 *getelementptr([%d x i8] *@aberrormsg, i32 0, i32 0))
163     {
164       Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C));
165 
166       Constant *gep_params[] = {
167         zero_32,
168         zero_32
169       };
170 
171       Constant *msgptr = ConstantExpr::
172         getGetElementPtr(aberrormsg->getValueType(), aberrormsg, gep_params);
173 
174       Value *puts_params[] = {
175         msgptr
176       };
177 
178       CallInst *puts_call =
179         CallInst::Create(puts_func,
180                          puts_params,
181                          "", aberrorbb);
182       puts_call->setTailCall(false);
183     }
184 
185     //br label %brainf.end
186     BranchInst::Create(endbb, aberrorbb);
187   }
188 }
189 
readloop(PHINode * phi,BasicBlock * oldbb,BasicBlock * testbb,LLVMContext & C)190 void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb,
191                       LLVMContext &C) {
192   Symbol cursym = SYM_NONE;
193   int curvalue = 0;
194   Symbol nextsym = SYM_NONE;
195   int nextvalue = 0;
196   char c;
197   int loop;
198   int direction;
199 
200   while(cursym != SYM_EOF && cursym != SYM_ENDLOOP) {
201     // Write out commands
202     switch(cursym) {
203       case SYM_NONE:
204         // Do nothing
205         break;
206 
207       case SYM_READ:
208         {
209           //%tape.%d = call i32 @getchar()
210           CallInst *getchar_call =
211               builder->CreateCall(getchar_func, {}, tapereg);
212           getchar_call->setTailCall(false);
213           Value *tape_0 = getchar_call;
214 
215           //%tape.%d = trunc i32 %tape.%d to i8
216           Value *tape_1 = builder->
217             CreateTrunc(tape_0, IntegerType::getInt8Ty(C), tapereg);
218 
219           //store i8 %tape.%d, i8 *%head.%d
220           builder->CreateStore(tape_1, curhead);
221         }
222         break;
223 
224       case SYM_WRITE:
225         {
226           //%tape.%d = load i8 *%head.%d
227           LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg);
228 
229           //%tape.%d = sext i8 %tape.%d to i32
230           Value *tape_1 = builder->
231             CreateSExt(tape_0, IntegerType::getInt32Ty(C), tapereg);
232 
233           //call i32 @putchar(i32 %tape.%d)
234           Value *putchar_params[] = {
235             tape_1
236           };
237           CallInst *putchar_call = builder->
238             CreateCall(putchar_func,
239                        putchar_params);
240           putchar_call->setTailCall(false);
241         }
242         break;
243 
244       case SYM_MOVE:
245         {
246           //%head.%d = getelementptr i8 *%head.%d, i32 %d
247           curhead = builder->
248             CreateGEP(curhead, ConstantInt::get(C, APInt(32, curvalue)),
249                       headreg);
250 
251           //Error block for array out of bounds
252           if (comflag & flag_arraybounds)
253           {
254             //%test.%d = icmp uge i8 *%head.%d, %arrmax
255             Value *test_0 = builder->
256               CreateICmpUGE(curhead, ptr_arrmax, testreg);
257 
258             //%test.%d = icmp ult i8 *%head.%d, %arr
259             Value *test_1 = builder->
260               CreateICmpULT(curhead, ptr_arr, testreg);
261 
262             //%test.%d = or i1 %test.%d, %test.%d
263             Value *test_2 = builder->
264               CreateOr(test_0, test_1, testreg);
265 
266             //br i1 %test.%d, label %main.%d, label %main.%d
267             BasicBlock *nextbb = BasicBlock::Create(C, label, brainf_func);
268             builder->CreateCondBr(test_2, aberrorbb, nextbb);
269 
270             //main.%d:
271             builder->SetInsertPoint(nextbb);
272           }
273         }
274         break;
275 
276       case SYM_CHANGE:
277         {
278           //%tape.%d = load i8 *%head.%d
279           LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg);
280 
281           //%tape.%d = add i8 %tape.%d, %d
282           Value *tape_1 = builder->
283             CreateAdd(tape_0, ConstantInt::get(C, APInt(8, curvalue)), tapereg);
284 
285           //store i8 %tape.%d, i8 *%head.%d\n"
286           builder->CreateStore(tape_1, curhead);
287         }
288         break;
289 
290       case SYM_LOOP:
291         {
292           //br label %main.%d
293           BasicBlock *testbb = BasicBlock::Create(C, label, brainf_func);
294           builder->CreateBr(testbb);
295 
296           //main.%d:
297           BasicBlock *bb_0 = builder->GetInsertBlock();
298           BasicBlock *bb_1 = BasicBlock::Create(C, label, brainf_func);
299           builder->SetInsertPoint(bb_1);
300 
301           // Make part of PHI instruction now, wait until end of loop to finish
302           PHINode *phi_0 =
303             PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)),
304                             2, headreg, testbb);
305           phi_0->addIncoming(curhead, bb_0);
306           curhead = phi_0;
307 
308           readloop(phi_0, bb_1, testbb, C);
309         }
310         break;
311 
312       default:
313         std::cerr << "Error: Unknown symbol.\n";
314         abort();
315         break;
316     }
317 
318     cursym = nextsym;
319     curvalue = nextvalue;
320     nextsym = SYM_NONE;
321 
322     // Reading stdin loop
323     loop = (cursym == SYM_NONE)
324         || (cursym == SYM_MOVE)
325         || (cursym == SYM_CHANGE);
326     while(loop) {
327       *in>>c;
328       if (in->eof()) {
329         if (cursym == SYM_NONE) {
330           cursym = SYM_EOF;
331         } else {
332           nextsym = SYM_EOF;
333         }
334         loop = 0;
335       } else {
336         direction = 1;
337         switch(c) {
338           case '-':
339             direction = -1;
340             LLVM_FALLTHROUGH;
341 
342           case '+':
343             if (cursym == SYM_CHANGE) {
344               curvalue += direction;
345               // loop = 1
346             } else {
347               if (cursym == SYM_NONE) {
348                 cursym = SYM_CHANGE;
349                 curvalue = direction;
350                 // loop = 1
351               } else {
352                 nextsym = SYM_CHANGE;
353                 nextvalue = direction;
354                 loop = 0;
355               }
356             }
357             break;
358 
359           case '<':
360             direction = -1;
361             LLVM_FALLTHROUGH;
362 
363           case '>':
364             if (cursym == SYM_MOVE) {
365               curvalue += direction;
366               // loop = 1
367             } else {
368               if (cursym == SYM_NONE) {
369                 cursym = SYM_MOVE;
370                 curvalue = direction;
371                 // loop = 1
372               } else {
373                 nextsym = SYM_MOVE;
374                 nextvalue = direction;
375                 loop = 0;
376               }
377             }
378             break;
379 
380           case ',':
381             if (cursym == SYM_NONE) {
382               cursym = SYM_READ;
383             } else {
384               nextsym = SYM_READ;
385             }
386             loop = 0;
387             break;
388 
389           case '.':
390             if (cursym == SYM_NONE) {
391               cursym = SYM_WRITE;
392             } else {
393               nextsym = SYM_WRITE;
394             }
395             loop = 0;
396             break;
397 
398           case '[':
399             if (cursym == SYM_NONE) {
400               cursym = SYM_LOOP;
401             } else {
402               nextsym = SYM_LOOP;
403             }
404             loop = 0;
405             break;
406 
407           case ']':
408             if (cursym == SYM_NONE) {
409               cursym = SYM_ENDLOOP;
410             } else {
411               nextsym = SYM_ENDLOOP;
412             }
413             loop = 0;
414             break;
415 
416           // Ignore other characters
417           default:
418             break;
419         }
420       }
421     }
422   }
423 
424   if (cursym == SYM_ENDLOOP) {
425     if (!phi) {
426       std::cerr << "Error: Extra ']'\n";
427       abort();
428     }
429 
430     // Write loop test
431     {
432       //br label %main.%d
433       builder->CreateBr(testbb);
434 
435       //main.%d:
436 
437       //%head.%d = phi i8 *[%head.%d, %main.%d], [%head.%d, %main.%d]
438       //Finish phi made at beginning of loop
439       phi->addIncoming(curhead, builder->GetInsertBlock());
440       Value *head_0 = phi;
441 
442       //%tape.%d = load i8 *%head.%d
443       LoadInst *tape_0 = new LoadInst(head_0, tapereg, testbb);
444 
445       //%test.%d = icmp eq i8 %tape.%d, 0
446       ICmpInst *test_0 = new ICmpInst(*testbb, ICmpInst::ICMP_EQ, tape_0,
447                                     ConstantInt::get(C, APInt(8, 0)), testreg);
448 
449       //br i1 %test.%d, label %main.%d, label %main.%d
450       BasicBlock *bb_0 = BasicBlock::Create(C, label, brainf_func);
451       BranchInst::Create(bb_0, oldbb, test_0, testbb);
452 
453       //main.%d:
454       builder->SetInsertPoint(bb_0);
455 
456       //%head.%d = phi i8 *[%head.%d, %main.%d]
457       PHINode *phi_1 = builder->
458         CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), 1,
459                   headreg);
460       phi_1->addIncoming(head_0, testbb);
461       curhead = phi_1;
462     }
463 
464     return;
465   }
466 
467   //End of the program, so go to return block
468   builder->CreateBr(endbb);
469 
470   if (phi) {
471     std::cerr << "Error: Missing ']'\n";
472     abort();
473   }
474 }
475