1 //
2 //  m3_parse.c
3 //
4 //  Created by Steven Massey on 4/19/19.
5 //  Copyright © 2019 Steven Massey. All rights reserved.
6 //
7 
8 #include "m3_env.h"
9 #include "m3_compile.h"
10 #include "m3_exec.h"
11 #include "m3_exception.h"
12 #include "m3_info.h"
13 
14 
ParseType_Table(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)15 M3Result  ParseType_Table  (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
16 {
17     M3Result result = m3Err_none;
18 
19     return result;
20 }
21 
22 
ParseType_Memory(M3MemoryInfo * o_memory,bytes_t * io_bytes,cbytes_t i_end)23 M3Result  ParseType_Memory  (M3MemoryInfo * o_memory, bytes_t * io_bytes, cbytes_t i_end)
24 {
25     M3Result result = m3Err_none;
26 
27     u8 flag;
28 
29 _   (ReadLEB_u7 (& flag, io_bytes, i_end));                   // really a u1
30 _   (ReadLEB_u32 (& o_memory->initPages, io_bytes, i_end));
31 
32     o_memory->maxPages = 0;
33     if (flag)
34 _       (ReadLEB_u32 (& o_memory->maxPages, io_bytes, i_end));
35 
36     _catch: return result;
37 }
38 
39 
ParseSection_Type(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)40 M3Result  ParseSection_Type  (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
41 {
42     M3Result result = m3Err_none;
43     IM3FuncType ftype = NULL;
44 
45 _try {
46     u32 numTypes;
47 _   (ReadLEB_u32 (& numTypes, & i_bytes, i_end));                                   m3log (parse, "** Type [%d]", numTypes);
48 
49     _throwif("too many types", numTypes > d_m3MaxSaneTypesCount);
50 
51     if (numTypes)
52     {
53         // table of IM3FuncType (that point to the actual M3FuncType struct in the Environment)
54         io_module->funcTypes = m3_AllocArray (IM3FuncType, numTypes);
55         _throwifnull (io_module->funcTypes);
56         io_module->numFuncTypes = numTypes;
57 
58         for (u32 i = 0; i < numTypes; ++i)
59         {
60             i8 form;
61 _           (ReadLEB_i7 (& form, & i_bytes, i_end));
62             _throwif (m3Err_wasmMalformed, form != -32); // for Wasm MVP
63 
64             u32 numArgs;
65 _           (ReadLEB_u32 (& numArgs, & i_bytes, i_end));
66 
67             _throwif (m3Err_tooManyArgsRets, numArgs > d_m3MaxSaneFunctionArgRetCount);
68 #if defined(M3_COMPILER_MSVC)
69             u8 argTypes [d_m3MaxSaneFunctionArgRetCount];
70 #else
71             u8 argTypes[numArgs+1]; // make ubsan happy
72 #endif
73             for (u32 a = 0; a < numArgs; ++a)
74             {
75                 i8 wasmType;
76                 u8 argType;
77 _               (ReadLEB_i7 (& wasmType, & i_bytes, i_end));
78 _               (NormalizeType (& argType, wasmType));
79 
80                 argTypes[a] = argType;
81             }
82 
83             u32 numRets;
84 _           (ReadLEB_u32 (& numRets, & i_bytes, i_end));
85             _throwif (m3Err_tooManyArgsRets, (u64)(numRets) + numArgs > d_m3MaxSaneFunctionArgRetCount);
86 
87 _           (AllocFuncType (& ftype, numRets + numArgs));
88             ftype->numArgs = numArgs;
89             ftype->numRets = numRets;
90 
91             for (u32 r = 0; r < numRets; ++r)
92             {
93                 i8 wasmType;
94                 u8 retType;
95 _               (ReadLEB_i7 (& wasmType, & i_bytes, i_end));
96 _               (NormalizeType (& retType, wasmType));
97 
98                 ftype->types[r] = retType;
99             }
100             memcpy (ftype->types + numRets, argTypes, numArgs);                                 m3log (parse, "    type %2d: %s", i, SPrintFuncTypeSignature (ftype));
101 
102             Environment_AddFuncType (io_module->environment, & ftype);
103             io_module->funcTypes [i] = ftype;
104             ftype = NULL; // ownership transfered to environment
105         }
106     }
107 
108 } _catch:
109 
110     if (result)
111     {
112         m3_Free (ftype);
113         // FIX: M3FuncTypes in the table are leaked
114         m3_Free (io_module->funcTypes);
115         io_module->numFuncTypes = 0;
116     }
117 
118     return result;
119 }
120 
121 
ParseSection_Function(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)122 M3Result  ParseSection_Function  (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
123 {
124     M3Result result = m3Err_none;
125 
126     u32 numFunctions;
127 _   (ReadLEB_u32 (& numFunctions, & i_bytes, i_end));                               m3log (parse, "** Function [%d]", numFunctions);
128 
129     _throwif("too many functions", numFunctions > d_m3MaxSaneFunctionsCount);
130 
131     // TODO: prealloc functions
132 
133     for (u32 i = 0; i < numFunctions; ++i)
134     {
135         u32 funcTypeIndex;
136 _       (ReadLEB_u32 (& funcTypeIndex, & i_bytes, i_end));
137 
138 _       (Module_AddFunction (io_module, funcTypeIndex, NULL /* import info */));
139     }
140 
141     _catch: return result;
142 }
143 
144 
ParseSection_Import(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)145 M3Result  ParseSection_Import  (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
146 {
147     M3Result result = m3Err_none;
148 
149     M3ImportInfo import = { NULL, NULL }, clearImport = { NULL, NULL };
150 
151     u32 numImports;
152 _   (ReadLEB_u32 (& numImports, & i_bytes, i_end));                                 m3log (parse, "** Import [%d]", numImports);
153 
154     _throwif("too many imports", numImports > d_m3MaxSaneImportsCount);
155 
156     for (u32 i = 0; i < numImports; ++i)
157     {
158         u8 importKind;
159 
160 _       (Read_utf8 (& import.moduleUtf8, & i_bytes, i_end));
161 _       (Read_utf8 (& import.fieldUtf8, & i_bytes, i_end));
162 _       (Read_u8 (& importKind, & i_bytes, i_end));                                 m3log (parse, "    kind: %d '%s.%s' ",
163                                                                                                 (u32) importKind, import.moduleUtf8, import.fieldUtf8);
164         switch (importKind)
165         {
166             case d_externalKind_function:
167             {
168                 u32 typeIndex;
169 _               (ReadLEB_u32 (& typeIndex, & i_bytes, i_end))
170 
171 _               (Module_AddFunction (io_module, typeIndex, & import))
172                 import = clearImport;
173 
174                 io_module->numFuncImports++;
175             }
176             break;
177 
178             case d_externalKind_table:
179 //                  result = ParseType_Table (& i_bytes, i_end);
180                 break;
181 
182             case d_externalKind_memory:
183             {
184 _               (ParseType_Memory (& io_module->memoryInfo, & i_bytes, i_end));
185                 io_module->memoryImported = true;
186             }
187             break;
188 
189             case d_externalKind_global:
190             {
191                 i8 waType;
192                 u8 type, isMutable;
193 
194 _               (ReadLEB_i7 (& waType, & i_bytes, i_end));
195 _               (NormalizeType (& type, waType));
196 _               (ReadLEB_u7 (& isMutable, & i_bytes, i_end));                     m3log (parse, "     global: %s mutable=%d", c_waTypes [type], (u32) isMutable);
197 
198                 IM3Global global;
199 _               (Module_AddGlobal (io_module, & global, type, isMutable, true /* isImport */));
200                 global->import = import;
201                 import = clearImport;
202             }
203             break;
204 
205             default:
206                 _throw (m3Err_wasmMalformed);
207         }
208 
209         FreeImportInfo (& import);
210     }
211 
212     _catch:
213 
214     FreeImportInfo (& import);
215 
216     return result;
217 }
218 
219 
ParseSection_Export(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)220 M3Result  ParseSection_Export  (IM3Module io_module, bytes_t i_bytes, cbytes_t  i_end)
221 {
222     M3Result result = m3Err_none;
223     const char * utf8 = NULL;
224 
225     u32 numExports;
226 _   (ReadLEB_u32 (& numExports, & i_bytes, i_end));                                 m3log (parse, "** Export [%d]", numExports);
227 
228     _throwif("too many exports", numExports > d_m3MaxSaneExportsCount);
229 
230     for (u32 i = 0; i < numExports; ++i)
231     {
232         u8 exportKind;
233         u32 index;
234 
235 _       (Read_utf8 (& utf8, & i_bytes, i_end));
236 _       (Read_u8 (& exportKind, & i_bytes, i_end));
237 _       (ReadLEB_u32 (& index, & i_bytes, i_end));                                  m3log (parse, "    index: %3d; kind: %d; export: '%s'; ", index, (u32) exportKind, utf8);
238 
239         if (exportKind == d_externalKind_function)
240         {
241             _throwif(m3Err_wasmMalformed, index >= io_module->numFunctions);
242             IM3Function func = &(io_module->functions [index]);
243             if (func->numNames < d_m3MaxDuplicateFunctionImpl)
244             {
245                 func->names[func->numNames++] = utf8;
246                 utf8 = NULL; // ownership transferred to M3Function
247             }
248         }
249         else if (exportKind == d_externalKind_global)
250         {
251             _throwif(m3Err_wasmMalformed, index >= io_module->numGlobals);
252             IM3Global global = &(io_module->globals [index]);
253             m3_Free (global->name);
254             global->name = utf8;
255             utf8 = NULL; // ownership transferred to M3Global
256         }
257 
258         m3_Free (utf8);
259     }
260 
261 _catch:
262     m3_Free (utf8);
263     return result;
264 }
265 
266 
ParseSection_Start(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)267 M3Result  ParseSection_Start  (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
268 {
269     M3Result result = m3Err_none;
270 
271     u32 startFuncIndex;
272 _   (ReadLEB_u32 (& startFuncIndex, & i_bytes, i_end));                               m3log (parse, "** Start Function: %d", startFuncIndex);
273 
274     if (startFuncIndex < io_module->numFunctions)
275     {
276         io_module->startFunction = startFuncIndex;
277     }
278     else result = "start function index out of bounds";
279 
280     _catch: return result;
281 }
282 
283 
Parse_InitExpr(M3Module * io_module,bytes_t * io_bytes,cbytes_t i_end)284 M3Result  Parse_InitExpr  (M3Module * io_module, bytes_t * io_bytes, cbytes_t i_end)
285 {
286     M3Result result = m3Err_none;
287 
288     // this doesn't generate code pages. just walks the wasm bytecode to find the end
289 
290 #if defined(d_m3PreferStaticAlloc)
291     static M3Compilation compilation;
292 #else
293     M3Compilation compilation;
294 #endif
295     compilation = (M3Compilation){ .runtime = NULL, .module = io_module, .wasm = * io_bytes, .wasmEnd = i_end };
296 
297     result = CompileBlockStatements (& compilation);
298 
299     * io_bytes = compilation.wasm;
300 
301     return result;
302 }
303 
304 
ParseSection_Element(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)305 M3Result  ParseSection_Element  (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
306 {
307     M3Result result = m3Err_none;
308 
309     u32 numSegments;
310     result = ReadLEB_u32 (& numSegments, & i_bytes, i_end);                         m3log (parse, "** Element [%d]", numSegments);
311 
312     _throwif ("error parsing Element section", result);
313 
314     _throwif ("too many element segments", numSegments > d_m3MaxSaneElementSegments);
315 
316     io_module->elementSection = i_bytes;
317     io_module->elementSectionEnd = i_end;
318     io_module->numElementSegments = numSegments;
319 
320     _catch: return result;
321 }
322 
323 
ParseSection_Code(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)324 M3Result  ParseSection_Code  (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
325 {
326     M3Result result;
327 
328     u32 numFunctions;
329 _   (ReadLEB_u32 (& numFunctions, & i_bytes, i_end));                               m3log (parse, "** Code [%d]", numFunctions);
330 
331     if (numFunctions != io_module->numFunctions - io_module->numFuncImports)
332     {
333         _throw ("mismatched function count in code section");
334     }
335 
336     for (u32 f = 0; f < numFunctions; ++f)
337     {
338         const u8 * start = i_bytes;
339 
340         u32 size;
341 _       (ReadLEB_u32 (& size, & i_bytes, i_end));
342 
343         if (size)
344         {
345             const u8 * ptr = i_bytes;
346             i_bytes += size;
347 
348             if (i_bytes <= i_end)
349             {
350                 /*
351                 u32 numLocalBlocks;
352 _               (ReadLEB_u32 (& numLocalBlocks, & ptr, i_end));                                      m3log (parse, "    code size: %-4d", size);
353 
354                 u32 numLocals = 0;
355 
356                 for (u32 l = 0; l < numLocalBlocks; ++l)
357                 {
358                     u32 varCount;
359                     i8 wasmType;
360                     u8 normalType;
361 
362 _                   (ReadLEB_u32 (& varCount, & ptr, i_end));
363 _                   (ReadLEB_i7 (& wasmType, & ptr, i_end));
364 _                   (NormalizeType (& normalType, wasmType));
365 
366                     numLocals += varCount;                                                      m3log (parse, "      %2d locals; type: '%s'", varCount, c_waTypes [normalType]);
367                 }
368                  */
369 
370                 IM3Function func = Module_GetFunction (io_module, f + io_module->numFuncImports);
371 
372                 func->module = io_module;
373                 func->wasm = start;
374                 func->wasmEnd = i_bytes;
375                 //func->ownsWasmCode = io_module->hasWasmCodeCopy;
376 //                func->numLocals = numLocals;
377             }
378             else _throw (m3Err_wasmSectionOverrun);
379         }
380     }
381 
382     _catch:
383 
384     if (not result and i_bytes != i_end)
385         result = m3Err_wasmSectionUnderrun;
386 
387     return result;
388 }
389 
390 
ParseSection_Data(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)391 M3Result  ParseSection_Data  (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
392 {
393     M3Result result = m3Err_none;
394 
395     u32 numDataSegments;
396 _   (ReadLEB_u32 (& numDataSegments, & i_bytes, i_end));                            m3log (parse, "** Data [%d]", numDataSegments);
397 
398     _throwif("too many data segments", numDataSegments > d_m3MaxSaneDataSegments);
399 
400     io_module->dataSegments = m3_AllocArray (M3DataSegment, numDataSegments);
401     _throwifnull(io_module->dataSegments);
402     io_module->numDataSegments = numDataSegments;
403 
404     for (u32 i = 0; i < numDataSegments; ++i)
405     {
406         M3DataSegment * segment = & io_module->dataSegments [i];
407 
408 _       (ReadLEB_u32 (& segment->memoryRegion, & i_bytes, i_end));
409 
410         segment->initExpr = i_bytes;
411 _       (Parse_InitExpr (io_module, & i_bytes, i_end));
412         segment->initExprSize = (u32) (i_bytes - segment->initExpr);
413 
414         _throwif (m3Err_wasmMissingInitExpr, segment->initExprSize <= 1);
415 
416 _       (ReadLEB_u32 (& segment->size, & i_bytes, i_end));
417         segment->data = i_bytes;                                                    m3log (parse, "    segment [%u]  memory: %u;  expr-size: %d;  size: %d",
418                                                                                        i, segment->memoryRegion, segment->initExprSize, segment->size);
419         i_bytes += segment->size;
420 
421         _throwif("data segment underflow", i_bytes > i_end);
422     }
423 
424     _catch:
425 
426     return result;
427 }
428 
429 
ParseSection_Memory(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)430 M3Result  ParseSection_Memory  (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
431 {
432     M3Result result = m3Err_none;
433 
434     // TODO: MVP; assert no memory imported
435 
436     u32 numMemories;
437 _   (ReadLEB_u32 (& numMemories, & i_bytes, i_end));                             m3log (parse, "** Memory [%d]", numMemories);
438 
439     _throwif (m3Err_tooManyMemorySections, numMemories != 1);
440 
441     ParseType_Memory (& io_module->memoryInfo, & i_bytes, i_end);
442 
443     _catch: return result;
444 }
445 
446 
ParseSection_Global(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)447 M3Result  ParseSection_Global  (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
448 {
449     M3Result result = m3Err_none;
450 
451     u32 numGlobals;
452 _   (ReadLEB_u32 (& numGlobals, & i_bytes, i_end));                                 m3log (parse, "** Global [%d]", numGlobals);
453 
454     _throwif("too many globals", numGlobals > d_m3MaxSaneGlobalsCount);
455 
456     for (u32 i = 0; i < numGlobals; ++i)
457     {
458         i8 waType;
459         u8 type, isMutable;
460 
461 _       (ReadLEB_i7 (& waType, & i_bytes, i_end));
462 _       (NormalizeType (& type, waType));
463 _       (ReadLEB_u7 (& isMutable, & i_bytes, i_end));                                 m3log (parse, "    global: [%d] %s mutable: %d", i, c_waTypes [type],   (u32) isMutable);
464 
465         IM3Global global;
466 _       (Module_AddGlobal (io_module, & global, type, isMutable, false /* isImport */));
467 
468         global->initExpr = i_bytes;
469 _       (Parse_InitExpr (io_module, & i_bytes, i_end));
470         global->initExprSize = (u32) (i_bytes - global->initExpr);
471 
472         _throwif (m3Err_wasmMissingInitExpr, global->initExprSize <= 1);
473     }
474 
475     _catch: return result;
476 }
477 
478 
ParseSection_Custom(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)479 M3Result  ParseSection_Custom  (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
480 {
481     M3Result result;
482 
483     cstr_t name;
484 _   (Read_utf8 (& name, & i_bytes, i_end));
485                                                                                     m3log (parse, "** Custom: '%s'", name);
486     if (strcmp (name, "name") != 0)
487         i_bytes = i_end;
488 
489     m3_Free (name);
490 
491     while (i_bytes < i_end)
492     {
493         u8 nameType;
494         u32 payloadLength;
495 
496 _       (ReadLEB_u7 (& nameType, & i_bytes, i_end));
497 _       (ReadLEB_u32 (& payloadLength, & i_bytes, i_end));
498 
499         bytes_t start = i_bytes;
500         if (nameType == 1)
501         {
502             u32 numNames;
503 _           (ReadLEB_u32 (& numNames, & i_bytes, i_end));
504 
505             _throwif("too many names", numNames > d_m3MaxSaneFunctionsCount);
506 
507             for (u32 i = 0; i < numNames; ++i)
508             {
509                 u32 index;
510 _               (ReadLEB_u32 (& index, & i_bytes, i_end));
511 _               (Read_utf8 (& name, & i_bytes, i_end));
512 
513                 if (index < io_module->numFunctions)
514                 {
515                     IM3Function func = &(io_module->functions [index]);
516                     if (func->numNames == 0)
517                     {
518                         func->names[0] = name;        m3log (parse, "    naming function%5d:  %s", index, name);
519                         func->numNames = 1;
520                         name = NULL; // transfer ownership
521                     }
522 //                          else m3log (parse, "prenamed: %s", io_module->functions [index].name);
523                 }
524 
525                 m3_Free (name);
526             }
527         }
528 
529         i_bytes = start + payloadLength;
530     }
531 
532     _catch: return result;
533 }
534 
535 
ParseModuleSection(M3Module * o_module,u8 i_sectionType,bytes_t i_bytes,u32 i_numBytes)536 M3Result  ParseModuleSection  (M3Module * o_module, u8 i_sectionType, bytes_t i_bytes, u32 i_numBytes)
537 {
538     M3Result result = m3Err_none;
539 
540     typedef M3Result (* M3Parser) (M3Module *, bytes_t, cbytes_t);
541 
542     static M3Parser s_parsers [] =
543     {
544         ParseSection_Custom,    // 0
545         ParseSection_Type,      // 1
546         ParseSection_Import,    // 2
547         ParseSection_Function,  // 3
548         NULL,                   // 4: TODO Table
549         ParseSection_Memory,    // 5
550         ParseSection_Global,    // 6
551         ParseSection_Export,    // 7
552         ParseSection_Start,     // 8
553         ParseSection_Element,   // 9
554         ParseSection_Code,      // 10
555         ParseSection_Data,      // 11
556         NULL,                   // 12: TODO DataCount
557     };
558 
559     M3Parser parser = NULL;
560 
561     if (i_sectionType <= 12)
562         parser = s_parsers [i_sectionType];
563 
564     if (parser)
565     {
566         cbytes_t end = i_bytes + i_numBytes;
567         result = parser (o_module, i_bytes, end);
568     }
569     else
570     {
571         m3log (parse, " skipped section type: %d", (u32) i_sectionType);
572     }
573 
574     return result;
575 }
576 
577 
m3_ParseModule(IM3Environment i_environment,IM3Module * o_module,cbytes_t i_bytes,u32 i_numBytes)578 M3Result  m3_ParseModule  (IM3Environment i_environment, IM3Module * o_module, cbytes_t i_bytes, u32 i_numBytes)
579 {
580     M3Result result;                                                             m3log (parse, "load module: %d bytes", i_numBytes);
581 
582     IM3Module module;
583 _try {
584     module = m3_AllocStruct (M3Module);
585     _throwifnull (module);
586     module->name = ".unnamed";                                                      m3log (parse, "load module: %d bytes", i_numBytes);
587     module->startFunction = -1;
588     //module->hasWasmCodeCopy = false;
589     module->environment = i_environment;
590 
591     const u8 * pos = i_bytes;
592     const u8 * end = pos + i_numBytes;
593 
594     module->wasmStart = pos;
595     module->wasmEnd = end;
596 
597     u32 magic, version;
598 _   (Read_u32 (& magic, & pos, end));
599 _   (Read_u32 (& version, & pos, end));
600 
601     _throwif (m3Err_wasmMalformed, magic != 0x6d736100);
602     _throwif (m3Err_incompatibleWasmVersion, version != 1);
603 
604     static const u8 sectionsOrder[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 10, 11, 0 }; // 0 is a placeholder
605     u8 expectedSection = 0;
606 
607     while (pos < end)
608     {
609         u8 section;
610 _       (ReadLEB_u7 (& section, & pos, end));
611 
612         if (section != 0) {
613             // Ensure sections appear only once and in order
614             while (sectionsOrder[expectedSection++] != section) {
615                 _throwif(m3Err_misorderedWasmSection, expectedSection >= 12);
616             }
617         }
618 
619         u32 sectionLength;
620 _       (ReadLEB_u32 (& sectionLength, & pos, end));
621         _throwif(m3Err_wasmMalformed, pos + sectionLength > end);
622 
623 _       (ParseModuleSection (module, section, pos, sectionLength));
624 
625         pos += sectionLength;
626     }
627 
628 } _catch:
629 
630     if (result)
631     {
632         m3_FreeModule (module);
633         module = NULL;
634     }
635 
636     * o_module = module;
637 
638     return result;
639 }
640