1 //
2 // m3_parse.c
3 //
4 // Created by Steven Massey on 4/19/19.
5 // Copyright © 2019 Steven Massey. All rights reserved.
6 //
7
8 #include "m3_env.h"
9 #include "m3_compile.h"
10 #include "m3_exec.h"
11 #include "m3_exception.h"
12 #include "m3_info.h"
13
14
ParseType_Table(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)15 M3Result ParseType_Table (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
16 {
17 M3Result result = m3Err_none;
18
19 return result;
20 }
21
22
ParseType_Memory(M3MemoryInfo * o_memory,bytes_t * io_bytes,cbytes_t i_end)23 M3Result ParseType_Memory (M3MemoryInfo * o_memory, bytes_t * io_bytes, cbytes_t i_end)
24 {
25 M3Result result = m3Err_none;
26
27 u8 flag;
28
29 _ (ReadLEB_u7 (& flag, io_bytes, i_end)); // really a u1
30 _ (ReadLEB_u32 (& o_memory->initPages, io_bytes, i_end));
31
32 o_memory->maxPages = 0;
33 if (flag)
34 _ (ReadLEB_u32 (& o_memory->maxPages, io_bytes, i_end));
35
36 _catch: return result;
37 }
38
39
ParseSection_Type(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)40 M3Result ParseSection_Type (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
41 {
42 M3Result result = m3Err_none;
43 IM3FuncType ftype = NULL;
44
45 _try {
46 u32 numTypes;
47 _ (ReadLEB_u32 (& numTypes, & i_bytes, i_end)); m3log (parse, "** Type [%d]", numTypes);
48
49 _throwif("too many types", numTypes > d_m3MaxSaneTypesCount);
50
51 if (numTypes)
52 {
53 // table of IM3FuncType (that point to the actual M3FuncType struct in the Environment)
54 io_module->funcTypes = m3_AllocArray (IM3FuncType, numTypes);
55 _throwifnull (io_module->funcTypes);
56 io_module->numFuncTypes = numTypes;
57
58 for (u32 i = 0; i < numTypes; ++i)
59 {
60 i8 form;
61 _ (ReadLEB_i7 (& form, & i_bytes, i_end));
62 _throwif (m3Err_wasmMalformed, form != -32); // for Wasm MVP
63
64 u32 numArgs;
65 _ (ReadLEB_u32 (& numArgs, & i_bytes, i_end));
66
67 _throwif (m3Err_tooManyArgsRets, numArgs > d_m3MaxSaneFunctionArgRetCount);
68 #if defined(M3_COMPILER_MSVC)
69 u8 argTypes [d_m3MaxSaneFunctionArgRetCount];
70 #else
71 u8 argTypes[numArgs+1]; // make ubsan happy
72 #endif
73 for (u32 a = 0; a < numArgs; ++a)
74 {
75 i8 wasmType;
76 u8 argType;
77 _ (ReadLEB_i7 (& wasmType, & i_bytes, i_end));
78 _ (NormalizeType (& argType, wasmType));
79
80 argTypes[a] = argType;
81 }
82
83 u32 numRets;
84 _ (ReadLEB_u32 (& numRets, & i_bytes, i_end));
85 _throwif (m3Err_tooManyArgsRets, (u64)(numRets) + numArgs > d_m3MaxSaneFunctionArgRetCount);
86
87 _ (AllocFuncType (& ftype, numRets + numArgs));
88 ftype->numArgs = numArgs;
89 ftype->numRets = numRets;
90
91 for (u32 r = 0; r < numRets; ++r)
92 {
93 i8 wasmType;
94 u8 retType;
95 _ (ReadLEB_i7 (& wasmType, & i_bytes, i_end));
96 _ (NormalizeType (& retType, wasmType));
97
98 ftype->types[r] = retType;
99 }
100 memcpy (ftype->types + numRets, argTypes, numArgs); m3log (parse, " type %2d: %s", i, SPrintFuncTypeSignature (ftype));
101
102 Environment_AddFuncType (io_module->environment, & ftype);
103 io_module->funcTypes [i] = ftype;
104 ftype = NULL; // ownership transfered to environment
105 }
106 }
107
108 } _catch:
109
110 if (result)
111 {
112 m3_Free (ftype);
113 // FIX: M3FuncTypes in the table are leaked
114 m3_Free (io_module->funcTypes);
115 io_module->numFuncTypes = 0;
116 }
117
118 return result;
119 }
120
121
ParseSection_Function(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)122 M3Result ParseSection_Function (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
123 {
124 M3Result result = m3Err_none;
125
126 u32 numFunctions;
127 _ (ReadLEB_u32 (& numFunctions, & i_bytes, i_end)); m3log (parse, "** Function [%d]", numFunctions);
128
129 _throwif("too many functions", numFunctions > d_m3MaxSaneFunctionsCount);
130
131 // TODO: prealloc functions
132
133 for (u32 i = 0; i < numFunctions; ++i)
134 {
135 u32 funcTypeIndex;
136 _ (ReadLEB_u32 (& funcTypeIndex, & i_bytes, i_end));
137
138 _ (Module_AddFunction (io_module, funcTypeIndex, NULL /* import info */));
139 }
140
141 _catch: return result;
142 }
143
144
ParseSection_Import(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)145 M3Result ParseSection_Import (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
146 {
147 M3Result result = m3Err_none;
148
149 M3ImportInfo import = { NULL, NULL }, clearImport = { NULL, NULL };
150
151 u32 numImports;
152 _ (ReadLEB_u32 (& numImports, & i_bytes, i_end)); m3log (parse, "** Import [%d]", numImports);
153
154 _throwif("too many imports", numImports > d_m3MaxSaneImportsCount);
155
156 for (u32 i = 0; i < numImports; ++i)
157 {
158 u8 importKind;
159
160 _ (Read_utf8 (& import.moduleUtf8, & i_bytes, i_end));
161 _ (Read_utf8 (& import.fieldUtf8, & i_bytes, i_end));
162 _ (Read_u8 (& importKind, & i_bytes, i_end)); m3log (parse, " kind: %d '%s.%s' ",
163 (u32) importKind, import.moduleUtf8, import.fieldUtf8);
164 switch (importKind)
165 {
166 case d_externalKind_function:
167 {
168 u32 typeIndex;
169 _ (ReadLEB_u32 (& typeIndex, & i_bytes, i_end))
170
171 _ (Module_AddFunction (io_module, typeIndex, & import))
172 import = clearImport;
173
174 io_module->numFuncImports++;
175 }
176 break;
177
178 case d_externalKind_table:
179 // result = ParseType_Table (& i_bytes, i_end);
180 break;
181
182 case d_externalKind_memory:
183 {
184 _ (ParseType_Memory (& io_module->memoryInfo, & i_bytes, i_end));
185 io_module->memoryImported = true;
186 }
187 break;
188
189 case d_externalKind_global:
190 {
191 i8 waType;
192 u8 type, isMutable;
193
194 _ (ReadLEB_i7 (& waType, & i_bytes, i_end));
195 _ (NormalizeType (& type, waType));
196 _ (ReadLEB_u7 (& isMutable, & i_bytes, i_end)); m3log (parse, " global: %s mutable=%d", c_waTypes [type], (u32) isMutable);
197
198 IM3Global global;
199 _ (Module_AddGlobal (io_module, & global, type, isMutable, true /* isImport */));
200 global->import = import;
201 import = clearImport;
202 }
203 break;
204
205 default:
206 _throw (m3Err_wasmMalformed);
207 }
208
209 FreeImportInfo (& import);
210 }
211
212 _catch:
213
214 FreeImportInfo (& import);
215
216 return result;
217 }
218
219
ParseSection_Export(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)220 M3Result ParseSection_Export (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
221 {
222 M3Result result = m3Err_none;
223 const char * utf8 = NULL;
224
225 u32 numExports;
226 _ (ReadLEB_u32 (& numExports, & i_bytes, i_end)); m3log (parse, "** Export [%d]", numExports);
227
228 _throwif("too many exports", numExports > d_m3MaxSaneExportsCount);
229
230 for (u32 i = 0; i < numExports; ++i)
231 {
232 u8 exportKind;
233 u32 index;
234
235 _ (Read_utf8 (& utf8, & i_bytes, i_end));
236 _ (Read_u8 (& exportKind, & i_bytes, i_end));
237 _ (ReadLEB_u32 (& index, & i_bytes, i_end)); m3log (parse, " index: %3d; kind: %d; export: '%s'; ", index, (u32) exportKind, utf8);
238
239 if (exportKind == d_externalKind_function)
240 {
241 _throwif(m3Err_wasmMalformed, index >= io_module->numFunctions);
242 IM3Function func = &(io_module->functions [index]);
243 if (func->numNames < d_m3MaxDuplicateFunctionImpl)
244 {
245 func->names[func->numNames++] = utf8;
246 utf8 = NULL; // ownership transferred to M3Function
247 }
248 }
249 else if (exportKind == d_externalKind_global)
250 {
251 _throwif(m3Err_wasmMalformed, index >= io_module->numGlobals);
252 IM3Global global = &(io_module->globals [index]);
253 m3_Free (global->name);
254 global->name = utf8;
255 utf8 = NULL; // ownership transferred to M3Global
256 }
257
258 m3_Free (utf8);
259 }
260
261 _catch:
262 m3_Free (utf8);
263 return result;
264 }
265
266
ParseSection_Start(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)267 M3Result ParseSection_Start (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
268 {
269 M3Result result = m3Err_none;
270
271 u32 startFuncIndex;
272 _ (ReadLEB_u32 (& startFuncIndex, & i_bytes, i_end)); m3log (parse, "** Start Function: %d", startFuncIndex);
273
274 if (startFuncIndex < io_module->numFunctions)
275 {
276 io_module->startFunction = startFuncIndex;
277 }
278 else result = "start function index out of bounds";
279
280 _catch: return result;
281 }
282
283
Parse_InitExpr(M3Module * io_module,bytes_t * io_bytes,cbytes_t i_end)284 M3Result Parse_InitExpr (M3Module * io_module, bytes_t * io_bytes, cbytes_t i_end)
285 {
286 M3Result result = m3Err_none;
287
288 // this doesn't generate code pages. just walks the wasm bytecode to find the end
289
290 #if defined(d_m3PreferStaticAlloc)
291 static M3Compilation compilation;
292 #else
293 M3Compilation compilation;
294 #endif
295 compilation = (M3Compilation){ .runtime = NULL, .module = io_module, .wasm = * io_bytes, .wasmEnd = i_end };
296
297 result = CompileBlockStatements (& compilation);
298
299 * io_bytes = compilation.wasm;
300
301 return result;
302 }
303
304
ParseSection_Element(IM3Module io_module,bytes_t i_bytes,cbytes_t i_end)305 M3Result ParseSection_Element (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
306 {
307 M3Result result = m3Err_none;
308
309 u32 numSegments;
310 result = ReadLEB_u32 (& numSegments, & i_bytes, i_end); m3log (parse, "** Element [%d]", numSegments);
311
312 _throwif ("error parsing Element section", result);
313
314 _throwif ("too many element segments", numSegments > d_m3MaxSaneElementSegments);
315
316 io_module->elementSection = i_bytes;
317 io_module->elementSectionEnd = i_end;
318 io_module->numElementSegments = numSegments;
319
320 _catch: return result;
321 }
322
323
ParseSection_Code(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)324 M3Result ParseSection_Code (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
325 {
326 M3Result result;
327
328 u32 numFunctions;
329 _ (ReadLEB_u32 (& numFunctions, & i_bytes, i_end)); m3log (parse, "** Code [%d]", numFunctions);
330
331 if (numFunctions != io_module->numFunctions - io_module->numFuncImports)
332 {
333 _throw ("mismatched function count in code section");
334 }
335
336 for (u32 f = 0; f < numFunctions; ++f)
337 {
338 const u8 * start = i_bytes;
339
340 u32 size;
341 _ (ReadLEB_u32 (& size, & i_bytes, i_end));
342
343 if (size)
344 {
345 const u8 * ptr = i_bytes;
346 i_bytes += size;
347
348 if (i_bytes <= i_end)
349 {
350 /*
351 u32 numLocalBlocks;
352 _ (ReadLEB_u32 (& numLocalBlocks, & ptr, i_end)); m3log (parse, " code size: %-4d", size);
353
354 u32 numLocals = 0;
355
356 for (u32 l = 0; l < numLocalBlocks; ++l)
357 {
358 u32 varCount;
359 i8 wasmType;
360 u8 normalType;
361
362 _ (ReadLEB_u32 (& varCount, & ptr, i_end));
363 _ (ReadLEB_i7 (& wasmType, & ptr, i_end));
364 _ (NormalizeType (& normalType, wasmType));
365
366 numLocals += varCount; m3log (parse, " %2d locals; type: '%s'", varCount, c_waTypes [normalType]);
367 }
368 */
369
370 IM3Function func = Module_GetFunction (io_module, f + io_module->numFuncImports);
371
372 func->module = io_module;
373 func->wasm = start;
374 func->wasmEnd = i_bytes;
375 //func->ownsWasmCode = io_module->hasWasmCodeCopy;
376 // func->numLocals = numLocals;
377 }
378 else _throw (m3Err_wasmSectionOverrun);
379 }
380 }
381
382 _catch:
383
384 if (not result and i_bytes != i_end)
385 result = m3Err_wasmSectionUnderrun;
386
387 return result;
388 }
389
390
ParseSection_Data(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)391 M3Result ParseSection_Data (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
392 {
393 M3Result result = m3Err_none;
394
395 u32 numDataSegments;
396 _ (ReadLEB_u32 (& numDataSegments, & i_bytes, i_end)); m3log (parse, "** Data [%d]", numDataSegments);
397
398 _throwif("too many data segments", numDataSegments > d_m3MaxSaneDataSegments);
399
400 io_module->dataSegments = m3_AllocArray (M3DataSegment, numDataSegments);
401 _throwifnull(io_module->dataSegments);
402 io_module->numDataSegments = numDataSegments;
403
404 for (u32 i = 0; i < numDataSegments; ++i)
405 {
406 M3DataSegment * segment = & io_module->dataSegments [i];
407
408 _ (ReadLEB_u32 (& segment->memoryRegion, & i_bytes, i_end));
409
410 segment->initExpr = i_bytes;
411 _ (Parse_InitExpr (io_module, & i_bytes, i_end));
412 segment->initExprSize = (u32) (i_bytes - segment->initExpr);
413
414 _throwif (m3Err_wasmMissingInitExpr, segment->initExprSize <= 1);
415
416 _ (ReadLEB_u32 (& segment->size, & i_bytes, i_end));
417 segment->data = i_bytes; m3log (parse, " segment [%u] memory: %u; expr-size: %d; size: %d",
418 i, segment->memoryRegion, segment->initExprSize, segment->size);
419 i_bytes += segment->size;
420
421 _throwif("data segment underflow", i_bytes > i_end);
422 }
423
424 _catch:
425
426 return result;
427 }
428
429
ParseSection_Memory(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)430 M3Result ParseSection_Memory (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
431 {
432 M3Result result = m3Err_none;
433
434 // TODO: MVP; assert no memory imported
435
436 u32 numMemories;
437 _ (ReadLEB_u32 (& numMemories, & i_bytes, i_end)); m3log (parse, "** Memory [%d]", numMemories);
438
439 _throwif (m3Err_tooManyMemorySections, numMemories != 1);
440
441 ParseType_Memory (& io_module->memoryInfo, & i_bytes, i_end);
442
443 _catch: return result;
444 }
445
446
ParseSection_Global(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)447 M3Result ParseSection_Global (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
448 {
449 M3Result result = m3Err_none;
450
451 u32 numGlobals;
452 _ (ReadLEB_u32 (& numGlobals, & i_bytes, i_end)); m3log (parse, "** Global [%d]", numGlobals);
453
454 _throwif("too many globals", numGlobals > d_m3MaxSaneGlobalsCount);
455
456 for (u32 i = 0; i < numGlobals; ++i)
457 {
458 i8 waType;
459 u8 type, isMutable;
460
461 _ (ReadLEB_i7 (& waType, & i_bytes, i_end));
462 _ (NormalizeType (& type, waType));
463 _ (ReadLEB_u7 (& isMutable, & i_bytes, i_end)); m3log (parse, " global: [%d] %s mutable: %d", i, c_waTypes [type], (u32) isMutable);
464
465 IM3Global global;
466 _ (Module_AddGlobal (io_module, & global, type, isMutable, false /* isImport */));
467
468 global->initExpr = i_bytes;
469 _ (Parse_InitExpr (io_module, & i_bytes, i_end));
470 global->initExprSize = (u32) (i_bytes - global->initExpr);
471
472 _throwif (m3Err_wasmMissingInitExpr, global->initExprSize <= 1);
473 }
474
475 _catch: return result;
476 }
477
478
ParseSection_Custom(M3Module * io_module,bytes_t i_bytes,cbytes_t i_end)479 M3Result ParseSection_Custom (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
480 {
481 M3Result result;
482
483 cstr_t name;
484 _ (Read_utf8 (& name, & i_bytes, i_end));
485 m3log (parse, "** Custom: '%s'", name);
486 if (strcmp (name, "name") != 0)
487 i_bytes = i_end;
488
489 m3_Free (name);
490
491 while (i_bytes < i_end)
492 {
493 u8 nameType;
494 u32 payloadLength;
495
496 _ (ReadLEB_u7 (& nameType, & i_bytes, i_end));
497 _ (ReadLEB_u32 (& payloadLength, & i_bytes, i_end));
498
499 bytes_t start = i_bytes;
500 if (nameType == 1)
501 {
502 u32 numNames;
503 _ (ReadLEB_u32 (& numNames, & i_bytes, i_end));
504
505 _throwif("too many names", numNames > d_m3MaxSaneFunctionsCount);
506
507 for (u32 i = 0; i < numNames; ++i)
508 {
509 u32 index;
510 _ (ReadLEB_u32 (& index, & i_bytes, i_end));
511 _ (Read_utf8 (& name, & i_bytes, i_end));
512
513 if (index < io_module->numFunctions)
514 {
515 IM3Function func = &(io_module->functions [index]);
516 if (func->numNames == 0)
517 {
518 func->names[0] = name; m3log (parse, " naming function%5d: %s", index, name);
519 func->numNames = 1;
520 name = NULL; // transfer ownership
521 }
522 // else m3log (parse, "prenamed: %s", io_module->functions [index].name);
523 }
524
525 m3_Free (name);
526 }
527 }
528
529 i_bytes = start + payloadLength;
530 }
531
532 _catch: return result;
533 }
534
535
ParseModuleSection(M3Module * o_module,u8 i_sectionType,bytes_t i_bytes,u32 i_numBytes)536 M3Result ParseModuleSection (M3Module * o_module, u8 i_sectionType, bytes_t i_bytes, u32 i_numBytes)
537 {
538 M3Result result = m3Err_none;
539
540 typedef M3Result (* M3Parser) (M3Module *, bytes_t, cbytes_t);
541
542 static M3Parser s_parsers [] =
543 {
544 ParseSection_Custom, // 0
545 ParseSection_Type, // 1
546 ParseSection_Import, // 2
547 ParseSection_Function, // 3
548 NULL, // 4: TODO Table
549 ParseSection_Memory, // 5
550 ParseSection_Global, // 6
551 ParseSection_Export, // 7
552 ParseSection_Start, // 8
553 ParseSection_Element, // 9
554 ParseSection_Code, // 10
555 ParseSection_Data, // 11
556 NULL, // 12: TODO DataCount
557 };
558
559 M3Parser parser = NULL;
560
561 if (i_sectionType <= 12)
562 parser = s_parsers [i_sectionType];
563
564 if (parser)
565 {
566 cbytes_t end = i_bytes + i_numBytes;
567 result = parser (o_module, i_bytes, end);
568 }
569 else
570 {
571 m3log (parse, " skipped section type: %d", (u32) i_sectionType);
572 }
573
574 return result;
575 }
576
577
m3_ParseModule(IM3Environment i_environment,IM3Module * o_module,cbytes_t i_bytes,u32 i_numBytes)578 M3Result m3_ParseModule (IM3Environment i_environment, IM3Module * o_module, cbytes_t i_bytes, u32 i_numBytes)
579 {
580 M3Result result; m3log (parse, "load module: %d bytes", i_numBytes);
581
582 IM3Module module;
583 _try {
584 module = m3_AllocStruct (M3Module);
585 _throwifnull (module);
586 module->name = ".unnamed"; m3log (parse, "load module: %d bytes", i_numBytes);
587 module->startFunction = -1;
588 //module->hasWasmCodeCopy = false;
589 module->environment = i_environment;
590
591 const u8 * pos = i_bytes;
592 const u8 * end = pos + i_numBytes;
593
594 module->wasmStart = pos;
595 module->wasmEnd = end;
596
597 u32 magic, version;
598 _ (Read_u32 (& magic, & pos, end));
599 _ (Read_u32 (& version, & pos, end));
600
601 _throwif (m3Err_wasmMalformed, magic != 0x6d736100);
602 _throwif (m3Err_incompatibleWasmVersion, version != 1);
603
604 static const u8 sectionsOrder[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 10, 11, 0 }; // 0 is a placeholder
605 u8 expectedSection = 0;
606
607 while (pos < end)
608 {
609 u8 section;
610 _ (ReadLEB_u7 (& section, & pos, end));
611
612 if (section != 0) {
613 // Ensure sections appear only once and in order
614 while (sectionsOrder[expectedSection++] != section) {
615 _throwif(m3Err_misorderedWasmSection, expectedSection >= 12);
616 }
617 }
618
619 u32 sectionLength;
620 _ (ReadLEB_u32 (& sectionLength, & pos, end));
621 _throwif(m3Err_wasmMalformed, pos + sectionLength > end);
622
623 _ (ParseModuleSection (module, section, pos, sectionLength));
624
625 pos += sectionLength;
626 }
627
628 } _catch:
629
630 if (result)
631 {
632 m3_FreeModule (module);
633 module = NULL;
634 }
635
636 * o_module = module;
637
638 return result;
639 }
640