1 /*
2 * Copyright 2017 WebAssembly Community Group participants
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //
18 // Tries to reduce the input wasm into the smallest possible wasm
19 // that still generates the same result on a given command. This is
20 // useful to reduce bug testcases, for example, if a file crashes
21 // the optimizer, you can reduce it to find the smallest file that
22 // also crashes it (which generally will show the same bug, in a
23 // much more debuggable manner).
24 //
25
26 #include <cstdio>
27 #include <cstdlib>
28 #include <memory>
29
30 #include "ir/branch-utils.h"
31 #include "ir/iteration.h"
32 #include "ir/literal-utils.h"
33 #include "ir/properties.h"
34 #include "pass.h"
35 #include "support/colors.h"
36 #include "support/command-line.h"
37 #include "support/file.h"
38 #include "support/path.h"
39 #include "support/timing.h"
40 #include "wasm-builder.h"
41 #include "wasm-io.h"
42 #include "wasm-validator.h"
43 #ifdef _WIN32
44 #ifndef NOMINMAX
45 #define NOMINMAX
46 #endif
47 #include <windows.h>
48 // Create a string with last error message
GetLastErrorStdStr()49 std::string GetLastErrorStdStr() {
50 DWORD error = GetLastError();
51 if (error) {
52 LPVOID lpMsgBuf;
53 DWORD bufLen = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
54 FORMAT_MESSAGE_FROM_SYSTEM |
55 FORMAT_MESSAGE_IGNORE_INSERTS,
56 NULL,
57 error,
58 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
59 (LPTSTR)&lpMsgBuf,
60 0,
61 NULL);
62 if (bufLen) {
63 LPCSTR lpMsgStr = (LPCSTR)lpMsgBuf;
64 std::string result(lpMsgStr, lpMsgStr + bufLen);
65 LocalFree(lpMsgBuf);
66 return result;
67 }
68 }
69 return std::string();
70 }
71 #endif
72 using namespace wasm;
73
74 // a timeout on every execution of the command
75 size_t timeout = 2;
76
77 struct ProgramResult {
78 int code;
79 std::string output;
80 double time;
81
82 ProgramResult() = default;
ProgramResultProgramResult83 ProgramResult(std::string command) { getFromExecution(command); }
84
85 #ifdef _WIN32
getFromExecutionProgramResult86 void getFromExecution(std::string command) {
87 Timer timer;
88 timer.start();
89 SECURITY_ATTRIBUTES saAttr;
90 saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
91 saAttr.bInheritHandle = TRUE;
92 saAttr.lpSecurityDescriptor = NULL;
93
94 HANDLE hChildStd_OUT_Rd;
95 HANDLE hChildStd_OUT_Wr;
96
97 if (
98 // Create a pipe for the child process's STDOUT.
99 !CreatePipe(&hChildStd_OUT_Rd, &hChildStd_OUT_Wr, &saAttr, 0) ||
100 // Ensure the read handle to the pipe for STDOUT is not inherited.
101 !SetHandleInformation(hChildStd_OUT_Rd, HANDLE_FLAG_INHERIT, 0)) {
102 Fatal() << "CreatePipe \"" << command
103 << "\" failed: " << GetLastErrorStdStr() << ".\n";
104 }
105
106 STARTUPINFO si;
107 PROCESS_INFORMATION pi;
108
109 ZeroMemory(&si, sizeof(si));
110 si.cb = sizeof(si);
111 si.hStdError = hChildStd_OUT_Wr;
112 si.hStdOutput = hChildStd_OUT_Wr;
113 si.dwFlags |= STARTF_USESTDHANDLES;
114 ZeroMemory(&pi, sizeof(pi));
115
116 // Start the child process.
117 if (!CreateProcess(NULL, // No module name (use command line)
118 (LPSTR)command.c_str(), // Command line
119 NULL, // Process handle not inheritable
120 NULL, // Thread handle not inheritable
121 TRUE, // Set handle inheritance to TRUE
122 0, // No creation flags
123 NULL, // Use parent's environment block
124 NULL, // Use parent's starting directory
125 &si, // Pointer to STARTUPINFO structure
126 &pi) // Pointer to PROCESS_INFORMATION structure
127 ) {
128 Fatal() << "CreateProcess \"" << command
129 << "\" failed: " << GetLastErrorStdStr() << ".\n";
130 }
131
132 // Wait until child process exits.
133 DWORD retVal = WaitForSingleObject(pi.hProcess, timeout * 1000);
134 if (retVal == WAIT_TIMEOUT) {
135 printf("Command timeout: %s", command.c_str());
136 TerminateProcess(pi.hProcess, -1);
137 }
138 DWORD dwordExitCode;
139 if (!GetExitCodeProcess(pi.hProcess, &dwordExitCode)) {
140 Fatal() << "GetExitCodeProcess failed: " << GetLastErrorStdStr() << ".\n";
141 }
142 code = (int)dwordExitCode;
143
144 // Close process and thread handles.
145 CloseHandle(pi.hProcess);
146 CloseHandle(pi.hThread);
147
148 // Read output from the child process's pipe for STDOUT
149 // Stop when there is no more data.
150 {
151 const int BUFSIZE = 4096;
152 DWORD dwRead, dwTotal, dwTotalRead = 0;
153 CHAR chBuf[BUFSIZE];
154 BOOL bSuccess = FALSE;
155
156 PeekNamedPipe(hChildStd_OUT_Rd, NULL, 0, NULL, &dwTotal, NULL);
157 while (dwTotalRead < dwTotal) {
158 bSuccess =
159 ReadFile(hChildStd_OUT_Rd, chBuf, BUFSIZE - 1, &dwRead, NULL);
160 if (!bSuccess || dwRead == 0)
161 break;
162 chBuf[dwRead] = 0;
163 dwTotalRead += dwRead;
164 output.append(chBuf);
165 }
166 }
167 timer.stop();
168 time = timer.getTotal();
169 }
170 #else // POSIX
171 // runs the command and notes the output
172 // TODO: also stderr, not just stdout?
getFromExecutionProgramResult173 void getFromExecution(std::string command) {
174 Timer timer;
175 timer.start();
176 // do this using just core stdio.h and stdlib.h, for portability
177 // sadly this requires two invokes
178 code = system(("timeout " + std::to_string(timeout) + "s " + command +
179 " > /dev/null 2> /dev/null")
180 .c_str());
181 const int MAX_BUFFER = 1024;
182 char buffer[MAX_BUFFER];
183 FILE* stream = popen(
184 ("timeout " + std::to_string(timeout) + "s " + command + " 2> /dev/null")
185 .c_str(),
186 "r");
187 while (fgets(buffer, MAX_BUFFER, stream) != NULL) {
188 output.append(buffer);
189 }
190 pclose(stream);
191 timer.stop();
192 time = timer.getTotal() / 2;
193 }
194 #endif // _WIN32
195
operator ==ProgramResult196 bool operator==(ProgramResult& other) {
197 return code == other.code && output == other.output;
198 }
operator !=ProgramResult199 bool operator!=(ProgramResult& other) { return !(*this == other); }
200
failedProgramResult201 bool failed() { return code != 0; }
202
dumpProgramResult203 void dump(std::ostream& o) {
204 o << "[ProgramResult] code: " << code << " stdout: \n"
205 << output << "[====]\nin " << time << " seconds\n[/ProgramResult]\n";
206 }
207 };
208
209 namespace std {
210
operator <<(std::ostream & o,ProgramResult & result)211 inline std::ostream& operator<<(std::ostream& o, ProgramResult& result) {
212 result.dump(o);
213 return o;
214 }
215
216 } // namespace std
217
218 ProgramResult expected;
219
220 // Removing functions is extremely beneficial and efficient. We aggressively
221 // try to remove functions, unless we've seen they can't be removed, in which
222 // case we may try again but much later.
223 static std::unordered_set<Name> functionsWeTriedToRemove;
224
225 struct Reducer
226 : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<Reducer>>> {
227 std::string command, test, working;
228 bool binary, deNan, verbose, debugInfo;
229
230 // test is the file we write to that the command will operate on
231 // working is the current temporary state, the reduction so far
ReducerReducer232 Reducer(std::string command,
233 std::string test,
234 std::string working,
235 bool binary,
236 bool deNan,
237 bool verbose,
238 bool debugInfo)
239 : command(command), test(test), working(working), binary(binary),
240 deNan(deNan), verbose(verbose), debugInfo(debugInfo) {}
241
242 // runs passes in order to reduce, until we can't reduce any more
243 // the criterion here is wasm binary size
reduceUsingPassesReducer244 void reduceUsingPasses() {
245 // run optimization passes until we can't shrink it any more
246 std::vector<std::string> passes = {
247 "-Oz",
248 "-Os",
249 "-O1",
250 "-O2",
251 "-O3",
252 "-O4",
253 "--flatten -Os",
254 "--flatten -O3",
255 "--flatten --local-cse -Os",
256 "--coalesce-locals --vacuum",
257 "--dce",
258 "--duplicate-function-elimination",
259 "--inlining",
260 "--inlining-optimizing",
261 "--optimize-level=3 --inlining-optimizing",
262 "--memory-packing",
263 "--remove-unused-names --merge-blocks --vacuum",
264 "--optimize-instructions",
265 "--precompute",
266 "--remove-imports",
267 "--remove-memory",
268 "--remove-unused-names --remove-unused-brs",
269 "--remove-unused-module-elements",
270 "--remove-unused-nonfunction-module-elements",
271 "--reorder-functions",
272 "--reorder-locals",
273 "--simplify-locals --vacuum",
274 "--strip",
275 "--vacuum"};
276 auto oldSize = file_size(working);
277 bool more = true;
278 while (more) {
279 // std::cerr << "| starting passes loop iteration\n";
280 more = false;
281 // try both combining with a generic shrink (so minor pass overhead is
282 // compensated for), and without
283 for (auto pass : passes) {
284 std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " ";
285 currCommand += working + " --detect-features -o " + test + " " + pass;
286 if (debugInfo) {
287 currCommand += " -g ";
288 }
289 if (!binary) {
290 currCommand += " -S ";
291 }
292 if (verbose) {
293 std::cerr << "| trying pass command: " << currCommand << "\n";
294 }
295 if (!ProgramResult(currCommand).failed()) {
296 auto newSize = file_size(test);
297 if (newSize < oldSize) {
298 // the pass didn't fail, and the size looks smaller, so promising
299 // see if it is still has the property we are preserving
300 if (ProgramResult(command) == expected) {
301 std::cerr << "| command \"" << currCommand
302 << "\" succeeded, reduced size to " << newSize
303 << ", and preserved the property\n";
304 copy_file(test, working);
305 more = true;
306 oldSize = newSize;
307 }
308 }
309 }
310 }
311 }
312 if (verbose) {
313 std::cerr << "| done with passes for now\n";
314 }
315 }
316
317 // does one pass of slow and destructive reduction. returns whether it
318 // succeeded or not
319 // the criterion here is a logical change in the program. this may actually
320 // increase wasm size in some cases, but it should allow more reduction later.
321 // @param factor how much to ignore. starting with a high factor skips through
322 // most of the file, which is often faster than going one by one
323 // from the start
reduceDestructivelyReducer324 size_t reduceDestructively(int factor_) {
325 factor = factor_;
326 // prepare
327 loadWorking();
328 reduced = 0;
329 funcsSeen = 0;
330 // before we do any changes, it should be valid to write out the module:
331 // size should be as expected, and output should be as expected
332 ProgramResult result;
333 if (!writeAndTestReduction(result)) {
334 std::cerr << "\n|! WARNING: writing before destructive reduction fails, "
335 "very unlikely reduction can work\n"
336 << result << '\n';
337 }
338 // destroy!
339 walkModule(getModule());
340 return reduced;
341 }
342
loadWorkingReducer343 void loadWorking() {
344 module = make_unique<Module>();
345 Module wasm;
346 ModuleReader reader;
347 reader.read(working, *module);
348 wasm.features = FeatureSet::All;
349 builder = make_unique<Builder>(*module);
350 setModule(module.get());
351 }
352
353 // destructive reduction state
354
355 size_t reduced;
356 Expression* beforeReduction;
357 std::unique_ptr<Module> module;
358 std::unique_ptr<Builder> builder;
359 Index funcsSeen;
360 int factor;
361
362 // write the module and see if the command still fails on it as expected
writeAndTestReductionReducer363 bool writeAndTestReduction() {
364 ProgramResult result;
365 return writeAndTestReduction(result);
366 }
367
writeAndTestReductionReducer368 bool writeAndTestReduction(ProgramResult& out) {
369 // write the module out
370 ModuleWriter writer;
371 writer.setBinary(binary);
372 writer.setDebugInfo(debugInfo);
373 writer.write(*getModule(), test);
374 // note that it is ok for the destructively-reduced module to be bigger
375 // than the previous - each destructive reduction removes logical code,
376 // and so is strictly better, even if the wasm binary format happens to
377 // encode things slightly less efficiently.
378 // test it
379 out.getFromExecution(command);
380 return out == expected;
381 }
382
shouldTryToReduceReducer383 bool shouldTryToReduce(size_t bonus = 1) {
384 static size_t counter = 0;
385 counter += bonus;
386 return (counter % factor) <= bonus;
387 }
388
isOkReplacementReducer389 bool isOkReplacement(Expression* with) {
390 if (deNan) {
391 if (auto* c = with->dynCast<Const>()) {
392 if (c->value.isNaN()) {
393 return false;
394 }
395 }
396 }
397 return true;
398 }
399
400 // tests a reduction on the current traversal node, and undos if it failed
tryToReplaceCurrentReducer401 bool tryToReplaceCurrent(Expression* with) {
402 if (!isOkReplacement(with)) {
403 return false;
404 }
405 auto* curr = getCurrent();
406 // std::cerr << "try " << curr << " => " << with << '\n';
407 if (curr->type != with->type) {
408 return false;
409 }
410 if (!shouldTryToReduce()) {
411 return false;
412 }
413 replaceCurrent(with);
414 if (!writeAndTestReduction()) {
415 replaceCurrent(curr);
416 return false;
417 }
418 std::cerr << "| tryToReplaceCurrent succeeded (in " << getLocation()
419 << ")\n";
420 noteReduction();
421 return true;
422 }
423
noteReductionReducer424 void noteReduction(size_t amount = 1) {
425 reduced += amount;
426 copy_file(test, working);
427 }
428
429 // tests a reduction on an arbitrary child
tryToReplaceChildReducer430 bool tryToReplaceChild(Expression*& child, Expression* with) {
431 if (!isOkReplacement(with)) {
432 return false;
433 }
434 if (child->type != with->type) {
435 return false;
436 }
437 if (!shouldTryToReduce()) {
438 return false;
439 }
440 auto* before = child;
441 child = with;
442 if (!writeAndTestReduction()) {
443 child = before;
444 return false;
445 }
446 std::cerr << "| tryToReplaceChild succeeded (in " << getLocation()
447 << ")\n";
448 // std::cerr << "| " << before << " => " << with << '\n';
449 noteReduction();
450 return true;
451 }
452
getLocationReducer453 std::string getLocation() {
454 if (getFunction()) {
455 return getFunction()->name.str;
456 }
457 return "(non-function context)";
458 }
459
460 // visitors. in each we try to remove code in a destructive and nontrivial
461 // way. "nontrivial" means something that optimization passes can't achieve,
462 // since we don't need to duplicate work that they do
463
visitExpressionReducer464 void visitExpression(Expression* curr) {
465 // type-based reductions
466 if (curr->type == Type::none) {
467 if (tryToReduceCurrentToNop()) {
468 return;
469 }
470 } else if (curr->type.isConcrete()) {
471 if (tryToReduceCurrentToConst()) {
472 return;
473 }
474 } else {
475 assert(curr->type == Type::unreachable);
476 if (tryToReduceCurrentToUnreachable()) {
477 return;
478 }
479 }
480 // specific reductions
481 if (auto* iff = curr->dynCast<If>()) {
482 if (iff->type == Type::none) {
483 // perhaps we need just the condition?
484 if (tryToReplaceCurrent(builder->makeDrop(iff->condition))) {
485 return;
486 }
487 }
488 handleCondition(iff->condition);
489 } else if (auto* br = curr->dynCast<Break>()) {
490 handleCondition(br->condition);
491 } else if (auto* select = curr->dynCast<Select>()) {
492 handleCondition(select->condition);
493 } else if (auto* sw = curr->dynCast<Switch>()) {
494 handleCondition(sw->condition);
495 // Try to replace switch targets with the default
496 for (auto& target : sw->targets) {
497 if (target != sw->default_) {
498 auto old = target;
499 target = sw->default_;
500 if (!tryToReplaceCurrent(curr)) {
501 target = old;
502 }
503 }
504 }
505 // Try to shorten the list of targets.
506 while (sw->targets.size() > 1) {
507 auto last = sw->targets.back();
508 sw->targets.pop_back();
509 if (!tryToReplaceCurrent(curr)) {
510 sw->targets.push_back(last);
511 break;
512 }
513 }
514 } else if (auto* block = curr->dynCast<Block>()) {
515 if (!shouldTryToReduce()) {
516 return;
517 }
518 // replace a singleton
519 auto& list = block->list;
520 if (list.size() == 1 &&
521 !BranchUtils::BranchSeeker::has(block, block->name)) {
522 if (tryToReplaceCurrent(block->list[0])) {
523 return;
524 }
525 }
526 // try to get rid of nops
527 Index i = 0;
528 while (list.size() > 1 && i < list.size()) {
529 auto* curr = list[i];
530 if (curr->is<Nop>() && shouldTryToReduce()) {
531 // try to remove it
532 for (Index j = i; j < list.size() - 1; j++) {
533 list[j] = list[j + 1];
534 }
535 list.pop_back();
536 if (writeAndTestReduction()) {
537 std::cerr << "| block-nop removed\n";
538 noteReduction();
539 return;
540 }
541 list.push_back(nullptr);
542 // we failed; undo
543 for (Index j = list.size() - 1; j > i; j--) {
544 list[j] = list[j - 1];
545 }
546 list[i] = curr;
547 }
548 i++;
549 }
550 return; // nothing more to do
551 } else if (auto* loop = curr->dynCast<Loop>()) {
552 if (shouldTryToReduce() &&
553 !BranchUtils::BranchSeeker::has(loop, loop->name)) {
554 tryToReplaceCurrent(loop->body);
555 }
556 return; // nothing more to do
557 }
558 // Finally, try to replace with a child.
559 for (auto* child : ChildIterator(curr)) {
560 if (child->type.isConcrete() && curr->type == Type::none) {
561 if (tryToReplaceCurrent(builder->makeDrop(child))) {
562 return;
563 }
564 } else {
565 if (tryToReplaceCurrent(child)) {
566 return;
567 }
568 }
569 }
570 // If that didn't work, try to replace with a child + a unary conversion,
571 // but not if it's already unary
572 if (curr->type.isSingle() && !curr->is<Unary>()) {
573 for (auto* child : ChildIterator(curr)) {
574 if (child->type == curr->type) {
575 continue; // already tried
576 }
577 if (!child->type.isSingle()) {
578 continue; // no conversion
579 }
580 Expression* fixed = nullptr;
581 TODO_SINGLE_COMPOUND(curr->type);
582 switch (curr->type.getBasic()) {
583 case Type::i32: {
584 TODO_SINGLE_COMPOUND(child->type);
585 switch (child->type.getBasic()) {
586 case Type::i32:
587 WASM_UNREACHABLE("invalid type");
588 case Type::i64:
589 fixed = builder->makeUnary(WrapInt64, child);
590 break;
591 case Type::f32:
592 fixed = builder->makeUnary(TruncSFloat32ToInt32, child);
593 break;
594 case Type::f64:
595 fixed = builder->makeUnary(TruncSFloat64ToInt32, child);
596 break;
597 case Type::v128:
598 case Type::funcref:
599 case Type::externref:
600 case Type::exnref:
601 case Type::anyref:
602 case Type::eqref:
603 case Type::i31ref:
604 continue; // not implemented yet
605 case Type::none:
606 case Type::unreachable:
607 WASM_UNREACHABLE("unexpected type");
608 }
609 break;
610 }
611 case Type::i64: {
612 TODO_SINGLE_COMPOUND(child->type);
613 switch (child->type.getBasic()) {
614 case Type::i32:
615 fixed = builder->makeUnary(ExtendSInt32, child);
616 break;
617 case Type::i64:
618 WASM_UNREACHABLE("invalid type");
619 case Type::f32:
620 fixed = builder->makeUnary(TruncSFloat32ToInt64, child);
621 break;
622 case Type::f64:
623 fixed = builder->makeUnary(TruncSFloat64ToInt64, child);
624 break;
625 case Type::v128:
626 case Type::funcref:
627 case Type::externref:
628 case Type::exnref:
629 case Type::anyref:
630 case Type::eqref:
631 case Type::i31ref:
632 continue; // not implemented yet
633 case Type::none:
634 case Type::unreachable:
635 WASM_UNREACHABLE("unexpected type");
636 }
637 break;
638 }
639 case Type::f32: {
640 TODO_SINGLE_COMPOUND(child->type);
641 switch (child->type.getBasic()) {
642 case Type::i32:
643 fixed = builder->makeUnary(ConvertSInt32ToFloat32, child);
644 break;
645 case Type::i64:
646 fixed = builder->makeUnary(ConvertSInt64ToFloat32, child);
647 break;
648 case Type::f32:
649 WASM_UNREACHABLE("unexpected type");
650 case Type::f64:
651 fixed = builder->makeUnary(DemoteFloat64, child);
652 break;
653 case Type::v128:
654 case Type::funcref:
655 case Type::externref:
656 case Type::exnref:
657 case Type::anyref:
658 case Type::eqref:
659 case Type::i31ref:
660 continue; // not implemented yet
661 case Type::none:
662 case Type::unreachable:
663 WASM_UNREACHABLE("unexpected type");
664 }
665 break;
666 }
667 case Type::f64: {
668 TODO_SINGLE_COMPOUND(child->type);
669 switch (child->type.getBasic()) {
670 case Type::i32:
671 fixed = builder->makeUnary(ConvertSInt32ToFloat64, child);
672 break;
673 case Type::i64:
674 fixed = builder->makeUnary(ConvertSInt64ToFloat64, child);
675 break;
676 case Type::f32:
677 fixed = builder->makeUnary(PromoteFloat32, child);
678 break;
679 case Type::f64:
680 WASM_UNREACHABLE("unexpected type");
681 case Type::v128:
682 case Type::funcref:
683 case Type::externref:
684 case Type::exnref:
685 case Type::anyref:
686 case Type::eqref:
687 case Type::i31ref:
688 continue; // not implemented yet
689 case Type::none:
690 case Type::unreachable:
691 WASM_UNREACHABLE("unexpected type");
692 }
693 break;
694 }
695 case Type::v128:
696 case Type::funcref:
697 case Type::externref:
698 case Type::exnref:
699 case Type::anyref:
700 case Type::eqref:
701 case Type::i31ref:
702 continue; // not implemented yet
703 case Type::none:
704 case Type::unreachable:
705 WASM_UNREACHABLE("unexpected type");
706 }
707 assert(fixed->type == curr->type);
708 if (tryToReplaceCurrent(fixed)) {
709 return;
710 }
711 }
712 }
713 }
714
visitFunctionReducer715 void visitFunction(Function* curr) {
716 if (!curr->imported()) {
717 // extra chance to work on the function toplevel element, as if it can
718 // be reduced it's great
719 visitExpression(curr->body);
720 }
721 // finish function
722 funcsSeen++;
723 static int last = 0;
724 int percentage = (100 * funcsSeen) / getModule()->functions.size();
725 if (std::abs(percentage - last) >= 5) {
726 std::cerr << "| " << percentage << "% of funcs complete\n";
727 last = percentage;
728 }
729 }
730
731 // TODO: bisection on segment shrinking?
732
visitTableReducer733 void visitTable(Table* curr) {
734 std::cerr << "| try to simplify table\n";
735 Name first;
736 for (auto& segment : curr->segments) {
737 for (auto item : segment.data) {
738 first = item;
739 break;
740 }
741 if (!first.isNull()) {
742 break;
743 }
744 }
745 visitSegmented(curr, first, 100);
746 }
747
visitMemoryReducer748 void visitMemory(Memory* curr) {
749 std::cerr << "| try to simplify memory\n";
750 visitSegmented(curr, 0, 2);
751 }
752
753 template<typename T, typename U>
visitSegmentedReducer754 void visitSegmented(T* curr, U zero, size_t bonus) {
755 // try to reduce to first function. first, shrink segment elements.
756 // while we are shrinking successfully, keep going exponentially.
757 bool justShrank = false;
758 bool shrank = false;
759 for (auto& segment : curr->segments) {
760 auto& data = segment.data;
761 // when we succeed, try to shrink by more and more, similar to bisection
762 size_t skip = 1;
763 for (size_t i = 0; i < data.size() && !data.empty(); i++) {
764 if (!justShrank && !shouldTryToReduce(bonus)) {
765 continue;
766 }
767 auto save = data;
768 for (size_t j = 0; j < skip; j++) {
769 if (!data.empty()) {
770 data.pop_back();
771 }
772 }
773 auto justShrank = writeAndTestReduction();
774 if (justShrank) {
775 std::cerr << "| shrank segment (skip: " << skip << ")\n";
776 shrank = true;
777 noteReduction();
778 skip = std::min(size_t(factor), 2 * skip);
779 } else {
780 data = save;
781 break;
782 }
783 }
784 }
785 // the "opposite" of shrinking: copy a 'zero' element
786 for (auto& segment : curr->segments) {
787 if (segment.data.empty()) {
788 continue;
789 }
790 for (auto& item : segment.data) {
791 if (!shouldTryToReduce(bonus)) {
792 continue;
793 }
794 if (item == zero) {
795 continue;
796 }
797 auto save = item;
798 item = zero;
799 if (writeAndTestReduction()) {
800 std::cerr << "| zeroed segment\n";
801 noteReduction();
802 } else {
803 item = save;
804 }
805 if (shrank) {
806 // zeroing is fairly inefficient. if we are managing to shrink
807 // (which we do exponentially), just zero one per segment at most
808 break;
809 }
810 }
811 }
812 }
813
visitModuleReducer814 void visitModule(Module* curr) {
815 assert(curr == module.get());
816 // try to remove functions
817 std::cerr << "| try to remove functions\n";
818 std::vector<Name> functionNames;
819 for (auto& func : module->functions) {
820 functionNames.push_back(func->name);
821 }
822 size_t skip = 1;
823 // If we just removed some functions in the previous iteration, keep trying
824 // to remove more as this is one of the most efficient ways to reduce.
825 bool justRemoved = false;
826 for (size_t i = 0; i < functionNames.size(); i++) {
827 if (!justRemoved &&
828 functionsWeTriedToRemove.count(functionNames[i]) == 1 &&
829 !shouldTryToReduce(std::max((factor / 100) + 1, 1000))) {
830 continue;
831 }
832 std::vector<Name> names;
833 for (size_t j = 0; names.size() < skip && i + j < functionNames.size();
834 j++) {
835 auto name = functionNames[i + j];
836 if (module->getFunctionOrNull(name)) {
837 names.push_back(name);
838 functionsWeTriedToRemove.insert(name);
839 }
840 }
841 if (names.size() == 0) {
842 continue;
843 }
844 std::cout << "| try to remove " << names.size()
845 << " functions (skip: " << skip << ")\n";
846 justRemoved = tryToRemoveFunctions(names);
847 if (justRemoved) {
848 noteReduction(names.size());
849 i += skip;
850 skip = std::min(size_t(factor), 2 * skip);
851 } else {
852 skip = std::max(skip / 2, size_t(1)); // or 1?
853 }
854 }
855 // try to remove exports
856 std::cerr << "| try to remove exports (with factor " << factor << ")\n";
857 std::vector<Export> exports;
858 for (auto& exp : module->exports) {
859 exports.push_back(*exp);
860 }
861 skip = 1;
862 for (size_t i = 0; i < exports.size(); i++) {
863 if (!shouldTryToReduce(std::max((factor / 100) + 1, 1000))) {
864 continue;
865 }
866 std::vector<Export> currExports;
867 for (size_t j = 0; currExports.size() < skip && i + j < exports.size();
868 j++) {
869 auto exp = exports[i + j];
870 if (module->getExportOrNull(exp.name)) {
871 currExports.push_back(exp);
872 module->removeExport(exp.name);
873 }
874 }
875 ProgramResult result;
876 if (!writeAndTestReduction(result)) {
877 for (auto exp : currExports) {
878 module->addExport(new Export(exp));
879 }
880 skip = std::max(skip / 2, size_t(1)); // or 1?
881 } else {
882 std::cerr << "| removed " << currExports.size() << " exports\n";
883 noteReduction(currExports.size());
884 i += skip;
885 skip = std::min(size_t(factor), 2 * skip);
886 }
887 }
888 // If we are left with a single function that is not exported or used in
889 // a table, that is useful as then we can change the return type.
890 if (module->functions.size() == 1 && module->exports.empty() &&
891 module->table.segments.empty()) {
892 auto* func = module->functions[0].get();
893 // We can't remove something that might have breaks to it.
894 if (!func->imported() && !Properties::isNamedControlFlow(func->body)) {
895 auto funcSig = func->sig;
896 auto* funcBody = func->body;
897 for (auto* child : ChildIterator(func->body)) {
898 if (!(child->type.isConcrete() || child->type == Type::none)) {
899 continue; // not something a function can return
900 }
901 // Try to replace the body with the child, fixing up the function
902 // to accept it.
903 func->sig.results = child->type;
904 func->body = child;
905 if (writeAndTestReduction()) {
906 // great, we succeeded!
907 std::cerr << "| altered function result type\n";
908 noteReduction(1);
909 break;
910 }
911 // Undo.
912 func->sig = funcSig;
913 func->body = funcBody;
914 }
915 }
916 }
917 }
918
tryToRemoveFunctionsReducer919 bool tryToRemoveFunctions(std::vector<Name> names) {
920 for (auto name : names) {
921 module->removeFunction(name);
922 }
923
924 // remove all references to them
925 struct FunctionReferenceRemover
926 : public PostWalker<FunctionReferenceRemover> {
927 std::unordered_set<Name> names;
928 std::vector<Name> exportsToRemove;
929
930 FunctionReferenceRemover(std::vector<Name>& vec) {
931 for (auto name : vec) {
932 names.insert(name);
933 }
934 }
935 void visitCall(Call* curr) {
936 if (names.count(curr->target)) {
937 replaceCurrent(Builder(*getModule()).replaceWithIdenticalType(curr));
938 }
939 }
940 void visitExport(Export* curr) {
941 if (names.count(curr->value)) {
942 exportsToRemove.push_back(curr->name);
943 }
944 }
945 void visitTable(Table* curr) {
946 Name other;
947 for (auto& segment : curr->segments) {
948 for (auto name : segment.data) {
949 if (!names.count(name)) {
950 other = name;
951 break;
952 }
953 }
954 if (!other.isNull()) {
955 break;
956 }
957 }
958 if (other.isNull()) {
959 return; // we failed to find a replacement
960 }
961 for (auto& segment : curr->segments) {
962 for (auto& name : segment.data) {
963 if (names.count(name)) {
964 name = other;
965 }
966 }
967 }
968 }
969 void doWalkModule(Module* module) {
970 PostWalker<FunctionReferenceRemover>::doWalkModule(module);
971 for (auto name : exportsToRemove) {
972 module->removeExport(name);
973 }
974 }
975 };
976 FunctionReferenceRemover referenceRemover(names);
977 referenceRemover.walkModule(module.get());
978
979 if (WasmValidator().validate(
980 *module, WasmValidator::Globally | WasmValidator::Quiet) &&
981 writeAndTestReduction()) {
982 std::cerr << "| removed " << names.size() << " functions\n";
983 return true;
984 } else {
985 loadWorking(); // restore it from orbit
986 return false;
987 }
988 }
989
990 // helpers
991
992 // try to replace condition with always true and always false
handleConditionReducer993 void handleCondition(Expression*& condition) {
994 if (!condition) {
995 return;
996 }
997 if (condition->is<Const>()) {
998 return;
999 }
1000 auto* c = builder->makeConst(int32_t(0));
1001 if (!tryToReplaceChild(condition, c)) {
1002 c->value = Literal(int32_t(1));
1003 tryToReplaceChild(condition, c);
1004 }
1005 }
1006
tryToReduceCurrentToNopReducer1007 bool tryToReduceCurrentToNop() {
1008 auto* curr = getCurrent();
1009 if (curr->is<Nop>()) {
1010 return false;
1011 }
1012 // try to replace with a trivial value
1013 Nop nop;
1014 if (tryToReplaceCurrent(&nop)) {
1015 replaceCurrent(builder->makeNop());
1016 return true;
1017 }
1018 return false;
1019 }
1020
1021 // try to replace a concrete value with a trivial constant
tryToReduceCurrentToConstReducer1022 bool tryToReduceCurrentToConst() {
1023 auto* curr = getCurrent();
1024 if (curr->is<Const>()) {
1025 return false;
1026 }
1027 // try to replace with a trivial value
1028 if (curr->type.isNullable()) {
1029 RefNull* n = builder->makeRefNull(curr->type);
1030 return tryToReplaceCurrent(n);
1031 }
1032 if (curr->type.isTuple()) {
1033 Expression* n =
1034 builder->makeConstantExpression(Literal::makeZeros(curr->type));
1035 return tryToReplaceCurrent(n);
1036 }
1037 Const* c = builder->makeConst(int32_t(0));
1038 if (tryToReplaceCurrent(c)) {
1039 return true;
1040 }
1041 c->value = Literal::makeOne(curr->type);
1042 c->type = curr->type;
1043 return tryToReplaceCurrent(c);
1044 }
1045
tryToReduceCurrentToUnreachableReducer1046 bool tryToReduceCurrentToUnreachable() {
1047 auto* curr = getCurrent();
1048 if (curr->is<Unreachable>()) {
1049 return false;
1050 }
1051 // try to replace with a trivial value
1052 Unreachable un;
1053 if (tryToReplaceCurrent(&un)) {
1054 replaceCurrent(builder->makeUnreachable());
1055 return true;
1056 }
1057 // maybe a return? TODO
1058 return false;
1059 }
1060 };
1061
1062 //
1063 // main
1064 //
1065
main(int argc,const char * argv[])1066 int main(int argc, const char* argv[]) {
1067 std::string input, test, working, command;
1068 // By default, look for binaries alongside our own binary.
1069 std::string binDir = Path::getDirName(argv[0]);
1070 bool binary = true, deNan = false, verbose = false, debugInfo = false,
1071 force = false;
1072 Options options("wasm-reduce",
1073 "Reduce a wasm file to a smaller one that has the same "
1074 "behavior on a given command");
1075 options
1076 .add("--command",
1077 "-cmd",
1078 "The command to run on the test, that we want to reduce while keeping "
1079 "the command's output identical. "
1080 "We look at the command's return code and stdout here (TODO: stderr), "
1081 "and we reduce while keeping those unchanged.",
1082 Options::Arguments::One,
1083 [&](Options* o, const std::string& argument) { command = argument; })
1084 .add("--test",
1085 "-t",
1086 "Test file (this will be written to to test, the given command should "
1087 "read it when we call it)",
1088 Options::Arguments::One,
1089 [&](Options* o, const std::string& argument) { test = argument; })
1090 .add("--working",
1091 "-w",
1092 "Working file (this will contain the current good state while doing "
1093 "temporary computations, "
1094 "and will contain the final best result at the end)",
1095 Options::Arguments::One,
1096 [&](Options* o, const std::string& argument) { working = argument; })
1097 .add("--binaries",
1098 "-b",
1099 "binaryen binaries location (bin/ directory)",
1100 Options::Arguments::One,
1101 [&](Options* o, const std::string& argument) {
1102 // Add separator just in case
1103 binDir = argument + Path::getPathSeparator();
1104 })
1105 .add("--text",
1106 "-S",
1107 "Emit intermediate files as text, instead of binary (also make sure "
1108 "the test and working files have a .wat or .wast suffix)",
1109 Options::Arguments::Zero,
1110 [&](Options* o, const std::string& argument) { binary = false; })
1111 .add("--denan",
1112 "",
1113 "Avoid nans when reducing",
1114 Options::Arguments::Zero,
1115 [&](Options* o, const std::string& argument) { deNan = true; })
1116 .add("--verbose",
1117 "-v",
1118 "Verbose output mode",
1119 Options::Arguments::Zero,
1120 [&](Options* o, const std::string& argument) { verbose = true; })
1121 .add("--debugInfo",
1122 "-g",
1123 "Keep debug info in binaries",
1124 Options::Arguments::Zero,
1125 [&](Options* o, const std::string& argument) { debugInfo = true; })
1126 .add("--force",
1127 "-f",
1128 "Force the reduction attempt, ignoring problems that imply it is "
1129 "unlikely to succeed",
1130 Options::Arguments::Zero,
1131 [&](Options* o, const std::string& argument) { force = true; })
1132 .add("--timeout",
1133 "-to",
1134 "A timeout to apply to each execution of the command, in seconds "
1135 "(default: 2)",
1136 Options::Arguments::One,
1137 [&](Options* o, const std::string& argument) {
1138 timeout = atoi(argument.c_str());
1139 std::cout << "|applying timeout: " << timeout << "\n";
1140 })
1141 .add_positional(
1142 "INFILE",
1143 Options::Arguments::One,
1144 [&](Options* o, const std::string& argument) { input = argument; });
1145 options.parse(argc, argv);
1146
1147 if (test.size() == 0) {
1148 Fatal() << "test file not provided\n";
1149 }
1150 if (working.size() == 0) {
1151 Fatal() << "working file not provided\n";
1152 }
1153
1154 if (!binary) {
1155 Colors::setEnabled(false);
1156 }
1157
1158 Path::setBinaryenBinDir(binDir);
1159
1160 std::cerr << "|wasm-reduce\n";
1161 std::cerr << "|input: " << input << '\n';
1162 std::cerr << "|test: " << test << '\n';
1163 std::cerr << "|working: " << working << '\n';
1164 std::cerr << "|bin dir: " << binDir << '\n';
1165
1166 // get the expected output
1167 copy_file(input, test);
1168 expected.getFromExecution(command);
1169
1170 std::cerr << "|expected result:\n" << expected << '\n';
1171 std::cerr << "|!! Make sure the above is what you expect! !!\n\n";
1172
1173 auto stopIfNotForced = [&](std::string message, ProgramResult& result) {
1174 std::cerr << "|! " << message << '\n' << result << '\n';
1175 if (!force) {
1176 Fatal() << "|! stopping, as it is very unlikely reduction can succeed "
1177 "(use -f to ignore this check)";
1178 }
1179 };
1180
1181 if (expected.time + 1 >= timeout) {
1182 stopIfNotForced("execution time is dangerously close to the timeout - you "
1183 "should probably increase the timeout",
1184 expected);
1185 }
1186
1187 std::cerr
1188 << "|checking that command has different behavior on invalid binary (this "
1189 "verifies that the test file is used by the command)\n";
1190 {
1191 {
1192 std::ofstream dst(test, std::ios::binary);
1193 dst << "waka waka\n";
1194 }
1195 ProgramResult result(command);
1196 if (result == expected) {
1197 stopIfNotForced(
1198 "running command on an invalid module should give different results",
1199 result);
1200 }
1201 }
1202
1203 std::cerr << "|checking that command has expected behavior on canonicalized "
1204 "(read-written) binary\n";
1205 {
1206 // read and write it
1207 auto cmd = Path::getBinaryenBinaryTool("wasm-opt") + " " + input +
1208 " --detect-features -o " + test;
1209 if (!binary) {
1210 cmd += " -S";
1211 }
1212 ProgramResult readWrite(cmd);
1213 if (readWrite.failed()) {
1214 stopIfNotForced("failed to read and write the binary", readWrite);
1215 } else {
1216 ProgramResult result(command);
1217 if (result != expected) {
1218 stopIfNotForced("running command on the canonicalized module should "
1219 "give the same results",
1220 result);
1221 }
1222 }
1223 }
1224
1225 copy_file(input, working);
1226 auto workingSize = file_size(working);
1227 std::cerr << "|input size: " << workingSize << "\n";
1228
1229 std::cerr << "|starting reduction!\n";
1230
1231 int factor = workingSize * 2;
1232 size_t lastDestructiveReductions = 0;
1233 size_t lastPostPassesSize = 0;
1234
1235 bool stopping = false;
1236
1237 while (1) {
1238 Reducer reducer(command, test, working, binary, deNan, verbose, debugInfo);
1239
1240 // run binaryen optimization passes to reduce. passes are fast to run
1241 // and can often reduce large amounts of code efficiently, as opposed
1242 // to detructive reduction (i.e., that doesn't preserve correctness as
1243 // passes do) since destrucive must operate one change at a time
1244 std::cerr << "| reduce using passes...\n";
1245 auto oldSize = file_size(working);
1246 reducer.reduceUsingPasses();
1247 auto newSize = file_size(working);
1248 auto passProgress = oldSize - newSize;
1249 std::cerr << "| after pass reduction: " << newSize << "\n";
1250
1251 // always stop after a pass reduction attempt, for final cleanup
1252 if (stopping) {
1253 break;
1254 }
1255
1256 // check if the full cycle (destructive/passes) has helped or not
1257 if (lastPostPassesSize && newSize >= lastPostPassesSize) {
1258 std::cerr << "| progress has stopped, skipping to the end\n";
1259 if (factor == 1) {
1260 // this is after doing work with factor 1, so after the remaining work,
1261 // stop
1262 stopping = true;
1263 } else {
1264 // just try to remove all we can and finish up
1265 factor = 1;
1266 }
1267 }
1268 lastPostPassesSize = newSize;
1269
1270 // if destructive reductions lead to useful proportionate pass reductions,
1271 // keep going at the same factor, as pass reductions are far faster
1272 std::cerr << "| pass progress: " << passProgress
1273 << ", last destructive: " << lastDestructiveReductions << '\n';
1274 if (passProgress >= 4 * lastDestructiveReductions) {
1275 // don't change
1276 std::cerr << "| progress is good, do not quickly decrease factor\n";
1277 } else {
1278 if (factor > 10) {
1279 factor = (factor / 3) + 1;
1280 } else {
1281 factor = (factor + 1) / 2; // stable on 1
1282 }
1283 }
1284
1285 // no point in a factor lorger than the size
1286 assert(newSize > 4); // wasm modules are >4 bytes anyhow
1287 factor = std::min(factor, int(newSize) / 4);
1288
1289 // try to reduce destructively. if a high factor fails to find anything,
1290 // quickly try a lower one (no point in doing passes until we reduce
1291 // destructively at least a little)
1292 while (1) {
1293 std::cerr << "| reduce destructively... (factor: " << factor << ")\n";
1294 lastDestructiveReductions = reducer.reduceDestructively(factor);
1295 if (lastDestructiveReductions > 0) {
1296 break;
1297 }
1298 // we failed to reduce destructively
1299 if (factor == 1) {
1300 stopping = true;
1301 break;
1302 }
1303 factor = std::max(
1304 1, factor / 4); // quickly now, try to find *something* we can reduce
1305 }
1306
1307 std::cerr << "| destructive reduction led to size: " << file_size(working)
1308 << '\n';
1309 }
1310 std::cerr << "|finished, final size: " << file_size(working) << "\n";
1311 copy_file(working, test); // just to avoid confusion
1312 }
1313