1 /*
2  * Copyright 2017 WebAssembly Community Group participants
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //
18 // Tries to reduce the input wasm into the smallest possible wasm
19 // that still generates the same result on a given command. This is
20 // useful to reduce bug testcases, for example, if a file crashes
21 // the optimizer, you can reduce it to find the smallest file that
22 // also crashes it (which generally will show the same bug, in a
23 // much more debuggable manner).
24 //
25 
26 #include <cstdio>
27 #include <cstdlib>
28 #include <memory>
29 
30 #include "ir/branch-utils.h"
31 #include "ir/iteration.h"
32 #include "ir/literal-utils.h"
33 #include "ir/properties.h"
34 #include "pass.h"
35 #include "support/colors.h"
36 #include "support/command-line.h"
37 #include "support/file.h"
38 #include "support/path.h"
39 #include "support/timing.h"
40 #include "wasm-builder.h"
41 #include "wasm-io.h"
42 #include "wasm-validator.h"
43 #ifdef _WIN32
44 #ifndef NOMINMAX
45 #define NOMINMAX
46 #endif
47 #include <windows.h>
48 // Create a string with last error message
GetLastErrorStdStr()49 std::string GetLastErrorStdStr() {
50   DWORD error = GetLastError();
51   if (error) {
52     LPVOID lpMsgBuf;
53     DWORD bufLen = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
54                                    FORMAT_MESSAGE_FROM_SYSTEM |
55                                    FORMAT_MESSAGE_IGNORE_INSERTS,
56                                  NULL,
57                                  error,
58                                  MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
59                                  (LPTSTR)&lpMsgBuf,
60                                  0,
61                                  NULL);
62     if (bufLen) {
63       LPCSTR lpMsgStr = (LPCSTR)lpMsgBuf;
64       std::string result(lpMsgStr, lpMsgStr + bufLen);
65       LocalFree(lpMsgBuf);
66       return result;
67     }
68   }
69   return std::string();
70 }
71 #endif
72 using namespace wasm;
73 
74 // a timeout on every execution of the command
75 size_t timeout = 2;
76 
77 struct ProgramResult {
78   int code;
79   std::string output;
80   double time;
81 
82   ProgramResult() = default;
ProgramResultProgramResult83   ProgramResult(std::string command) { getFromExecution(command); }
84 
85 #ifdef _WIN32
getFromExecutionProgramResult86   void getFromExecution(std::string command) {
87     Timer timer;
88     timer.start();
89     SECURITY_ATTRIBUTES saAttr;
90     saAttr.nLength = sizeof(SECURITY_ATTRIBUTES);
91     saAttr.bInheritHandle = TRUE;
92     saAttr.lpSecurityDescriptor = NULL;
93 
94     HANDLE hChildStd_OUT_Rd;
95     HANDLE hChildStd_OUT_Wr;
96 
97     if (
98       // Create a pipe for the child process's STDOUT.
99       !CreatePipe(&hChildStd_OUT_Rd, &hChildStd_OUT_Wr, &saAttr, 0) ||
100       // Ensure the read handle to the pipe for STDOUT is not inherited.
101       !SetHandleInformation(hChildStd_OUT_Rd, HANDLE_FLAG_INHERIT, 0)) {
102       Fatal() << "CreatePipe \"" << command
103               << "\" failed: " << GetLastErrorStdStr() << ".\n";
104     }
105 
106     STARTUPINFO si;
107     PROCESS_INFORMATION pi;
108 
109     ZeroMemory(&si, sizeof(si));
110     si.cb = sizeof(si);
111     si.hStdError = hChildStd_OUT_Wr;
112     si.hStdOutput = hChildStd_OUT_Wr;
113     si.dwFlags |= STARTF_USESTDHANDLES;
114     ZeroMemory(&pi, sizeof(pi));
115 
116     // Start the child process.
117     if (!CreateProcess(NULL, // No module name (use command line)
118                        (LPSTR)command.c_str(), // Command line
119                        NULL,                   // Process handle not inheritable
120                        NULL,                   // Thread handle not inheritable
121                        TRUE,                   // Set handle inheritance to TRUE
122                        0,                      // No creation flags
123                        NULL,                   // Use parent's environment block
124                        NULL, // Use parent's starting directory
125                        &si,  // Pointer to STARTUPINFO structure
126                        &pi)  // Pointer to PROCESS_INFORMATION structure
127     ) {
128       Fatal() << "CreateProcess \"" << command
129               << "\" failed: " << GetLastErrorStdStr() << ".\n";
130     }
131 
132     // Wait until child process exits.
133     DWORD retVal = WaitForSingleObject(pi.hProcess, timeout * 1000);
134     if (retVal == WAIT_TIMEOUT) {
135       printf("Command timeout: %s", command.c_str());
136       TerminateProcess(pi.hProcess, -1);
137     }
138     DWORD dwordExitCode;
139     if (!GetExitCodeProcess(pi.hProcess, &dwordExitCode)) {
140       Fatal() << "GetExitCodeProcess failed: " << GetLastErrorStdStr() << ".\n";
141     }
142     code = (int)dwordExitCode;
143 
144     // Close process and thread handles.
145     CloseHandle(pi.hProcess);
146     CloseHandle(pi.hThread);
147 
148     // Read output from the child process's pipe for STDOUT
149     // Stop when there is no more data.
150     {
151       const int BUFSIZE = 4096;
152       DWORD dwRead, dwTotal, dwTotalRead = 0;
153       CHAR chBuf[BUFSIZE];
154       BOOL bSuccess = FALSE;
155 
156       PeekNamedPipe(hChildStd_OUT_Rd, NULL, 0, NULL, &dwTotal, NULL);
157       while (dwTotalRead < dwTotal) {
158         bSuccess =
159           ReadFile(hChildStd_OUT_Rd, chBuf, BUFSIZE - 1, &dwRead, NULL);
160         if (!bSuccess || dwRead == 0)
161           break;
162         chBuf[dwRead] = 0;
163         dwTotalRead += dwRead;
164         output.append(chBuf);
165       }
166     }
167     timer.stop();
168     time = timer.getTotal();
169   }
170 #else  // POSIX
171   // runs the command and notes the output
172   // TODO: also stderr, not just stdout?
getFromExecutionProgramResult173   void getFromExecution(std::string command) {
174     Timer timer;
175     timer.start();
176     // do this using just core stdio.h and stdlib.h, for portability
177     // sadly this requires two invokes
178     code = system(("timeout " + std::to_string(timeout) + "s " + command +
179                    " > /dev/null 2> /dev/null")
180                     .c_str());
181     const int MAX_BUFFER = 1024;
182     char buffer[MAX_BUFFER];
183     FILE* stream = popen(
184       ("timeout " + std::to_string(timeout) + "s " + command + " 2> /dev/null")
185         .c_str(),
186       "r");
187     while (fgets(buffer, MAX_BUFFER, stream) != NULL) {
188       output.append(buffer);
189     }
190     pclose(stream);
191     timer.stop();
192     time = timer.getTotal() / 2;
193   }
194 #endif // _WIN32
195 
operator ==ProgramResult196   bool operator==(ProgramResult& other) {
197     return code == other.code && output == other.output;
198   }
operator !=ProgramResult199   bool operator!=(ProgramResult& other) { return !(*this == other); }
200 
failedProgramResult201   bool failed() { return code != 0; }
202 
dumpProgramResult203   void dump(std::ostream& o) {
204     o << "[ProgramResult] code: " << code << " stdout: \n"
205       << output << "[====]\nin " << time << " seconds\n[/ProgramResult]\n";
206   }
207 };
208 
209 namespace std {
210 
operator <<(std::ostream & o,ProgramResult & result)211 inline std::ostream& operator<<(std::ostream& o, ProgramResult& result) {
212   result.dump(o);
213   return o;
214 }
215 
216 } // namespace std
217 
218 ProgramResult expected;
219 
220 // Removing functions is extremely beneficial and efficient. We aggressively
221 // try to remove functions, unless we've seen they can't be removed, in which
222 // case we may try again but much later.
223 static std::unordered_set<Name> functionsWeTriedToRemove;
224 
225 struct Reducer
226   : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<Reducer>>> {
227   std::string command, test, working;
228   bool binary, deNan, verbose, debugInfo;
229 
230   // test is the file we write to that the command will operate on
231   // working is the current temporary state, the reduction so far
ReducerReducer232   Reducer(std::string command,
233           std::string test,
234           std::string working,
235           bool binary,
236           bool deNan,
237           bool verbose,
238           bool debugInfo)
239     : command(command), test(test), working(working), binary(binary),
240       deNan(deNan), verbose(verbose), debugInfo(debugInfo) {}
241 
242   // runs passes in order to reduce, until we can't reduce any more
243   // the criterion here is wasm binary size
reduceUsingPassesReducer244   void reduceUsingPasses() {
245     // run optimization passes until we can't shrink it any more
246     std::vector<std::string> passes = {
247       "-Oz",
248       "-Os",
249       "-O1",
250       "-O2",
251       "-O3",
252       "-O4",
253       "--flatten -Os",
254       "--flatten -O3",
255       "--flatten --local-cse -Os",
256       "--coalesce-locals --vacuum",
257       "--dce",
258       "--duplicate-function-elimination",
259       "--inlining",
260       "--inlining-optimizing",
261       "--optimize-level=3 --inlining-optimizing",
262       "--memory-packing",
263       "--remove-unused-names --merge-blocks --vacuum",
264       "--optimize-instructions",
265       "--precompute",
266       "--remove-imports",
267       "--remove-memory",
268       "--remove-unused-names --remove-unused-brs",
269       "--remove-unused-module-elements",
270       "--remove-unused-nonfunction-module-elements",
271       "--reorder-functions",
272       "--reorder-locals",
273       "--simplify-locals --vacuum",
274       "--strip",
275       "--vacuum"};
276     auto oldSize = file_size(working);
277     bool more = true;
278     while (more) {
279       // std::cerr << "|    starting passes loop iteration\n";
280       more = false;
281       // try both combining with a generic shrink (so minor pass overhead is
282       // compensated for), and without
283       for (auto pass : passes) {
284         std::string currCommand = Path::getBinaryenBinaryTool("wasm-opt") + " ";
285         currCommand += working + " --detect-features -o " + test + " " + pass;
286         if (debugInfo) {
287           currCommand += " -g ";
288         }
289         if (!binary) {
290           currCommand += " -S ";
291         }
292         if (verbose) {
293           std::cerr << "|    trying pass command: " << currCommand << "\n";
294         }
295         if (!ProgramResult(currCommand).failed()) {
296           auto newSize = file_size(test);
297           if (newSize < oldSize) {
298             // the pass didn't fail, and the size looks smaller, so promising
299             // see if it is still has the property we are preserving
300             if (ProgramResult(command) == expected) {
301               std::cerr << "|    command \"" << currCommand
302                         << "\" succeeded, reduced size to " << newSize
303                         << ", and preserved the property\n";
304               copy_file(test, working);
305               more = true;
306               oldSize = newSize;
307             }
308           }
309         }
310       }
311     }
312     if (verbose) {
313       std::cerr << "|    done with passes for now\n";
314     }
315   }
316 
317   // does one pass of slow and destructive reduction. returns whether it
318   // succeeded or not
319   // the criterion here is a logical change in the program. this may actually
320   // increase wasm size in some cases, but it should allow more reduction later.
321   // @param factor how much to ignore. starting with a high factor skips through
322   //               most of the file, which is often faster than going one by one
323   //               from the start
reduceDestructivelyReducer324   size_t reduceDestructively(int factor_) {
325     factor = factor_;
326     // prepare
327     loadWorking();
328     reduced = 0;
329     funcsSeen = 0;
330     // before we do any changes, it should be valid to write out the module:
331     // size should be as expected, and output should be as expected
332     ProgramResult result;
333     if (!writeAndTestReduction(result)) {
334       std::cerr << "\n|! WARNING: writing before destructive reduction fails, "
335                    "very unlikely reduction can work\n"
336                 << result << '\n';
337     }
338     // destroy!
339     walkModule(getModule());
340     return reduced;
341   }
342 
loadWorkingReducer343   void loadWorking() {
344     module = make_unique<Module>();
345     Module wasm;
346     ModuleReader reader;
347     reader.read(working, *module);
348     wasm.features = FeatureSet::All;
349     builder = make_unique<Builder>(*module);
350     setModule(module.get());
351   }
352 
353   // destructive reduction state
354 
355   size_t reduced;
356   Expression* beforeReduction;
357   std::unique_ptr<Module> module;
358   std::unique_ptr<Builder> builder;
359   Index funcsSeen;
360   int factor;
361 
362   // write the module and see if the command still fails on it as expected
writeAndTestReductionReducer363   bool writeAndTestReduction() {
364     ProgramResult result;
365     return writeAndTestReduction(result);
366   }
367 
writeAndTestReductionReducer368   bool writeAndTestReduction(ProgramResult& out) {
369     // write the module out
370     ModuleWriter writer;
371     writer.setBinary(binary);
372     writer.setDebugInfo(debugInfo);
373     writer.write(*getModule(), test);
374     // note that it is ok for the destructively-reduced module to be bigger
375     // than the previous - each destructive reduction removes logical code,
376     // and so is strictly better, even if the wasm binary format happens to
377     // encode things slightly less efficiently.
378     // test it
379     out.getFromExecution(command);
380     return out == expected;
381   }
382 
shouldTryToReduceReducer383   bool shouldTryToReduce(size_t bonus = 1) {
384     static size_t counter = 0;
385     counter += bonus;
386     return (counter % factor) <= bonus;
387   }
388 
isOkReplacementReducer389   bool isOkReplacement(Expression* with) {
390     if (deNan) {
391       if (auto* c = with->dynCast<Const>()) {
392         if (c->value.isNaN()) {
393           return false;
394         }
395       }
396     }
397     return true;
398   }
399 
400   // tests a reduction on the current traversal node, and undos if it failed
tryToReplaceCurrentReducer401   bool tryToReplaceCurrent(Expression* with) {
402     if (!isOkReplacement(with)) {
403       return false;
404     }
405     auto* curr = getCurrent();
406     // std::cerr << "try " << curr << " => " << with << '\n';
407     if (curr->type != with->type) {
408       return false;
409     }
410     if (!shouldTryToReduce()) {
411       return false;
412     }
413     replaceCurrent(with);
414     if (!writeAndTestReduction()) {
415       replaceCurrent(curr);
416       return false;
417     }
418     std::cerr << "|      tryToReplaceCurrent succeeded (in " << getLocation()
419               << ")\n";
420     noteReduction();
421     return true;
422   }
423 
noteReductionReducer424   void noteReduction(size_t amount = 1) {
425     reduced += amount;
426     copy_file(test, working);
427   }
428 
429   // tests a reduction on an arbitrary child
tryToReplaceChildReducer430   bool tryToReplaceChild(Expression*& child, Expression* with) {
431     if (!isOkReplacement(with)) {
432       return false;
433     }
434     if (child->type != with->type) {
435       return false;
436     }
437     if (!shouldTryToReduce()) {
438       return false;
439     }
440     auto* before = child;
441     child = with;
442     if (!writeAndTestReduction()) {
443       child = before;
444       return false;
445     }
446     std::cerr << "|      tryToReplaceChild succeeded (in " << getLocation()
447               << ")\n";
448     // std::cerr << "|      " << before << " => " << with << '\n';
449     noteReduction();
450     return true;
451   }
452 
getLocationReducer453   std::string getLocation() {
454     if (getFunction()) {
455       return getFunction()->name.str;
456     }
457     return "(non-function context)";
458   }
459 
460   // visitors. in each we try to remove code in a destructive and nontrivial
461   // way. "nontrivial" means something that optimization passes can't achieve,
462   // since we don't need to duplicate work that they do
463 
visitExpressionReducer464   void visitExpression(Expression* curr) {
465     // type-based reductions
466     if (curr->type == Type::none) {
467       if (tryToReduceCurrentToNop()) {
468         return;
469       }
470     } else if (curr->type.isConcrete()) {
471       if (tryToReduceCurrentToConst()) {
472         return;
473       }
474     } else {
475       assert(curr->type == Type::unreachable);
476       if (tryToReduceCurrentToUnreachable()) {
477         return;
478       }
479     }
480     // specific reductions
481     if (auto* iff = curr->dynCast<If>()) {
482       if (iff->type == Type::none) {
483         // perhaps we need just the condition?
484         if (tryToReplaceCurrent(builder->makeDrop(iff->condition))) {
485           return;
486         }
487       }
488       handleCondition(iff->condition);
489     } else if (auto* br = curr->dynCast<Break>()) {
490       handleCondition(br->condition);
491     } else if (auto* select = curr->dynCast<Select>()) {
492       handleCondition(select->condition);
493     } else if (auto* sw = curr->dynCast<Switch>()) {
494       handleCondition(sw->condition);
495       // Try to replace switch targets with the default
496       for (auto& target : sw->targets) {
497         if (target != sw->default_) {
498           auto old = target;
499           target = sw->default_;
500           if (!tryToReplaceCurrent(curr)) {
501             target = old;
502           }
503         }
504       }
505       // Try to shorten the list of targets.
506       while (sw->targets.size() > 1) {
507         auto last = sw->targets.back();
508         sw->targets.pop_back();
509         if (!tryToReplaceCurrent(curr)) {
510           sw->targets.push_back(last);
511           break;
512         }
513       }
514     } else if (auto* block = curr->dynCast<Block>()) {
515       if (!shouldTryToReduce()) {
516         return;
517       }
518       // replace a singleton
519       auto& list = block->list;
520       if (list.size() == 1 &&
521           !BranchUtils::BranchSeeker::has(block, block->name)) {
522         if (tryToReplaceCurrent(block->list[0])) {
523           return;
524         }
525       }
526       // try to get rid of nops
527       Index i = 0;
528       while (list.size() > 1 && i < list.size()) {
529         auto* curr = list[i];
530         if (curr->is<Nop>() && shouldTryToReduce()) {
531           // try to remove it
532           for (Index j = i; j < list.size() - 1; j++) {
533             list[j] = list[j + 1];
534           }
535           list.pop_back();
536           if (writeAndTestReduction()) {
537             std::cerr << "|      block-nop removed\n";
538             noteReduction();
539             return;
540           }
541           list.push_back(nullptr);
542           // we failed; undo
543           for (Index j = list.size() - 1; j > i; j--) {
544             list[j] = list[j - 1];
545           }
546           list[i] = curr;
547         }
548         i++;
549       }
550       return; // nothing more to do
551     } else if (auto* loop = curr->dynCast<Loop>()) {
552       if (shouldTryToReduce() &&
553           !BranchUtils::BranchSeeker::has(loop, loop->name)) {
554         tryToReplaceCurrent(loop->body);
555       }
556       return; // nothing more to do
557     }
558     // Finally, try to replace with a child.
559     for (auto* child : ChildIterator(curr)) {
560       if (child->type.isConcrete() && curr->type == Type::none) {
561         if (tryToReplaceCurrent(builder->makeDrop(child))) {
562           return;
563         }
564       } else {
565         if (tryToReplaceCurrent(child)) {
566           return;
567         }
568       }
569     }
570     // If that didn't work, try to replace with a child + a unary conversion,
571     // but not if it's already unary
572     if (curr->type.isSingle() && !curr->is<Unary>()) {
573       for (auto* child : ChildIterator(curr)) {
574         if (child->type == curr->type) {
575           continue; // already tried
576         }
577         if (!child->type.isSingle()) {
578           continue; // no conversion
579         }
580         Expression* fixed = nullptr;
581         TODO_SINGLE_COMPOUND(curr->type);
582         switch (curr->type.getBasic()) {
583           case Type::i32: {
584             TODO_SINGLE_COMPOUND(child->type);
585             switch (child->type.getBasic()) {
586               case Type::i32:
587                 WASM_UNREACHABLE("invalid type");
588               case Type::i64:
589                 fixed = builder->makeUnary(WrapInt64, child);
590                 break;
591               case Type::f32:
592                 fixed = builder->makeUnary(TruncSFloat32ToInt32, child);
593                 break;
594               case Type::f64:
595                 fixed = builder->makeUnary(TruncSFloat64ToInt32, child);
596                 break;
597               case Type::v128:
598               case Type::funcref:
599               case Type::externref:
600               case Type::exnref:
601               case Type::anyref:
602               case Type::eqref:
603               case Type::i31ref:
604                 continue; // not implemented yet
605               case Type::none:
606               case Type::unreachable:
607                 WASM_UNREACHABLE("unexpected type");
608             }
609             break;
610           }
611           case Type::i64: {
612             TODO_SINGLE_COMPOUND(child->type);
613             switch (child->type.getBasic()) {
614               case Type::i32:
615                 fixed = builder->makeUnary(ExtendSInt32, child);
616                 break;
617               case Type::i64:
618                 WASM_UNREACHABLE("invalid type");
619               case Type::f32:
620                 fixed = builder->makeUnary(TruncSFloat32ToInt64, child);
621                 break;
622               case Type::f64:
623                 fixed = builder->makeUnary(TruncSFloat64ToInt64, child);
624                 break;
625               case Type::v128:
626               case Type::funcref:
627               case Type::externref:
628               case Type::exnref:
629               case Type::anyref:
630               case Type::eqref:
631               case Type::i31ref:
632                 continue; // not implemented yet
633               case Type::none:
634               case Type::unreachable:
635                 WASM_UNREACHABLE("unexpected type");
636             }
637             break;
638           }
639           case Type::f32: {
640             TODO_SINGLE_COMPOUND(child->type);
641             switch (child->type.getBasic()) {
642               case Type::i32:
643                 fixed = builder->makeUnary(ConvertSInt32ToFloat32, child);
644                 break;
645               case Type::i64:
646                 fixed = builder->makeUnary(ConvertSInt64ToFloat32, child);
647                 break;
648               case Type::f32:
649                 WASM_UNREACHABLE("unexpected type");
650               case Type::f64:
651                 fixed = builder->makeUnary(DemoteFloat64, child);
652                 break;
653               case Type::v128:
654               case Type::funcref:
655               case Type::externref:
656               case Type::exnref:
657               case Type::anyref:
658               case Type::eqref:
659               case Type::i31ref:
660                 continue; // not implemented yet
661               case Type::none:
662               case Type::unreachable:
663                 WASM_UNREACHABLE("unexpected type");
664             }
665             break;
666           }
667           case Type::f64: {
668             TODO_SINGLE_COMPOUND(child->type);
669             switch (child->type.getBasic()) {
670               case Type::i32:
671                 fixed = builder->makeUnary(ConvertSInt32ToFloat64, child);
672                 break;
673               case Type::i64:
674                 fixed = builder->makeUnary(ConvertSInt64ToFloat64, child);
675                 break;
676               case Type::f32:
677                 fixed = builder->makeUnary(PromoteFloat32, child);
678                 break;
679               case Type::f64:
680                 WASM_UNREACHABLE("unexpected type");
681               case Type::v128:
682               case Type::funcref:
683               case Type::externref:
684               case Type::exnref:
685               case Type::anyref:
686               case Type::eqref:
687               case Type::i31ref:
688                 continue; // not implemented yet
689               case Type::none:
690               case Type::unreachable:
691                 WASM_UNREACHABLE("unexpected type");
692             }
693             break;
694           }
695           case Type::v128:
696           case Type::funcref:
697           case Type::externref:
698           case Type::exnref:
699           case Type::anyref:
700           case Type::eqref:
701           case Type::i31ref:
702             continue; // not implemented yet
703           case Type::none:
704           case Type::unreachable:
705             WASM_UNREACHABLE("unexpected type");
706         }
707         assert(fixed->type == curr->type);
708         if (tryToReplaceCurrent(fixed)) {
709           return;
710         }
711       }
712     }
713   }
714 
visitFunctionReducer715   void visitFunction(Function* curr) {
716     if (!curr->imported()) {
717       // extra chance to work on the function toplevel element, as if it can
718       // be reduced it's great
719       visitExpression(curr->body);
720     }
721     // finish function
722     funcsSeen++;
723     static int last = 0;
724     int percentage = (100 * funcsSeen) / getModule()->functions.size();
725     if (std::abs(percentage - last) >= 5) {
726       std::cerr << "|    " << percentage << "% of funcs complete\n";
727       last = percentage;
728     }
729   }
730 
731   // TODO: bisection on segment shrinking?
732 
visitTableReducer733   void visitTable(Table* curr) {
734     std::cerr << "|    try to simplify table\n";
735     Name first;
736     for (auto& segment : curr->segments) {
737       for (auto item : segment.data) {
738         first = item;
739         break;
740       }
741       if (!first.isNull()) {
742         break;
743       }
744     }
745     visitSegmented(curr, first, 100);
746   }
747 
visitMemoryReducer748   void visitMemory(Memory* curr) {
749     std::cerr << "|    try to simplify memory\n";
750     visitSegmented(curr, 0, 2);
751   }
752 
753   template<typename T, typename U>
visitSegmentedReducer754   void visitSegmented(T* curr, U zero, size_t bonus) {
755     // try to reduce to first function. first, shrink segment elements.
756     // while we are shrinking successfully, keep going exponentially.
757     bool justShrank = false;
758     bool shrank = false;
759     for (auto& segment : curr->segments) {
760       auto& data = segment.data;
761       // when we succeed, try to shrink by more and more, similar to bisection
762       size_t skip = 1;
763       for (size_t i = 0; i < data.size() && !data.empty(); i++) {
764         if (!justShrank && !shouldTryToReduce(bonus)) {
765           continue;
766         }
767         auto save = data;
768         for (size_t j = 0; j < skip; j++) {
769           if (!data.empty()) {
770             data.pop_back();
771           }
772         }
773         auto justShrank = writeAndTestReduction();
774         if (justShrank) {
775           std::cerr << "|      shrank segment (skip: " << skip << ")\n";
776           shrank = true;
777           noteReduction();
778           skip = std::min(size_t(factor), 2 * skip);
779         } else {
780           data = save;
781           break;
782         }
783       }
784     }
785     // the "opposite" of shrinking: copy a 'zero' element
786     for (auto& segment : curr->segments) {
787       if (segment.data.empty()) {
788         continue;
789       }
790       for (auto& item : segment.data) {
791         if (!shouldTryToReduce(bonus)) {
792           continue;
793         }
794         if (item == zero) {
795           continue;
796         }
797         auto save = item;
798         item = zero;
799         if (writeAndTestReduction()) {
800           std::cerr << "|      zeroed segment\n";
801           noteReduction();
802         } else {
803           item = save;
804         }
805         if (shrank) {
806           // zeroing is fairly inefficient. if we are managing to shrink
807           // (which we do exponentially), just zero one per segment at most
808           break;
809         }
810       }
811     }
812   }
813 
visitModuleReducer814   void visitModule(Module* curr) {
815     assert(curr == module.get());
816     // try to remove functions
817     std::cerr << "|    try to remove functions\n";
818     std::vector<Name> functionNames;
819     for (auto& func : module->functions) {
820       functionNames.push_back(func->name);
821     }
822     size_t skip = 1;
823     // If we just removed some functions in the previous iteration, keep trying
824     // to remove more as this is one of the most efficient ways to reduce.
825     bool justRemoved = false;
826     for (size_t i = 0; i < functionNames.size(); i++) {
827       if (!justRemoved &&
828           functionsWeTriedToRemove.count(functionNames[i]) == 1 &&
829           !shouldTryToReduce(std::max((factor / 100) + 1, 1000))) {
830         continue;
831       }
832       std::vector<Name> names;
833       for (size_t j = 0; names.size() < skip && i + j < functionNames.size();
834            j++) {
835         auto name = functionNames[i + j];
836         if (module->getFunctionOrNull(name)) {
837           names.push_back(name);
838           functionsWeTriedToRemove.insert(name);
839         }
840       }
841       if (names.size() == 0) {
842         continue;
843       }
844       std::cout << "|    try to remove " << names.size()
845                 << " functions (skip: " << skip << ")\n";
846       justRemoved = tryToRemoveFunctions(names);
847       if (justRemoved) {
848         noteReduction(names.size());
849         i += skip;
850         skip = std::min(size_t(factor), 2 * skip);
851       } else {
852         skip = std::max(skip / 2, size_t(1)); // or 1?
853       }
854     }
855     // try to remove exports
856     std::cerr << "|    try to remove exports (with factor " << factor << ")\n";
857     std::vector<Export> exports;
858     for (auto& exp : module->exports) {
859       exports.push_back(*exp);
860     }
861     skip = 1;
862     for (size_t i = 0; i < exports.size(); i++) {
863       if (!shouldTryToReduce(std::max((factor / 100) + 1, 1000))) {
864         continue;
865       }
866       std::vector<Export> currExports;
867       for (size_t j = 0; currExports.size() < skip && i + j < exports.size();
868            j++) {
869         auto exp = exports[i + j];
870         if (module->getExportOrNull(exp.name)) {
871           currExports.push_back(exp);
872           module->removeExport(exp.name);
873         }
874       }
875       ProgramResult result;
876       if (!writeAndTestReduction(result)) {
877         for (auto exp : currExports) {
878           module->addExport(new Export(exp));
879         }
880         skip = std::max(skip / 2, size_t(1)); // or 1?
881       } else {
882         std::cerr << "|      removed " << currExports.size() << " exports\n";
883         noteReduction(currExports.size());
884         i += skip;
885         skip = std::min(size_t(factor), 2 * skip);
886       }
887     }
888     // If we are left with a single function that is not exported or used in
889     // a table, that is useful as then we can change the return type.
890     if (module->functions.size() == 1 && module->exports.empty() &&
891         module->table.segments.empty()) {
892       auto* func = module->functions[0].get();
893       // We can't remove something that might have breaks to it.
894       if (!func->imported() && !Properties::isNamedControlFlow(func->body)) {
895         auto funcSig = func->sig;
896         auto* funcBody = func->body;
897         for (auto* child : ChildIterator(func->body)) {
898           if (!(child->type.isConcrete() || child->type == Type::none)) {
899             continue; // not something a function can return
900           }
901           // Try to replace the body with the child, fixing up the function
902           // to accept it.
903           func->sig.results = child->type;
904           func->body = child;
905           if (writeAndTestReduction()) {
906             // great, we succeeded!
907             std::cerr << "|    altered function result type\n";
908             noteReduction(1);
909             break;
910           }
911           // Undo.
912           func->sig = funcSig;
913           func->body = funcBody;
914         }
915       }
916     }
917   }
918 
tryToRemoveFunctionsReducer919   bool tryToRemoveFunctions(std::vector<Name> names) {
920     for (auto name : names) {
921       module->removeFunction(name);
922     }
923 
924     // remove all references to them
925     struct FunctionReferenceRemover
926       : public PostWalker<FunctionReferenceRemover> {
927       std::unordered_set<Name> names;
928       std::vector<Name> exportsToRemove;
929 
930       FunctionReferenceRemover(std::vector<Name>& vec) {
931         for (auto name : vec) {
932           names.insert(name);
933         }
934       }
935       void visitCall(Call* curr) {
936         if (names.count(curr->target)) {
937           replaceCurrent(Builder(*getModule()).replaceWithIdenticalType(curr));
938         }
939       }
940       void visitExport(Export* curr) {
941         if (names.count(curr->value)) {
942           exportsToRemove.push_back(curr->name);
943         }
944       }
945       void visitTable(Table* curr) {
946         Name other;
947         for (auto& segment : curr->segments) {
948           for (auto name : segment.data) {
949             if (!names.count(name)) {
950               other = name;
951               break;
952             }
953           }
954           if (!other.isNull()) {
955             break;
956           }
957         }
958         if (other.isNull()) {
959           return; // we failed to find a replacement
960         }
961         for (auto& segment : curr->segments) {
962           for (auto& name : segment.data) {
963             if (names.count(name)) {
964               name = other;
965             }
966           }
967         }
968       }
969       void doWalkModule(Module* module) {
970         PostWalker<FunctionReferenceRemover>::doWalkModule(module);
971         for (auto name : exportsToRemove) {
972           module->removeExport(name);
973         }
974       }
975     };
976     FunctionReferenceRemover referenceRemover(names);
977     referenceRemover.walkModule(module.get());
978 
979     if (WasmValidator().validate(
980           *module, WasmValidator::Globally | WasmValidator::Quiet) &&
981         writeAndTestReduction()) {
982       std::cerr << "|      removed " << names.size() << " functions\n";
983       return true;
984     } else {
985       loadWorking(); // restore it from orbit
986       return false;
987     }
988   }
989 
990   // helpers
991 
992   // try to replace condition with always true and always false
handleConditionReducer993   void handleCondition(Expression*& condition) {
994     if (!condition) {
995       return;
996     }
997     if (condition->is<Const>()) {
998       return;
999     }
1000     auto* c = builder->makeConst(int32_t(0));
1001     if (!tryToReplaceChild(condition, c)) {
1002       c->value = Literal(int32_t(1));
1003       tryToReplaceChild(condition, c);
1004     }
1005   }
1006 
tryToReduceCurrentToNopReducer1007   bool tryToReduceCurrentToNop() {
1008     auto* curr = getCurrent();
1009     if (curr->is<Nop>()) {
1010       return false;
1011     }
1012     // try to replace with a trivial value
1013     Nop nop;
1014     if (tryToReplaceCurrent(&nop)) {
1015       replaceCurrent(builder->makeNop());
1016       return true;
1017     }
1018     return false;
1019   }
1020 
1021   // try to replace a concrete value with a trivial constant
tryToReduceCurrentToConstReducer1022   bool tryToReduceCurrentToConst() {
1023     auto* curr = getCurrent();
1024     if (curr->is<Const>()) {
1025       return false;
1026     }
1027     // try to replace with a trivial value
1028     if (curr->type.isNullable()) {
1029       RefNull* n = builder->makeRefNull(curr->type);
1030       return tryToReplaceCurrent(n);
1031     }
1032     if (curr->type.isTuple()) {
1033       Expression* n =
1034         builder->makeConstantExpression(Literal::makeZeros(curr->type));
1035       return tryToReplaceCurrent(n);
1036     }
1037     Const* c = builder->makeConst(int32_t(0));
1038     if (tryToReplaceCurrent(c)) {
1039       return true;
1040     }
1041     c->value = Literal::makeOne(curr->type);
1042     c->type = curr->type;
1043     return tryToReplaceCurrent(c);
1044   }
1045 
tryToReduceCurrentToUnreachableReducer1046   bool tryToReduceCurrentToUnreachable() {
1047     auto* curr = getCurrent();
1048     if (curr->is<Unreachable>()) {
1049       return false;
1050     }
1051     // try to replace with a trivial value
1052     Unreachable un;
1053     if (tryToReplaceCurrent(&un)) {
1054       replaceCurrent(builder->makeUnreachable());
1055       return true;
1056     }
1057     // maybe a return? TODO
1058     return false;
1059   }
1060 };
1061 
1062 //
1063 // main
1064 //
1065 
main(int argc,const char * argv[])1066 int main(int argc, const char* argv[]) {
1067   std::string input, test, working, command;
1068   // By default, look for binaries alongside our own binary.
1069   std::string binDir = Path::getDirName(argv[0]);
1070   bool binary = true, deNan = false, verbose = false, debugInfo = false,
1071        force = false;
1072   Options options("wasm-reduce",
1073                   "Reduce a wasm file to a smaller one that has the same "
1074                   "behavior on a given command");
1075   options
1076     .add("--command",
1077          "-cmd",
1078          "The command to run on the test, that we want to reduce while keeping "
1079          "the command's output identical. "
1080          "We look at the command's return code and stdout here (TODO: stderr), "
1081          "and we reduce while keeping those unchanged.",
1082          Options::Arguments::One,
1083          [&](Options* o, const std::string& argument) { command = argument; })
1084     .add("--test",
1085          "-t",
1086          "Test file (this will be written to to test, the given command should "
1087          "read it when we call it)",
1088          Options::Arguments::One,
1089          [&](Options* o, const std::string& argument) { test = argument; })
1090     .add("--working",
1091          "-w",
1092          "Working file (this will contain the current good state while doing "
1093          "temporary computations, "
1094          "and will contain the final best result at the end)",
1095          Options::Arguments::One,
1096          [&](Options* o, const std::string& argument) { working = argument; })
1097     .add("--binaries",
1098          "-b",
1099          "binaryen binaries location (bin/ directory)",
1100          Options::Arguments::One,
1101          [&](Options* o, const std::string& argument) {
1102            // Add separator just in case
1103            binDir = argument + Path::getPathSeparator();
1104          })
1105     .add("--text",
1106          "-S",
1107          "Emit intermediate files as text, instead of binary (also make sure "
1108          "the test and working files have a .wat or .wast suffix)",
1109          Options::Arguments::Zero,
1110          [&](Options* o, const std::string& argument) { binary = false; })
1111     .add("--denan",
1112          "",
1113          "Avoid nans when reducing",
1114          Options::Arguments::Zero,
1115          [&](Options* o, const std::string& argument) { deNan = true; })
1116     .add("--verbose",
1117          "-v",
1118          "Verbose output mode",
1119          Options::Arguments::Zero,
1120          [&](Options* o, const std::string& argument) { verbose = true; })
1121     .add("--debugInfo",
1122          "-g",
1123          "Keep debug info in binaries",
1124          Options::Arguments::Zero,
1125          [&](Options* o, const std::string& argument) { debugInfo = true; })
1126     .add("--force",
1127          "-f",
1128          "Force the reduction attempt, ignoring problems that imply it is "
1129          "unlikely to succeed",
1130          Options::Arguments::Zero,
1131          [&](Options* o, const std::string& argument) { force = true; })
1132     .add("--timeout",
1133          "-to",
1134          "A timeout to apply to each execution of the command, in seconds "
1135          "(default: 2)",
1136          Options::Arguments::One,
1137          [&](Options* o, const std::string& argument) {
1138            timeout = atoi(argument.c_str());
1139            std::cout << "|applying timeout: " << timeout << "\n";
1140          })
1141     .add_positional(
1142       "INFILE",
1143       Options::Arguments::One,
1144       [&](Options* o, const std::string& argument) { input = argument; });
1145   options.parse(argc, argv);
1146 
1147   if (test.size() == 0) {
1148     Fatal() << "test file not provided\n";
1149   }
1150   if (working.size() == 0) {
1151     Fatal() << "working file not provided\n";
1152   }
1153 
1154   if (!binary) {
1155     Colors::setEnabled(false);
1156   }
1157 
1158   Path::setBinaryenBinDir(binDir);
1159 
1160   std::cerr << "|wasm-reduce\n";
1161   std::cerr << "|input: " << input << '\n';
1162   std::cerr << "|test: " << test << '\n';
1163   std::cerr << "|working: " << working << '\n';
1164   std::cerr << "|bin dir: " << binDir << '\n';
1165 
1166   // get the expected output
1167   copy_file(input, test);
1168   expected.getFromExecution(command);
1169 
1170   std::cerr << "|expected result:\n" << expected << '\n';
1171   std::cerr << "|!! Make sure the above is what you expect! !!\n\n";
1172 
1173   auto stopIfNotForced = [&](std::string message, ProgramResult& result) {
1174     std::cerr << "|! " << message << '\n' << result << '\n';
1175     if (!force) {
1176       Fatal() << "|! stopping, as it is very unlikely reduction can succeed "
1177                  "(use -f to ignore this check)";
1178     }
1179   };
1180 
1181   if (expected.time + 1 >= timeout) {
1182     stopIfNotForced("execution time is dangerously close to the timeout - you "
1183                     "should probably increase the timeout",
1184                     expected);
1185   }
1186 
1187   std::cerr
1188     << "|checking that command has different behavior on invalid binary (this "
1189        "verifies that the test file is used by the command)\n";
1190   {
1191     {
1192       std::ofstream dst(test, std::ios::binary);
1193       dst << "waka waka\n";
1194     }
1195     ProgramResult result(command);
1196     if (result == expected) {
1197       stopIfNotForced(
1198         "running command on an invalid module should give different results",
1199         result);
1200     }
1201   }
1202 
1203   std::cerr << "|checking that command has expected behavior on canonicalized "
1204                "(read-written) binary\n";
1205   {
1206     // read and write it
1207     auto cmd = Path::getBinaryenBinaryTool("wasm-opt") + " " + input +
1208                " --detect-features -o " + test;
1209     if (!binary) {
1210       cmd += " -S";
1211     }
1212     ProgramResult readWrite(cmd);
1213     if (readWrite.failed()) {
1214       stopIfNotForced("failed to read and write the binary", readWrite);
1215     } else {
1216       ProgramResult result(command);
1217       if (result != expected) {
1218         stopIfNotForced("running command on the canonicalized module should "
1219                         "give the same results",
1220                         result);
1221       }
1222     }
1223   }
1224 
1225   copy_file(input, working);
1226   auto workingSize = file_size(working);
1227   std::cerr << "|input size: " << workingSize << "\n";
1228 
1229   std::cerr << "|starting reduction!\n";
1230 
1231   int factor = workingSize * 2;
1232   size_t lastDestructiveReductions = 0;
1233   size_t lastPostPassesSize = 0;
1234 
1235   bool stopping = false;
1236 
1237   while (1) {
1238     Reducer reducer(command, test, working, binary, deNan, verbose, debugInfo);
1239 
1240     // run binaryen optimization passes to reduce. passes are fast to run
1241     // and can often reduce large amounts of code efficiently, as opposed
1242     // to detructive reduction (i.e., that doesn't preserve correctness as
1243     // passes do) since destrucive must operate one change at a time
1244     std::cerr << "|  reduce using passes...\n";
1245     auto oldSize = file_size(working);
1246     reducer.reduceUsingPasses();
1247     auto newSize = file_size(working);
1248     auto passProgress = oldSize - newSize;
1249     std::cerr << "|  after pass reduction: " << newSize << "\n";
1250 
1251     // always stop after a pass reduction attempt, for final cleanup
1252     if (stopping) {
1253       break;
1254     }
1255 
1256     // check if the full cycle (destructive/passes) has helped or not
1257     if (lastPostPassesSize && newSize >= lastPostPassesSize) {
1258       std::cerr << "|  progress has stopped, skipping to the end\n";
1259       if (factor == 1) {
1260         // this is after doing work with factor 1, so after the remaining work,
1261         // stop
1262         stopping = true;
1263       } else {
1264         // just try to remove all we can and finish up
1265         factor = 1;
1266       }
1267     }
1268     lastPostPassesSize = newSize;
1269 
1270     // if destructive reductions lead to useful proportionate pass reductions,
1271     // keep going at the same factor, as pass reductions are far faster
1272     std::cerr << "|  pass progress: " << passProgress
1273               << ", last destructive: " << lastDestructiveReductions << '\n';
1274     if (passProgress >= 4 * lastDestructiveReductions) {
1275       // don't change
1276       std::cerr << "|  progress is good, do not quickly decrease factor\n";
1277     } else {
1278       if (factor > 10) {
1279         factor = (factor / 3) + 1;
1280       } else {
1281         factor = (factor + 1) / 2; // stable on 1
1282       }
1283     }
1284 
1285     // no point in a factor lorger than the size
1286     assert(newSize > 4); // wasm modules are >4 bytes anyhow
1287     factor = std::min(factor, int(newSize) / 4);
1288 
1289     // try to reduce destructively. if a high factor fails to find anything,
1290     // quickly try a lower one (no point in doing passes until we reduce
1291     // destructively at least a little)
1292     while (1) {
1293       std::cerr << "|  reduce destructively... (factor: " << factor << ")\n";
1294       lastDestructiveReductions = reducer.reduceDestructively(factor);
1295       if (lastDestructiveReductions > 0) {
1296         break;
1297       }
1298       // we failed to reduce destructively
1299       if (factor == 1) {
1300         stopping = true;
1301         break;
1302       }
1303       factor = std::max(
1304         1, factor / 4); // quickly now, try to find *something* we can reduce
1305     }
1306 
1307     std::cerr << "|  destructive reduction led to size: " << file_size(working)
1308               << '\n';
1309   }
1310   std::cerr << "|finished, final size: " << file_size(working) << "\n";
1311   copy_file(working, test); // just to avoid confusion
1312 }
1313