1 /*
2  * Copyright 2020 WebAssembly Community Group participants
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //
18 // stack-utils.h: Utilities for manipulating and analyzing stack machine code in
19 // the form of Poppy IR.
20 //
21 // Poppy IR represents stack machine code using normal Binaryen IR types by
22 // imposing the following constraints:
23 //
24 //  1. Function bodies and children of control flow (except If conditions) must
25 //     be blocks.
26 //
27 //  2. Blocks may have any Expressions except for Pops as children. The sequence
28 //     of instructions in a block follows the same validation rules as in
29 //     WebAssembly. That means that any expression may have a concrete type, not
30 //     just the final expression in the block.
31 //
32 //  3. All other children must be Pops, which are used to determine the input
33 //     stack type of each instruction. Pops may not have `unreachable` type.
34 //
35 //  4. Only control flow structures and instructions that have polymorphic
36 //     unreachable behavior in WebAssembly may have unreachable type. Blocks may
37 //     be unreachable when they are not branch targets and when they have an
38 //     unreachable child. Note that this means a block may be unreachable even
39 //     if it would otherwise have a concrete type, unlike in Binaryen IR.
40 //
41 // For example, the following Binaryen IR Function:
42 //
43 //   (func $foo (result i32)
44 //    (i32.add
45 //     (i32.const 42)
46 //     (i32.const 5)
47 //    )
48 //   )
49 //
50 // would look like this in Poppy IR:
51 //
52 //   (func $foo (result i32)
53 //    (block
54 //     (i32.const 42)
55 //     (i32.const 5)
56 //     (i32.add
57 //      (i32.pop)
58 //      (i32.pop)
59 //     )
60 //    )
61 //   )
62 //
63 // Notice that the sequence of instructions in the block is now identical to the
64 // sequence of instructions in raw WebAssembly. Also note that Poppy IR's
65 // validation rules are largely additional on top of the normal Binaryen IR
66 // validation rules, with the only exceptions being block body validation and
67 // block unreahchability rules.
68 //
69 
70 #ifndef wasm_ir_stack_h
71 #define wasm_ir_stack_h
72 
73 #include "ir/properties.h"
74 #include "wasm-type.h"
75 #include "wasm.h"
76 
77 namespace wasm {
78 
79 namespace StackUtils {
80 
81 // Iterate through `block` and remove nops.
82 void removeNops(Block* block);
83 
84 // Whether `expr` may be unreachable in Poppy IR. True for control flow
85 // structures and polymorphic unreachable instructions.
86 bool mayBeUnreachable(Expression* expr);
87 
88 } // namespace StackUtils
89 
90 // Stack signatures are like regular function signatures, but they are used to
91 // represent the stack parameters and results of arbitrary sequences of stacky
92 // instructions. They have to record whether they cover an unreachable
93 // instruction because their composition takes into account the polymorphic
94 // results of unreachable instructions. For example, the following instruction
95 // sequences both have params {i32, i32} and results {f32}, but only sequence B
96 // is unreachable:
97 //
98 //  A:
99 //    i32.add
100 //    drop
101 //    f32.const 42
102 //
103 //  B:
104 //    i32.add
105 //    unreachable
106 //    f32.const 42
107 //
108 // Notice that this distinction is important because sequence B can be the body
109 // of the blocks below but sequence A cannot. In other words, the stack
110 // signature of sequence B satisfies the signatures of these blocks, but the
111 // stack signature of sequence A does not.
112 //
113 //  (block (param f64 i32 i32) (result f32) ... )
114 //  (block (param i32 i32) (result f64 f32) ... )
115 //  (block (param f64 i32 i32) (result f64 f32) ... )
116 //
117 struct StackSignature {
118   Type params;
119   Type results;
120   bool unreachable;
121 
StackSignatureStackSignature122   StackSignature()
123     : params(Type::none), results(Type::none), unreachable(false) {}
124   StackSignature(Type params, Type results, bool unreachable = false)
paramsStackSignature125     : params(params), results(results), unreachable(unreachable) {}
126 
127   StackSignature(const StackSignature&) = default;
128   StackSignature& operator=(const StackSignature&) = default;
129 
130   // Get the stack signature of `expr`
131   explicit StackSignature(Expression* expr);
132 
133   // Get the stack signature of the range of instructions [first, last). The
134   // sequence of instructions is assumed to be valid, i.e. their signatures
135   // compose.
136   template<class InputIt>
StackSignatureStackSignature137   explicit StackSignature(InputIt first, InputIt last)
138     : params(Type::none), results(Type::none), unreachable(false) {
139     // TODO: It would be more efficient to build the signature directly and
140     // construct the params in reverse to avoid quadratic behavior.
141     while (first != last) {
142       *this += StackSignature(*first++);
143     }
144   }
145 
146   // Return `true` iff `next` composes after this stack signature.
147   bool composes(const StackSignature& next) const;
148 
149   // Whether a block whose contents have this stack signature could be typed
150   // with `sig`.
151   bool satisfies(Signature sig) const;
152 
153   // Compose stack signatures. Assumes they actually compose.
154   StackSignature& operator+=(const StackSignature& next);
155   StackSignature operator+(const StackSignature& next) const;
156 
157   bool operator==(const StackSignature& other) const {
158     return params == other.params && results == other.results &&
159            unreachable == other.unreachable;
160   }
161 };
162 
163 // Calculates stack machine data flow, associating the sources and destinations
164 // of all values in a block.
165 struct StackFlow {
166   // The destination (source) location at which a value of type `type` is
167   // consumed (produced), corresponding to the `index`th value consumed by
168   // (produced by) instruction `expr`. For destination locations, `unreachable`
169   // is true iff the corresponding value is consumed by the polymorphic behavior
170   // of an unreachable instruction rather than being used directly. For source
171   // locations, `unreachable` is true iff the corresponding value is produced by
172   // an unreachable instruction. For produced values that are not consumed
173   // within the block (TODO: also for consumed values that are not produced
174   // within the block), `expr` will be the enclosing block.
175   struct Location {
176     Expression* expr;
177     Index index;
178     Type type;
179     bool unreachable;
180 
181     bool operator==(const Location& other) const {
182       return expr == other.expr && index == other.index && type == other.type &&
183              unreachable == other.unreachable;
184     }
185   };
186 
187   using LocationMap = std::unordered_map<Expression*, std::vector<Location>>;
188 
189   // Maps each instruction to the set of source locations producing its inputs.
190   LocationMap srcs;
191 
192   // Maps each instruction to the set of output locations consuming its results.
193   LocationMap dests;
194 
195   // Gets the effective stack signature of `expr`, which must be a child of the
196   // block. If `expr` is unreachable, the returned signature will reflect the
197   // values consumed and produced by its polymorphic unreachable behavior.
198   StackSignature getSignature(Expression* expr);
199 
200   // Calculates `srcs` and `dests`.
201   StackFlow(Block* curr);
202 };
203 
204 } // namespace wasm
205 
206 #endif // wasm_ir_stack_h
207