1 #ifndef HALIDE_IR_H
2 #define HALIDE_IR_H
3 
4 /** \file
5  * Subtypes for Halide expressions (\ref Halide::Expr) and statements (\ref Halide::Internal::Stmt)
6  */
7 
8 #include <string>
9 #include <vector>
10 
11 #include "Buffer.h"
12 #include "Expr.h"
13 #include "FunctionPtr.h"
14 #include "ModulusRemainder.h"
15 #include "Parameter.h"
16 #include "PrefetchDirective.h"
17 #include "Reduction.h"
18 #include "Type.h"
19 
20 namespace Halide {
21 namespace Internal {
22 
23 class Function;
24 
25 /** The actual IR nodes begin here. Remember that all the Expr
26  * nodes also have a public "type" property */
27 
28 /** Cast a node from one type to another. Can't change vector widths. */
29 struct Cast : public ExprNode<Cast> {
30     Expr value;
31 
32     static Expr make(Type t, Expr v);
33 
34     static const IRNodeType _node_type = IRNodeType::Cast;
35 };
36 
37 /** The sum of two expressions */
38 struct Add : public ExprNode<Add> {
39     Expr a, b;
40 
41     static Expr make(Expr a, Expr b);
42 
43     static const IRNodeType _node_type = IRNodeType::Add;
44 };
45 
46 /** The difference of two expressions */
47 struct Sub : public ExprNode<Sub> {
48     Expr a, b;
49 
50     static Expr make(Expr a, Expr b);
51 
52     static const IRNodeType _node_type = IRNodeType::Sub;
53 };
54 
55 /** The product of two expressions */
56 struct Mul : public ExprNode<Mul> {
57     Expr a, b;
58 
59     static Expr make(Expr a, Expr b);
60 
61     static const IRNodeType _node_type = IRNodeType::Mul;
62 };
63 
64 /** The ratio of two expressions */
65 struct Div : public ExprNode<Div> {
66     Expr a, b;
67 
68     static Expr make(Expr a, Expr b);
69 
70     static const IRNodeType _node_type = IRNodeType::Div;
71 };
72 
73 /** The remainder of a / b. Mostly equivalent to '%' in C, except that
74  * the result here is always positive. For floats, this is equivalent
75  * to calling fmod. */
76 struct Mod : public ExprNode<Mod> {
77     Expr a, b;
78 
79     static Expr make(Expr a, Expr b);
80 
81     static const IRNodeType _node_type = IRNodeType::Mod;
82 };
83 
84 /** The lesser of two values. */
85 struct Min : public ExprNode<Min> {
86     Expr a, b;
87 
88     static Expr make(Expr a, Expr b);
89 
90     static const IRNodeType _node_type = IRNodeType::Min;
91 };
92 
93 /** The greater of two values */
94 struct Max : public ExprNode<Max> {
95     Expr a, b;
96 
97     static Expr make(Expr a, Expr b);
98 
99     static const IRNodeType _node_type = IRNodeType::Max;
100 };
101 
102 /** Is the first expression equal to the second */
103 struct EQ : public ExprNode<EQ> {
104     Expr a, b;
105 
106     static Expr make(Expr a, Expr b);
107 
108     static const IRNodeType _node_type = IRNodeType::EQ;
109 };
110 
111 /** Is the first expression not equal to the second */
112 struct NE : public ExprNode<NE> {
113     Expr a, b;
114 
115     static Expr make(Expr a, Expr b);
116 
117     static const IRNodeType _node_type = IRNodeType::NE;
118 };
119 
120 /** Is the first expression less than the second. */
121 struct LT : public ExprNode<LT> {
122     Expr a, b;
123 
124     static Expr make(Expr a, Expr b);
125 
126     static const IRNodeType _node_type = IRNodeType::LT;
127 };
128 
129 /** Is the first expression less than or equal to the second. */
130 struct LE : public ExprNode<LE> {
131     Expr a, b;
132 
133     static Expr make(Expr a, Expr b);
134 
135     static const IRNodeType _node_type = IRNodeType::LE;
136 };
137 
138 /** Is the first expression greater than the second. */
139 struct GT : public ExprNode<GT> {
140     Expr a, b;
141 
142     static Expr make(Expr a, Expr b);
143 
144     static const IRNodeType _node_type = IRNodeType::GT;
145 };
146 
147 /** Is the first expression greater than or equal to the second. */
148 struct GE : public ExprNode<GE> {
149     Expr a, b;
150 
151     static Expr make(Expr a, Expr b);
152 
153     static const IRNodeType _node_type = IRNodeType::GE;
154 };
155 
156 /** Logical and - are both expressions true */
157 struct And : public ExprNode<And> {
158     Expr a, b;
159 
160     static Expr make(Expr a, Expr b);
161 
162     static const IRNodeType _node_type = IRNodeType::And;
163 };
164 
165 /** Logical or - is at least one of the expression true */
166 struct Or : public ExprNode<Or> {
167     Expr a, b;
168 
169     static Expr make(Expr a, Expr b);
170 
171     static const IRNodeType _node_type = IRNodeType::Or;
172 };
173 
174 /** Logical not - true if the expression false */
175 struct Not : public ExprNode<Not> {
176     Expr a;
177 
178     static Expr make(Expr a);
179 
180     static const IRNodeType _node_type = IRNodeType::Not;
181 };
182 
183 /** A ternary operator. Evalutes 'true_value' and 'false_value',
184  * then selects between them based on 'condition'. Equivalent to
185  * the ternary operator in C. */
186 struct Select : public ExprNode<Select> {
187     Expr condition, true_value, false_value;
188 
189     static Expr make(Expr condition, Expr true_value, Expr false_value);
190 
191     static const IRNodeType _node_type = IRNodeType::Select;
192 };
193 
194 /** Load a value from a named symbol if predicate is true. The buffer
195  * is treated as an array of the 'type' of this Load node. That is,
196  * the buffer has no inherent type. The name may be the name of an
197  * enclosing allocation, an input or output buffer, or any other
198  * symbol of type Handle(). */
199 struct Load : public ExprNode<Load> {
200     std::string name;
201 
202     Expr predicate, index;
203 
204     // If it's a load from an image argument or compiled-in constant
205     // image, this will point to that
206     Buffer<> image;
207 
208     // If it's a load from an image parameter, this points to that
209     Parameter param;
210 
211     // The alignment of the index. If the index is a vector, this is
212     // the alignment of the first lane.
213     ModulusRemainder alignment;
214 
215     static Expr make(Type type, const std::string &name,
216                      Expr index, Buffer<> image,
217                      Parameter param,
218                      Expr predicate,
219                      ModulusRemainder alignment);
220 
221     static const IRNodeType _node_type = IRNodeType::Load;
222 };
223 
224 /** A linear ramp vector node. This is vector with 'lanes' elements,
225  * where element i is 'base' + i*'stride'. This is a convenient way to
226  * pass around vectors without busting them up into individual
227  * elements. E.g. a dense vector load from a buffer can use a ramp
228  * node with stride 1 as the index. */
229 struct Ramp : public ExprNode<Ramp> {
230     Expr base, stride;
231     int lanes;
232 
233     static Expr make(Expr base, Expr stride, int lanes);
234 
235     static const IRNodeType _node_type = IRNodeType::Ramp;
236 };
237 
238 /** A vector with 'lanes' elements, in which every element is
239  * 'value'. This is a special case of the ramp node above, in which
240  * the stride is zero. */
241 struct Broadcast : public ExprNode<Broadcast> {
242     Expr value;
243     int lanes;
244 
245     static Expr make(Expr value, int lanes);
246 
247     static const IRNodeType _node_type = IRNodeType::Broadcast;
248 };
249 
250 /** A let expression, like you might find in a functional
251  * language. Within the expression \ref Let::body, instances of the Var
252  * node \ref Let::name refer to \ref Let::value. */
253 struct Let : public ExprNode<Let> {
254     std::string name;
255     Expr value, body;
256 
257     static Expr make(const std::string &name, Expr value, Expr body);
258 
259     static const IRNodeType _node_type = IRNodeType::Let;
260 };
261 
262 /** The statement form of a let node. Within the statement 'body',
263  * instances of the Var named 'name' refer to 'value' */
264 struct LetStmt : public StmtNode<LetStmt> {
265     std::string name;
266     Expr value;
267     Stmt body;
268 
269     static Stmt make(const std::string &name, Expr value, Stmt body);
270 
271     static const IRNodeType _node_type = IRNodeType::LetStmt;
272 };
273 
274 /** If the 'condition' is false, then evaluate and return the message,
275  * which should be a call to an error function. */
276 struct AssertStmt : public StmtNode<AssertStmt> {
277     // if condition then val else error out with message
278     Expr condition;
279     Expr message;
280 
281     static Stmt make(Expr condition, Expr message);
282 
283     static const IRNodeType _node_type = IRNodeType::AssertStmt;
284 };
285 
286 /** This node is a helpful annotation to do with permissions. If 'is_produce' is
287  * set to true, this represents a producer node which may also contain updates;
288  * otherwise, this represents a consumer node. If the producer node contains
289  * updates, the body of the node will be a block of 'produce' and 'update'
290  * in that order. In a producer node, the access is read-write only (or write
291  * only if it doesn't have updates). In a consumer node, the access is read-only.
292  * None of this is actually enforced, the node is purely for informative purposes
293  * to help out our analysis during lowering. For every unique ProducerConsumer,
294  * there is an associated Realize node with the same name that creates the buffer
295  * being read from or written to in the body of the ProducerConsumer.
296  */
297 struct ProducerConsumer : public StmtNode<ProducerConsumer> {
298     std::string name;
299     bool is_producer;
300     Stmt body;
301 
302     static Stmt make(const std::string &name, bool is_producer, Stmt body);
303 
304     static Stmt make_produce(const std::string &name, Stmt body);
305     static Stmt make_consume(const std::string &name, Stmt body);
306 
307     static const IRNodeType _node_type = IRNodeType::ProducerConsumer;
308 };
309 
310 /** Store a 'value' to the buffer called 'name' at a given 'index' if
311  * 'predicate' is true. The buffer is interpreted as an array of the
312  * same type as 'value'. The name may be the name of an enclosing
313  * Allocate node, an output buffer, or any other symbol of type
314  * Handle(). */
315 struct Store : public StmtNode<Store> {
316     std::string name;
317     Expr predicate, value, index;
318     // If it's a store to an output buffer, then this parameter points to it.
319     Parameter param;
320 
321     // The alignment of the index. If the index is a vector, this is
322     // the alignment of the first lane.
323     ModulusRemainder alignment;
324 
325     static Stmt make(const std::string &name, Expr value, Expr index,
326                      Parameter param, Expr predicate, ModulusRemainder alignment);
327 
328     static const IRNodeType _node_type = IRNodeType::Store;
329 };
330 
331 /** This defines the value of a function at a multi-dimensional
332  * location. You should think of it as a store to a multi-dimensional
333  * array. It gets lowered to a conventional Store node. The name must
334  * correspond to an output buffer or the name of an enclosing Realize
335  * node. */
336 struct Provide : public StmtNode<Provide> {
337     std::string name;
338     std::vector<Expr> values;
339     std::vector<Expr> args;
340 
341     static Stmt make(const std::string &name, const std::vector<Expr> &values, const std::vector<Expr> &args);
342 
343     static const IRNodeType _node_type = IRNodeType::Provide;
344 };
345 
346 /** Allocate a scratch area called with the given name, type, and
347  * size. The buffer lives for at most the duration of the body
348  * statement, within which it may or may not be freed explicitly with
349  * a Free node with a matching name. Allocation only occurs if the
350  * condition evaluates to true. Within the body of the allocation,
351  * defines a symbol with the given name and the type Handle(). */
352 struct Allocate : public StmtNode<Allocate> {
353     std::string name;
354     Type type;
355     MemoryType memory_type;
356     std::vector<Expr> extents;
357     Expr condition;
358 
359     // These override the code generator dependent malloc and free
360     // equivalents if provided. If the new_expr succeeds, that is it
361     // returns non-nullptr, the function named be free_function is
362     // guaranteed to be called. The free function signature must match
363     // that of the code generator dependent free (typically
364     // halide_free). If free_function is left empty, code generator
365     // default will be called.
366     Expr new_expr;
367     std::string free_function;
368 
369     Stmt body;
370 
371     static Stmt make(const std::string &name, Type type, MemoryType memory_type,
372                      const std::vector<Expr> &extents,
373                      Expr condition, Stmt body,
374                      Expr new_expr = Expr(), const std::string &free_function = std::string());
375 
376     /** A routine to check if the extents are all constants, and if so verify
377      * the total size is less than 2^31 - 1. If the result is constant, but
378      * overflows, this routine asserts. This returns 0 if the extents are
379      * not all constants; otherwise, it returns the total constant allocation
380      * size. */
381     static int32_t constant_allocation_size(const std::vector<Expr> &extents, const std::string &name);
382     int32_t constant_allocation_size() const;
383 
384     static const IRNodeType _node_type = IRNodeType::Allocate;
385 };
386 
387 /** Free the resources associated with the given buffer. */
388 struct Free : public StmtNode<Free> {
389     std::string name;
390 
391     static Stmt make(const std::string &name);
392 
393     static const IRNodeType _node_type = IRNodeType::Free;
394 };
395 
396 /** Allocate a multi-dimensional buffer of the given type and
397  * size. Create some scratch memory that will back the function 'name'
398  * over the range specified in 'bounds'. The bounds are a vector of
399  * (min, extent) pairs for each dimension. Allocation only occurs if
400  * the condition evaluates to true.
401  */
402 struct Realize : public StmtNode<Realize> {
403     std::string name;
404     std::vector<Type> types;
405     MemoryType memory_type;
406     Region bounds;
407     Expr condition;
408     Stmt body;
409 
410     static Stmt make(const std::string &name, const std::vector<Type> &types, MemoryType memory_type, const Region &bounds, Expr condition, Stmt body);
411 
412     static const IRNodeType _node_type = IRNodeType::Realize;
413 };
414 
415 /** A sequence of statements to be executed in-order. 'rest' may be
416  * undefined. Used rest.defined() to find out. */
417 struct Block : public StmtNode<Block> {
418     Stmt first, rest;
419 
420     static Stmt make(Stmt first, Stmt rest);
421     /** Construct zero or more Blocks to invoke a list of statements in order.
422      * This method may not return a Block statement if stmts.size() <= 1. */
423     static Stmt make(const std::vector<Stmt> &stmts);
424 
425     static const IRNodeType _node_type = IRNodeType::Block;
426 };
427 
428 /** A pair of statements executed concurrently. Both statements are
429  * joined before the Stmt ends. This is the parallel equivalent to
430  * Block. */
431 struct Fork : public StmtNode<Fork> {
432     Stmt first, rest;
433 
434     static Stmt make(Stmt first, Stmt rest);
435 
436     static const IRNodeType _node_type = IRNodeType::Fork;
437 };
438 
439 /** An if-then-else block. 'else' may be undefined. */
440 struct IfThenElse : public StmtNode<IfThenElse> {
441     Expr condition;
442     Stmt then_case, else_case;
443 
444     static Stmt make(Expr condition, Stmt then_case, Stmt else_case = Stmt());
445 
446     static const IRNodeType _node_type = IRNodeType::IfThenElse;
447 };
448 
449 /** Evaluate and discard an expression, presumably because it has some side-effect. */
450 struct Evaluate : public StmtNode<Evaluate> {
451     Expr value;
452 
453     static Stmt make(Expr v);
454 
455     static const IRNodeType _node_type = IRNodeType::Evaluate;
456 };
457 
458 /** A function call. This can represent a call to some extern function
459  * (like sin), but it's also our multi-dimensional version of a Load,
460  * so it can be a load from an input image, or a call to another
461  * halide function. These two types of call nodes don't survive all
462  * the way down to code generation - the lowering process converts
463  * them to Load nodes. */
464 struct Call : public ExprNode<Call> {
465     std::string name;
466     std::vector<Expr> args;
467     typedef enum { Image,            ///< A load from an input image
468                    Extern,           ///< A call to an external C-ABI function, possibly with side-effects
469                    ExternCPlusPlus,  ///< A call to an external C-ABI function, possibly with side-effects
470                    PureExtern,       ///< A call to a guaranteed-side-effect-free external function
471                    Halide,           ///< A call to a Func
472                    Intrinsic,        ///< A possibly-side-effecty compiler intrinsic, which has special handling during codegen
473                    PureIntrinsic     ///< A side-effect-free version of the above.
474     } CallType;
475     CallType call_type;
476 
477     // Halide uses calls internally to represent certain operations
478     // (instead of IR nodes). These are matched by name. Note that
479     // these are deliberately char* (rather than std::string) so that
480     // they can be referenced at static-initialization time without
481     // risking ambiguous initalization order; we use a typedef to simplify
482     // declaration.
483     typedef const char *const ConstString;
484 
485     // enums for various well-known intrinsics. (It is not *required* that all
486     // intrinsics have an enum entry here, but as a matter of style, it is recommended.)
487     // Note that these are only used in the API; inside the node, they are translated
488     // into a name. (To recover the name, call get_intrinsic_name().)
489     //
490     // Please keep this list sorted alphabetically; the specific enum values
491     // are *not* guaranteed to be stable across time.
492     enum IntrinsicOp {
493         abs,
494         absd,
495         add_image_checks_marker,
496         alloca,
497         bitwise_and,
498         bitwise_not,
499         bitwise_or,
500         bitwise_xor,
501         bool_to_mask,
502         bundle,  // Bundle multiple exprs together temporarily for analysis (e.g. CSE)
503         call_cached_indirect_function,
504         cast_mask,
505         count_leading_zeros,
506         count_trailing_zeros,
507         declare_box_touched,
508         debug_to_file,
509         div_round_to_zero,
510         dynamic_shuffle,
511         extract_mask_element,
512         gather,
513         glsl_texture_load,
514         glsl_texture_store,
515         glsl_varying,
516         gpu_thread_barrier,
517         if_then_else,
518         if_then_else_mask,
519         image_load,
520         image_store,
521         lerp,
522         likely,
523         likely_if_innermost,
524         make_struct,
525         memoize_expr,
526         mod_round_to_zero,
527         mulhi_shr,  // Compute high_half(arg[0] * arg[1]) >> arg[3]. Note that this is a shift in addition to taking the upper half of multiply result. arg[3] must be an unsigned integer immediate.
528         popcount,
529         prefetch,
530         promise_clamped,
531         random,
532         register_destructor,
533         reinterpret,
534         require,
535         require_mask,
536         return_second,
537         rewrite_buffer,
538         scatter,
539         scatter_acc,
540         scatter_release,
541         select_mask,
542         shift_left,
543         shift_right,
544         signed_integer_overflow,
545         size_of_halide_buffer_t,
546         sorted_avg,  // Compute (arg[0] + arg[1]) / 2, assuming arg[0] < arg[1].
547         strict_float,
548         stringify,
549         undef,
550         unsafe_promise_clamped,
551         IntrinsicOpCount  // Sentinel: keep last.
552     };
553 
554     static const char *get_intrinsic_name(IntrinsicOp op);
555 
556     // We also declare some symbolic names for some of the runtime
557     // functions that we want to construct Call nodes to here to avoid
558     // magic string constants and the potential risk of typos.
559     HALIDE_EXPORT static ConstString
560         buffer_get_dimensions,
561         buffer_get_min,
562         buffer_get_extent,
563         buffer_get_stride,
564         buffer_get_max,
565         buffer_get_host,
566         buffer_get_device,
567         buffer_get_device_interface,
568         buffer_get_shape,
569         buffer_get_host_dirty,
570         buffer_get_device_dirty,
571         buffer_get_type,
572         buffer_set_host_dirty,
573         buffer_set_device_dirty,
574         buffer_is_bounds_query,
575         buffer_init,
576         buffer_init_from_buffer,
577         buffer_crop,
578         buffer_set_bounds,
579         trace;
580 
581     // If it's a call to another halide function, this call node holds
582     // a possibly-weak reference to that function.
583     FunctionPtr func;
584 
585     // If that function has multiple values, which value does this
586     // call node refer to?
587     int value_index;
588 
589     // If it's a call to an image, this call nodes hold a
590     // pointer to that image's buffer
591     Buffer<> image;
592 
593     // If it's a call to an image parameter, this call node holds a
594     // pointer to that
595     Parameter param;
596 
597     static Expr make(Type type, IntrinsicOp op, const std::vector<Expr> &args, CallType call_type,
598                      FunctionPtr func = FunctionPtr(), int value_index = 0,
599                      const Buffer<> &image = Buffer<>(), Parameter param = Parameter());
600 
601     static Expr make(Type type, const std::string &name, const std::vector<Expr> &args, CallType call_type,
602                      FunctionPtr func = FunctionPtr(), int value_index = 0,
603                      Buffer<> image = Buffer<>(), Parameter param = Parameter());
604 
605     /** Convenience constructor for calls to other halide functions */
606     static Expr make(const Function &func, const std::vector<Expr> &args, int idx = 0);
607 
608     /** Convenience constructor for loads from concrete images */
makeCall609     static Expr make(const Buffer<> &image, const std::vector<Expr> &args) {
610         return make(image.type(), image.name(), args, Image, FunctionPtr(), 0, image, Parameter());
611     }
612 
613     /** Convenience constructor for loads from images parameters */
makeCall614     static Expr make(const Parameter &param, const std::vector<Expr> &args) {
615         return make(param.type(), param.name(), args, Image, FunctionPtr(), 0, Buffer<>(), param);
616     }
617 
618     /** Check if a call node is pure within a pipeline, meaning that
619      * the same args always give the same result, and the calls can be
620      * reordered, duplicated, unified, etc without changing the
621      * meaning of anything. Not transitive - doesn't guarantee the
622      * args themselves are pure. An example of a pure Call node is
623      * sqrt. If in doubt, don't mark a Call node as pure. */
is_pureCall624     bool is_pure() const {
625         return (call_type == PureExtern ||
626                 call_type == Image ||
627                 call_type == PureIntrinsic);
628     }
629 
is_intrinsicCall630     bool is_intrinsic() const {
631         return (call_type == Intrinsic ||
632                 call_type == PureIntrinsic);
633     }
634 
is_intrinsicCall635     bool is_intrinsic(IntrinsicOp op) const {
636         return is_intrinsic() && this->name == get_intrinsic_name(op);
637     }
638 
is_externCall639     bool is_extern() const {
640         return (call_type == Extern ||
641                 call_type == ExternCPlusPlus ||
642                 call_type == PureExtern);
643     }
644 
645     static const IRNodeType _node_type = IRNodeType::Call;
646 };
647 
648 /** A named variable. Might be a loop variable, function argument,
649  * parameter, reduction variable, or something defined by a Let or
650  * LetStmt node. */
651 struct Variable : public ExprNode<Variable> {
652     std::string name;
653 
654     /** References to scalar parameters, or to the dimensions of buffer
655      * parameters hang onto those expressions. */
656     Parameter param;
657 
658     /** References to properties of literal image parameters. */
659     Buffer<> image;
660 
661     /** Reduction variables hang onto their domains */
662     ReductionDomain reduction_domain;
663 
makeVariable664     static Expr make(Type type, const std::string &name) {
665         return make(type, name, Buffer<>(), Parameter(), ReductionDomain());
666     }
667 
makeVariable668     static Expr make(Type type, const std::string &name, Parameter param) {
669         return make(type, name, Buffer<>(), std::move(param), ReductionDomain());
670     }
671 
makeVariable672     static Expr make(Type type, const std::string &name, const Buffer<> &image) {
673         return make(type, name, image, Parameter(), ReductionDomain());
674     }
675 
makeVariable676     static Expr make(Type type, const std::string &name, ReductionDomain reduction_domain) {
677         return make(type, name, Buffer<>(), Parameter(), std::move(reduction_domain));
678     }
679 
680     static Expr make(Type type, const std::string &name, Buffer<> image,
681                      Parameter param, ReductionDomain reduction_domain);
682 
683     static const IRNodeType _node_type = IRNodeType::Variable;
684 };
685 
686 /** A for loop. Execute the 'body' statement for all values of the
687  * variable 'name' from 'min' to 'min + extent'. There are four
688  * types of For nodes. A 'Serial' for loop is a conventional
689  * one. In a 'Parallel' for loop, each iteration of the loop
690  * happens in parallel or in some unspecified order. In a
691  * 'Vectorized' for loop, each iteration maps to one SIMD lane,
692  * and the whole loop is executed in one shot. For this case,
693  * 'extent' must be some small integer constant (probably 4, 8, or
694  * 16). An 'Unrolled' for loop compiles to a completely unrolled
695  * version of the loop. Each iteration becomes its own
696  * statement. Again in this case, 'extent' should be a small
697  * integer constant. */
698 struct For : public StmtNode<For> {
699     std::string name;
700     Expr min, extent;
701     ForType for_type;
702     DeviceAPI device_api;
703     Stmt body;
704 
705     static Stmt make(const std::string &name, Expr min, Expr extent, ForType for_type, DeviceAPI device_api, Stmt body);
706 
is_unordered_parallelFor707     bool is_unordered_parallel() const {
708         return Halide::Internal::is_unordered_parallel(for_type);
709     }
is_parallelFor710     bool is_parallel() const {
711         return Halide::Internal::is_parallel(for_type);
712     }
713 
714     static const IRNodeType _node_type = IRNodeType::For;
715 };
716 
717 struct Acquire : public StmtNode<Acquire> {
718     Expr semaphore;
719     Expr count;
720     Stmt body;
721 
722     static Stmt make(Expr semaphore, Expr count, Stmt body);
723 
724     static const IRNodeType _node_type = IRNodeType::Acquire;
725 };
726 
727 /** Construct a new vector by taking elements from another sequence of
728  * vectors. */
729 struct Shuffle : public ExprNode<Shuffle> {
730     std::vector<Expr> vectors;
731 
732     /** Indices indicating which vector element to place into the
733      * result. The elements are numbered by their position in the
734      * concatenation of the vector argumentss. */
735     std::vector<int> indices;
736 
737     static Expr make(const std::vector<Expr> &vectors,
738                      const std::vector<int> &indices);
739 
740     /** Convenience constructor for making a shuffle representing an
741      * interleaving of vectors of the same length. */
742     static Expr make_interleave(const std::vector<Expr> &vectors);
743 
744     /** Convenience constructor for making a shuffle representing a
745      * concatenation of the vectors. */
746     static Expr make_concat(const std::vector<Expr> &vectors);
747 
748     /** Convenience constructor for making a shuffle representing a
749      * contiguous subset of a vector. */
750     static Expr make_slice(Expr vector, int begin, int stride, int size);
751 
752     /** Convenience constructor for making a shuffle representing
753      * extracting a single element. */
754     static Expr make_extract_element(Expr vector, int i);
755 
756     /** Check if this shuffle is an interleaving of the vector
757      * arguments. */
758     bool is_interleave() const;
759 
760     /** Check if this shuffle is a concatenation of the vector
761      * arguments. */
762     bool is_concat() const;
763 
764     /** Check if this shuffle is a contiguous strict subset of the
765      * vector arguments, and if so, the offset and stride of the
766      * slice. */
767     ///@{
768     bool is_slice() const;
slice_beginShuffle769     int slice_begin() const {
770         return indices[0];
771     }
slice_strideShuffle772     int slice_stride() const {
773         return indices.size() >= 2 ? indices[1] - indices[0] : 1;
774     }
775     ///@}
776 
777     /** Check if this shuffle is extracting a scalar from the vector
778      * arguments. */
779     bool is_extract_element() const;
780 
781     static const IRNodeType _node_type = IRNodeType::Shuffle;
782 };
783 
784 /** Represent a multi-dimensional region of a Func or an ImageParam that
785  * needs to be prefetched. */
786 struct Prefetch : public StmtNode<Prefetch> {
787     std::string name;
788     std::vector<Type> types;
789     Region bounds;
790     PrefetchDirective prefetch;
791     Expr condition;
792 
793     Stmt body;
794 
795     static Stmt make(const std::string &name, const std::vector<Type> &types,
796                      const Region &bounds,
797                      const PrefetchDirective &prefetch,
798                      Expr condition, Stmt body);
799 
800     static const IRNodeType _node_type = IRNodeType::Prefetch;
801 };
802 
803 /** Lock all the Store nodes in the body statement.
804  *  Typically the lock is implemented by an atomic operation
805  *  (e.g. atomic add or atomic compare-and-swap).
806  *  However, if necessary, the node can access a mutex buffer through
807  *  mutex_name and mutex_args, by lowering this node into
808  *  calls to acquire and release the lock. */
809 struct Atomic : public StmtNode<Atomic> {
810     std::string producer_name;
811     std::string mutex_name;  // empty string if not using mutex
812     Stmt body;
813 
814     static Stmt make(const std::string &producer_name,
815                      const std::string &mutex_name,
816                      Stmt body);
817 
818     static const IRNodeType _node_type = IRNodeType::Atomic;
819 };
820 
821 /** Horizontally reduce a vector to a scalar or narrower vector using
822  * the given commutative and associative binary operator. The reduction
823  * factor is dictated by the number of lanes in the input and output
824  * types. Groups of adjacent lanes are combined. The number of lanes
825  * in the input type must be a divisor of the number of lanes of the
826  * output type.  */
827 struct VectorReduce : public ExprNode<VectorReduce> {
828     // 99.9% of the time people will use this for horizontal addition,
829     // but these are all of our commutative and associative primitive
830     // operators.
831     typedef enum {
832         Add,
833         Mul,
834         Min,
835         Max,
836         And,
837         Or,
838     } Operator;
839 
840     Expr value;
841     Operator op;
842 
843     static Expr make(Operator op, Expr vec, int lanes);
844 
845     static const IRNodeType _node_type = IRNodeType::VectorReduce;
846 };
847 
848 }  // namespace Internal
849 }  // namespace Halide
850 
851 #endif
852