1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17
18 /*
19 * Kernel generator related common definitions
20 */
21
22 #ifndef KERNGEN_H_
23 #define KERNGEN_H_
24
25 #include <sys/types.h>
26 #include <errno.h>
27
28 #if defined (_MSC_VER)
29 #include <msvc.h>
30 #endif
31
32 #include <defbool.h>
33 #include <list.h>
34 #include <cltypes.h>
35 #include <mutex.h>
36 #include <granulation.h>
37 #include <trace_malloc.h>
38
39 /**
40 * @internal
41 * @defgroup KGEN_INFRA Kernel generator infrastructure
42 */
43 /*@{*/
44
45 #ifdef _MSC_VER
46 #define SPREFIX "I"
47 #else
48 #define SPREFIX "z"
49 #endif
50
51 #define SUBDIM_UNUSED (size_t)-1
52
53 enum {
54 MAX_TABS = 16,
55 MAX_STATEMENT_PRIORITY = 63,
56 MAX_STATEMENT_LENGTH = 4096
57 };
58
59 enum {
60 // maximum subproblem dimensions
61 MAX_SUBDIMS = 3,
62 // maximum code nesting
63 MAX_NESTING = 10,
64 KSTRING_MAXLEN = 256,
65 // generated function name max len
66 FUNC_NAME_MAXLEN = KSTRING_MAXLEN
67 };
68
69 typedef struct{
70 SubproblemDim subdims[MAX_SUBDIMS];
71 PGranularity pgran;
72 }DecompositionStruct;
73
74 struct KgenContext;
75 struct KgenGuard;
76 struct StatementBatch;
77
78 /**
79 * @internal
80 * @defgroup KGEN_TYPES Types
81 * @ingroup KGEN_INFRA
82 */
83 /*@{*/
84
85 /**
86 * @internal
87 * @brief Memory fence type
88 */
89 typedef enum CLMemFence {
90 /** Fence for operations against the local memory */
91 CLK_LOCAL_MEM_FENCE,
92 /** Fence for operations against the global memory */
93 CLK_GLOBAL_MEM_FENCE
94 } CLMemFence;
95
96 // TODO: deprecate
97 typedef enum UptrType {
98 UPTR_GLOBAL,
99 UPTR_LOCAL,
100 UPTR_PRIVATE
101 } UptrType;
102
103 /**
104 * @internal
105 * @brief Null-terminated string being a part of a kernel
106 */
107 typedef struct Kstring {
108 /** Buffer storing the string */
109 char buf[KSTRING_MAXLEN];
110 } Kstring;
111
112 /**
113 * @internal
114 * @brief Type of custom generator for loop unrolling
115 */
116 typedef int
117 (*LoopUnrollGen)(struct KgenContext *ctx, void *priv);
118
119 /*@}*/
120
121 /**
122 * @internal
123 * @brief Unrolled loop control information
124 */
125 typedef struct LoopCtl {
126 const char *ocName; /**< outer loop counter name */
127 union {
128 const char *name;
129 unsigned long val;
130 } outBound; /**< outer loop bound */
131 bool obConst; /**< outer loop bound is constant flag */
132 unsigned long inBound; /**< inner loop bound */
133 } LoopCtl;
134
135 /**
136 * @internal
137 * @brief Set of loop unrolling subgenerators
138 */
139 typedef struct LoopUnrollers {
140 /** generate preparative code before unrolling */
141 LoopUnrollGen preUnroll;
142 /** generate single step for unrolled body in the vectorized way */
143 LoopUnrollGen genSingleVec;
144 /** generated single step for unrolled body in non vectorized way */
145 LoopUnrollGen genSingle;
146 /** generate code that should be inserted just after unrolled loop body */
147 LoopUnrollGen postUnroll;
148 /** return veclen*/
149 LoopUnrollGen getVecLen;
150 } LoopUnrollers;
151
152 /*@}*/
153
154 static __inline void
emptyKstring(Kstring * kstr)155 emptyKstring(Kstring *kstr)
156 {
157 kstr->buf[0] = '\0';
158 }
159
160 static __inline bool
isKstringEmpty(const Kstring * kstr)161 isKstringEmpty(const Kstring *kstr)
162 {
163 return (kstr->buf[0] == '\0');
164 }
165
166 /**
167 * @internal
168 * @defgroup KGEN_CORE Core API
169 * @ingroup KGEN_INFRA
170 */
171 /*@{*/
172
173 /**
174 * @internal
175 * @brief Create new generator context
176 *
177 * @param[out] srcBuf Source buffer; if NULL, then any statements
178 * were not actually added to the source buffer, just
179 * their overall size will be calculated
180 * @param[in] srcBufLen Maximal length of the source which is being
181 * generated; ignored if an actual buffer was not
182 * specified
183 * @param[in] fmt Format the source. Code formatting assumes
184 * tabulation and watch line width
185 *
186 * @return New generator context on success. Returns NULL
187 * if there is not enough memory to allocate internal structures
188 */
189 struct KgenContext
190 *createKgenContext(char *srcBuf, size_t srcBufLen, bool fmt);
191
192 /**
193 * @internal
194 * @brief Destroy a kernel generator context
195 *
196 * @param[out] ctx An existing generator context to be destroyed
197 */
198 void
199 destroyKgenContext(struct KgenContext *ctx);
200
201 /**
202 * @internal
203 * @brief Reset a kernel generator context used before
204 *
205 * @param[out] ctx A generator context to be reset
206 *
207 * Clear the source buffer and another information associated
208 * with this context
209 */
210 void
211 resetKgenContext(struct KgenContext *ctx);
212
213 /**
214 * @internal
215 * @brief Synchronize formatting of 2 contexts
216 *
217 * @param[in] srcCtx Source generator context
218 * @param[out] dstCtx Destination generator context
219 * @param[in] nrTabs Tabs number to be inserted in the source context.
220 * It is relative on the current nesting level of the
221 * target context. It must be not less than zero, and
222 * resulting number of tabs which is evaluated as
223 * the target context's nesting level plus 'nrTabs'
224 * must not exceed 'MAX_TABS'
225 *
226 * The function is usable when it's needed to insert a code from
227 * one context into another one, and don't disturb formatting.
228 *
229 * @return 0 on success, -EINVAL if the 'nrTabs' parameter is out
230 * of range
231 */
232 int
233 kgenSyncFormatting(
234 struct KgenContext *srcCtx,
235 const struct KgenContext *dstCtx,
236 int nrTabs);
237
238 /**
239 * @internal
240 * @brief Add a function declaration
241 *
242 * @param[out] ctx Generator context
243 * @param[in] decl The declaration to be added
244 *
245 * @return 0 on success; -1 if the source code exceeds the buffer,
246 * or level of the code nesting is not zero, or the returned
247 * type is not defined, or there is not a paranthesis opening
248 * the argument list
249 */
250 int
251 kgenDeclareFunction(struct KgenContext *ctx, const char *decl);
252
253 /**
254 * @internal
255 * @brief Begin function body
256 *
257 * @param[out] ctx Generator context
258 *
259 * Adds the opening bracket and increments a nesting counter.
260 *
261 * @return 0 on success; -1 if the source code exceeds the buffer
262 */
263 int
264 kgenBeginFuncBody(struct KgenContext *ctx);
265
266 /**
267 * @internal
268 * @brief End function body
269 *
270 * @param[out] ctx Generator context
271 *
272 * Adds the closing bracket and decrements a nesting counter
273 *
274 * @return 0 on success; -1 if the source code exceeds the buffer,
275 * or code nesting is not 1
276 */
277 int
278 kgenEndFuncBody(struct KgenContext *ctx);
279
280 /**
281 * @internal
282 * @brief Get the last declared function name for the context
283 *
284 * @param[out] buf A buffer to store the function name
285 * @param[in] buflen Size of the buffer
286 * @param[in] ctx Generator context
287 *
288 * @return pointer to the gotten function name on success; -1
289 * if no functions were declared or the passed buffer is
290 * insufficient
291 */
292 int
293 kgenGetLastFuncName(
294 char *buf,
295 size_t buflen,
296 const struct KgenContext *ctx);
297
298 /**
299 * @internal
300 * @brief Begin new execution branch: conditional branch or loop
301 *
302 * @param[out] ctx Generator context
303 * @param[in] stmt A statement containing a branch control code.
304 * Ignored if NULL.
305 *
306 * The opening bracket and trailing new line symbol are added
307 * automatically and should not be passed
308 *
309 * @return 0 on success; -1 if the overall source exceeds the set
310 * limit or nesting exceeds the maximum allowed one
311 */
312 int
313 kgenBeginBranch(struct KgenContext *ctx, const char *stmt);
314
315 /**
316 * @internal
317 * @brief End the current code branch
318 *
319 * @param[out] ctx Generator context
320 * @param[in] stmt A statement containing a branch control code
321 *
322 * As well closing bracket as trailing ';' and '\n' are added automatically and
323 * should not be passed.
324 * The statement passed in 'stmt' is appended after the closing bracket.
325 *
326 * @return 0 on sucess; -1 if the overall source exceeds the set limit,
327 * or there is not an opened branch
328 */
329 int
330 kgenEndBranch(struct KgenContext *ctx, const char *stmt);
331
332 /**
333 * @internal
334 * @brief Add a statement to generated source
335 *
336 * @param[out] ctx Generator context
337 * @param[in] stmt A statement to be added
338 *
339 * If formatting is enabled and the statement is multiline, all the lines are
340 * formatted automatically. It's strongly not recommended to add with this
341 * function any statements containing variables or function declaration,
342 * or branch bounds. The appropriated functions should be used for that to avoid
343 * unexpected side effects.
344 *
345 * @return 0 on success; -1 if the overall source exceeds the set limit
346 */
347 int
348 kgenAddStmt(struct KgenContext *ctx, const char *stmt);
349
350 int
351 kgenPrintf(struct KgenContext *ctx, const char *fmt,...);
352
353 struct StatementBatch
354 *createStmtBatch(void);
355
356 int
357 kgenAddStmtToBatch(
358 struct StatementBatch *batch,
359 int priority,
360 const char *stmt);
361
362 int
363 kgenBatchPrintf(
364 struct StatementBatch *batch,
365 int priority,
366 const char *fmt,...);
367
368 int
369 flushStmtBatch(struct KgenContext *ctx, struct StatementBatch *batch);
370
371 void
372 destroyStmtBatch(struct StatementBatch *batch);
373
374 /**
375 * @internal
376 * @brief Add a blank line to generated source
377 *
378 * @param[out] ctx Generator context
379 *
380 * @return 0 on success; -1 if the overall source exceeds
381 * the set limit returns -1
382 */
383 int
384 kgenAddBlankLine(struct KgenContext *ctx);
385
386 /**
387 * @internal
388 * @brief Get resulting source size
389 *
390 * @param[out] ctx Generator context
391 *
392 * @return size of the overall source was added to the
393 * generator context including the trailing null
394 * byte
395 */
396 size_t
397 kgenSourceSize(struct KgenContext *ctx);
398
399 /*@}*/
400
401 /**
402 * @internal
403 * @defgroup KGEN_BASIC Basic generating functions
404 * @ingroup KGEN_INFRA
405 */
406 /*@{*/
407
408 /**
409 * @internal
410 * @brief Add barrier
411 *
412 * @param[out] ctx Generator context
413 * @param[in] fence Fence type
414 *
415 * @return 0 on success, and -EOVERFLOW on buffer overflowing
416 */
417 int
418 kgenAddBarrier(struct KgenContext *ctx, CLMemFence fence);
419
420 /**
421 * @internal
422 * @brief Add memory fence
423 *
424 * @param[out] ctx Generator context
425 * @param[in] fence Fence type
426 *
427 * @return 0 on success, and -EOVERFLOW on buffer overflowing
428 */
429 int
430 kgenAddMemFence(struct KgenContext *ctx, CLMemFence fence);
431
432 /**
433 * @internal
434 * @brief Add local ID declaration and evaluating expression
435 *
436 * @param[out] ctx Generator context
437 * @param[in] lidName Local id variable name
438 * @param[in] pgran Data parallelism granularity
439 *
440 * The resulting expression depends on the work group dimension and size
441 * of the first one.
442 *
443 * @return 0 on success, and -EOVERFLOW on buffer overflowing
444 */
445 int
446 kgenDeclareLocalID(
447 struct KgenContext *ctx,
448 const char *lidName,
449 const PGranularity *pgran);
450
451 /**
452 * @internal
453 * @brief Add work group ID declaration and evaluating expression
454 *
455 * @param[out] ctx Generator context
456 * @param[in] gidName Group id variable name
457 * @param[in] pgran Data parallelism granularity
458 *
459 * The resulting expression depends on the work group dimension and size
460 * of the first one.
461 *
462 * @return 0 on success, and -EOVERFLOW on buffer overflowing
463 */
464 int
465 kgenDeclareGroupID(
466 struct KgenContext *ctx,
467 const char *gidName,
468 const PGranularity *pgran);
469
470 /*
471 * TODO: deprecate when casting is eliminated
472 *
473 * declare unified pointers
474 *
475 * @withDouble: double based types pointers area needed
476 *
477 * On success returns 0, on buffer overflowing returns -EOVERFLOW
478 */
479 int
480 kgenDeclareUptrs(struct KgenContext *ctx, bool withDouble);
481
482 /*@}*/
483
484 /**
485 * @internal
486 * @defgroup KGEN_HELPERS Generating helpers
487 * @ingroup KGEN_INFRA
488 */
489 /*@{*/
490
491 /**
492 * @internal
493 * @brief Assistant for loop body unrolling
494 *
495 * @param[out] ctx Generator context
496 * @param[in] loopCtl Unrolled loop control information
497 * @param[in] dtype Data type to unroll the loop body for
498 * @param[in] unrollers Set of subgenerators;
499 * If 'preUnroll', 'postUnroll' or 'vecUnroll'
500 * is set to NULL, it is ignored. Vectorized unrolling
501 * is not used for 'COMPLEX_DOUBLE' type
502 * @param[out] priv Private data for generators
503 *
504 * The unrolled loop can be as well single as double. In the case
505 * of the double loop only the inner loop is unrolled, and the outer
506 * loop is generated in the standard way with using the passed loop
507 * counter name and its bound. For the single loop 'ocName' field of the
508 * 'loop' structure should be NULL.
509 *
510 * @return 0 on success. On error returns negated error code:\n
511 *\n
512 * -EOVERFLOW: code buffer overflowed\n
513 * -EINVAL: invalid parameter is passed
514 * (unsupported data type, or 'genSingle' generator
515 * is not specified)
516 */
517 int
518 kgenLoopUnroll(
519 struct KgenContext *ctx,
520 LoopCtl *loopCtl,
521 DataType dtype,
522 const LoopUnrollers *unrollers,
523 void *priv);
524
525 /**
526 * @internal
527 * @brief Create code generation guard
528 *
529 * @param[out] ctx Generator context
530 * @param[in] genCallback Generator callback which is invoked it the function
531 * matching to a pattern is not found
532 * @param[in] patSize Pattern size
533 *
534 * The guard doesn't allow to generate several functions matching to the same
535 * pattern and as result having the same name.
536 *
537 * @return a guard object on success; -ENOMEM if there is
538 * not enough of memory to allocate internal structures
539 */
540 struct KgenGuard
541 *createKgenGuard(
542 struct KgenContext *ctx,
543 int (*genCallback)(struct KgenContext *ctx, const void *pattern),
544 size_t patSize);
545
546 /**
547 * @internal
548 * @brief Reinitialize generator guard
549 *
550 * @param[out] guard An existing generation guard
551 * @param[out] ctx Generator context
552 * @param[in] genCallback Generator callback which is invoked it the function
553 * matching to a pattern is not found
554 * @param[in] patSize Pattern size
555 */
556 void
557 reinitKgenGuard(
558 struct KgenGuard *guard,
559 struct KgenContext *ctx,
560 int (*genCallback)(struct KgenContext *ctx, const void *pattern),
561 size_t patSize);
562
563 /**
564 * @internal
565 * @brief Find an already generated function or generate it
566 *
567 * @param[out] guard An existing generation guard
568 * @param[in] pattern Pattern the function being looked for should match
569 * @param[out] name Buffer to store a name of the function
570 * @param[in] nameLen Name buffer length
571 *
572 * At first it tries to find an already generated function mathing to the passed
573 * pattern. If the guard doesn't find the function, it invokes the generator
574 * callback
575 *
576 * NOTE: names of generated functions should not exceed 'FUNC_NAME_MAXLEN'
577 * constant.
578 *
579 * @return 0 on success, otherwise returns a negated error code:\n
580 * -ENOMEM: enough of memory to allocate internal structures\n
581 * -EOVERFLOW: source buffer overflowing
582 */
583 int
584 findGenerateFunction(
585 struct KgenGuard *guard,
586 const void *pattern,
587 char *name,
588 size_t nameLen);
589
590 /**
591 * @internal
592 * @brief Destroy code generation guard
593 *
594 * @param[out] guard A guard instance to be destroyed
595 */
596 void
597 destroyKgenGuard(struct KgenGuard *guard);
598
599 /*@}*/
600
601 /**
602 * @internal
603 * @defgroup KGEN_AUX_FUNCS Auxiliary functions
604 * @ingroup KGEN_INFRA
605 */
606 /*@{*/
607
608 void
609 kstrcpy(Kstring *kstr, const char *str);
610
611 void
612 ksprintf(Kstring *kstr, const char *fmt,...);
613
614 void
615 kstrcatf(Kstring *kstr, const char *fmt,...);
616
617 // unified pointer type name
618 const char
619 *uptrTypeName(UptrType type);
620
621 /**
622 * @internal
623 * @brief get a BLAS data type dependendtto function prefix
624 *
625 * @param[in] type Data type
626 *
627 * A literal returned by the function is assumed to be used as the prefix
628 * of some generated function to put the accent on the BLAS data type it
629 * operates with.
630 *
631 * @return 0 if an unknown type is passed
632 */
633 char
634 dtypeToPrefix(DataType type);
635
636 /**
637 * @internal
638 * @brief convert a BLAS data type to the respective built-in OpenCL type
639 *
640 * @param[in] dtype Data type
641 *
642 * @return NULL if an unknown type is passed
643 */
644 const char
645 *dtypeBuiltinType(DataType dtype);
646
647 /**
648 * internal
649 * @brief Return unified pointer field corresponding to the data type
650 *
651 * @param[in] dtype Data type
652 *
653 * @Returns NULL if an unknown type is passed
654 */
655 const char
656 *dtypeUPtrField(DataType dtype);
657
658 /**
659 * @internal
660 * @brief Return "one" value string depending on the data type
661 *
662 * @param[in] dtype Data type
663 *
664 * @return NULL if an unknown type is passed
665 */
666 const char
667 *strOne(DataType dtype);
668
669 /**
670 * @internal
671 * @brief Get vector type name
672 *
673 * @param[in] dtype Data type
674 * @param[in] vecLen Vector length for the type. Must be set to 1 if
675 * the type is scalar.
676 * @param[out] typeName Location to store pointer to a constant string
677 * with the type name
678 * @param[out] typePtrName Location to store unified pointer field
679 * corresponding to the vector consisting of elements
680 * of \b dtype \b type
681 */
682 void
683 getVectorTypeName(
684 DataType dtype,
685 unsigned int vecLen,
686 const char **typeName,
687 const char **typePtrName);
688
689 /*@}*/
690
691 #endif /* KERNGEN_H_ */
692