1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 
18 /*
19  * Kernel generator related common definitions
20  */
21 
22 #ifndef KERNGEN_H_
23 #define KERNGEN_H_
24 
25 #include <sys/types.h>
26 #include <errno.h>
27 
28 #if defined (_MSC_VER)
29 #include <msvc.h>
30 #endif
31 
32 #include <defbool.h>
33 #include <list.h>
34 #include <cltypes.h>
35 #include <mutex.h>
36 #include <granulation.h>
37 #include <trace_malloc.h>
38 
39 /**
40  * @internal
41  * @defgroup KGEN_INFRA Kernel generator infrastructure
42  */
43 /*@{*/
44 
45 #ifdef _MSC_VER
46 #define SPREFIX "I"
47 #else
48 #define SPREFIX "z"
49 #endif
50 
51 #define SUBDIM_UNUSED (size_t)-1
52 
53 enum {
54     MAX_TABS = 16,
55     MAX_STATEMENT_PRIORITY = 63,
56     MAX_STATEMENT_LENGTH = 4096
57 };
58 
59 enum {
60     // maximum subproblem dimensions
61     MAX_SUBDIMS = 3,
62     // maximum code nesting
63     MAX_NESTING = 10,
64     KSTRING_MAXLEN = 256,
65     // generated function name max len
66     FUNC_NAME_MAXLEN = KSTRING_MAXLEN
67 };
68 
69 typedef struct{
70 	SubproblemDim	subdims[MAX_SUBDIMS];
71 	PGranularity	pgran;
72 }DecompositionStruct;
73 
74 struct KgenContext;
75 struct KgenGuard;
76 struct StatementBatch;
77 
78 /**
79  * @internal
80  * @defgroup KGEN_TYPES Types
81  * @ingroup KGEN_INFRA
82  */
83 /*@{*/
84 
85 /**
86  * @internal
87  * @brief Memory fence type
88  */
89 typedef enum CLMemFence {
90     /** Fence for operations against the local memory */
91     CLK_LOCAL_MEM_FENCE,
92     /** Fence for operations against the global memory */
93     CLK_GLOBAL_MEM_FENCE
94 } CLMemFence;
95 
96 // TODO: deprecate
97 typedef enum UptrType {
98     UPTR_GLOBAL,
99     UPTR_LOCAL,
100     UPTR_PRIVATE
101 } UptrType;
102 
103 /**
104  * @internal
105  * @brief Null-terminated string being a part of a kernel
106  */
107 typedef struct Kstring {
108     /** Buffer storing the string */
109     char buf[KSTRING_MAXLEN];
110 } Kstring;
111 
112 /**
113  * @internal
114  * @brief Type of custom generator for loop unrolling
115  */
116 typedef int
117 (*LoopUnrollGen)(struct KgenContext *ctx, void *priv);
118 
119 /*@}*/
120 
121 /**
122  * @internal
123  * @brief Unrolled loop control information
124  */
125 typedef struct LoopCtl {
126     const char *ocName;     /**< outer loop counter name */
127     union {
128         const char *name;
129         unsigned long val;
130     } outBound;             /**< outer loop bound */
131     bool obConst;           /**< outer loop bound is constant flag */
132     unsigned long inBound;  /**< inner loop bound */
133 } LoopCtl;
134 
135 /**
136  * @internal
137  * @brief Set of loop unrolling subgenerators
138  */
139 typedef struct LoopUnrollers {
140     /** generate preparative code before unrolling */
141     LoopUnrollGen preUnroll;
142     /** generate single step for unrolled body in the vectorized way */
143     LoopUnrollGen genSingleVec;
144     /** generated single step for unrolled body in non vectorized way */
145     LoopUnrollGen genSingle;
146     /** generate code that should be inserted just after unrolled loop body */
147     LoopUnrollGen postUnroll;
148     /** return veclen*/
149     LoopUnrollGen getVecLen;
150 } LoopUnrollers;
151 
152 /*@}*/
153 
154 static __inline void
emptyKstring(Kstring * kstr)155 emptyKstring(Kstring *kstr)
156 {
157     kstr->buf[0] = '\0';
158 }
159 
160 static __inline bool
isKstringEmpty(const Kstring * kstr)161 isKstringEmpty(const Kstring *kstr)
162 {
163     return (kstr->buf[0] == '\0');
164 }
165 
166 /**
167  * @internal
168  * @defgroup KGEN_CORE Core API
169  * @ingroup KGEN_INFRA
170  */
171 /*@{*/
172 
173 /**
174  * @internal
175  * @brief Create new generator context
176  *
177  * @param[out] srcBuf        Source buffer; if NULL, then any statements
178  *                           were not actually added to the source buffer, just
179  *                           their overall size will be calculated
180  * @param[in]  srcBufLen     Maximal length of the source which is being
181  *                           generated; ignored if an actual buffer was not
182  *                           specified
183  * @param[in]  fmt           Format the source. Code formatting assumes
184  *                           tabulation and watch line width
185  *
186  * @return New generator context on success. Returns NULL
187  *         if there is not enough memory to allocate internal structures
188  */
189 struct KgenContext
190 *createKgenContext(char *srcBuf, size_t srcBufLen, bool fmt);
191 
192 /**
193  * @internal
194  * @brief Destroy a kernel generator context
195  *
196  * @param[out] ctx           An existing generator context to be destroyed
197  */
198 void
199 destroyKgenContext(struct KgenContext *ctx);
200 
201 /**
202  * @internal
203  * @brief Reset a kernel generator context used before
204  *
205  * @param[out] ctx           A generator context to be reset
206  *
207  * Clear the source buffer and another information associated
208  * with this context
209  */
210 void
211 resetKgenContext(struct KgenContext *ctx);
212 
213 /**
214  * @internal
215  * @brief Synchronize formatting of 2 contexts
216  *
217  * @param[in]  srcCtx        Source generator context
218  * @param[out] dstCtx        Destination generator context
219  * @param[in]  nrTabs        Tabs number to be inserted in the source context.
220  *                           It is relative on the current nesting level of the
221  *                           target context. It must be not less than zero, and
222  *                           resulting number of tabs which is evaluated as
223  *                           the target context's nesting level plus 'nrTabs'
224  *                           must not exceed 'MAX_TABS'
225  *
226  * The function is usable when it's needed to insert a code from
227  * one context into another one, and don't disturb formatting.
228  *
229  * @return 0 on success, -EINVAL if the 'nrTabs' parameter is out
230  *         of range
231  */
232 int
233 kgenSyncFormatting(
234     struct KgenContext *srcCtx,
235     const struct KgenContext *dstCtx,
236     int nrTabs);
237 
238 /**
239  * @internal
240  * @brief Add a function declaration
241  *
242  * @param[out] ctx           Generator context
243  * @param[in]  decl          The declaration to be added
244  *
245  * @return 0 on success; -1 if the source code exceeds the buffer,
246  *           or level of the code nesting is not zero, or the returned
247  *           type is not defined, or there is not a paranthesis opening
248  *           the argument list
249  */
250 int
251 kgenDeclareFunction(struct KgenContext *ctx, const char *decl);
252 
253 /**
254  * @internal
255  * @brief Begin function body
256  *
257  * @param[out] ctx           Generator context
258  *
259  * Adds the opening bracket and increments a nesting counter.
260  *
261  * @return 0 on success; -1 if the source code exceeds the buffer
262  */
263 int
264 kgenBeginFuncBody(struct KgenContext *ctx);
265 
266 /**
267  * @internal
268  * @brief End function body
269  *
270  * @param[out] ctx           Generator context
271  *
272  * Adds the closing bracket and decrements a nesting counter
273  *
274  * @return 0 on success; -1 if the source code exceeds the buffer,
275  * or code nesting is not 1
276  */
277 int
278 kgenEndFuncBody(struct KgenContext *ctx);
279 
280 /**
281  * @internal
282  * @brief Get the last declared function name for the context
283  *
284  * @param[out] buf           A buffer to store the function name
285  * @param[in] buflen         Size of the buffer
286  * @param[in] ctx            Generator context
287  *
288  * @return pointer to the gotten function name on success; -1
289  *         if no functions were declared or the passed buffer is
290  *         insufficient
291  */
292 int
293 kgenGetLastFuncName(
294     char *buf,
295     size_t buflen,
296     const struct KgenContext *ctx);
297 
298 /**
299  * @internal
300  * @brief Begin new execution branch: conditional branch or loop
301  *
302  * @param[out] ctx           Generator context
303  * @param[in]  stmt          A statement containing a branch control code.
304  *                           Ignored if NULL.
305  *
306  * The opening bracket and trailing new line symbol are added
307  * automatically and should not be passed
308  *
309  * @return 0 on success; -1 if the overall source exceeds the set
310  *         limit or nesting exceeds the maximum allowed one
311  */
312 int
313 kgenBeginBranch(struct KgenContext *ctx, const char *stmt);
314 
315 /**
316  * @internal
317  * @brief End the current code branch
318  *
319  * @param[out] ctx           Generator context
320  * @param[in]  stmt          A statement containing a branch control code
321  *
322  * As well closing bracket as trailing ';' and '\n' are added automatically and
323  * should not be passed.
324  * The statement passed in 'stmt' is appended after the closing bracket.
325  *
326  * @return 0 on sucess; -1 if the overall source exceeds the set limit,
327  *         or there is not an opened branch
328  */
329 int
330 kgenEndBranch(struct KgenContext *ctx, const char *stmt);
331 
332 /**
333  * @internal
334  * @brief Add a statement to generated source
335  *
336  * @param[out] ctx           Generator context
337  * @param[in]  stmt          A statement to be added
338  *
339  * If formatting is enabled and the statement is multiline, all the lines are
340  * formatted automatically. It's strongly not recommended to add with this
341  * function any statements containing variables or function declaration,
342  * or branch bounds. The appropriated functions should be used for that to avoid
343  * unexpected side effects.
344  *
345  * @return 0 on success; -1 if the overall source exceeds the set limit
346  */
347 int
348 kgenAddStmt(struct KgenContext *ctx, const char *stmt);
349 
350 int
351 kgenPrintf(struct KgenContext *ctx, const char *fmt,...);
352 
353 struct StatementBatch
354 *createStmtBatch(void);
355 
356 int
357 kgenAddStmtToBatch(
358     struct StatementBatch *batch,
359     int priority,
360     const char *stmt);
361 
362 int
363 kgenBatchPrintf(
364     struct StatementBatch *batch,
365     int priority,
366     const char *fmt,...);
367 
368 int
369 flushStmtBatch(struct KgenContext *ctx, struct StatementBatch *batch);
370 
371 void
372 destroyStmtBatch(struct StatementBatch *batch);
373 
374 /**
375  * @internal
376  * @brief Add a blank line to generated source
377  *
378  * @param[out] ctx           Generator context
379  *
380  * @return 0 on success; -1 if the overall source exceeds
381  *           the set limit returns -1
382  */
383 int
384 kgenAddBlankLine(struct KgenContext *ctx);
385 
386 /**
387  * @internal
388  * @brief Get resulting source size
389  *
390  * @param[out] ctx           Generator context
391  *
392  * @return size of the overall source was added to the
393  *         generator context including the trailing null
394  *         byte
395  */
396 size_t
397 kgenSourceSize(struct KgenContext *ctx);
398 
399 /*@}*/
400 
401 /**
402  * @internal
403  * @defgroup KGEN_BASIC Basic generating functions
404  * @ingroup KGEN_INFRA
405  */
406 /*@{*/
407 
408 /**
409  * @internal
410  * @brief Add barrier
411  *
412  * @param[out] ctx           Generator context
413  * @param[in]  fence         Fence type
414  *
415  * @return 0 on success, and -EOVERFLOW on buffer overflowing
416  */
417 int
418 kgenAddBarrier(struct KgenContext *ctx, CLMemFence fence);
419 
420 /**
421  * @internal
422  * @brief Add memory fence
423  *
424  * @param[out] ctx           Generator context
425  * @param[in]  fence         Fence type
426  *
427  * @return 0 on success, and -EOVERFLOW on buffer overflowing
428  */
429 int
430 kgenAddMemFence(struct KgenContext *ctx, CLMemFence fence);
431 
432 /**
433  * @internal
434  * @brief Add local ID declaration and evaluating expression
435  *
436  * @param[out] ctx           Generator context
437  * @param[in]  lidName       Local id variable name
438  * @param[in]  pgran         Data parallelism granularity
439  *
440  * The resulting expression depends on the work group dimension and size
441  * of the first one.
442  *
443  * @return 0 on success, and -EOVERFLOW on buffer overflowing
444  */
445 int
446 kgenDeclareLocalID(
447     struct KgenContext *ctx,
448     const char *lidName,
449     const PGranularity *pgran);
450 
451 /**
452  * @internal
453  * @brief Add work group ID declaration and evaluating expression
454  *
455  * @param[out] ctx           Generator context
456  * @param[in]  gidName       Group id variable name
457  * @param[in]  pgran         Data parallelism granularity
458  *
459  * The resulting expression depends on the work group dimension and size
460  * of the first one.
461  *
462  * @return 0 on success, and -EOVERFLOW on buffer overflowing
463  */
464 int
465 kgenDeclareGroupID(
466     struct KgenContext *ctx,
467     const char *gidName,
468     const PGranularity *pgran);
469 
470 /*
471  * TODO: deprecate when casting is eliminated
472  *
473  * declare unified pointers
474  *
475  * @withDouble: double based types pointers area needed
476  *
477  * On success returns 0, on buffer overflowing returns -EOVERFLOW
478  */
479 int
480 kgenDeclareUptrs(struct KgenContext *ctx, bool withDouble);
481 
482 /*@}*/
483 
484 /**
485  * @internal
486  * @defgroup KGEN_HELPERS Generating helpers
487  * @ingroup KGEN_INFRA
488  */
489 /*@{*/
490 
491 /**
492  * @internal
493  * @brief Assistant for loop body unrolling
494  *
495  * @param[out] ctx           Generator context
496  * @param[in]  loopCtl       Unrolled loop control information
497  * @param[in]  dtype         Data type to unroll the loop body for
498  * @param[in]  unrollers     Set of subgenerators;
499  *                           If 'preUnroll', 'postUnroll' or 'vecUnroll'
500  *                           is set to NULL, it is ignored. Vectorized unrolling
501  *                           is not used for 'COMPLEX_DOUBLE' type
502  * @param[out] priv          Private data for generators
503  *
504  * The unrolled loop can be as well single as double. In the case
505  * of the double loop only the inner loop is unrolled, and the outer
506  * loop is generated in the standard way with using the passed loop
507  * counter name and its bound. For the single loop 'ocName' field of the
508  * 'loop' structure should be NULL.
509  *
510  * @return 0 on success. On error returns negated error code:\n
511  *\n
512  *      -EOVERFLOW: code buffer overflowed\n
513  *      -EINVAL: invalid parameter is passed
514  *               (unsupported data type, or 'genSingle' generator
515  *               is not specified)
516  */
517 int
518 kgenLoopUnroll(
519     struct KgenContext *ctx,
520     LoopCtl *loopCtl,
521     DataType dtype,
522     const LoopUnrollers *unrollers,
523     void *priv);
524 
525 /**
526  * @internal
527  * @brief Create code generation guard
528  *
529  * @param[out] ctx           Generator context
530  * @param[in]  genCallback   Generator callback which is invoked it the function
531  *                           matching to a pattern is not found
532  * @param[in]  patSize       Pattern size
533  *
534  * The guard doesn't allow to generate several functions matching to the same
535  * pattern and as result having the same name.
536  *
537  * @return a guard object on success; -ENOMEM if there is
538  *         not enough of memory to allocate internal structures
539  */
540 struct KgenGuard
541 *createKgenGuard(
542     struct KgenContext *ctx,
543     int (*genCallback)(struct KgenContext *ctx, const void *pattern),
544     size_t patSize);
545 
546 /**
547  * @internal
548  * @brief Reinitialize generator guard
549  *
550  * @param[out] guard         An existing generation guard
551  * @param[out] ctx           Generator context
552  * @param[in]  genCallback   Generator callback which is invoked it the function
553  *                           matching to a pattern is not found
554  * @param[in]  patSize       Pattern size
555  */
556 void
557 reinitKgenGuard(
558     struct KgenGuard *guard,
559     struct KgenContext *ctx,
560     int (*genCallback)(struct KgenContext *ctx, const void *pattern),
561     size_t patSize);
562 
563 /**
564  * @internal
565  * @brief Find an already generated function or generate it
566  *
567  * @param[out] guard         An existing generation guard
568  * @param[in]  pattern       Pattern the function being looked for should match
569  * @param[out] name          Buffer to store a name of the function
570  * @param[in]  nameLen       Name buffer length
571  *
572  * At first it tries to find an already generated function mathing to the passed
573  * pattern. If the guard doesn't find the function, it invokes the generator
574  * callback
575  *
576  * NOTE: names of generated functions should not exceed 'FUNC_NAME_MAXLEN'
577  *       constant.
578  *
579  * @return 0 on success, otherwise returns a negated error code:\n
580  *      -ENOMEM: enough of memory to allocate internal structures\n
581  *      -EOVERFLOW: source buffer overflowing
582  */
583 int
584 findGenerateFunction(
585     struct KgenGuard *guard,
586     const void *pattern,
587     char *name,
588     size_t nameLen);
589 
590 /**
591  * @internal
592  * @brief Destroy code generation guard
593  *
594  * @param[out] guard         A guard instance to be destroyed
595  */
596 void
597 destroyKgenGuard(struct KgenGuard *guard);
598 
599 /*@}*/
600 
601 /**
602  * @internal
603  * @defgroup KGEN_AUX_FUNCS Auxiliary functions
604  * @ingroup KGEN_INFRA
605  */
606 /*@{*/
607 
608 void
609 kstrcpy(Kstring *kstr, const char *str);
610 
611 void
612 ksprintf(Kstring *kstr, const char *fmt,...);
613 
614 void
615 kstrcatf(Kstring *kstr, const char *fmt,...);
616 
617 // unified pointer type name
618 const char
619 *uptrTypeName(UptrType type);
620 
621 /**
622  * @internal
623  * @brief get a BLAS data type dependendtto function prefix
624  *
625  * @param[in]  type          Data type
626  *
627  * A literal returned by the function is assumed to be used as the prefix
628  * of some generated function to put the accent on the BLAS data type it
629  * operates with.
630  *
631  * @return 0 if an unknown type is passed
632  */
633 char
634 dtypeToPrefix(DataType type);
635 
636 /**
637  * @internal
638  * @brief convert a BLAS data type to the respective built-in OpenCL type
639  *
640  * @param[in]  dtype         Data type
641  *
642  * @return NULL if an unknown type is passed
643  */
644 const char
645 *dtypeBuiltinType(DataType dtype);
646 
647 /**
648  * internal
649  * @brief Return unified pointer field corresponding to the data type
650  *
651  * @param[in]  dtype         Data type
652  *
653  * @Returns NULL if an unknown type is passed
654  */
655 const char
656 *dtypeUPtrField(DataType dtype);
657 
658 /**
659  * @internal
660  * @brief Return "one" value string depending on the data type
661  *
662  * @param[in]  dtype         Data type
663  *
664  * @return NULL if an unknown type is passed
665  */
666 const char
667 *strOne(DataType dtype);
668 
669 /**
670  * @internal
671  * @brief Get vector type name
672  *
673  * @param[in]  dtype         Data type
674  * @param[in]  vecLen        Vector length for the type. Must be set to 1 if
675  *                           the type is scalar.
676  * @param[out] typeName      Location to store pointer to a constant string
677  *                           with the type name
678  * @param[out] typePtrName   Location to store unified pointer field
679  *                           corresponding to the vector consisting of elements
680  *                           of \b dtype \b type
681  */
682 void
683 getVectorTypeName(
684     DataType dtype,
685     unsigned int vecLen,
686     const char **typeName,
687     const char **typePtrName);
688 
689 /*@}*/
690 
691 #endif /* KERNGEN_H_ */
692