1 //===- SCFToGPU.h - Convert loop nests to GPU kernels -----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #ifndef MLIR_CONVERSION_SCFTOGPU_SCFTOGPU_H_ 9 #define MLIR_CONVERSION_SCFTOGPU_SCFTOGPU_H_ 10 11 #include "mlir/Support/LLVM.h" 12 13 namespace mlir { 14 class AffineForOp; 15 class ConversionTarget; 16 struct LogicalResult; 17 class MLIRContext; 18 class Value; 19 class RewritePatternSet; 20 using OwningRewritePatternList = RewritePatternSet; 21 22 namespace scf { 23 class ForOp; 24 } // end namespace scf 25 26 /// Convert a perfect affine loop nest with the outermost loop identified by 27 /// `forOp` into a gpu::Launch operation. Map `numBlockDims` outer loops to 28 /// GPU blocks and `numThreadDims` to GPU threads. The bounds of the loops that 29 /// are mapped should be independent of the induction variables of the other 30 /// mapped loops. 31 /// 32 /// No check on the size of the block or grid, or on the validity of 33 /// parallelization is performed, it is under the responsibility of the caller 34 /// to strip-mine the loops and to perform the dependence analysis before 35 /// calling the conversion. 36 37 // TODO: Consider removing this in favor of affine.for -> affine.parallel 38 // detection followed by an affine.parallel -> scf.parallel -> gpu.launch 39 // conversion 40 LogicalResult convertAffineLoopNestToGPULaunch(AffineForOp forOp, 41 unsigned numBlockDims, 42 unsigned numThreadDims); 43 44 /// Adds the conversion pattern from `scf.parallel` to `gpu.launch` to the 45 /// provided pattern list. 46 void populateParallelLoopToGPUPatterns(RewritePatternSet &patterns); 47 48 /// Configures the rewrite target such that only `scf.parallel` operations that 49 /// are not rewritten by the provided patterns are legal. 50 void configureParallelLoopToGPULegality(ConversionTarget &target); 51 52 } // namespace mlir 53 54 #endif // MLIR_CONVERSION_SCFTOGPU_SCFTOGPU_H_ 55