1 /** 2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef GLOW_OPTIMIZER_GRAPHOPTIMIZER_COMPILATIONCONTEXT_H 17 #define GLOW_OPTIMIZER_GRAPHOPTIMIZER_COMPILATIONCONTEXT_H 18 19 #include "glow/Backends/BackendOptions.h" 20 #include "glow/Graph/PlaceholderBindings.h" 21 #include "glow/Quantization/Base/Base.h" 22 #include "glow/Support/Error.h" 23 24 namespace glow { 25 namespace runtime { 26 struct PartitionConfig; 27 struct PrePartitionedConfig; 28 class DeferredWeightLoader; 29 } // namespace runtime 30 31 /// Configuration for different precision modes. 32 struct PrecisionConfiguration { 33 /// Enum for what kind of transformation should be done for Quantization. 34 enum class QuantizationMode { 35 None, /// Perform no transformations for quantization. 36 Quantize, /// Quantize the graph using previously gathered statistics. 37 Profile, /// Add profiling nodes for quantization statistics gathering. 38 } quantMode{QuantizationMode::None}; 39 40 /// Configuration for Profiling. 41 quantization::ProfilingConfiguration profConfig; 42 43 /// Configuration for Quantization. 44 quantization::QuantizationConfiguration quantConfig; 45 46 /// Enum for what kind of float16 format should be used. 47 enum class Float16Format { 48 None, /// No float16 format should be used. 49 FP16, /// FP16 format for float16 should be used. 50 BFloat16, /// FP16 format for float16 should be used. 51 } float16Format{ 52 Float16Format::FP16}; /// If convertToFp16, float16 format to be used. 53 54 /// Whether to convert the FloatTy to Float16Ty in the Function. 55 bool convertToFP16{false}; 56 57 /// Whether to convert UInt8FusedQTy to UInt8FusedFP16QTy in the Function. 58 bool convertFusedToFP16{false}; 59 60 /// If convertToFP16, whether to convert input Placeholders. 61 bool convertPlaceholdersToFP16{false}; 62 63 /// If convertToFP16, whether to convert Constants. 64 bool convertConstantsToFP16{false}; 65 66 /// If convertToFP16, whether to clip out-of-range FP values to the min/max of 67 /// fp16. 68 bool clipFP16{false}; 69 70 /// If clipFP16, whether to skip clipping inputs of Nodes. 71 bool clipFP16SkipInputs{false}; 72 73 /// Whether to force FP16 accumulation for the SLS family of ops. 74 bool forceFP16AccumSLS{true}; 75 76 /// Used during Quantization and convertToFP16 to keep the original precision 77 /// of specific node kinds (i.e. quantization/FP16 conversion would be skipped 78 /// for any node kinds found here). Used during profiling to prevent nodes 79 /// from being lowered before instrumenting the graph (e.g. do not lower group 80 /// convolutions for profiling; see `-do-not-lower-nodes-for-profiling` in 81 /// docs/Quantization.md). 82 KindSet precisionModeKindSet; 83 84 /// Whether to use the precisionModeKindSet as a whitelist instead of the 85 /// default blacklist. Currently only supported for convertToFP16. 86 bool useSetAsWhitelist{false}; 87 88 /// Converts a float16 \p format into an ElemKind. getElementTypePrecisionConfiguration89 static ElemKind getElementType(Float16Format format) { 90 switch (format) { 91 case Float16Format::FP16: 92 return ElemKind::Float16Ty; 93 case Float16Format::BFloat16: 94 return ElemKind::BFloat16Ty; 95 default: 96 llvm_unreachable("Unknown float16 format"); 97 } 98 } 99 }; 100 101 using QuantizationMode = PrecisionConfiguration::QuantizationMode; 102 103 /// Options relevant to optimizations during compilation. 104 struct OptimizationOptions { 105 /// Only lower, i.e. skip optimizations and precision transformations. Used 106 /// for testing. 107 llvm::SmallSet<Function *, 1> onlyLowerFuns; 108 109 /// If true, perform compile-time computation of constant operations. 110 bool enableConstantFolding{true}; 111 112 /// If true, before any Function optimization, all the Constants will be 113 /// temporarily replaced by Placeholders, preventing the Constants from being 114 /// modified during the normal optimization pipeline. The original Constants 115 /// will be put back in place automatically afterward, and then Constant 116 /// Folding will be run. 117 bool delayAndRecordConstantModification{false}; 118 119 /// If true, this will merge ConvertTo and Quantize nodes into inputs and 120 /// outputs of the Function. This means modifying the types of Placeholders 121 /// and SaveNodes if they have a corresponding ElemKind conversion (ConvertTo, 122 /// Quantize, Dequantize nodes). Note that this must be accompanied by 123 /// modifying the Tensors backing Placeholders at runtime. 124 bool foldElemKindConversionIntoIO{false}; 125 126 /// If true this will fold convertTo and Quantize nodes into only static 127 /// placeholders. The conversion of the Tensors will be handled by the 128 /// provisioner. 129 bool foldStaticPlaceholderConversions{false}; 130 131 /// If true, this will direct the partitioner to use SparseNN partitioning 132 /// scheme 133 bool useSparseNNPartitioningScheme{false}; 134 135 /// If true, SparseNN partiitoning scheme will add extra concats to the 136 /// SLS partition for more efficient inter-partition transfers 137 bool sparseNNPartitioningAddSLSConcats{false}; 138 139 /// If true, SparseNN partiitoning scheme will balance SLS tables across 140 /// cards using a performance model 141 bool sparseNNPartitioningBalancePerfModel{false}; 142 143 /// If true, SparseNN partiitoning scheme will move Layer Normalization 144 /// nodes immediately following SLS into SLS partitions 145 bool sparseNNPartitioningPairLNWithSLS{false}; 146 147 /// The number of cards over which to split SLS tables when using SparseNN 148 /// partitioning scheme 149 unsigned int sparseNNPartitioningSchemeNumCards{1}; 150 151 /// The number of bytes to allocate per card for SLS tables when using 152 /// the SparseNN partitioning scheme 153 unsigned int sparseNNPartitioningSchemeSLSTableKBytesPerCard{0}; 154 155 /// The number of cores to assign to SLS partition when using SparseNN 156 /// partitioning scheme 157 unsigned int sparseNNPartitioningSchemeNumCoresSLS{1}; 158 159 /// The number of cores to assign to non-SLS partition when using SparseNN 160 /// partitioning scheme 161 unsigned int sparseNNPartitioningSchemeNumCoresOther{1}; 162 163 /// The algorithm used for Placement tagging in DAG Optimizer 164 std::string DAGOptimizerPlacementTaggingAlgorithm; 165 166 /// The algorithm used for Parallelization tagging in DAG Optimizer 167 std::string DAGOptimizerParallelizationTaggingAlgorithm; 168 169 /// The number of parallel chunks used in DAG Optimizer parallelization 170 int32_t DAGOptimizerNumParallelChunks; 171 172 /// If true does int64 to int32 type demotion if backend supports for specific 173 /// nodes. 174 bool enableTypeDemotion{true}; 175 176 /// If true, optimizations are allowed to change quantization scale/offset. 177 bool enableQuantParamChanges{false}; 178 }; 179 180 /// Meta information produced during the compilation. Whereas the compile 181 /// options should be interpreted as input variables for the compilation, the 182 /// below structure is output information produced by the compilation process. 183 struct CompilationInfo { 184 /// The hash of the graph before the lowering stage. 185 llvm::hash_code graphPreLowerHash{0}; 186 }; 187 188 /// Context for compilation. 189 struct CompilationContext { 190 /// Used during Profiling. 191 PlaceholderBindings *bindings{nullptr}; 192 193 /// Allows the user to specify user defined partitioning. 194 runtime::PartitionConfig *partitionConfig{nullptr}; 195 196 /// Allows a loader to store a pre-partitioned config. 197 runtime::PrePartitionedConfig *prepartitionedConfig{nullptr}; 198 199 /// If true the HostManager will try to use all available devices on the host. 200 bool saturateHost{false}; 201 202 /// Number of max active requests per instance of this network. 203 unsigned maxActiveRequestsPerInstance{48}; 204 205 /// Used during Quantization and Profiling. 206 LoweredInfoMap *loweredInfoMap{nullptr}; 207 208 /// Select whether in Training or Inference mode. 209 enum class CompilationMode { 210 Train, /// Compile the graph in preparation for training. 211 Infer, /// Compile the graph for inference. Notice that this operation 212 /// changes the graph in a way that is not reversible. 213 NumCompilationModes, /// Used to count the number of CompilationModes. 214 } compMode{CompilationMode::Infer}; 215 216 /// Options for the Backend to use. 217 BackendOptions backendOpts; 218 219 /// Options for the optimizations to use. 220 OptimizationOptions optimizationOpts; 221 222 /// Configuration for different precision modes. 223 PrecisionConfiguration precisionConfig; 224 225 /// Information produced during compilation. 226 CompilationInfo info; 227 228 /// How to annotate the compilation log filename. 229 std::string compilationLogPrefix{"glow"}; 230 231 /// Pointer to deferredWeightLoader object, this is used for large model 232 /// support. 233 runtime::DeferredWeightLoader *deferredWeightLoader{nullptr}; 234 235 /// Whether to print out issues/logging during compilation. Used for example 236 /// to disable printing issues encountered during ConstantFolding. 237 bool verboseCompile{true}; 238 239 /// Call dumpDag on each Function passed to the backend for compilation. 240 bool dumpFinalGraph = false; 241 242 /// Whether to skip stripping the module. 243 bool skipModuleStrip{false}; 244 245 /// Enables Peer to Peer Tensor optimization. 246 bool enableP2P{false}; 247 248 /// Enables Device Resident Tensor optimization. 249 bool enableDRT{false}; 250 251 /// Number of times a function should be replicated on a device. This is 252 /// enabled for single partition networks. For advanced replication setups use 253 /// user-defined partitioning. 254 unsigned replicationCount{1}; 255 256 /// Whether to serialize the DAG that has been optimized and partitioned. 257 bool serializeCompiledDAG{false}; 258 259 /// Whether to call the DAG optimizer after the DAG is created in HostManager. 260 bool callDAGOptimizer{false}; 261 262 CompilationContext(PlaceholderBindings *bindings_ = nullptr, 263 LoweredInfoMap *loweredInfoMap_ = nullptr) bindingsCompilationContext264 : bindings(bindings_), loweredInfoMap(loweredInfoMap_) {} 265 266 /// \returns an error if the CompilationContext is malformed for whatever 267 /// configuration it is set up for, otherwise returns success. verifyCompilationContext268 Error verify() const { 269 RETURN_ERR_IF_NOT(!precisionConfig.useSetAsWhitelist || 270 precisionConfig.convertToFP16, 271 "Can only use the precisionModeKindSet as a whitelist in " 272 "convertToFP16 mode."); 273 274 switch (precisionConfig.quantMode) { 275 case QuantizationMode::Profile: 276 RETURN_ERR_IF_NOT(bindings, 277 ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED, 278 "In Profiling mode, but bindings was not set.\n"); 279 280 RETURN_ERR_IF_NOT(loweredInfoMap, 281 ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED, 282 "In Profiling mode, but loweredInfoMap was not set.\n"); 283 284 RETURN_ERR_IF_NOT(!precisionConfig.convertToFP16, 285 ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED, 286 "Converting to FP16 while profiling is unsupported.\n"); 287 break; 288 289 case QuantizationMode::Quantize: 290 RETURN_ERR_IF_NOT( 291 loweredInfoMap, ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED, 292 "In Quantization mode, but loweredInfoMap was not set.\n"); 293 break; 294 295 case QuantizationMode::None: 296 break; 297 } 298 299 RETURN_ERR_IF_NOT(!(optimizationOpts.foldElemKindConversionIntoIO && 300 optimizationOpts.delayAndRecordConstantModification), 301 "Cannot currently perform elem kind merging into PHs " 302 "when also preventing constant modification."); 303 304 RETURN_ERR_IF_NOT(!(serializeCompiledDAG && 305 !optimizationOpts.delayAndRecordConstantModification), 306 "When serializing the compiled DAG, must also enable " 307 "delayAndRecordConstantModification."); 308 309 return Error::success(); 310 } 311 }; 312 313 using CompilationMode = CompilationContext::CompilationMode; 314 315 }; // namespace glow 316 317 #endif // GLOW_OPTIMIZER_GRAPHOPTIMIZER_COMPILATIONCONTEXT_H 318