CompilationContext.h - OpenGrok cross reference for /dports/misc/glow/glow-f24d960e3cc80db95ac0bc17b1900dbf60ca044a/include/glow/Optimizer/GraphOptimizer/CompilationContext.h

/**
 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef GLOW_OPTIMIZER_GRAPHOPTIMIZER_COMPILATIONCONTEXT_H
#define GLOW_OPTIMIZER_GRAPHOPTIMIZER_COMPILATIONCONTEXT_H

#include "glow/Backends/BackendOptions.h"
#include "glow/Graph/PlaceholderBindings.h"
#include "glow/Quantization/Base/Base.h"
#include "glow/Support/Error.h"

namespace glow {
namespace runtime {
struct PartitionConfig;
struct PrePartitionedConfig;
class DeferredWeightLoader;
} // namespace runtime

/// Configuration for different precision modes.
struct PrecisionConfiguration {
  /// Enum for what kind of transformation should be done for Quantization.
  enum class QuantizationMode {
    None,     /// Perform no transformations for quantization.
    Quantize, /// Quantize the graph using previously gathered statistics.
    Profile,  /// Add profiling nodes for quantization statistics gathering.
  } quantMode{QuantizationMode::None};

  /// Configuration for Profiling.
  quantization::ProfilingConfiguration profConfig;

  /// Configuration for Quantization.
  quantization::QuantizationConfiguration quantConfig;

  /// Enum for what kind of float16 format should be used.
  enum class Float16Format {
    None,     /// No float16 format should be used.
    FP16,     /// FP16 format for float16 should be used.
    BFloat16, /// FP16 format for float16 should be used.
  } float16Format{
      Float16Format::FP16}; /// If convertToFp16, float16 format to be used.

  /// Whether to convert the FloatTy to Float16Ty in the Function.
  bool convertToFP16{false};

  /// Whether to convert UInt8FusedQTy to UInt8FusedFP16QTy in the Function.
  bool convertFusedToFP16{false};

  /// If convertToFP16, whether to convert input Placeholders.
  bool convertPlaceholdersToFP16{false};

  /// If convertToFP16, whether to convert Constants.
  bool convertConstantsToFP16{false};

  /// If convertToFP16, whether to clip out-of-range FP values to the min/max of
  /// fp16.
  bool clipFP16{false};

  /// If clipFP16, whether to skip clipping inputs of Nodes.
  bool clipFP16SkipInputs{false};

  /// Whether to force FP16 accumulation for the SLS family of ops.
  bool forceFP16AccumSLS{true};

  /// Used during Quantization and convertToFP16 to keep the original precision
  /// of specific node kinds (i.e. quantization/FP16 conversion would be skipped
  /// for any node kinds found here). Used during profiling to prevent nodes
  /// from being lowered before instrumenting the graph (e.g. do not lower group
  /// convolutions for profiling; see `-do-not-lower-nodes-for-profiling` in
  /// docs/Quantization.md).
  KindSet precisionModeKindSet;

  /// Whether to use the precisionModeKindSet as a whitelist instead of the
  /// default blacklist. Currently only supported for convertToFP16.
  bool useSetAsWhitelist{false};

  /// Converts a float16 \p format into an ElemKind.
  static ElemKind getElementType(Float16Format format) {
    switch (format) {
    case Float16Format::FP16:
      return ElemKind::Float16Ty;
    case Float16Format::BFloat16:
      return ElemKind::BFloat16Ty;
    default:
      llvm_unreachable("Unknown float16 format");
    }
  }
};

using QuantizationMode = PrecisionConfiguration::QuantizationMode;

/// Options relevant to optimizations during compilation.
struct OptimizationOptions {
  /// Only lower, i.e. skip optimizations and precision transformations. Used
  /// for testing.
  llvm::SmallSet<Function *, 1> onlyLowerFuns;

  /// If true, perform compile-time computation of constant operations.
  bool enableConstantFolding{true};

  /// If true, before any Function optimization, all the Constants will be
  /// temporarily replaced by Placeholders, preventing the Constants from being
  /// modified during the normal optimization pipeline. The original Constants
  /// will be put back in place automatically afterward, and then Constant
  /// Folding will be run.
  bool delayAndRecordConstantModification{false};

  /// If true, this will merge ConvertTo and Quantize nodes into inputs and
  /// outputs of the Function. This means modifying the types of Placeholders
  /// and SaveNodes if they have a corresponding ElemKind conversion (ConvertTo,
  /// Quantize, Dequantize nodes). Note that this must be accompanied by
  /// modifying the Tensors backing Placeholders at runtime.
  bool foldElemKindConversionIntoIO{false};

  /// If true this will fold convertTo and Quantize nodes into only static
  /// placeholders. The conversion of the Tensors will be handled by the
  /// provisioner.
  bool foldStaticPlaceholderConversions{false};

  /// If true, this will direct the partitioner to use SparseNN partitioning
  /// scheme
  bool useSparseNNPartitioningScheme{false};

  /// If true, SparseNN partiitoning scheme will add extra concats to the
  /// SLS partition for more efficient inter-partition transfers
  bool sparseNNPartitioningAddSLSConcats{false};

  /// If true, SparseNN partiitoning scheme will balance SLS tables across
  /// cards using a performance model
  bool sparseNNPartitioningBalancePerfModel{false};

  /// If true, SparseNN partiitoning scheme will move Layer Normalization
  /// nodes immediately following SLS into SLS partitions
  bool sparseNNPartitioningPairLNWithSLS{false};

  /// The number of cards over which to split SLS tables when using SparseNN
  /// partitioning scheme
  unsigned int sparseNNPartitioningSchemeNumCards{1};

  /// The number of bytes to allocate per card for SLS tables when using
  /// the SparseNN partitioning scheme
  unsigned int sparseNNPartitioningSchemeSLSTableKBytesPerCard{0};

  /// The number of cores to assign to SLS partition when using SparseNN
  /// partitioning scheme
  unsigned int sparseNNPartitioningSchemeNumCoresSLS{1};

  /// The number of cores to assign to non-SLS partition when using SparseNN
  /// partitioning scheme
  unsigned int sparseNNPartitioningSchemeNumCoresOther{1};

  /// The algorithm used for Placement tagging in DAG Optimizer
  std::string DAGOptimizerPlacementTaggingAlgorithm;

  /// The algorithm used for Parallelization tagging in DAG Optimizer
  std::string DAGOptimizerParallelizationTaggingAlgorithm;

  /// The number of parallel chunks used in DAG Optimizer parallelization
  int32_t DAGOptimizerNumParallelChunks;

  /// If true does int64 to int32 type demotion if backend supports for specific
  /// nodes.
  bool enableTypeDemotion{true};

  /// If true, optimizations are allowed to change quantization scale/offset.
  bool enableQuantParamChanges{false};
};

/// Meta information produced during the compilation. Whereas the compile
/// options should be interpreted as input variables for the compilation, the
/// below structure is output information produced by the compilation process.
struct CompilationInfo {
  /// The hash of the graph before the lowering stage.
  llvm::hash_code graphPreLowerHash{0};
};

/// Context for compilation.
struct CompilationContext {
  /// Used during Profiling.
  PlaceholderBindings *bindings{nullptr};

  /// Allows the user to specify user defined partitioning.
  runtime::PartitionConfig *partitionConfig{nullptr};

  /// Allows a loader to store a pre-partitioned config.
  runtime::PrePartitionedConfig *prepartitionedConfig{nullptr};

  /// If true the HostManager will try to use all available devices on the host.
  bool saturateHost{false};

  /// Number of max active requests per instance of this network.
  unsigned maxActiveRequestsPerInstance{48};

  /// Used during Quantization and Profiling.
  LoweredInfoMap *loweredInfoMap{nullptr};

  /// Select whether in Training or Inference mode.
  enum class CompilationMode {
    Train, /// Compile the graph in preparation for training.
    Infer, /// Compile the graph for inference. Notice that this operation
           /// changes the graph in a way that is not reversible.
    NumCompilationModes, /// Used to count the number of CompilationModes.
  } compMode{CompilationMode::Infer};

  /// Options for the Backend to use.
  BackendOptions backendOpts;

  /// Options for the optimizations to use.
  OptimizationOptions optimizationOpts;

  /// Configuration for different precision modes.
  PrecisionConfiguration precisionConfig;

  /// Information produced during compilation.
  CompilationInfo info;

  /// How to annotate the compilation log filename.
  std::string compilationLogPrefix{"glow"};

  /// Pointer to deferredWeightLoader object, this is used for large model
  /// support.
  runtime::DeferredWeightLoader *deferredWeightLoader{nullptr};

  /// Whether to print out issues/logging during compilation. Used for example
  /// to disable printing issues encountered during ConstantFolding.
  bool verboseCompile{true};

  /// Call dumpDag on each Function passed to the backend for compilation.
  bool dumpFinalGraph = false;

  /// Whether to skip stripping the module.
  bool skipModuleStrip{false};

  /// Enables Peer to Peer Tensor optimization.
  bool enableP2P{false};

  /// Enables Device Resident Tensor optimization.
  bool enableDRT{false};

  /// Number of times a function should be replicated on a device. This is
  /// enabled for single partition networks. For advanced replication setups use
  /// user-defined partitioning.
  unsigned replicationCount{1};

  /// Whether to serialize the DAG that has been optimized and partitioned.
  bool serializeCompiledDAG{false};

  /// Whether to call the DAG optimizer after the DAG is created in HostManager.
  bool callDAGOptimizer{false};

  CompilationContext(PlaceholderBindings *bindings_ = nullptr,
                     LoweredInfoMap *loweredInfoMap_ = nullptr)
      : bindings(bindings_), loweredInfoMap(loweredInfoMap_) {}

  /// \returns an error if the CompilationContext is malformed for whatever
  /// configuration it is set up for, otherwise returns success.
  Error verify() const {
    RETURN_ERR_IF_NOT(!precisionConfig.useSetAsWhitelist ||
                          precisionConfig.convertToFP16,
                      "Can only use the precisionModeKindSet as a whitelist in "
                      "convertToFP16 mode.");

    switch (precisionConfig.quantMode) {
    case QuantizationMode::Profile:
      RETURN_ERR_IF_NOT(bindings,
                        ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED,
                        "In Profiling mode, but bindings was not set.\n");

      RETURN_ERR_IF_NOT(loweredInfoMap,
                        ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED,
                        "In Profiling mode, but loweredInfoMap was not set.\n");

      RETURN_ERR_IF_NOT(!precisionConfig.convertToFP16,
                        ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED,
                        "Converting to FP16 while profiling is unsupported.\n");
      break;

    case QuantizationMode::Quantize:
      RETURN_ERR_IF_NOT(
          loweredInfoMap, ErrorValue::ErrorCode::COMPILE_CONTEXT_MALFORMED,
          "In Quantization mode, but loweredInfoMap was not set.\n");
      break;

    case QuantizationMode::None:
      break;
    }

    RETURN_ERR_IF_NOT(!(optimizationOpts.foldElemKindConversionIntoIO &&
                        optimizationOpts.delayAndRecordConstantModification),
                      "Cannot currently perform elem kind merging into PHs "
                      "when also preventing constant modification.");

    RETURN_ERR_IF_NOT(!(serializeCompiledDAG &&
                        !optimizationOpts.delayAndRecordConstantModification),
                      "When serializing the compiled DAG, must also enable "
                      "delayAndRecordConstantModification.");

    return Error::success();
  }
};

using CompilationMode = CompilationContext::CompilationMode;

}; // namespace glow

#endif // GLOW_OPTIMIZER_GRAPHOPTIMIZER_COMPILATIONCONTEXT_H