example/inference/imagenet_inference.cpp

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * This example demonstrates image classification workflow with pre-trained models using MXNet C++ API.
 * The example performs following tasks.
 * 1. Load the pre-trained model.
 * 2. Load the parameters of pre-trained model.
 * 3. Load the inference dataset and create a new ImageRecordIter.
 * 4. Run the forward pass and obtain throughput & accuracy.
 */
#ifndef _WIN32
#include <sys/time.h>
#endif
#include <fstream>
#include <iostream>
#include <map>
#include <chrono>
#include <string>
#include <vector>
#include <random>
#include <type_traits>
#include <opencv2/opencv.hpp>
#include "mxnet/c_api.h"
#include "mxnet/tuple.h"
#include "mxnet-cpp/MxNetCpp.h"
#include "mxnet-cpp/initializer.h"

using namespace mxnet::cpp;

double ms_now() {
  double ret;
#ifdef _WIN32
  auto timePoint = std::chrono::high_resolution_clock::now().time_since_epoch();
  ret = std::chrono::duration<double, std::milli>(timePoint).count();
#else
  struct timeval time;
  gettimeofday(&time, nullptr);
  ret = 1e+3 * time.tv_sec + 1e-3 * time.tv_usec;
#endif
  return ret;
}


// define the data type for NDArray, aliged with the definition in mshadow/base.h
enum TypeFlag {
  kFloat32 = 0,
  kFloat64 = 1,
  kFloat16 = 2,
  kUint8 = 3,
  kInt32 = 4,
  kInt8  = 5,
  kInt64 = 6,
};

/*
 * class Predictor
 *
 * This class encapsulates the functionality to load the model, prepare dataset and run the forward pass.
 */

class Predictor {
 public:
    Predictor() {}
    Predictor(const std::string& model_json_file,
              const std::string& model_params_file,
              const Shape& input_shape,
              bool use_gpu,
              bool enable_tensorrt,
              const std::string& dataset,
              const int data_nthreads,
              const std::string& data_layer_type,
              const std::vector<float>& rgb_mean,
              const std::vector<float>& rgb_std,
              int shuffle_chunk_seed,
              int seed, bool benchmark);
    void BenchmarkScore(int num_inference_batches);
    void Score(int num_skipped_batches, int num_inference_batches);
    ~Predictor();

 private:
    bool CreateImageRecordIter();
    bool AdvanceDataIter(int skipped_batches);
    void LoadModel(const std::string& model_json_file);
    void LoadParameters(const std::string& model_parameters_file);
    void SplitParamMap(const std::map<std::string, NDArray> &paramMap,
        std::map<std::string, NDArray> *argParamInTargetContext,
        std::map<std::string, NDArray> *auxParamInTargetContext,
        Context targetContext);
    void ConvertParamMapToTargetContext(const std::map<std::string, NDArray> &paramMap,
        std::map<std::string, NDArray> *paramMapInTargetContext,
        Context targetContext);
    void InitParameters();

    inline bool FileExists(const std::string &name) {
      std::ifstream fhandle(name.c_str());
      return fhandle.good();
    }
    int GetDataLayerType();

    std::map<std::string, NDArray> args_map_;
    std::map<std::string, NDArray> aux_map_;
    Symbol net_;
    Executor *executor_;
    Shape input_shape_;
    Context global_ctx_ = Context::cpu();

    MXDataIter *val_iter_;
    bool use_gpu_;
    bool enable_tensorrt_;
    std::string dataset_;
    int data_nthreads_;
    std::string data_layer_type_;
    std::vector<float> rgb_mean_;
    std::vector<float> rgb_std_;
    int shuffle_chunk_seed_;
    int seed_;
    bool benchmark_;
};


/*
 * The constructor takes following parameters as input:
 * 1. model_json_file:  The model in json formatted file.
 * 2. model_params_file: File containing model parameters
 * 3. input_shape: Shape of input data to the model. Since this class will be running one inference at a time,
 *                 the input shape is required to be in format Shape(1, number_of_channels, height, width)
 *                 The input image will be resized to (height x width) size before running the inference.
 * 4. use_gpu: determine if run inference on GPU
 * 5. enable_tensorrt: determine if enable TensorRT
 * 6. dataset: data file (.rec) to be used for inference
 * 7. data_nthreads: number of threads for data loading
 * 8. data_layer_type: data type for data layer
 * 9. rgb_mean: mean value to be subtracted on R/G/B channel
 * 10. rgb_std: standard deviation on R/G/B channel
 * 11. shuffle_chunk_seed: shuffling chunk seed
 * 12. seed: shuffling seed
 * 13. benchmark: use dummy data for inference
 *
 * The constructor will:
 *  1. Create ImageRecordIter based on the given dataset file.
 *  2. Load the model and parameter files.
 *  3. Infer and construct NDArrays according to the input argument and create an executor.
 */
Predictor::Predictor(const std::string& model_json_file,
                     const std::string& model_params_file,
                     const Shape& input_shape,
                     bool use_gpu,
                     bool enable_tensorrt,
                     const std::string& dataset,
                     const int data_nthreads,
                     const std::string& data_layer_type,
                     const std::vector<float>& rgb_mean,
                     const std::vector<float>& rgb_std,
                     int shuffle_chunk_seed,
                     int seed, bool benchmark)
    : input_shape_(input_shape),
      use_gpu_(use_gpu),
      enable_tensorrt_(enable_tensorrt),
      dataset_(dataset),
      data_nthreads_(data_nthreads),
      data_layer_type_(data_layer_type),
      rgb_mean_(rgb_mean),
      rgb_std_(rgb_std),
      shuffle_chunk_seed_(shuffle_chunk_seed),
      seed_(seed),
      benchmark_(benchmark) {
  if (use_gpu) {
    global_ctx_ = Context::gpu();
  }

  // initilize data iterator
  if (!benchmark_ && !CreateImageRecordIter()) {
    LG << "Error: failed to create ImageRecordIter";
    throw std::runtime_error("ImageRecordIter cannot be created");
  }

  // Load the model
  LoadModel(model_json_file);
  // Initilize the parameters
  // benchmark=true && model_params_file.empty(), randomly initialize parameters
  // else, load parameters
  if (benchmark_ && model_params_file.empty()) {
    InitParameters();
  } else {
    LoadParameters(model_params_file);
  }

  int dtype = GetDataLayerType();
  if (dtype == -1) {
    throw std::runtime_error("Unsupported data layer type...");
  }
  args_map_["data"] = NDArray(input_shape_, global_ctx_, false, dtype);
  Shape label_shape(input_shape_[0]);
  args_map_["softmax_label"] = NDArray(label_shape, global_ctx_, false);
  std::vector<NDArray> arg_arrays;
  std::vector<NDArray> grad_arrays;
  std::vector<OpReqType> grad_reqs;
  std::vector<NDArray> aux_arrays;

  // infer and create ndarrays according to the given input ndarrays.
  net_.InferExecutorArrays(global_ctx_, &arg_arrays, &grad_arrays, &grad_reqs,
                           &aux_arrays, args_map_, std::map<std::string, NDArray>(),
                           std::map<std::string, OpReqType>(), aux_map_);
  for (auto& i : grad_reqs) i = OpReqType::kNullOp;

  // Create an executor after binding the model to input parameters.
  executor_ = new Executor(net_, global_ctx_, arg_arrays, grad_arrays, grad_reqs, aux_arrays);
}

/*
 * The following function is used to get the data layer type for input data
 */
int Predictor::GetDataLayerType() {
  int ret_type = -1;
  if (data_layer_type_ == "float32") {
    ret_type = kFloat32;
  } else if (data_layer_type_ == "int8") {
    ret_type = kInt8;
  } else if (data_layer_type_ == "uint8") {
    ret_type = kUint8;
  } else {
    LG << "Unsupported data layer type " << data_layer_type_ << "..."
       << "Please use one of {float32, int8, uint8}";
  }
  return ret_type;
}

/*
 * create a new ImageRecordIter according to the given parameters
 */
bool Predictor::CreateImageRecordIter() {
  val_iter_ = new MXDataIter("ImageRecordIter");
  if (!FileExists(dataset_)) {
    LG << "Error: " << dataset_ << " must be provided";
    return false;
  }

  std::vector<index_t> shape_vec;
  for (index_t i = 1; i < input_shape_.ndim(); i++)
    shape_vec.push_back(input_shape_[i]);
  mxnet::TShape data_shape(shape_vec.begin(), shape_vec.end());

  // set image record parser parameters
  val_iter_->SetParam("path_imgrec", dataset_);
  val_iter_->SetParam("label_width", 1);
  val_iter_->SetParam("data_shape", data_shape);
  val_iter_->SetParam("preprocess_threads", data_nthreads_);
  val_iter_->SetParam("shuffle_chunk_seed", shuffle_chunk_seed_);

  // set Batch parameters
  val_iter_->SetParam("batch_size", input_shape_[0]);

  // image record parameters
  val_iter_->SetParam("shuffle", true);
  val_iter_->SetParam("seed", seed_);

  // set normalize parameters
  val_iter_->SetParam("mean_r", rgb_mean_[0]);
  val_iter_->SetParam("mean_g", rgb_mean_[1]);
  val_iter_->SetParam("mean_b", rgb_mean_[2]);
  val_iter_->SetParam("std_r", rgb_std_[0]);
  val_iter_->SetParam("std_g", rgb_std_[1]);
  val_iter_->SetParam("std_b", rgb_std_[2]);

  // set prefetcher parameters
  if (use_gpu_) {
    val_iter_->SetParam("ctx", "gpu");
  } else {
    val_iter_->SetParam("ctx", "cpu");
  }
  val_iter_->SetParam("dtype", data_layer_type_);

  val_iter_->CreateDataIter();
  return true;
}

/*
 * The following function loads the model from json file.
 */
void Predictor::LoadModel(const std::string& model_json_file) {
  if (!FileExists(model_json_file)) {
    LG << "Model file " << model_json_file << " does not exist";
    throw std::runtime_error("Model file does not exist");
  }
  LG << "Loading the model from " << model_json_file << std::endl;
  net_ = Symbol::Load(model_json_file);
  if (enable_tensorrt_) {
    net_ = net_.GetBackendSymbol("TensorRT");
  }
}

/*
 * The following function loads the model parameters.
 */
void Predictor::LoadParameters(const std::string& model_parameters_file) {
  if (!FileExists(model_parameters_file)) {
    LG << "Parameter file " << model_parameters_file << " does not exist";
    throw std::runtime_error("Model parameters does not exist");
  }
  LG << "Loading the model parameters from " << model_parameters_file << std::endl;
  std::map<std::string, NDArray> parameters;
  NDArray::Load(model_parameters_file, 0, &parameters);
  if (enable_tensorrt_) {
    std::map<std::string, NDArray> intermediate_args_map;
    std::map<std::string, NDArray> intermediate_aux_map;
    SplitParamMap(parameters, &intermediate_args_map, &intermediate_aux_map, Context::cpu());
    contrib::InitTensorRTParams(net_, &intermediate_args_map, &intermediate_aux_map);
    ConvertParamMapToTargetContext(intermediate_args_map, &args_map_, global_ctx_);
    ConvertParamMapToTargetContext(intermediate_aux_map, &aux_map_, global_ctx_);
  } else {
    SplitParamMap(parameters, &args_map_, &aux_map_, global_ctx_);
  }
  /*WaitAll is need when we copy data between GPU and the main memory*/
  NDArray::WaitAll();
}

/*
 * The following function split loaded param map into arg parm
 *   and aux param with target context
 */
void Predictor::SplitParamMap(const std::map<std::string, NDArray> &paramMap,
    std::map<std::string, NDArray> *argParamInTargetContext,
    std::map<std::string, NDArray> *auxParamInTargetContext,
    Context targetContext) {
  for (const auto& pair : paramMap) {
    std::string type = pair.first.substr(0, 4);
    std::string name = pair.first.substr(4);
    if (type == "arg:") {
      (*argParamInTargetContext)[name] = pair.second.Copy(targetContext);
    } else if (type == "aux:") {
      (*auxParamInTargetContext)[name] = pair.second.Copy(targetContext);
    }
  }
}

/*
 * The following function copy the param map into the target context
 */
void Predictor::ConvertParamMapToTargetContext(const std::map<std::string, NDArray> &paramMap,
    std::map<std::string, NDArray> *paramMapInTargetContext,
    Context targetContext) {
  for (const auto& pair : paramMap) {
    (*paramMapInTargetContext)[pair.first] = pair.second.Copy(targetContext);
  }
}

/*
 * The following function randomly initializes the parameters when benchmark_ is true.
 */
void Predictor::InitParameters() {
  std::vector<mx_uint> data_shape;
  for (index_t i = 0; i < input_shape_.ndim(); i++) {
    data_shape.push_back(input_shape_[i]);
  }

  std::map<std::string, std::vector<mx_uint> > arg_shapes;
  std::vector<std::vector<mx_uint> > aux_shapes, in_shapes, out_shapes;
  arg_shapes["data"] = data_shape;
  net_.InferShape(arg_shapes, &in_shapes, &aux_shapes, &out_shapes);

  // initializer to call
  Xavier xavier(Xavier::uniform, Xavier::avg, 2.0f);

  auto arg_name_list = net_.ListArguments();
  for (index_t i = 0; i < in_shapes.size(); i++) {
    const auto &shape = in_shapes[i];
    const auto &arg_name = arg_name_list[i];
    int paramType = kFloat32;
    if (Initializer::StringEndWith(arg_name, "weight_quantize") ||
        Initializer::StringEndWith(arg_name, "bias_quantize")) {
      paramType = kInt8;
    }
    NDArray tmp_arr(shape, global_ctx_, false, paramType);
    xavier(arg_name, &tmp_arr);
    args_map_[arg_name] = tmp_arr.Copy(global_ctx_);
  }

  auto aux_name_list = net_.ListAuxiliaryStates();
  for (index_t i = 0; i < aux_shapes.size(); i++) {
    const auto &shape = aux_shapes[i];
    const auto &aux_name = aux_name_list[i];
    NDArray tmp_arr(shape, global_ctx_, false);
    xavier(aux_name, &tmp_arr);
    aux_map_[aux_name] = tmp_arr.Copy(global_ctx_);
  }
  /*WaitAll is need when we copy data between GPU and the main memory*/
  NDArray::WaitAll();
}

/*
 * The following function runs the forward pass on the model
 * and use dummy data for benchmark.
 */
void Predictor::BenchmarkScore(int num_inference_batches) {
  // Create dummy data
  std::vector<float> dummy_data(input_shape_.Size());
  std::default_random_engine generator;
  std::uniform_real_distribution<float> val(0.0f, 1.0f);
  for (size_t i = 0; i < static_cast<size_t>(input_shape_.Size()); ++i) {
    dummy_data[i] = static_cast<float>(val(generator));
  }
  executor_->arg_dict()["data"].SyncCopyFromCPU(
        dummy_data.data(),
        input_shape_.Size());
  NDArray::WaitAll();

  LG << "Running the forward pass on model to evaluate the performance..";

  // warm up.
  for (int i = 0; i < 5; i++) {
    executor_->Forward(false);
    NDArray::WaitAll();
  }

  // Run the forward pass.
  double ms = ms_now();
  for (int i = 0; i < num_inference_batches; i++) {
    executor_->Forward(false);
    NDArray::WaitAll();
  }
  ms = ms_now() - ms;
  LG << " benchmark completed!";
  LG << " batch size: " << input_shape_[0] << " num batch: " << num_inference_batches
     << " throughput: " << 1000.0 * input_shape_[0] * num_inference_batches / ms
     << " imgs/s latency:" << ms / input_shape_[0] / num_inference_batches << " ms";
}

/*
 * \param skipped_batches skip the first number of batches
 *
 */
bool Predictor::AdvanceDataIter(int skipped_batches) {
  assert(skipped_batches >= 0);
  if (skipped_batches == 0) return true;
  int skipped_count = 0;
  while (val_iter_->Next()) {
    if (++skipped_count >= skipped_batches) break;
  }
  if (skipped_count != skipped_batches) return false;
  return true;
}

/*
 * The following function runs the forward pass on the model
 * and use real data for testing accuracy and performance.
 */
void Predictor::Score(int num_skipped_batches, int num_inference_batches) {
  // Create metrics
  Accuracy val_acc;

  val_iter_->Reset();
  val_acc.Reset();
  int nBatch = 0;

  if (!AdvanceDataIter(num_skipped_batches)) {
    LG << "skipped batches should less than total batches!";
    return;
  }

  double ms = ms_now();
  while (val_iter_->Next()) {
    auto data_batch = val_iter_->GetDataBatch();
    data_batch.data.CopyTo(&args_map_["data"]);
    data_batch.label.CopyTo(&args_map_["softmax_label"]);
    NDArray::WaitAll();

    // running on forward pass
    executor_->Forward(false);
    NDArray::WaitAll();
    val_acc.Update(data_batch.label, executor_->outputs[0]);

    if (++nBatch >= num_inference_batches) {
      break;
    }
  }
  ms = ms_now() - ms;
  auto args_name = net_.ListArguments();
  LG << "INFO:" << "Dataset for inference: " << dataset_;
  LG << "INFO:" << "label_name = " << args_name[args_name.size()-1];
  LG << "INFO:" << "rgb_mean: " << "(" << rgb_mean_[0] << ", " << rgb_mean_[1]
     << ", " << rgb_mean_[2] << ")";
  LG << "INFO:" << "rgb_std: " << "(" << rgb_std_[0] << ", " << rgb_std_[1]
     << ", " << rgb_std_[2] << ")";
  LG << "INFO:" << "Image shape: " << "(" << input_shape_[1] << ", "
     << input_shape_[2] << ", " << input_shape_[3] << ")";
  LG << "INFO:" << "Finished inference with: " << nBatch * input_shape_[0]
     << " images ";
  LG << "INFO:" << "Batch size = " << input_shape_[0] << " for inference";
  LG << "INFO:" << "Accuracy: " << val_acc.Get();
  LG << "INFO:" << "Throughput: " << (1000.0 * nBatch * input_shape_[0] / ms)
     << " images per second";
}

Predictor::~Predictor() {
  if (executor_) {
    delete executor_;
  }
  if (!benchmark_ && val_iter_) {
    delete val_iter_;
  }
  MXNotifyShutdown();
}

/*
 * Convert the input string of number into the vector.
 */
template<typename T>
std::vector<T> createVectorFromString(const std::string& input_string) {
  std::vector<T> dst_vec;
  char *p_next;
  T elem;
  bool bFloat = std::is_same<T, float>::value;
  if (!bFloat) {
    elem = strtol(input_string.c_str(), &p_next, 10);
  } else {
    elem = strtof(input_string.c_str(), &p_next);
  }

  dst_vec.push_back(elem);
  while (*p_next) {
    if (!bFloat) {
      elem = strtol(p_next, &p_next, 10);
    } else {
      elem = strtof(p_next, &p_next);
    }
    dst_vec.push_back(elem);
  }
  return dst_vec;
}

void printUsage() {
    std::cout << "Usage:" << std::endl;
    std::cout << "imagenet_inference --symbol_file <model symbol file in json format>" << std::endl
              << "--params_file <model params file> " << std::endl
              << "--dataset <dataset used to run inference> " << std::endl
              << "--data_nthreads <default: 60> " << std::endl
              << "--input_shape <shape of input image e.g \"3 224 224\">] " << std::endl
              << "--rgb_mean <mean value to be subtracted on RGB channel e.g \"0 0 0\">"
              << std::endl
              << "--rgb_std <standard deviation on R/G/B channel. e.g \"1 1 1\"> " << std::endl
              << "--batch_size <number of images per batch> " << std::endl
              << "--num_skipped_batches <skip the number of batches for inference> " << std::endl
              << "--num_inference_batches <number of batches used for inference> " << std::endl
              << "--data_layer_type <default: \"float32\" "
              << "choices: [\"float32\",\"int8\",\"uint8\"]>" << std::endl
              << "--gpu  <whether to run inference on GPU, default: false>" << std::endl
              << "--enableTRT  <whether to run inference with TensorRT, "
              << "default: false>" << std::endl
              << "--benchmark <whether to use dummy data to run inference, default: false>"
              << std::endl;
}

int main(int argc, char** argv) {
  std::string model_file_json;
  std::string model_file_params;
  std::string dataset("");
  std::string input_rgb_mean("0 0 0");
  std::string input_rgb_std("1 1 1");
  bool use_gpu = false;
  bool enable_tensorrt = false;
  bool benchmark = false;
  int batch_size = 64;
  int num_skipped_batches = 0;
  int num_inference_batches = 100;
  std::string data_layer_type("float32");
  std::string input_shape("3 224 224");
  int seed = 48564309;
  int shuffle_chunk_seed = 3982304;
  int data_nthreads = 60;

  int index = 1;
  while (index < argc) {
    if (strcmp("--symbol_file", argv[index]) == 0) {
      index++;
      model_file_json = (index < argc ? argv[index]:"");
    } else if (strcmp("--params_file", argv[index]) == 0) {
      index++;
      model_file_params = (index < argc ? argv[index]:"");
    } else if (strcmp("--dataset", argv[index]) == 0) {
      index++;
      dataset = (index < argc ? argv[index]:dataset);
    } else if (strcmp("--data_nthreads", argv[index]) == 0) {
      index++;
      data_nthreads = strtol(argv[index], nullptr, 10);
    } else if (strcmp("--input_shape", argv[index]) == 0) {
      index++;
      input_shape = (index < argc ? argv[index]:input_shape);
    } else if (strcmp("--rgb_mean", argv[index]) == 0) {
      index++;
      input_rgb_mean = (index < argc ? argv[index]:input_rgb_mean);
    } else if (strcmp("--rgb_std", argv[index]) == 0) {
      index++;
      input_rgb_std = (index < argc ? argv[index]:input_rgb_std);
    } else if (strcmp("--batch_size", argv[index]) == 0) {
      index++;
      batch_size = strtol(argv[index], nullptr, 10);
    }  else if (strcmp("--num_skipped_batches", argv[index]) == 0) {
      index++;
      num_skipped_batches = strtol(argv[index], nullptr, 10);
    }  else if (strcmp("--num_inference_batches", argv[index]) == 0) {
      index++;
      num_inference_batches = strtol(argv[index], nullptr, 10);
    } else if (strcmp("--data_layer_type", argv[index]) == 0) {
      index++;
      data_layer_type = (index < argc ? argv[index]:data_layer_type);
    } else if (strcmp("--gpu", argv[index]) == 0) {
      use_gpu = true;
    } else if (strcmp("--enableTRT", argv[index]) == 0) {
      use_gpu = true;
      enable_tensorrt = true;
    } else if (strcmp("--benchmark", argv[index]) == 0) {
      benchmark = true;
    } else if (strcmp("--help", argv[index]) == 0) {
      printUsage();
      return 0;
    }
    index++;
  }

  if (model_file_json.empty()
      || (!benchmark && model_file_params.empty())
      || (enable_tensorrt && model_file_params.empty())) {
    LG << "ERROR: Model details such as symbol, param files are not specified";
    printUsage();
    return 1;
  }
  std::vector<index_t> input_dimensions = createVectorFromString<index_t>(input_shape);
  input_dimensions.insert(input_dimensions.begin(), batch_size);
  Shape input_data_shape(input_dimensions);

  std::vector<float> rgb_mean = createVectorFromString<float>(input_rgb_mean);
  std::vector<float> rgb_std = createVectorFromString<float>(input_rgb_std);

  // Initialize the predictor object
  Predictor predict(model_file_json, model_file_params, input_data_shape, use_gpu, enable_tensorrt,
                    dataset, data_nthreads, data_layer_type, rgb_mean, rgb_std, shuffle_chunk_seed,
                    seed, benchmark);

  if (benchmark) {
    predict.BenchmarkScore(num_inference_batches);
  } else {
    predict.Score(num_skipped_batches, num_inference_batches);
  }
  return 0;
}