• Home
  • History
  • Annotate
Name Date Size #Lines LOC

..03-May-2022-

aarch64/H03-May-2022-50,63941,116

gemm/H21-Dec-2021-1,9531,372

jit_utils/H21-Dec-2021-594417

matmul/H21-Dec-2021-2,4681,759

reorder/H21-Dec-2021-1,9721,278

rnn/H21-Dec-2021-7,8016,247

x64/H03-May-2022-291,970246,425

README.mdH A D21-Dec-20213.1 KiB8167

bfloat16.cppH A D21-Dec-20213 KiB10474

binary_injector_utils.cppH A D21-Dec-20212.5 KiB6945

binary_injector_utils.hppH A D21-Dec-20212.8 KiB7335

cpu_batch_normalization_list.cppH A D21-Dec-20214.5 KiB11077

cpu_batch_normalization_pd.hppH A D21-Dec-20211.4 KiB4217

cpu_batch_normalization_utils.cppH A D21-Dec-20216.8 KiB194137

cpu_batch_normalization_utils.hppH A D21-Dec-20211.6 KiB4521

cpu_binary_list.cppH A D21-Dec-20211.4 KiB5124

cpu_binary_pd.hppH A D21-Dec-20211.2 KiB4017

cpu_concat.cppH A D21-Dec-20211.6 KiB5430

cpu_concat_pd.hppH A D21-Dec-20211.2 KiB4318

cpu_convolution_list.cppH A D21-Dec-202123.1 KiB468428

cpu_convolution_pd.hppH A D21-Dec-20212.6 KiB8453

cpu_deconvolution_list.cppH A D21-Dec-20212.8 KiB8152

cpu_deconvolution_pd.hppH A D21-Dec-20211.5 KiB4922

cpu_eltwise_list.cppH A D21-Dec-20214.1 KiB10675

cpu_eltwise_pd.hppH A D21-Dec-20211.3 KiB4721

cpu_engine.cppH A D21-Dec-20211.9 KiB6034

cpu_engine.hppH A D21-Dec-20215.6 KiB176127

cpu_inner_product_list.cppH A D21-Dec-20218.4 KiB209179

cpu_inner_product_pd.hppH A D21-Dec-202112.2 KiB324245

cpu_layer_normalization_list.cppH A D21-Dec-20212.3 KiB6840

cpu_layer_normalization_pd.hppH A D21-Dec-20211.4 KiB4217

cpu_lrn_list.cppH A D21-Dec-20212.7 KiB7950

cpu_lrn_pd.hppH A D21-Dec-20211.2 KiB4418

cpu_memory_storage.hppH A D21-Dec-20213.7 KiB10968

cpu_pooling_list.cppH A D21-Dec-20214 KiB10775

cpu_pooling_pd.hppH A D21-Dec-20211.2 KiB4217

cpu_prelu_list.cppH A D21-Dec-20212.1 KiB6839

cpu_prelu_pd.hppH A D21-Dec-20211.2 KiB4418

cpu_primitive.hppH A D21-Dec-20213.9 KiB9469

cpu_reduction_list.cppH A D21-Dec-20211.8 KiB6032

cpu_reduction_pd.hppH A D21-Dec-20211 KiB3513

cpu_resampling_list.cppH A D21-Dec-20212.2 KiB7243

cpu_resampling_pd.hppH A D21-Dec-20211.3 KiB4217

cpu_rnn_list.cppH A D21-Dec-20212 KiB6436

cpu_shuffle_list.cppH A D21-Dec-20211.5 KiB5427

cpu_shuffle_pd.hppH A D21-Dec-20211.2 KiB4318

cpu_softmax_list.cppH A D21-Dec-20212.8 KiB8755

cpu_softmax_pd.hppH A D21-Dec-20211.3 KiB4721

cpu_stream.hppH A D21-Dec-20212 KiB6536

cpu_sum.cppH A D21-Dec-20211.8 KiB6135

cpu_sum_pd.hppH A D21-Dec-20211.2 KiB4117

dw_convolution_utils.hppH A D21-Dec-20213.8 KiB11068

gemm_convolution.cppH A D21-Dec-202139.5 KiB926756

gemm_convolution.hppH A D21-Dec-20218.2 KiB211153

gemm_convolution_utils.cppH A D21-Dec-2021101.3 KiB2,1901,829

gemm_convolution_utils.hppH A D21-Dec-20214.5 KiB13999

gemm_inner_product.cppH A D21-Dec-20216.8 KiB184133

gemm_inner_product.hppH A D21-Dec-20216.5 KiB177123

gemm_inner_product_utils.cppH A D21-Dec-20219.8 KiB244194

gemm_inner_product_utils.hppH A D21-Dec-20214.1 KiB11076

gemm_x8s8s32x_conv_zp_src_pad_comp.cppH A D21-Dec-202114.6 KiB350279

gemm_x8s8s32x_conv_zp_src_pad_comp.hppH A D21-Dec-20211.5 KiB4220

gemm_x8s8s32x_convolution.cppH A D21-Dec-202117.9 KiB436346

gemm_x8s8s32x_convolution.hppH A D21-Dec-20217.3 KiB185136

gemm_x8s8s32x_convolution_utils.cppH A D21-Dec-20216.2 KiB179131

gemm_x8s8s32x_convolution_utils.hppH A D21-Dec-20212.2 KiB6234

gemm_x8s8s32x_inner_product.cppH A D21-Dec-20213.9 KiB10976

gemm_x8s8s32x_inner_product.hppH A D21-Dec-20214.3 KiB12885

nchw_pooling.cppH A D21-Dec-202127.9 KiB699581

nchw_pooling.hppH A D21-Dec-20217.7 KiB202146

ncsp_batch_normalization.cppH A D21-Dec-202124.7 KiB552441

ncsp_batch_normalization.hppH A D21-Dec-20217.2 KiB204148

nhwc_pooling.cppH A D21-Dec-202132.5 KiB799609

nhwc_pooling.hppH A D21-Dec-20217.2 KiB195143

nspc_batch_normalization.cppH A D21-Dec-202119.1 KiB479398

nspc_batch_normalization.hppH A D21-Dec-20216.8 KiB191138

platform.cppH A D21-Dec-20216.5 KiB229164

platform.hppH A D21-Dec-20215.1 KiB182119

primitive_attr_postops.cppH A D21-Dec-202112.4 KiB311255

primitive_attr_postops.hppH A D21-Dec-20212.6 KiB8951

ref_batch_normalization.cppH A D21-Dec-202111.1 KiB327258

ref_batch_normalization.hppH A D21-Dec-20214.2 KiB12379

ref_binary.cppH A D21-Dec-20214.5 KiB13190

ref_binary.hppH A D21-Dec-20213 KiB9460

ref_concat.hppH A D21-Dec-20216.2 KiB166123

ref_convolution.cppH A D21-Dec-202123.4 KiB577479

ref_convolution.hppH A D21-Dec-20218.8 KiB217165

ref_convolution_int8.cppH A D21-Dec-202118.8 KiB459374

ref_convolution_int8.hppH A D21-Dec-20216.7 KiB176128

ref_convolution_utils.hppH A D21-Dec-20212.2 KiB6841

ref_deconvolution.cppH A D21-Dec-202126.8 KiB707582

ref_deconvolution.hppH A D21-Dec-202121.6 KiB525399

ref_eltwise.cppH A D21-Dec-202110.5 KiB285219

ref_eltwise.hppH A D21-Dec-20215.9 KiB173118

ref_fused_convolution.hppH A D21-Dec-202114.1 KiB376279

ref_inner_product.cppH A D21-Dec-20217.9 KiB215170

ref_inner_product.hppH A D21-Dec-20217.4 KiB184133

ref_inner_product_int8.cppH A D21-Dec-20213.8 KiB11678

ref_inner_product_int8.hppH A D21-Dec-20213.5 KiB9964

ref_inner_product_utils.hppH A D21-Dec-20212 KiB6236

ref_io_helper.hppH A D21-Dec-20212.6 KiB10672

ref_layer_normalization.cppH A D21-Dec-20219.7 KiB275213

ref_layer_normalization.hppH A D21-Dec-20214 KiB11775

ref_lrn.cppH A D21-Dec-202116 KiB406326

ref_lrn.hppH A D21-Dec-20214.4 KiB13690

ref_pooling.cppH A D21-Dec-202113.7 KiB361293

ref_pooling.hppH A D21-Dec-20214.3 KiB12883

ref_prelu.cppH A D21-Dec-202115.5 KiB404323

ref_prelu.hppH A D21-Dec-20216.7 KiB174115

ref_reduction.cppH A D21-Dec-20216 KiB175135

ref_reduction.hppH A D21-Dec-20213.1 KiB9158

ref_resampling.cppH A D21-Dec-202111.1 KiB293222

ref_resampling.hppH A D21-Dec-20213.5 KiB11171

ref_shuffle.cppH A D21-Dec-20215.5 KiB146114

ref_shuffle.hppH A D21-Dec-20213.8 KiB11375

ref_softmax.cppH A D21-Dec-202112 KiB322260

ref_softmax.hppH A D21-Dec-20215.4 KiB172117

ref_sum.hppH A D21-Dec-20215.1 KiB150103

resampling_utils.hppH A D21-Dec-20213.3 KiB10168

simple_concat.cppH A D21-Dec-20216.7 KiB170121

simple_concat.hppH A D21-Dec-20216.1 KiB171112

simple_layer_normalization.cppH A D21-Dec-20219.7 KiB266202

simple_layer_normalization.hppH A D21-Dec-202110 KiB258191

simple_layer_normalization_kernels.cppH A D21-Dec-20219.4 KiB262218

simple_layer_normalization_kernels.hppH A D21-Dec-20213.8 KiB11781

simple_q10n.hppH A D21-Dec-20215.1 KiB177130

simple_resampling.cppH A D21-Dec-202124.4 KiB585503

simple_resampling.hppH A D21-Dec-20214.7 KiB14093

simple_sum.cppH A D21-Dec-20214.5 KiB12692

simple_sum.hppH A D21-Dec-20214.7 KiB13492

zero_point_utils.cppH A D21-Dec-20215.4 KiB141105

zero_point_utils.hppH A D21-Dec-20213.5 KiB9955

README.md

1oneDNN CPU Implementation
2=========================
3
4The source code is organized in a modular way to separate generic code that
5does not depend or weakly depends on architecture from architecture-specific
6code.
7- The generic code is located under `cpu/`;
8- The architecture-specific code is put into `cpu/<arch>/` sub-directories.
9
10## Directory structure
11
12```
13cpu
14├── gemm/               # Generic GEMM implementation (may call <arch>/gemm)
15├── rnn/                # Generic RNN implementation (may call <arch>/rnn)
16├── x64                 # x64-specific sub-directory
17│   ├── gemm/           # x64-specific GEMM implementation
18│   ├── jit_utils/      # JIT-related utilities, such as support of profilers
19│   ├── rnn/            # JIT-related kernels for rnn primitive
20│   ├── xbyak/          # Xbyak sources
21│   └── jit_*.*         # x64-specific implementations
22├── cpu_engine.hpp      # Basic oneDNN abstractions
23├── cpu_lrn_pd.hpp      # Base cpu primitive descriptor classes
24├── cpu_lrn_list.cpp    # Implementation lists
25├── nchw_pooling.cpp    # Semi-optimized (aka simple) implementations
26├── platform.hpp        # Platform-related utility functions
27└── ref_eltwise.cpp     # Reference implementations
28```
29
30## Target architectures
31
32Currently, the only architecture specific directory is `cpu/x64` which contains
33Intel 64 / AMD64 implementations, that mostly use JIT assembler
34[Xbyak](https://github.com/herumi/xbyak) to produce highly optimized code.
35
36The architecture specific code can easily access the generic code, but the
37opposite should be limited as much as possible. However, sometimes it is
38absolutely necessary for generic code to access architecture specific one. For
39instance, the list of implementations that live in `cpu/*_list.cpp` should
40conditionally include the specific implementations on the corresponding
41architecture. Hence, for portability reasons [`cpu/platform.hpp`](platform.hpp)
42header file provides a set of helpers macros that could help conditionally
43enable or disable parts of code. There the following macros defined:
44- `DNNL_X64` is 1 on x64 architecture;
45- `DNNL_AARCH64` is 1 on Arm AArch64 architecture;
46- `DNNL_PPC64` is 1 on OpenPOWER / IBM Power architecture;
47- `DNNL_S390X` is 1 on IBMz / s390x architecture;
48- `DNNL_RV64` is 1 on RISC-V architecture;
49- `DNNL_ARCH_GENERIC` is 1 on other platforms.
50Only one of the macros above is defined to 1. All others are defined to 0.
51
52Usage example:
53
54``` cpp
55#include "cpu/platform.hpp" // IMPORTANT: INCLUDE THIS FILE!
56
57int generic_foo() {
58#if DNNL_X64
59    return x64_impl_foo();
60#else
61    return generic_impl_foo();
62#endif
63}
64```
65
66Additionally, there is `DNNL_<ARCH>_ONLY(...)` macro that expands to its
67parameters only on the corresponding architectures. Hence, the following
68code has the same behavior as the example above:
69
70``` cpp
71#include "cpu/platform.hpp" // IMPORTANT: INCLUDE THIS FILE!
72
73int generic_foo() {
74    DNNL_X64_ONLY(return x64_impl_foo());
75    return generic_impl_foo();
76}
77```
78
79See more details in [`platform.hpp`](platform.hpp).
80Also check `DNNL_TARGET_ARCH` cmake variable.
81