/dports/misc/mnn/MNN-1.2.0/source/backend/opencl/execution/cl/ |
H A D | winogradTransform_buf.cl | 189 int batch_offset = srcChannelC4*dstHeight*16; 190 vstore4(+m00 - m20, 0, uOutput+out_offset+0*batch_offset); 191 vstore4(+(FLOAT)0.5f * m10 + (FLOAT)0.5f * m20, 0, uOutput+out_offset+1*batch_offset); 192 vstore4(-(FLOAT)0.5f * m10 + (FLOAT)0.5f * m20, 0, uOutput+out_offset+2*batch_offset); 193 vstore4(-m10 + m30, 0, uOutput+out_offset+3*batch_offset); 194 vstore4(+m01 - m21, 0, uOutput+out_offset+4*batch_offset); 195 vstore4(+(FLOAT)0.5f * m11 + (FLOAT)0.5f * m21, 0, uOutput+out_offset+5*batch_offset); 196 vstore4(-(FLOAT)0.5f * m11 + (FLOAT)0.5f * m21, 0, uOutput+out_offset+6*batch_offset); 197 vstore4(-m11 + m31, 0, uOutput+out_offset+7*batch_offset); 198 vstore4(+m02 - m22, 0, uOutput+out_offset+8*batch_offset); [all …]
|
/dports/misc/py-xgboost/xgboost-1.5.1/src/predictor/ |
H A D | gpu_predictor.cu | 652 predictions->DeviceSpan().subspan(batch_offset), in PredictInternal() 671 size_t batch_offset) const { in PredictInternal() 703 size_t batch_offset = 0; in DevicePredictInternal() local 710 size_t batch_offset = 0; in DevicePredictInternal() local 718 batch_offset); in DevicePredictInternal() 719 batch_offset += page.Impl()->n_rows; in DevicePredictInternal() 982 bst_row_t batch_offset = 0; in PredictLeaf() local 991 predictions->DeviceSpan().subspan(batch_offset), in PredictLeaf() 1001 batch_offset += batch.Size(); in PredictLeaf() 1005 bst_row_t batch_offset = 0; in PredictLeaf() local [all …]
|
H A D | cpu_predictor.cc | 106 void FVecFill(const size_t block_size, const size_t batch_offset, const int num_feature, in FVecFill() argument 113 const SparsePage::Inst inst = (*batch)[batch_offset + i]; in FVecFill() 119 void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch, in FVecDrop() argument 123 const SparsePage::Inst inst = (*batch)[batch_offset + i]; in FVecDrop() 201 const size_t batch_offset = block_id * block_of_rows_size; in PredictBatchByBlockOfRowsKernel() local 203 std::min(nsize - batch_offset, block_of_rows_size); in PredictBatchByBlockOfRowsKernel() 206 FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, in PredictBatchByBlockOfRowsKernel() 210 batch_offset + batch.base_rowid, num_group, thread_temp, in PredictBatchByBlockOfRowsKernel() 212 FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp); in PredictBatchByBlockOfRowsKernel()
|
/dports/misc/xgboost/xgboost-1.5.1/src/predictor/ |
H A D | gpu_predictor.cu | 652 predictions->DeviceSpan().subspan(batch_offset), in PredictInternal() 671 size_t batch_offset) const { in PredictInternal() 703 size_t batch_offset = 0; in DevicePredictInternal() local 710 size_t batch_offset = 0; in DevicePredictInternal() local 718 batch_offset); in DevicePredictInternal() 719 batch_offset += page.Impl()->n_rows; in DevicePredictInternal() 982 bst_row_t batch_offset = 0; in PredictLeaf() local 991 predictions->DeviceSpan().subspan(batch_offset), in PredictLeaf() 1001 batch_offset += batch.Size(); in PredictLeaf() 1005 bst_row_t batch_offset = 0; in PredictLeaf() local [all …]
|
H A D | cpu_predictor.cc | 106 void FVecFill(const size_t block_size, const size_t batch_offset, const int num_feature, in FVecFill() argument 113 const SparsePage::Inst inst = (*batch)[batch_offset + i]; in FVecFill() 119 void FVecDrop(const size_t block_size, const size_t batch_offset, DataView* batch, in FVecDrop() argument 123 const SparsePage::Inst inst = (*batch)[batch_offset + i]; in FVecDrop() 201 const size_t batch_offset = block_id * block_of_rows_size; in PredictBatchByBlockOfRowsKernel() local 203 std::min(nsize - batch_offset, block_of_rows_size); in PredictBatchByBlockOfRowsKernel() 206 FVecFill(block_size, batch_offset, num_feature, &batch, fvec_offset, in PredictBatchByBlockOfRowsKernel() 210 batch_offset + batch.base_rowid, num_group, thread_temp, in PredictBatchByBlockOfRowsKernel() 212 FVecDrop(block_size, batch_offset, &batch, fvec_offset, p_thread_temp); in PredictBatchByBlockOfRowsKernel()
|
/dports/multimedia/libv4l/linux-5.13-rc2/drivers/gpu/drm/i915/gt/ |
H A D | gen7_renderclear.c | 101 static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) in batch_offset() function 148 u32 offset = batch_offset(state, cs); in gen7_fill_surface_state() 180 u32 offset = batch_offset(state, cs); in gen7_fill_binding_table() 200 return batch_offset(state, in gen7_fill_kernel_data() 215 u32 offset = batch_offset(state, cs); in gen7_fill_interface_descriptor()
|
H A D | intel_renderstate.c | 87 so->batch_offset = i915_ggtt_offset(so->vma); in render_state_setup() 124 so->aux_offset += so->batch_offset; in render_state_setup() 223 so->batch_offset, so->batch_size, in intel_renderstate_emit()
|
/dports/multimedia/v4l_compat/linux-5.13-rc2/drivers/gpu/drm/i915/gt/ |
H A D | gen7_renderclear.c | 101 static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) in batch_offset() function 148 u32 offset = batch_offset(state, cs); in gen7_fill_surface_state() 180 u32 offset = batch_offset(state, cs); in gen7_fill_binding_table() 200 return batch_offset(state, in gen7_fill_kernel_data() 215 u32 offset = batch_offset(state, cs); in gen7_fill_interface_descriptor()
|
H A D | intel_renderstate.c | 87 so->batch_offset = i915_ggtt_offset(so->vma); in render_state_setup() 124 so->aux_offset += so->batch_offset; in render_state_setup() 223 so->batch_offset, so->batch_size, in intel_renderstate_emit()
|
/dports/multimedia/v4l-utils/linux-5.13-rc2/drivers/gpu/drm/i915/gt/ |
H A D | gen7_renderclear.c | 101 static u32 batch_offset(const struct batch_chunk *bc, u32 *cs) in batch_offset() function 148 u32 offset = batch_offset(state, cs); in gen7_fill_surface_state() 180 u32 offset = batch_offset(state, cs); in gen7_fill_binding_table() 200 return batch_offset(state, in gen7_fill_kernel_data() 215 u32 offset = batch_offset(state, cs); in gen7_fill_interface_descriptor()
|
H A D | intel_renderstate.c | 87 so->batch_offset = i915_ggtt_offset(so->vma); in render_state_setup() 124 so->aux_offset += so->batch_offset; in render_state_setup() 223 so->batch_offset, so->batch_size, in intel_renderstate_emit()
|
/dports/math/stanmath/math-4.2.0/stan/math/opencl/kernels/ |
H A D | inv_lower_tri_multiply.hpp | 110 const int batch_offset = result_matrix_id * rows * rows; 119 temp[batch_offset + temp_global_col * rows + temp_global_row] = acc[w];
|
/dports/math/py-pystan/pystan-2.19.0.0/pystan/stan/lib/stan_math/stan/math/opencl/kernels/ |
H A D | inv_lower_tri_multiply.hpp | 108 const int batch_offset = result_matrix_id * rows * rows; 117 temp[batch_offset + temp_global_col * rows + temp_global_row] = acc[w];
|
/dports/graphics/mesa-dri-classic/mesa-20.2.3/src/intel/common/tests/ |
H A D | gen_mi_builder_test.cpp | 131 uint32_t batch_offset; member in gen_mi_builder_test 222 batch_offset = 0; in SetUp() 258 void *ptr = (void *)((char *)batch_map + batch_offset); in emit_dwords() 259 batch_offset += num_dwords * 4; in emit_dwords() 260 assert(batch_offset < BATCH_BO_SIZE); in emit_dwords() 270 if (batch_offset & 4) in submit_batch() 296 execbuf.batch_len = batch_offset; in submit_batch()
|
/dports/lang/clover/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/libosmesa-gallium/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/libosmesa/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/mesa-libs/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/mesa-dri-gallium/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/mesa-gallium-va/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/mesa-dri-classic/mesa-20.2.3/src/mesa/drivers/dri/i965/ |
H A D | intel_batchbuffer.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/mesa-gallium-vdpau/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/mesa-gallium-xa/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/graphics/mesa-dri/mesa-21.3.6/src/mesa/drivers/dri/i965/ |
H A D | brw_batch.h | 67 uint32_t batch_offset, 72 uint32_t batch_offset,
|
/dports/lang/clover/mesa-21.3.6/src/gallium/drivers/crocus/ |
H A D | crocus_batch.h | 205 uint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset, 208 uint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset,
|