1struct BufferCopy { 2 uint4 SrcDst; 3}; 4 5struct ImageCopy { 6 uint4 Src; 7 uint4 Dst; 8}; 9 10struct BufferImageCopy { 11 // x=offset, yz=size 12 uint4 BufferVars; 13 uint4 ImageOffset; 14 uint4 ImageExtent; 15 uint4 ImageSize; 16}; 17 18cbuffer CopyConstants : register(b0) { 19 BufferCopy BufferCopies; 20 ImageCopy ImageCopies; 21 BufferImageCopy BufferImageCopies; 22}; 23 24 25uint3 GetDestBounds() 26{ 27 return min( 28 BufferImageCopies.ImageOffset + BufferImageCopies.ImageExtent, 29 BufferImageCopies.ImageSize 30 ); 31} 32 33uint3 GetImageCopyDst(uint3 dispatch_thread_id) 34{ 35 return uint3(ImageCopies.Dst.xy + dispatch_thread_id.xy, ImageCopies.Dst.z); 36} 37 38uint3 GetImageCopySrc(uint3 dispatch_thread_id) 39{ 40 return uint3(ImageCopies.Src.xy + dispatch_thread_id.xy, ImageCopies.Src.z); 41} 42 43uint3 GetImageDst(uint3 dispatch_thread_id) 44{ 45 return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z); 46} 47 48uint3 GetImageSrc(uint3 dispatch_thread_id) 49{ 50 return uint3(BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy, BufferImageCopies.ImageOffset.z); 51} 52 53uint GetBufferDst128(uint3 dispatch_thread_id) 54{ 55 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 56} 57uint GetBufferSrc128(uint3 dispatch_thread_id) 58{ 59 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 16 + dispatch_thread_id.y * 16 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 60} 61 62uint GetBufferDst64(uint3 dispatch_thread_id) 63{ 64 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 65} 66uint GetBufferSrc64(uint3 dispatch_thread_id) 67{ 68 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 8 + dispatch_thread_id.y * 8 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 69} 70 71uint GetBufferDst32(uint3 dispatch_thread_id) 72{ 73 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 74} 75uint GetBufferSrc32(uint3 dispatch_thread_id) 76{ 77 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 4 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 78} 79 80uint GetBufferDst16(uint3 dispatch_thread_id) 81{ 82 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 83} 84uint GetBufferSrc16(uint3 dispatch_thread_id) 85{ 86 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * 2 * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 87} 88 89uint GetBufferDst8(uint3 dispatch_thread_id) 90{ 91 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 92} 93uint GetBufferSrc8(uint3 dispatch_thread_id) 94{ 95 return BufferImageCopies.BufferVars.x + dispatch_thread_id.x * 4 + dispatch_thread_id.y * max(BufferImageCopies.BufferVars.y, BufferImageCopies.ImageExtent.x); 96} 97 98 99uint4 Uint32ToUint8x4(uint data) 100{ 101 return (data >> uint4(0, 8, 16, 24)) & 0xFF; 102} 103 104uint2 Uint32ToUint16x2(uint data) 105{ 106 return (data >> uint2(0, 16)) & 0xFFFF; 107} 108 109uint Uint8x4ToUint32(uint4 data) 110{ 111 return dot(min(data, 0xFF), 1 << uint4(0, 8, 16, 24)); 112} 113 114uint Uint16x2ToUint32(uint2 data) 115{ 116 return dot(min(data, 0xFFFF), 1 << uint2(0, 16)); 117} 118 119uint2 Uint16ToUint8x2(uint data) 120{ 121 return (data >> uint2(0, 8)) & 0xFF; 122} 123 124uint Uint8x2ToUint16(uint2 data) 125{ 126 return dot(min(data, 0xFF), 1 << uint2(0, 8)); 127} 128 129uint4 Float4ToUint8x4(float4 data) 130{ 131 return uint4(data * 255 + .5f); 132} 133 134// Buffers are always R32-aligned 135ByteAddressBuffer BufferCopySrc : register(t0); 136RWByteAddressBuffer BufferCopyDst : register(u0); 137 138Texture2DArray<uint4> ImageCopySrc : register(t0); 139RWTexture2DArray<uint> ImageCopyDstR : register(u0); 140RWTexture2DArray<uint2> ImageCopyDstRg : register(u0); 141RWTexture2DArray<uint4> ImageCopyDstRgba : register(u0); 142 143Texture2DArray<float4> ImageCopySrcBgra : register(t0); 144 145// Image<->Image copies 146[numthreads(1, 1, 1)] 147void cs_copy_image2d_r8g8_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) 148{ 149 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 150 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 151 152 ImageCopyDstR[dst_idx] = Uint8x2ToUint16(ImageCopySrc[src_idx]); 153} 154 155[numthreads(1, 1, 1)] 156void cs_copy_image2d_r16_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) 157{ 158 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 159 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 160 161 ImageCopyDstRg[dst_idx] = Uint16ToUint8x2(ImageCopySrc[src_idx]); 162} 163 164[numthreads(1, 1, 1)] 165void cs_copy_image2d_r8g8b8a8_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) 166{ 167 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 168 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 169 170 ImageCopyDstR[dst_idx] = Uint8x4ToUint32(ImageCopySrc[src_idx]); 171} 172 173[numthreads(1, 1, 1)] 174void cs_copy_image2d_r8g8b8a8_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) 175{ 176 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 177 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 178 179 ImageCopyDstRg[dst_idx] = Uint32ToUint16x2(Uint8x4ToUint32(ImageCopySrc[src_idx])); 180} 181 182[numthreads(1, 1, 1)] 183void cs_copy_image2d_r16g16_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) 184{ 185 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 186 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 187 188 ImageCopyDstR[dst_idx] = Uint16x2ToUint32(ImageCopySrc[src_idx]); 189} 190 191[numthreads(1, 1, 1)] 192void cs_copy_image2d_r16g16_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) 193{ 194 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 195 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 196 197 ImageCopyDstRgba[dst_idx] = Uint32ToUint8x4(Uint16x2ToUint32(ImageCopySrc[src_idx])); 198} 199 200[numthreads(1, 1, 1)] 201void cs_copy_image2d_r32_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) 202{ 203 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 204 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 205 206 ImageCopyDstRg[dst_idx] = Uint32ToUint16x2(ImageCopySrc[src_idx]); 207} 208 209[numthreads(1, 1, 1)] 210void cs_copy_image2d_r32_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) 211{ 212 uint3 dst_idx = GetImageCopyDst(dispatch_thread_id); 213 uint3 src_idx = GetImageCopySrc(dispatch_thread_id); 214 215 ImageCopyDstRgba[dst_idx] = Uint32ToUint8x4(ImageCopySrc[src_idx]); 216} 217 218#define COPY_NUM_THREAD_X 8 219#define COPY_NUM_THREAD_Y 8 220 221// Buffer<->Image copies 222 223// R32G32B32A32 224[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 225void cs_copy_buffer_image2d_r32g32b32a32(uint3 dispatch_thread_id : SV_DispatchThreadID) { 226 uint3 dst_idx = GetImageDst(dispatch_thread_id); 227 uint3 bounds = GetDestBounds(); 228 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 229 return; 230 } 231 232 uint src_idx = GetBufferSrc128(dispatch_thread_id); 233 234 ImageCopyDstRgba[dst_idx] = uint4( 235 BufferCopySrc.Load(src_idx), 236 BufferCopySrc.Load(src_idx + 1 * 4), 237 BufferCopySrc.Load(src_idx + 2 * 4), 238 BufferCopySrc.Load(src_idx + 3 * 4) 239 ); 240} 241 242[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 243void cs_copy_image2d_r32g32b32a32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 244 uint3 src_idx = GetImageSrc(dispatch_thread_id); 245 uint3 bounds = GetDestBounds(); 246 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 247 return; 248 } 249 250 uint4 data = ImageCopySrc[src_idx]; 251 uint dst_idx = GetBufferDst128(dispatch_thread_id); 252 253 BufferCopyDst.Store(dst_idx, data.x); 254 BufferCopyDst.Store(dst_idx + 1 * 4, data.y); 255 BufferCopyDst.Store(dst_idx + 2 * 4, data.z); 256 BufferCopyDst.Store(dst_idx + 3 * 4, data.w); 257} 258 259// R32G32 260[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 261void cs_copy_buffer_image2d_r32g32(uint3 dispatch_thread_id : SV_DispatchThreadID) { 262 uint3 dst_idx = GetImageDst(dispatch_thread_id); 263 uint3 bounds = GetDestBounds(); 264 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 265 return; 266 } 267 268 uint src_idx = GetBufferSrc64(dispatch_thread_id); 269 270 ImageCopyDstRg[dst_idx] = uint2( 271 BufferCopySrc.Load(src_idx), 272 BufferCopySrc.Load(src_idx + 1 * 4) 273 ); 274} 275 276[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 277void cs_copy_image2d_r32g32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 278 uint3 src_idx = GetImageSrc(dispatch_thread_id); 279 uint3 bounds = GetDestBounds(); 280 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 281 return; 282 } 283 284 uint2 data = ImageCopySrc[src_idx].rg; 285 uint dst_idx = GetBufferDst64(dispatch_thread_id); 286 287 BufferCopyDst.Store(dst_idx , data.x); 288 BufferCopyDst.Store(dst_idx + 1 * 4, data.y); 289} 290 291// R16G16B16A16 292[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 293void cs_copy_buffer_image2d_r16g16b16a16(uint3 dispatch_thread_id : SV_DispatchThreadID) { 294 uint3 dst_idx = GetImageDst(dispatch_thread_id); 295 uint3 bounds = GetDestBounds(); 296 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 297 return; 298 } 299 300 uint src_idx = GetBufferSrc64(dispatch_thread_id); 301 302 ImageCopyDstRgba[dst_idx] = uint4( 303 Uint32ToUint16x2(BufferCopySrc.Load(src_idx)), 304 Uint32ToUint16x2(BufferCopySrc.Load(src_idx + 1 * 4)) 305 ); 306} 307 308[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 309void cs_copy_image2d_r16g16b16a16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 310 uint3 src_idx = GetImageSrc(dispatch_thread_id); 311 uint3 bounds = GetDestBounds(); 312 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 313 return; 314 } 315 316 uint4 data = ImageCopySrc[src_idx]; 317 uint dst_idx = GetBufferDst64(dispatch_thread_id); 318 319 BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(data.xy)); 320 BufferCopyDst.Store(dst_idx + 1 * 4, Uint16x2ToUint32(data.zw)); 321} 322 323// R32 324[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 325void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) { 326 uint3 dst_idx = GetImageDst(dispatch_thread_id); 327 uint3 bounds = GetDestBounds(); 328 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 329 return; 330 } 331 332 uint src_idx = GetBufferSrc32(dispatch_thread_id); 333 334 ImageCopyDstR[dst_idx] = BufferCopySrc.Load(src_idx); 335} 336 337[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 338void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 339 uint3 src_idx = GetImageSrc(dispatch_thread_id); 340 uint3 bounds = GetDestBounds(); 341 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 342 return; 343 } 344 345 uint dst_idx = GetBufferDst32(dispatch_thread_id); 346 347 BufferCopyDst.Store(dst_idx, ImageCopySrc[src_idx].r); 348} 349 350// R16G16 351[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 352void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) { 353 uint3 dst_idx = GetImageDst(dispatch_thread_id); 354 uint3 bounds = GetDestBounds(); 355 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 356 return; 357 } 358 359 uint src_idx = GetBufferSrc32(dispatch_thread_id); 360 361 ImageCopyDstRg[dst_idx] = Uint32ToUint16x2(BufferCopySrc.Load(src_idx)); 362} 363 364[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 365void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 366 uint3 src_idx = GetImageSrc(dispatch_thread_id); 367 uint3 bounds = GetDestBounds(); 368 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 369 return; 370 } 371 372 uint dst_idx = GetBufferDst32(dispatch_thread_id); 373 374 BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(ImageCopySrc[src_idx].xy)); 375} 376 377// R8G8B8A8 378[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 379void cs_copy_buffer_image2d_r8g8b8a8(uint3 dispatch_thread_id : SV_DispatchThreadID) { 380 uint3 dst_idx = GetImageDst(dispatch_thread_id); 381 uint3 bounds = GetDestBounds(); 382 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 383 return; 384 } 385 386 uint src_idx = GetBufferSrc32(dispatch_thread_id); 387 388 ImageCopyDstRgba[dst_idx] = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); 389} 390 391[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 392void cs_copy_image2d_r8g8b8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 393 uint3 src_idx = GetImageSrc(dispatch_thread_id); 394 uint3 bounds = GetDestBounds(); 395 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 396 return; 397 } 398 399 uint dst_idx = GetBufferDst32(dispatch_thread_id); 400 401 BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(ImageCopySrc[src_idx])); 402} 403 404// B8G8R8A8 405[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 406void cs_copy_image2d_b8g8r8a8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 407 uint3 src_idx = GetImageSrc(dispatch_thread_id); 408 uint3 bounds = GetDestBounds(); 409 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 410 return; 411 } 412 413 uint dst_idx = GetBufferDst32(dispatch_thread_id); 414 415 BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(Float4ToUint8x4(ImageCopySrcBgra[src_idx].bgra))); 416} 417 418// R16 419[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 420void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) { 421 uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id); 422 uint3 bounds = GetDestBounds(); 423 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 424 return; 425 } 426 427 uint src_idx = GetBufferSrc16(dispatch_thread_id); 428 uint2 data = Uint32ToUint16x2(BufferCopySrc.Load(src_idx)); 429 430 ImageCopyDstR[dst_idx ] = data.x; 431 ImageCopyDstR[dst_idx + uint3(1, 0, 0)] = data.y; 432} 433 434[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 435void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 436 uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id); 437 uint3 bounds = GetDestBounds(); 438 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 439 return; 440 } 441 442 uint dst_idx = GetBufferDst16(dispatch_thread_id); 443 444 uint upper = ImageCopySrc[src_idx].r; 445 uint lower = ImageCopySrc[src_idx + uint3(1, 0, 0)].r; 446 447 BufferCopyDst.Store(dst_idx, Uint16x2ToUint32(uint2(upper, lower))); 448} 449 450// R8G8 451[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 452void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) { 453 uint3 dst_idx = GetImageDst(uint3(2, 1, 0) * dispatch_thread_id); 454 uint3 bounds = GetDestBounds(); 455 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 456 return; 457 } 458 459 uint src_idx = GetBufferSrc16(dispatch_thread_id); 460 461 uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); 462 463 ImageCopyDstRg[dst_idx ] = data.xy; 464 ImageCopyDstRg[dst_idx + uint3(1, 0, 0)] = data.zw; 465} 466 467[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 468void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 469 uint3 src_idx = GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id); 470 uint3 bounds = GetDestBounds(); 471 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 472 return; 473 } 474 475 uint dst_idx = GetBufferDst16(dispatch_thread_id); 476 477 uint2 lower = ImageCopySrc[src_idx].xy; 478 uint2 upper = ImageCopySrc[src_idx + uint3(1, 0, 0)].xy; 479 480 BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4(lower.x, lower.y, upper.x, upper.y))); 481} 482 483// R8 484[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 485void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) { 486 uint3 dst_idx = GetImageDst(uint3(4, 1, 0) * dispatch_thread_id); 487 uint3 bounds = GetDestBounds(); 488 if (dst_idx.x >= bounds.x || dst_idx.y >= bounds.y) { 489 return; 490 } 491 492 uint src_idx = GetBufferSrc8(dispatch_thread_id); 493 uint4 data = Uint32ToUint8x4(BufferCopySrc.Load(src_idx)); 494 495 ImageCopyDstR[dst_idx ] = data.x; 496 ImageCopyDstR[dst_idx + uint3(1, 0, 0)] = data.y; 497 ImageCopyDstR[dst_idx + uint3(2, 0, 0)] = data.z; 498 ImageCopyDstR[dst_idx + uint3(3, 0, 0)] = data.w; 499} 500 501[numthreads(COPY_NUM_THREAD_X, COPY_NUM_THREAD_Y, 1)] 502void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { 503 uint3 src_idx = GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id); 504 uint3 bounds = GetDestBounds(); 505 if (src_idx.x >= bounds.x || src_idx.y >= bounds.y) { 506 return; 507 } 508 509 uint dst_idx = GetBufferDst8(dispatch_thread_id); 510 511 BufferCopyDst.Store(dst_idx, Uint8x4ToUint32(uint4( 512 ImageCopySrc[src_idx].r, 513 ImageCopySrc[src_idx + uint3(1, 0, 0)].r, 514 ImageCopySrc[src_idx + uint3(2, 0, 0)].r, 515 ImageCopySrc[src_idx + uint3(3, 0, 0)].r 516 ))); 517} 518