1 /* 2 Copyright (c) 2013 yvt 3 4 This file is part of OpenSpades. 5 6 OpenSpades is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 OpenSpades is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with OpenSpades. If not, see <http://www.gnu.org/licenses/>. 18 19 */ 20 21 #include <atomic> 22 #include <cstdlib> 23 24 #include <Client/GameMap.h> 25 #include "GLMapShadowRenderer.h" 26 #include "GLRadiosityRenderer.h" 27 #include "GLRenderer.h" 28 29 #include <Core/ConcurrentDispatch.h> 30 #include <Core/Settings.h> 31 #ifdef __APPLE__ 32 #include <xmmintrin.h> 33 #endif 34 35 #include "GLProfiler.h" 36 37 namespace spades { 38 namespace draw { 39 class GLRadiosityRenderer::UpdateDispatch : public ConcurrentDispatch { 40 GLRadiosityRenderer *renderer; 41 42 public: 43 std::atomic<bool> done {false}; UpdateDispatch(GLRadiosityRenderer * r)44 UpdateDispatch(GLRadiosityRenderer *r) : renderer(r) { } Run()45 void Run() override { 46 SPADES_MARK_FUNCTION(); 47 48 renderer->UpdateDirtyChunks(); 49 50 done = true; 51 } 52 }; 53 GLRadiosityRenderer(GLRenderer * r,client::GameMap * m)54 GLRadiosityRenderer::GLRadiosityRenderer(GLRenderer *r, client::GameMap *m) 55 : renderer(r), device(r->GetGLDevice()), settings(r->GetSettings()), map(m) { 56 SPADES_MARK_FUNCTION(); 57 58 w = map->Width(); 59 h = map->Height(); 60 d = map->Depth(); 61 62 chunkW = w / ChunkSize; 63 chunkH = h / ChunkSize; 64 chunkD = d / ChunkSize; 65 66 chunks = std::vector<Chunk>{static_cast<std::size_t>(chunkW * chunkH * chunkD)}; 67 68 for (size_t i = 0; i < chunks.size(); i++) { 69 Chunk &c = chunks[i]; 70 71 uint32_t *data; 72 73 data = (uint32_t *)c.dataFlat; 74 std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200); 75 76 data = (uint32_t *)c.dataX; 77 std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200); 78 79 data = (uint32_t *)c.dataY; 80 std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200); 81 82 data = (uint32_t *)c.dataZ; 83 std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200); 84 } 85 86 for (int x = 0; x < chunkW; x++) 87 for (int y = 0; y < chunkH; y++) 88 for (int z = 0; z < chunkD; z++) { 89 Chunk &c = GetChunk(x, y, z); 90 c.cx = x; 91 c.cy = y; 92 c.cz = z; 93 } 94 95 SPLog("Chunk buffer allocated (%d bytes)", (int) sizeof(Chunk) * chunkW * chunkH * chunkD); 96 97 // make texture 98 textureFlat = device->GenTexture(); 99 textureX = device->GenTexture(); 100 textureY = device->GenTexture(); 101 textureZ = device->GenTexture(); 102 103 IGLDevice::UInteger texs[] = {textureFlat, textureX, textureY, textureZ}; 104 105 for (int i = 0; i < 4; i++) { 106 107 device->BindTexture(IGLDevice::Texture3D, texs[i]); 108 device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureMagFilter, 109 IGLDevice::Linear); 110 device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureMinFilter, 111 IGLDevice::Linear); 112 device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureWrapS, 113 IGLDevice::Repeat); 114 device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureWrapT, 115 IGLDevice::Repeat); 116 device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureWrapR, 117 IGLDevice::ClampToEdge); 118 device->TexImage3D( 119 IGLDevice::Texture3D, 0, 120 ((int)settings.r_radiosity >= 2) ? IGLDevice::RGB10A2 : IGLDevice::RGB5A1, w, h, 121 d, 0, IGLDevice::BGRA, IGLDevice::UnsignedInt2101010Rev, NULL); 122 } 123 124 SPLog("Chunk texture allocated"); 125 126 std::vector<uint32_t> v; 127 v.resize(w * h); 128 std::fill(v.begin(), v.end(), 0x20080200 /*0x4210 */); 129 130 for (int j = 0; j < 4; j++) { 131 132 device->BindTexture(IGLDevice::Texture3D, texs[j]); 133 for (int i = 0; i < d; i++) { 134 device->TexSubImage3D(IGLDevice::Texture3D, 0, 0, 0, i, w, h, 1, 135 IGLDevice::BGRA, IGLDevice::UnsignedInt2101010Rev, 136 v.data()); 137 } 138 } 139 dispatch = NULL; 140 141 SPLog("Chunk texture initialized"); 142 } 143 ~GLRadiosityRenderer()144 GLRadiosityRenderer::~GLRadiosityRenderer() { 145 SPADES_MARK_FUNCTION(); 146 if (dispatch) { 147 dispatch->Join(); 148 delete dispatch; 149 } 150 SPLog("Releasing textures"); 151 152 device->DeleteTexture(textureFlat); 153 device->DeleteTexture(textureX); 154 device->DeleteTexture(textureY); 155 device->DeleteTexture(textureZ); 156 } 157 Evaluate(IntVector3 ipos)158 GLRadiosityRenderer::Result GLRadiosityRenderer::Evaluate(IntVector3 ipos) { 159 SPADES_MARK_FUNCTION_DEBUG(); 160 161 GLRadiosityRenderer::Result result; 162 result.base = MakeVector3(0, 0, 0); 163 result.x = MakeVector3(0, 0, 0); 164 result.y = MakeVector3(0, 0, 0); 165 result.z = MakeVector3(0, 0, 0); 166 167 Vector3 pos = {ipos.x + .5f, ipos.y + .5f, ipos.z + .5f}; 168 169 GLMapShadowRenderer *shadowmap = renderer->mapShadowRenderer; 170 uint32_t *bitmap = shadowmap->bitmap.data(); 171 int centerX = ipos.x; 172 int centerY = ipos.y - ipos.z; 173 const int yMask = h - 1; 174 const int pitch = w; 175 176 for (int x = -Envelope; x <= Envelope; x++) { 177 uint32_t *column = bitmap + ((centerX + x) & (w - 1)); 178 for (int y = -Envelope; y <= Envelope; y++) { 179 uint32_t pixel = column[pitch * ((centerY + y) & yMask)]; 180 int depth = pixel >> 24; 181 182 // shadowmap pixel's world coord 183 int wx = centerX + x; 184 int wy = centerY + y + depth; 185 int wz = depth; 186 187 // if true, this is negative-y faced plane 188 // if false, this is negative-z faced plane 189 bool isSide = (pixel & 0x80) != 0; 190 191 // direction dependent process 192 Vector3 center; // center of face 193 Vector3 diff; // pos - center 194 float diffDot; // dot(diff, normal) 195 if (isSide) { 196 // normal cull 197 if (wy <= ipos.y) 198 continue; 199 200 center.x = wx + .5f; 201 center.y = wy; 202 center.z = wz - .5f; 203 204 diff = pos - center; 205 diffDot = -diff.y; 206 } else { 207 if (wz <= ipos.z) 208 continue; 209 210 center.x = wx + .5f; 211 center.y = wy + .5f; 212 center.z = wz; 213 214 diff = pos - center; 215 diffDot = -diff.z; 216 } 217 218 SPAssert(diffDot >= 0.f); 219 220 float diffLen = diff.GetLength(); 221 float invDiffLen = 1.f / diffLen; 222 float invDiffLenSmooth = 1.f / ((diffLen) + .4f); 223 224 // fall-off because of direciton 225 float intensity = diffDot * invDiffLen; 226 227 // 1/(r^2) distance fall-off 228 intensity *= invDiffLenSmooth; 229 intensity *= invDiffLenSmooth; 230 231 // smooth envelope cull 232 /* 233 float distFalloff = 1.f - diffLen * diffLen * (1.f / (Envelope * Envelope + 1)); 234 if(distFalloff < 0.f) 235 continue; 236 intensity *= distFalloff; 237 */ 238 239 // normalize 240 Vector3 normDiff = diff * -invDiffLen; 241 242 // extract shadowmap color 243 float red = static_cast<float>((pixel)&0x3f); 244 float green = static_cast<float>((pixel >> 8) & 0x3f); 245 float blue = static_cast<float>((pixel >> 16) & 0x3f); 246 247 Vector3 color = {red, green, blue}; 248 color *= intensity; 249 250 // add to result 251 result.base += color; 252 result.x += color * normDiff.x; 253 result.y += color * normDiff.y; 254 result.z += color * normDiff.z; 255 256 SPAssert(!std::isnan(intensity)); 257 SPAssert(intensity >= 0.f); 258 SPAssert(red >= 0.f && red < 64.f); 259 SPAssert(green >= 0.f && green < 64.f); 260 SPAssert(blue >= 0.f && blue < 64.f); 261 } 262 } 263 264 float scale = 0.1f / 64.f; 265 result.base *= scale; 266 result.x *= scale; 267 result.y *= scale; 268 result.z *= scale; 269 270 return result; 271 } 272 GameMapChanged(int x,int y,int z,client::GameMap * map)273 void GLRadiosityRenderer::GameMapChanged(int x, int y, int z, client::GameMap *map) { 274 SPADES_MARK_FUNCTION_DEBUG(); 275 if (map != this->map) 276 return; 277 278 Invalidate(x - Envelope, y - Envelope, z - Envelope, x + Envelope, y + Envelope, 279 z + Envelope); 280 } 281 Invalidate(int minX,int minY,int minZ,int maxX,int maxY,int maxZ)282 void GLRadiosityRenderer::Invalidate(int minX, int minY, int minZ, int maxX, int maxY, 283 int maxZ) { 284 SPADES_MARK_FUNCTION_DEBUG(); 285 if (minZ < 0) 286 minZ = 0; 287 if (maxZ > d - 1) 288 maxZ = d - 1; 289 if (minX > maxX || minY > maxY || minZ > maxZ) 290 return; 291 292 // these should be floor div 293 int cx1 = minX >> ChunkSizeBits; 294 int cy1 = minY >> ChunkSizeBits; 295 int cz1 = minZ >> ChunkSizeBits; 296 int cx2 = maxX >> ChunkSizeBits; 297 int cy2 = maxY >> ChunkSizeBits; 298 int cz2 = maxZ >> ChunkSizeBits; 299 300 for (int cx = cx1; cx <= cx2; cx++) 301 for (int cy = cy1; cy <= cy2; cy++) 302 for (int cz = cz1; cz <= cz2; cz++) { 303 Chunk &c = GetChunkWrapped(cx, cy, cz); 304 int originX = cx * ChunkSize; 305 int originY = cy * ChunkSize; 306 int originZ = cz * ChunkSize; 307 308 int inMinX = std::max(minX - originX, 0); 309 int inMinY = std::max(minY - originY, 0); 310 int inMinZ = std::max(minZ - originZ, 0); 311 int inMaxX = std::min(maxX - originX, ChunkSize - 1); 312 int inMaxY = std::min(maxY - originY, ChunkSize - 1); 313 int inMaxZ = std::min(maxZ - originZ, ChunkSize - 1); 314 315 if (!c.dirty) { 316 c.dirtyMinX = inMinX; 317 c.dirtyMinY = inMinY; 318 c.dirtyMinZ = inMinZ; 319 c.dirtyMaxX = inMaxX; 320 c.dirtyMaxY = inMaxY; 321 c.dirtyMaxZ = inMaxZ; 322 c.dirty = true; 323 } else { 324 c.dirtyMinX = std::min(inMinX, c.dirtyMinX); 325 c.dirtyMinY = std::min(inMinY, c.dirtyMinY); 326 c.dirtyMinZ = std::min(inMinZ, c.dirtyMinZ); 327 c.dirtyMaxX = std::max(inMaxX, c.dirtyMaxX); 328 c.dirtyMaxY = std::max(inMaxY, c.dirtyMaxY); 329 c.dirtyMaxZ = std::max(inMaxZ, c.dirtyMaxZ); 330 } 331 } 332 } 333 GetNumDirtyChunks()334 int GLRadiosityRenderer::GetNumDirtyChunks() { 335 int cnt = 0; 336 for (size_t i = 0; i < chunks.size(); i++) { 337 Chunk &c = chunks[i]; 338 if (c.dirty) 339 cnt++; 340 } 341 return cnt; 342 } 343 Update()344 void GLRadiosityRenderer::Update() { 345 if (GetNumDirtyChunks() > 0 && (dispatch == NULL || dispatch->done.load())) { 346 if (dispatch) { 347 dispatch->Join(); 348 delete dispatch; 349 } 350 dispatch = new UpdateDispatch(this); 351 dispatch->Start(); 352 } 353 int cnt = 0; 354 for (size_t i = 0; i < chunks.size(); i++) { 355 if (!chunks[i].transferDone.load()) 356 cnt++; 357 } 358 GLProfiler::Context profiler(renderer->GetGLProfiler(), "Radiosity [>= %d chunk(s)]", cnt); 359 for (size_t i = 0; i < chunks.size(); i++) { 360 Chunk &c = chunks[i]; 361 if (!c.transferDone.exchange(true)) { 362 device->BindTexture(IGLDevice::Texture3D, textureFlat); 363 device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize, 364 c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize, 365 ChunkSize, IGLDevice::BGRA, 366 IGLDevice::UnsignedInt2101010Rev, c.dataFlat); 367 368 device->BindTexture(IGLDevice::Texture3D, textureX); 369 device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize, 370 c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize, 371 ChunkSize, IGLDevice::BGRA, 372 IGLDevice::UnsignedInt2101010Rev, c.dataX); 373 374 device->BindTexture(IGLDevice::Texture3D, textureY); 375 device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize, 376 c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize, 377 ChunkSize, IGLDevice::BGRA, 378 IGLDevice::UnsignedInt2101010Rev, c.dataY); 379 380 device->BindTexture(IGLDevice::Texture3D, textureZ); 381 device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize, 382 c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize, 383 ChunkSize, IGLDevice::BGRA, 384 IGLDevice::UnsignedInt2101010Rev, c.dataZ); 385 } 386 } 387 } 388 UpdateDirtyChunks()389 void GLRadiosityRenderer::UpdateDirtyChunks() { 390 int dirtyChunkIds[256]; 391 int numDirtyChunks = 0; 392 int nearDirtyChunks = 0; 393 394 // first, check only chunks in near range 395 Vector3 eyePos = renderer->GetSceneDef().viewOrigin; 396 int eyeX = (int)(eyePos.x) >> ChunkSizeBits; 397 int eyeY = (int)(eyePos.y) >> ChunkSizeBits; 398 int eyeZ = (int)(eyePos.z) >> ChunkSizeBits; 399 400 for (size_t i = 0; i < chunks.size(); i++) { 401 Chunk &c = chunks[i]; 402 int dx = (c.cx - eyeX) & (chunkW - 1); 403 int dy = (c.cy - eyeY) & (chunkH - 1); 404 int dz = (c.cz - eyeZ); 405 if (dx >= 6 && dx <= chunkW - 6) 406 continue; 407 if (dy >= 6 && dy <= chunkW - 6) 408 continue; 409 if (dz >= 6 || dz <= -6) 410 continue; 411 if (c.dirty) { 412 dirtyChunkIds[numDirtyChunks++] = static_cast<int>(i); 413 nearDirtyChunks++; 414 if (numDirtyChunks >= 256) 415 break; 416 } 417 } 418 419 // far chunks 420 if (numDirtyChunks == 0) { 421 for (size_t i = 0; i < chunks.size(); i++) { 422 Chunk &c = chunks[i]; 423 if (c.dirty) { 424 dirtyChunkIds[numDirtyChunks++] = static_cast<int>(i); 425 if (numDirtyChunks >= 256) 426 break; 427 } 428 } 429 } 430 431 // limit update count per frame 432 for (int i = 0; i < 8; i++) { 433 if (numDirtyChunks <= 0) 434 break; 435 int idx = SampleRandomInt(0, numDirtyChunks - 1); 436 Chunk &c = chunks[dirtyChunkIds[idx]]; 437 438 // remove from list (fast) 439 if (idx < numDirtyChunks - 1) { 440 std::swap(dirtyChunkIds[idx], dirtyChunkIds[numDirtyChunks - 1]); 441 } 442 numDirtyChunks--; 443 444 UpdateChunk(c.cx, c.cy, c.cz); 445 } 446 /* 447 printf("%d (%d near) chunk update left\n", 448 GetNumDirtyChunks(), nearDirtyChunks);*/ 449 } 450 CompressDynamicRange(float v)451 float GLRadiosityRenderer::CompressDynamicRange(float v) { 452 if ((int)settings.r_radiosity >= 2) 453 return v; 454 if (v >= 0.f) 455 return sqrtf(v); 456 else 457 return -sqrtf(-v); 458 } 459 EncodeValue(Vector3 vec)460 uint32_t GLRadiosityRenderer::EncodeValue(Vector3 vec) { 461 float v; 462 int iv; 463 unsigned int out = 0xC0000000; 464 465 vec.x = CompressDynamicRange(vec.x); 466 vec.y = CompressDynamicRange(vec.y); 467 vec.z = CompressDynamicRange(vec.z); 468 469 vec *= .5f; 470 vec += .5f; 471 vec *= 1022.f / 1023.f; 472 473 v = vec.x * 1023.f + .5f; 474 if (v > 1023.2f) 475 v = 1023.2f; 476 if (v < 0.f) 477 v = 0.f; 478 iv = (unsigned int)v; 479 if (iv > 1023) 480 iv = 1023; 481 if (iv < 0) 482 iv = 0; 483 out |= iv << 20; 484 485 v = vec.y * 1023.f + .5f; 486 if (v > 1023.2f) 487 v = 1023.2f; 488 if (v < 0.f) 489 v = 0.f; 490 iv = (unsigned int)v; 491 if (iv > 1023) 492 iv = 1023; 493 if (iv < 0) 494 iv = 0; 495 out |= iv << 10; 496 497 v = vec.z * 1023.f + .5f; 498 if (v > 1023.2f) 499 v = 1023.2f; 500 if (v < 0.f) 501 v = 0.f; 502 iv = (unsigned int)v; 503 if (iv > 1023) 504 iv = 1023; 505 if (iv < 0) 506 iv = 0; 507 out |= iv; 508 509 return (uint32_t)out; 510 } 511 UpdateChunk(int cx,int cy,int cz)512 void GLRadiosityRenderer::UpdateChunk(int cx, int cy, int cz) { 513 Chunk &c = GetChunk(cx, cy, cz); 514 if (!c.dirty) 515 return; 516 517 int originX = cx * ChunkSize; 518 int originY = cy * ChunkSize; 519 int originZ = cz * ChunkSize; 520 521 for (int z = c.dirtyMinZ; z <= c.dirtyMaxZ; z++) 522 for (int y = c.dirtyMinY; y <= c.dirtyMaxY; y++) 523 for (int x = c.dirtyMinX; x <= c.dirtyMaxX; x++) { 524 IntVector3 pos; 525 pos.x = (x + originX); 526 pos.y = (y + originY); 527 pos.z = (z + originZ); 528 529 Result res = Evaluate(pos); 530 c.dataFlat[z][y][x] = EncodeValue(res.base); 531 c.dataX[z][y][x] = EncodeValue(res.x); 532 c.dataY[z][y][x] = EncodeValue(res.y); 533 c.dataZ[z][y][x] = EncodeValue(res.z); 534 } 535 536 c.dirty = false; 537 c.transferDone = false; 538 } 539 } 540 } 541