1 /*
2  Copyright (c) 2013 yvt
3 
4  This file is part of OpenSpades.
5 
6  OpenSpades is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  OpenSpades is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with OpenSpades.  If not, see <http://www.gnu.org/licenses/>.
18 
19  */
20 
21 #include <atomic>
22 #include <cstdlib>
23 
24 #include <Client/GameMap.h>
25 #include "GLMapShadowRenderer.h"
26 #include "GLRadiosityRenderer.h"
27 #include "GLRenderer.h"
28 
29 #include <Core/ConcurrentDispatch.h>
30 #include <Core/Settings.h>
31 #ifdef __APPLE__
32 #include <xmmintrin.h>
33 #endif
34 
35 #include "GLProfiler.h"
36 
37 namespace spades {
38 	namespace draw {
39 		class GLRadiosityRenderer::UpdateDispatch : public ConcurrentDispatch {
40 			GLRadiosityRenderer *renderer;
41 
42 		public:
43 			std::atomic<bool> done {false};
UpdateDispatch(GLRadiosityRenderer * r)44 			UpdateDispatch(GLRadiosityRenderer *r) : renderer(r) { }
Run()45 			void Run() override {
46 				SPADES_MARK_FUNCTION();
47 
48 				renderer->UpdateDirtyChunks();
49 
50 				done = true;
51 			}
52 		};
53 
GLRadiosityRenderer(GLRenderer * r,client::GameMap * m)54 		GLRadiosityRenderer::GLRadiosityRenderer(GLRenderer *r, client::GameMap *m)
55 		    : renderer(r), device(r->GetGLDevice()), settings(r->GetSettings()), map(m) {
56 			SPADES_MARK_FUNCTION();
57 
58 			w = map->Width();
59 			h = map->Height();
60 			d = map->Depth();
61 
62 			chunkW = w / ChunkSize;
63 			chunkH = h / ChunkSize;
64 			chunkD = d / ChunkSize;
65 
66 			chunks = std::vector<Chunk>{static_cast<std::size_t>(chunkW * chunkH * chunkD)};
67 
68 			for (size_t i = 0; i < chunks.size(); i++) {
69 				Chunk &c = chunks[i];
70 
71 				uint32_t *data;
72 
73 				data = (uint32_t *)c.dataFlat;
74 				std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200);
75 
76 				data = (uint32_t *)c.dataX;
77 				std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200);
78 
79 				data = (uint32_t *)c.dataY;
80 				std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200);
81 
82 				data = (uint32_t *)c.dataZ;
83 				std::fill(data, data + ChunkSize * ChunkSize * ChunkSize, 0x20080200);
84 			}
85 
86 			for (int x = 0; x < chunkW; x++)
87 				for (int y = 0; y < chunkH; y++)
88 					for (int z = 0; z < chunkD; z++) {
89 						Chunk &c = GetChunk(x, y, z);
90 						c.cx = x;
91 						c.cy = y;
92 						c.cz = z;
93 					}
94 
95 			SPLog("Chunk buffer allocated (%d bytes)", (int) sizeof(Chunk) * chunkW * chunkH * chunkD);
96 
97 			// make texture
98 			textureFlat = device->GenTexture();
99 			textureX = device->GenTexture();
100 			textureY = device->GenTexture();
101 			textureZ = device->GenTexture();
102 
103 			IGLDevice::UInteger texs[] = {textureFlat, textureX, textureY, textureZ};
104 
105 			for (int i = 0; i < 4; i++) {
106 
107 				device->BindTexture(IGLDevice::Texture3D, texs[i]);
108 				device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureMagFilter,
109 				                     IGLDevice::Linear);
110 				device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureMinFilter,
111 				                     IGLDevice::Linear);
112 				device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureWrapS,
113 				                     IGLDevice::Repeat);
114 				device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureWrapT,
115 				                     IGLDevice::Repeat);
116 				device->TexParamater(IGLDevice::Texture3D, IGLDevice::TextureWrapR,
117 				                     IGLDevice::ClampToEdge);
118 				device->TexImage3D(
119 				  IGLDevice::Texture3D, 0,
120 				  ((int)settings.r_radiosity >= 2) ? IGLDevice::RGB10A2 : IGLDevice::RGB5A1, w, h,
121 				  d, 0, IGLDevice::BGRA, IGLDevice::UnsignedInt2101010Rev, NULL);
122 			}
123 
124 			SPLog("Chunk texture allocated");
125 
126 			std::vector<uint32_t> v;
127 			v.resize(w * h);
128 			std::fill(v.begin(), v.end(), 0x20080200 /*0x4210 */);
129 
130 			for (int j = 0; j < 4; j++) {
131 
132 				device->BindTexture(IGLDevice::Texture3D, texs[j]);
133 				for (int i = 0; i < d; i++) {
134 					device->TexSubImage3D(IGLDevice::Texture3D, 0, 0, 0, i, w, h, 1,
135 					                      IGLDevice::BGRA, IGLDevice::UnsignedInt2101010Rev,
136 					                      v.data());
137 				}
138 			}
139 			dispatch = NULL;
140 
141 			SPLog("Chunk texture initialized");
142 		}
143 
~GLRadiosityRenderer()144 		GLRadiosityRenderer::~GLRadiosityRenderer() {
145 			SPADES_MARK_FUNCTION();
146 			if (dispatch) {
147 				dispatch->Join();
148 				delete dispatch;
149 			}
150 			SPLog("Releasing textures");
151 
152 			device->DeleteTexture(textureFlat);
153 			device->DeleteTexture(textureX);
154 			device->DeleteTexture(textureY);
155 			device->DeleteTexture(textureZ);
156 		}
157 
Evaluate(IntVector3 ipos)158 		GLRadiosityRenderer::Result GLRadiosityRenderer::Evaluate(IntVector3 ipos) {
159 			SPADES_MARK_FUNCTION_DEBUG();
160 
161 			GLRadiosityRenderer::Result result;
162 			result.base = MakeVector3(0, 0, 0);
163 			result.x = MakeVector3(0, 0, 0);
164 			result.y = MakeVector3(0, 0, 0);
165 			result.z = MakeVector3(0, 0, 0);
166 
167 			Vector3 pos = {ipos.x + .5f, ipos.y + .5f, ipos.z + .5f};
168 
169 			GLMapShadowRenderer *shadowmap = renderer->mapShadowRenderer;
170 			uint32_t *bitmap = shadowmap->bitmap.data();
171 			int centerX = ipos.x;
172 			int centerY = ipos.y - ipos.z;
173 			const int yMask = h - 1;
174 			const int pitch = w;
175 
176 			for (int x = -Envelope; x <= Envelope; x++) {
177 				uint32_t *column = bitmap + ((centerX + x) & (w - 1));
178 				for (int y = -Envelope; y <= Envelope; y++) {
179 					uint32_t pixel = column[pitch * ((centerY + y) & yMask)];
180 					int depth = pixel >> 24;
181 
182 					// shadowmap pixel's world coord
183 					int wx = centerX + x;
184 					int wy = centerY + y + depth;
185 					int wz = depth;
186 
187 					// if true, this is negative-y faced plane
188 					// if false, this is negative-z faced plane
189 					bool isSide = (pixel & 0x80) != 0;
190 
191 					// direction dependent process
192 					Vector3 center; // center of face
193 					Vector3 diff;   // pos - center
194 					float diffDot;  // dot(diff, normal)
195 					if (isSide) {
196 						// normal cull
197 						if (wy <= ipos.y)
198 							continue;
199 
200 						center.x = wx + .5f;
201 						center.y = wy;
202 						center.z = wz - .5f;
203 
204 						diff = pos - center;
205 						diffDot = -diff.y;
206 					} else {
207 						if (wz <= ipos.z)
208 							continue;
209 
210 						center.x = wx + .5f;
211 						center.y = wy + .5f;
212 						center.z = wz;
213 
214 						diff = pos - center;
215 						diffDot = -diff.z;
216 					}
217 
218 					SPAssert(diffDot >= 0.f);
219 
220 					float diffLen = diff.GetLength();
221 					float invDiffLen = 1.f / diffLen;
222 					float invDiffLenSmooth = 1.f / ((diffLen) + .4f);
223 
224 					// fall-off because of direciton
225 					float intensity = diffDot * invDiffLen;
226 
227 					// 1/(r^2) distance fall-off
228 					intensity *= invDiffLenSmooth;
229 					intensity *= invDiffLenSmooth;
230 
231 					// smooth envelope cull
232 					/*
233 					float distFalloff = 1.f - diffLen * diffLen * (1.f / (Envelope * Envelope + 1));
234 					if(distFalloff < 0.f)
235 					    continue;
236 					intensity *= distFalloff;
237 					*/
238 
239 					// normalize
240 					Vector3 normDiff = diff * -invDiffLen;
241 
242 					// extract shadowmap color
243 					float red = static_cast<float>((pixel)&0x3f);
244 					float green = static_cast<float>((pixel >> 8) & 0x3f);
245 					float blue = static_cast<float>((pixel >> 16) & 0x3f);
246 
247 					Vector3 color = {red, green, blue};
248 					color *= intensity;
249 
250 					// add to result
251 					result.base += color;
252 					result.x += color * normDiff.x;
253 					result.y += color * normDiff.y;
254 					result.z += color * normDiff.z;
255 
256 					SPAssert(!std::isnan(intensity));
257 					SPAssert(intensity >= 0.f);
258 					SPAssert(red >= 0.f && red < 64.f);
259 					SPAssert(green >= 0.f && green < 64.f);
260 					SPAssert(blue >= 0.f && blue < 64.f);
261 				}
262 			}
263 
264 			float scale = 0.1f / 64.f;
265 			result.base *= scale;
266 			result.x *= scale;
267 			result.y *= scale;
268 			result.z *= scale;
269 
270 			return result;
271 		}
272 
GameMapChanged(int x,int y,int z,client::GameMap * map)273 		void GLRadiosityRenderer::GameMapChanged(int x, int y, int z, client::GameMap *map) {
274 			SPADES_MARK_FUNCTION_DEBUG();
275 			if (map != this->map)
276 				return;
277 
278 			Invalidate(x - Envelope, y - Envelope, z - Envelope, x + Envelope, y + Envelope,
279 			           z + Envelope);
280 		}
281 
Invalidate(int minX,int minY,int minZ,int maxX,int maxY,int maxZ)282 		void GLRadiosityRenderer::Invalidate(int minX, int minY, int minZ, int maxX, int maxY,
283 		                                     int maxZ) {
284 			SPADES_MARK_FUNCTION_DEBUG();
285 			if (minZ < 0)
286 				minZ = 0;
287 			if (maxZ > d - 1)
288 				maxZ = d - 1;
289 			if (minX > maxX || minY > maxY || minZ > maxZ)
290 				return;
291 
292 			// these should be floor div
293 			int cx1 = minX >> ChunkSizeBits;
294 			int cy1 = minY >> ChunkSizeBits;
295 			int cz1 = minZ >> ChunkSizeBits;
296 			int cx2 = maxX >> ChunkSizeBits;
297 			int cy2 = maxY >> ChunkSizeBits;
298 			int cz2 = maxZ >> ChunkSizeBits;
299 
300 			for (int cx = cx1; cx <= cx2; cx++)
301 				for (int cy = cy1; cy <= cy2; cy++)
302 					for (int cz = cz1; cz <= cz2; cz++) {
303 						Chunk &c = GetChunkWrapped(cx, cy, cz);
304 						int originX = cx * ChunkSize;
305 						int originY = cy * ChunkSize;
306 						int originZ = cz * ChunkSize;
307 
308 						int inMinX = std::max(minX - originX, 0);
309 						int inMinY = std::max(minY - originY, 0);
310 						int inMinZ = std::max(minZ - originZ, 0);
311 						int inMaxX = std::min(maxX - originX, ChunkSize - 1);
312 						int inMaxY = std::min(maxY - originY, ChunkSize - 1);
313 						int inMaxZ = std::min(maxZ - originZ, ChunkSize - 1);
314 
315 						if (!c.dirty) {
316 							c.dirtyMinX = inMinX;
317 							c.dirtyMinY = inMinY;
318 							c.dirtyMinZ = inMinZ;
319 							c.dirtyMaxX = inMaxX;
320 							c.dirtyMaxY = inMaxY;
321 							c.dirtyMaxZ = inMaxZ;
322 							c.dirty = true;
323 						} else {
324 							c.dirtyMinX = std::min(inMinX, c.dirtyMinX);
325 							c.dirtyMinY = std::min(inMinY, c.dirtyMinY);
326 							c.dirtyMinZ = std::min(inMinZ, c.dirtyMinZ);
327 							c.dirtyMaxX = std::max(inMaxX, c.dirtyMaxX);
328 							c.dirtyMaxY = std::max(inMaxY, c.dirtyMaxY);
329 							c.dirtyMaxZ = std::max(inMaxZ, c.dirtyMaxZ);
330 						}
331 					}
332 		}
333 
GetNumDirtyChunks()334 		int GLRadiosityRenderer::GetNumDirtyChunks() {
335 			int cnt = 0;
336 			for (size_t i = 0; i < chunks.size(); i++) {
337 				Chunk &c = chunks[i];
338 				if (c.dirty)
339 					cnt++;
340 			}
341 			return cnt;
342 		}
343 
Update()344 		void GLRadiosityRenderer::Update() {
345 			if (GetNumDirtyChunks() > 0 && (dispatch == NULL || dispatch->done.load())) {
346 				if (dispatch) {
347 					dispatch->Join();
348 					delete dispatch;
349 				}
350 				dispatch = new UpdateDispatch(this);
351 				dispatch->Start();
352 			}
353 			int cnt = 0;
354 			for (size_t i = 0; i < chunks.size(); i++) {
355 				if (!chunks[i].transferDone.load())
356 					cnt++;
357 			}
358 			GLProfiler::Context profiler(renderer->GetGLProfiler(), "Radiosity [>= %d chunk(s)]", cnt);
359 			for (size_t i = 0; i < chunks.size(); i++) {
360 				Chunk &c = chunks[i];
361 				if (!c.transferDone.exchange(true)) {
362 					device->BindTexture(IGLDevice::Texture3D, textureFlat);
363 					device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize,
364 					                      c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize,
365 					                      ChunkSize, IGLDevice::BGRA,
366 					                      IGLDevice::UnsignedInt2101010Rev, c.dataFlat);
367 
368 					device->BindTexture(IGLDevice::Texture3D, textureX);
369 					device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize,
370 					                      c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize,
371 					                      ChunkSize, IGLDevice::BGRA,
372 					                      IGLDevice::UnsignedInt2101010Rev, c.dataX);
373 
374 					device->BindTexture(IGLDevice::Texture3D, textureY);
375 					device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize,
376 					                      c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize,
377 					                      ChunkSize, IGLDevice::BGRA,
378 					                      IGLDevice::UnsignedInt2101010Rev, c.dataY);
379 
380 					device->BindTexture(IGLDevice::Texture3D, textureZ);
381 					device->TexSubImage3D(IGLDevice::Texture3D, 0, c.cx * ChunkSize,
382 					                      c.cy * ChunkSize, c.cz * ChunkSize, ChunkSize, ChunkSize,
383 					                      ChunkSize, IGLDevice::BGRA,
384 					                      IGLDevice::UnsignedInt2101010Rev, c.dataZ);
385 				}
386 			}
387 		}
388 
UpdateDirtyChunks()389 		void GLRadiosityRenderer::UpdateDirtyChunks() {
390 			int dirtyChunkIds[256];
391 			int numDirtyChunks = 0;
392 			int nearDirtyChunks = 0;
393 
394 			// first, check only chunks in near range
395 			Vector3 eyePos = renderer->GetSceneDef().viewOrigin;
396 			int eyeX = (int)(eyePos.x) >> ChunkSizeBits;
397 			int eyeY = (int)(eyePos.y) >> ChunkSizeBits;
398 			int eyeZ = (int)(eyePos.z) >> ChunkSizeBits;
399 
400 			for (size_t i = 0; i < chunks.size(); i++) {
401 				Chunk &c = chunks[i];
402 				int dx = (c.cx - eyeX) & (chunkW - 1);
403 				int dy = (c.cy - eyeY) & (chunkH - 1);
404 				int dz = (c.cz - eyeZ);
405 				if (dx >= 6 && dx <= chunkW - 6)
406 					continue;
407 				if (dy >= 6 && dy <= chunkW - 6)
408 					continue;
409 				if (dz >= 6 || dz <= -6)
410 					continue;
411 				if (c.dirty) {
412 					dirtyChunkIds[numDirtyChunks++] = static_cast<int>(i);
413 					nearDirtyChunks++;
414 					if (numDirtyChunks >= 256)
415 						break;
416 				}
417 			}
418 
419 			// far chunks
420 			if (numDirtyChunks == 0) {
421 				for (size_t i = 0; i < chunks.size(); i++) {
422 					Chunk &c = chunks[i];
423 					if (c.dirty) {
424 						dirtyChunkIds[numDirtyChunks++] = static_cast<int>(i);
425 						if (numDirtyChunks >= 256)
426 							break;
427 					}
428 				}
429 			}
430 
431 			// limit update count per frame
432 			for (int i = 0; i < 8; i++) {
433 				if (numDirtyChunks <= 0)
434 					break;
435 				int idx = SampleRandomInt(0, numDirtyChunks - 1);
436 				Chunk &c = chunks[dirtyChunkIds[idx]];
437 
438 				// remove from list (fast)
439 				if (idx < numDirtyChunks - 1) {
440 					std::swap(dirtyChunkIds[idx], dirtyChunkIds[numDirtyChunks - 1]);
441 				}
442 				numDirtyChunks--;
443 
444 				UpdateChunk(c.cx, c.cy, c.cz);
445 			}
446 			/*
447 			printf("%d (%d near) chunk update left\n",
448 			       GetNumDirtyChunks(), nearDirtyChunks);*/
449 		}
450 
CompressDynamicRange(float v)451 		float GLRadiosityRenderer::CompressDynamicRange(float v) {
452 			if ((int)settings.r_radiosity >= 2)
453 				return v;
454 			if (v >= 0.f)
455 				return sqrtf(v);
456 			else
457 				return -sqrtf(-v);
458 		}
459 
EncodeValue(Vector3 vec)460 		uint32_t GLRadiosityRenderer::EncodeValue(Vector3 vec) {
461 			float v;
462 			int iv;
463 			unsigned int out = 0xC0000000;
464 
465 			vec.x = CompressDynamicRange(vec.x);
466 			vec.y = CompressDynamicRange(vec.y);
467 			vec.z = CompressDynamicRange(vec.z);
468 
469 			vec *= .5f;
470 			vec += .5f;
471 			vec *= 1022.f / 1023.f;
472 
473 			v = vec.x * 1023.f + .5f;
474 			if (v > 1023.2f)
475 				v = 1023.2f;
476 			if (v < 0.f)
477 				v = 0.f;
478 			iv = (unsigned int)v;
479 			if (iv > 1023)
480 				iv = 1023;
481 			if (iv < 0)
482 				iv = 0;
483 			out |= iv << 20;
484 
485 			v = vec.y * 1023.f + .5f;
486 			if (v > 1023.2f)
487 				v = 1023.2f;
488 			if (v < 0.f)
489 				v = 0.f;
490 			iv = (unsigned int)v;
491 			if (iv > 1023)
492 				iv = 1023;
493 			if (iv < 0)
494 				iv = 0;
495 			out |= iv << 10;
496 
497 			v = vec.z * 1023.f + .5f;
498 			if (v > 1023.2f)
499 				v = 1023.2f;
500 			if (v < 0.f)
501 				v = 0.f;
502 			iv = (unsigned int)v;
503 			if (iv > 1023)
504 				iv = 1023;
505 			if (iv < 0)
506 				iv = 0;
507 			out |= iv;
508 
509 			return (uint32_t)out;
510 		}
511 
UpdateChunk(int cx,int cy,int cz)512 		void GLRadiosityRenderer::UpdateChunk(int cx, int cy, int cz) {
513 			Chunk &c = GetChunk(cx, cy, cz);
514 			if (!c.dirty)
515 				return;
516 
517 			int originX = cx * ChunkSize;
518 			int originY = cy * ChunkSize;
519 			int originZ = cz * ChunkSize;
520 
521 			for (int z = c.dirtyMinZ; z <= c.dirtyMaxZ; z++)
522 				for (int y = c.dirtyMinY; y <= c.dirtyMaxY; y++)
523 					for (int x = c.dirtyMinX; x <= c.dirtyMaxX; x++) {
524 						IntVector3 pos;
525 						pos.x = (x + originX);
526 						pos.y = (y + originY);
527 						pos.z = (z + originZ);
528 
529 						Result res = Evaluate(pos);
530 						c.dataFlat[z][y][x] = EncodeValue(res.base);
531 						c.dataX[z][y][x] = EncodeValue(res.x);
532 						c.dataY[z][y][x] = EncodeValue(res.y);
533 						c.dataZ[z][y][x] = EncodeValue(res.z);
534 					}
535 
536 			c.dirty = false;
537 			c.transferDone = false;
538 		}
539 	}
540 }
541