1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus-rsp-hle - alist.c *
3 * Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4 * Copyright (C) 2014 Bobby Smiles *
5 * Copyright (C) 2009 Richard Goedeken *
6 * Copyright (C) 2002 Hacktarux *
7 * *
8 * This program is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this program; if not, write to the *
20 * Free Software Foundation, Inc., *
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
22 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23
24 #include <stdint.h>
25 #include <string.h>
26
27 #include <boolean.h>
28
29 #include "alist.h"
30 #include "arithmetics.h"
31 #include "audio.h"
32 #include "hle_external.h"
33 #include "hle_internal.h"
34 #include "memory.h"
35
36 struct ramp_t
37 {
38 int64_t value;
39 int64_t step;
40 int64_t target;
41 };
42
43 /* local functions */
swap(int16_t ** a,int16_t ** b)44 static void swap(int16_t **a, int16_t **b)
45 {
46 int16_t* tmp = *b;
47 *b = *a;
48 *a = tmp;
49 }
50
51 #define sample(hle, pos) ((int16_t*)(hle)->alist_buffer + ((pos ^ S) & 0xfff))
52 #define alist_u8(hle, dmem) ((uint8_t*)((hle)->alist_buffer + ((dmem ^ S8) & 0xfff)))
53 #define alist_s16(hle, dmem) ((int16_t*)u16((hle)->alist_buffer, (dmem)))
54 #define sample_mix(dst, src, gain) (clamp_s16(*(dst) + (((src) * (gain)) >> 15)))
55
alist_envmix_mix(size_t n,int16_t ** dst,const int16_t * gains,int16_t src)56 static void alist_envmix_mix(size_t n, int16_t** dst, const int16_t* gains, int16_t src)
57 {
58 size_t i;
59
60 for(i = 0; i < n; ++i)
61 *dst[i] = sample_mix(dst[i], src, gains[i]);
62 }
63
ramp_step(struct ramp_t * ramp)64 static int16_t ramp_step(struct ramp_t* ramp)
65 {
66 bool target_reached;
67 ramp->value += ramp->step;
68
69 target_reached = (ramp->step <= 0)
70 ? (ramp->value <= ramp->target)
71 : (ramp->value >= ramp->target);
72
73 if (target_reached)
74 {
75 ramp->value = ramp->target;
76 ramp->step = 0;
77 }
78
79 return (int16_t)(ramp->value >> 16);
80 }
81
82 /* global functions */
alist_process(struct hle_t * hle,const acmd_callback_t abi[],unsigned int abi_size)83 void alist_process(struct hle_t* hle, const acmd_callback_t abi[], unsigned int abi_size)
84 {
85 uint32_t addr = *dmem_u32(hle, TASK_DATA_PTR);
86 const uint32_t *alist = dram_u32(hle, addr);
87 const uint32_t *const alist_end = alist + (*dmem_u32(hle, TASK_DATA_SIZE) >> 2);
88
89 while (alist != alist_end)
90 {
91 uint32_t w1 = *(alist++);
92 uint32_t w2 = *(alist++);
93 uint32_t acmd = (w1 >> 24) & 0x7f;
94
95 if (acmd < abi_size)
96 (*abi[acmd])(hle, w1, w2);
97 }
98 }
99
alist_get_address(struct hle_t * hle,uint32_t so,const uint32_t * segments,size_t n)100 uint32_t alist_get_address(struct hle_t* hle, uint32_t so, const uint32_t *segments, size_t n)
101 {
102 uint8_t segment = (so >> 24);
103 uint32_t offset = (so & 0xffffff);
104
105 if (segment >= n) {
106 HleWarnMessage(hle->user_defined, "Invalid segment %u", segment);
107 return offset;
108 }
109
110 return segments[segment] + offset;
111 }
112
alist_set_address(struct hle_t * hle,uint32_t so,uint32_t * segments,size_t n)113 void alist_set_address(struct hle_t* hle, uint32_t so, uint32_t *segments, size_t n)
114 {
115 uint8_t segment = (so >> 24);
116 uint32_t offset = (so & 0xffffff);
117
118 if (segment >= n) {
119 HleWarnMessage(hle->user_defined, "Invalid segment %u", segment);
120 return;
121 }
122
123 segments[segment] = offset;
124 }
125
alist_clear(struct hle_t * hle,uint16_t dmem,uint16_t count)126 void alist_clear(struct hle_t* hle, uint16_t dmem, uint16_t count)
127 {
128 memset(hle->alist_buffer + dmem, 0, count);
129 }
130
alist_load(struct hle_t * hle,uint16_t dmem,uint32_t address,uint16_t count)131 void alist_load(struct hle_t* hle, uint16_t dmem, uint32_t address, uint16_t count)
132 {
133 /* enforce DMA alignment constraints */
134 dmem &= ~3;
135 address &= ~7;
136 count = align(count, 8);
137 memcpy(hle->alist_buffer + dmem, hle->dram + address, count);
138 }
139
alist_save(struct hle_t * hle,uint16_t dmem,uint32_t address,uint16_t count)140 void alist_save(struct hle_t* hle, uint16_t dmem, uint32_t address, uint16_t count)
141 {
142 /* enforce DMA alignment constraints */
143 dmem &= ~3;
144 address &= ~7;
145 count = align(count, 8);
146 memcpy(hle->dram + address, hle->alist_buffer + dmem, count);
147 }
148
alist_move(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count)149 void alist_move(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
150 {
151 while (count)
152 {
153 *alist_u8(hle, dmemo++) = *alist_u8(hle, dmemi++);
154 --count;
155 }
156 }
157
alist_copy_every_other_sample(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count)158 void alist_copy_every_other_sample(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
159 {
160 while (count)
161 {
162 *alist_s16(hle, dmemo) = *alist_s16(hle, dmemi);
163 dmemo += 2;
164 dmemi += 4;
165 --count;
166 }
167 }
168
alist_repeat64(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint8_t count)169 void alist_repeat64(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint8_t count)
170 {
171 uint16_t buffer[64];
172
173 memcpy(buffer, hle->alist_buffer + dmemi, 128);
174
175 while(count)
176 {
177 memcpy(hle->alist_buffer + dmemo, buffer, 128);
178 dmemo += 128;
179 --count;
180 }
181 }
182
alist_copy_blocks(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t block_size,uint8_t count)183 void alist_copy_blocks(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t block_size, uint8_t count)
184 {
185 int block_left = count;
186
187 do
188 {
189 int bytes_left = block_size;
190
191 do
192 {
193 memcpy(hle->alist_buffer + dmemo, hle->alist_buffer + dmemi, 0x20);
194 bytes_left -= 0x20;
195
196 dmemi += 0x20;
197 dmemo += 0x20;
198
199 } while(bytes_left > 0);
200
201 --block_left;
202 } while(block_left > 0);
203 }
204
alist_interleave(struct hle_t * hle,uint16_t dmemo,uint16_t left,uint16_t right,uint16_t count)205 void alist_interleave(struct hle_t* hle, uint16_t dmemo, uint16_t left, uint16_t right, uint16_t count)
206 {
207 uint16_t *dst = (uint16_t*)(hle->alist_buffer + dmemo);
208 const uint16_t *srcL = (uint16_t*)(hle->alist_buffer + left);
209 const uint16_t *srcR = (uint16_t*)(hle->alist_buffer + right);
210
211 count >>= 2;
212
213 while(count)
214 {
215 uint16_t l1 = *(srcL++);
216 uint16_t l2 = *(srcL++);
217 uint16_t r1 = *(srcR++);
218 uint16_t r2 = *(srcR++);
219
220 #ifdef MSB_FIRST
221 *(dst++) = l1;
222 *(dst++) = r1;
223 *(dst++) = l2;
224 *(dst++) = r2;
225 #else
226 *(dst++) = r2;
227 *(dst++) = l2;
228 *(dst++) = r1;
229 *(dst++) = l1;
230 #endif
231 --count;
232 }
233 }
234
235
alist_envmix_exp(struct hle_t * hle,bool init,bool aux,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,uint16_t count,int16_t dry,int16_t wet,const int16_t * vol,const int16_t * target,const int32_t * rate,uint32_t address)236 void alist_envmix_exp(
237 struct hle_t* hle,
238 bool init,
239 bool aux,
240 uint16_t dmem_dl, uint16_t dmem_dr,
241 uint16_t dmem_wl, uint16_t dmem_wr,
242 uint16_t dmemi, uint16_t count,
243 int16_t dry, int16_t wet,
244 const int16_t *vol,
245 const int16_t *target,
246 const int32_t *rate,
247 uint32_t address)
248 {
249 struct ramp_t ramps[2];
250 int32_t exp_seq[2];
251 int32_t exp_rates[2];
252 int x, y;
253 size_t n = (aux) ? 4 : 2;
254
255 const int16_t* const in = (int16_t*)(hle->alist_buffer + dmemi);
256 int16_t* const dl = (int16_t*)(hle->alist_buffer + dmem_dl);
257 int16_t* const dr = (int16_t*)(hle->alist_buffer + dmem_dr);
258 int16_t* const wl = (int16_t*)(hle->alist_buffer + dmem_wl);
259 int16_t* const wr = (int16_t*)(hle->alist_buffer + dmem_wr);
260 uint32_t ptr = 0;
261 short *save_buffer = (short*)((uint8_t*)hle->dram + address);
262
263 if (init)
264 {
265 ramps[0].value = (vol[0] << 16);
266 ramps[1].value = (vol[1] << 16);
267 ramps[0].target = (target[0] << 16);
268 ramps[1].target = (target[1] << 16);
269 exp_rates[0] = rate[0];
270 exp_rates[1] = rate[1];
271 exp_seq[0] = (vol[0] * rate[0]);
272 exp_seq[1] = (vol[1] * rate[1]);
273 }
274 else
275 {
276 wet = *(int16_t *)(save_buffer + 0); /* 0-1 */
277 dry = *(int16_t *)(save_buffer + 2); /* 2-3 */
278 ramps[0].target = *(int32_t *)(save_buffer + 4); /* 4-5 */
279 ramps[1].target = *(int32_t *)(save_buffer + 6); /* 6-7 */
280 exp_rates[0] = *(int32_t *)(save_buffer + 8); /* 8-9 (save_buffer is a 16bit pointer) */
281 exp_rates[1] = *(int32_t *)(save_buffer + 10); /* 10-11 */
282 exp_seq[0] = *(int32_t *)(save_buffer + 12); /* 12-13 */
283 exp_seq[1] = *(int32_t *)(save_buffer + 14); /* 14-15 */
284 ramps[0].value = *(int32_t *)(save_buffer + 16); /* 12-13 */
285 ramps[1].value = *(int32_t *)(save_buffer + 18); /* 14-15 */
286 }
287
288 /* init which ensure ramp.step != 0 iff ramp.value == ramp.target */
289 ramps[0].step = ramps[0].target - ramps[0].value;
290 ramps[1].step = ramps[1].target - ramps[1].value;
291
292 for (y = 0; y < count; y += 16)
293 {
294 if (ramps[0].step)
295 {
296 exp_seq[0] = ((int64_t)exp_seq[0]*(int64_t)exp_rates[0]) >> 16;
297 ramps[0].step = (exp_seq[0] - ramps[0].value) >> 3;
298 }
299
300 if (ramps[1].step)
301 {
302 exp_seq[1] = ((int64_t)exp_seq[1]*(int64_t)exp_rates[1]) >> 16;
303 ramps[1].step = (exp_seq[1] - ramps[1].value) >> 3;
304 }
305
306 for (x = 0; x < 8; ++x)
307 {
308 int16_t gains[4];
309 int16_t* buffers[4];
310 int16_t l_vol = ramp_step(&ramps[0]);
311 int16_t r_vol = ramp_step(&ramps[1]);
312
313 buffers[0] = dl + (ptr^S);
314 buffers[1] = dr + (ptr^S);
315 buffers[2] = wl + (ptr^S);
316 buffers[3] = wr + (ptr^S);
317
318 gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
319 gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
320 gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
321 gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
322
323 alist_envmix_mix(n, buffers, gains, in[ptr^S]);
324 ++ptr;
325 }
326 }
327
328 *(int16_t *)(save_buffer + 0) = wet; /* 0-1 */
329 *(int16_t *)(save_buffer + 2) = dry; /* 2-3 */
330 *(int32_t *)(save_buffer + 4) = (int32_t)ramps[0].target; /* 4-5 */
331 *(int32_t *)(save_buffer + 6) = (int32_t)ramps[1].target; /* 6-7 */
332 *(int32_t *)(save_buffer + 8) = exp_rates[0]; /* 8-9 (save_buffer is a 16bit pointer) */
333 *(int32_t *)(save_buffer + 10) = exp_rates[1]; /* 10-11 */
334 *(int32_t *)(save_buffer + 12) = exp_seq[0]; /* 12-13 */
335 *(int32_t *)(save_buffer + 14) = exp_seq[1]; /* 14-15 */
336 *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; /* 12-13 */
337 *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; /* 14-15 */
338 }
339
alist_envmix_ge(struct hle_t * hle,bool init,bool aux,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,uint16_t count,int16_t dry,int16_t wet,const int16_t * vol,const int16_t * target,const int32_t * rate,uint32_t address)340 void alist_envmix_ge(
341 struct hle_t* hle,
342 bool init,
343 bool aux,
344 uint16_t dmem_dl, uint16_t dmem_dr,
345 uint16_t dmem_wl, uint16_t dmem_wr,
346 uint16_t dmemi, uint16_t count,
347 int16_t dry, int16_t wet,
348 const int16_t *vol,
349 const int16_t *target,
350 const int32_t *rate,
351 uint32_t address)
352 {
353 unsigned k;
354 struct ramp_t ramps[2];
355 size_t n = (aux) ? 4 : 2;
356
357 const int16_t* const in = (int16_t*)(hle->alist_buffer + dmemi);
358 int16_t* const dl = (int16_t*)(hle->alist_buffer + dmem_dl);
359 int16_t* const dr = (int16_t*)(hle->alist_buffer + dmem_dr);
360 int16_t* const wl = (int16_t*)(hle->alist_buffer + dmem_wl);
361 int16_t* const wr = (int16_t*)(hle->alist_buffer + dmem_wr);
362 short *save_buffer = (short*)((uint8_t*)hle->dram + address);
363
364 if (init)
365 {
366 ramps[0].value = (vol[0] << 16);
367 ramps[1].value = (vol[1] << 16);
368 ramps[0].target = (target[0] << 16);
369 ramps[1].target = (target[1] << 16);
370 ramps[0].step = rate[0] / 8;
371 ramps[1].step = rate[1] / 8;
372 }
373 else
374 {
375 wet = *(int16_t *)(save_buffer + 0); /* 0-1 */
376 dry = *(int16_t *)(save_buffer + 2); /* 2-3 */
377 ramps[0].target = *(int32_t *)(save_buffer + 4); /* 4-5 */
378 ramps[1].target = *(int32_t *)(save_buffer + 6); /* 6-7 */
379 ramps[0].step = *(int32_t *)(save_buffer + 8); /* 8-9 (save_buffer is a 16bit pointer) */
380 ramps[1].step = *(int32_t *)(save_buffer + 10); /* 10-11 */
381 /* *(int32_t *)(save_buffer + 12);*/ /* 12-13 */
382 /* *(int32_t *)(save_buffer + 14);*/ /* 14-15 */
383 ramps[0].value = *(int32_t *)(save_buffer + 16); /* 12-13 */
384 ramps[1].value = *(int32_t *)(save_buffer + 18); /* 14-15 */
385 }
386
387 count >>= 1;
388 for (k = 0; k < count; ++k)
389 {
390 int16_t gains[4];
391 int16_t* buffers[4];
392 int16_t l_vol = ramp_step(&ramps[0]);
393 int16_t r_vol = ramp_step(&ramps[1]);
394
395 buffers[0] = dl + (k^S);
396 buffers[1] = dr + (k^S);
397 buffers[2] = wl + (k^S);
398 buffers[3] = wr + (k^S);
399
400 gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
401 gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
402 gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
403 gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
404
405 alist_envmix_mix(n, buffers, gains, in[k^S]);
406 }
407
408 *(int16_t *)(save_buffer + 0) = wet; /* 0-1 */
409 *(int16_t *)(save_buffer + 2) = dry; /* 2-3 */
410 *(int32_t *)(save_buffer + 4) = (int32_t)ramps[0].target; /* 4-5 */
411 *(int32_t *)(save_buffer + 6) = (int32_t)ramps[1].target; /* 6-7 */
412 *(int32_t *)(save_buffer + 8) = (int32_t)ramps[0].step; /* 8-9 (save_buffer is a 16bit pointer) */
413 *(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step; /* 10-11 */
414 /* *(int32_t *)(save_buffer + 12); */ /* 12-13 */
415 /* *(int32_t *)(save_buffer + 14); */ /* 14-15 */
416 *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; /* 12-13 */
417 *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; /* 14-15 */
418 }
419
alist_envmix_lin(struct hle_t * hle,bool init,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,uint16_t count,int16_t dry,int16_t wet,const int16_t * vol,const int16_t * target,const int32_t * rate,uint32_t address)420 void alist_envmix_lin(
421 struct hle_t* hle,
422 bool init,
423 uint16_t dmem_dl, uint16_t dmem_dr,
424 uint16_t dmem_wl, uint16_t dmem_wr,
425 uint16_t dmemi, uint16_t count,
426 int16_t dry, int16_t wet,
427 const int16_t *vol,
428 const int16_t *target,
429 const int32_t *rate,
430 uint32_t address)
431 {
432 size_t k;
433 struct ramp_t ramps[2];
434 short *save_buffer = (short*)((uint8_t*)hle->dram + address);
435
436 const int16_t * const in = (int16_t*)(hle->alist_buffer + dmemi);
437 int16_t* const dl = (int16_t*)(hle->alist_buffer + dmem_dl);
438 int16_t* const dr = (int16_t*)(hle->alist_buffer + dmem_dr);
439 int16_t* const wl = (int16_t*)(hle->alist_buffer + dmem_wl);
440 int16_t* const wr = (int16_t*)(hle->alist_buffer + dmem_wr);
441
442 if (init)
443 {
444 ramps[0].step = rate[0] / 8;
445 ramps[0].value = (vol[0] << 16);
446 ramps[0].target = (target[0] << 16);
447 ramps[1].step = rate[1] / 8;
448 ramps[1].value = (vol[1] << 16);
449 ramps[1].target = (target[1] << 16);
450 }
451 else
452 {
453 wet = *(int16_t *)(save_buffer + 0); /* 0-1 */
454 dry = *(int16_t *)(save_buffer + 2); /* 2-3 */
455 ramps[0].target = *(int16_t *)(save_buffer + 4) << 16; /* 4-5 */
456 ramps[1].target = *(int16_t *)(save_buffer + 6) << 16; /* 6-7 */
457 ramps[0].step = *(int32_t *)(save_buffer + 8); /* 8-9 (save_buffer is a 16bit pointer) */
458 ramps[1].step = *(int32_t *)(save_buffer + 10); /* 10-11 */
459 ramps[0].value = *(int32_t *)(save_buffer + 16); /* 16-17 */
460 ramps[1].value = *(int32_t *)(save_buffer + 18); /* 16-17 */
461 }
462
463 count >>= 1;
464 for(k = 0; k < count; ++k) {
465 int16_t gains[4];
466 int16_t* buffers[4];
467 int16_t l_vol = ramp_step(&ramps[0]);
468 int16_t r_vol = ramp_step(&ramps[1]);
469
470 buffers[0] = dl + (k^S);
471 buffers[1] = dr + (k^S);
472 buffers[2] = wl + (k^S);
473 buffers[3] = wr + (k^S);
474
475 gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
476 gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
477 gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
478 gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
479
480 alist_envmix_mix(4, buffers, gains, in[k^S]);
481 }
482
483 *(int16_t *)(save_buffer + 0) = wet; /* 0-1 */
484 *(int16_t *)(save_buffer + 2) = dry; /* 2-3 */
485 *(int16_t *)(save_buffer + 4) = (ramps[0].target>>16)&0xFFFF; /* 4-5 */
486 *(int16_t *)(save_buffer + 6) = (ramps[1].target>>16)&0xFFFF; /* 6-7 */
487 *(int32_t *)(save_buffer + 8) = (int32_t)ramps[0].step; /* 8-9 (save_buffer is a 16bit pointer) */
488 *(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step; /* 10-11 */
489 *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; /* 16-17 */
490 *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; /* 18-19 */
491 }
492
alist_envmix_nead(struct hle_t * hle,bool swap_wet_LR,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,unsigned count,uint16_t * env_values,uint16_t * env_steps,const int16_t * xors)493 void alist_envmix_nead(
494 struct hle_t* hle,
495 bool swap_wet_LR,
496 uint16_t dmem_dl,
497 uint16_t dmem_dr,
498 uint16_t dmem_wl,
499 uint16_t dmem_wr,
500 uint16_t dmemi,
501 unsigned count,
502 uint16_t *env_values,
503 uint16_t *env_steps,
504 const int16_t *xors)
505 {
506 int16_t *in = (int16_t*)(hle->alist_buffer + dmemi);
507 int16_t *dl = (int16_t*)(hle->alist_buffer + dmem_dl);
508 int16_t *dr = (int16_t*)(hle->alist_buffer + dmem_dr);
509 int16_t *wl = (int16_t*)(hle->alist_buffer + dmem_wl);
510 int16_t *wr = (int16_t*)(hle->alist_buffer + dmem_wr);
511
512 /* make sure count is a multiple of 8 */
513 count = align(count, 8);
514
515 if (swap_wet_LR)
516 swap(&wl, &wr);
517
518 while (count)
519 {
520 size_t i;
521
522 for(i = 0; i < 8; ++i)
523 {
524 int16_t l = (((int32_t)in[i^S] * (uint32_t)env_values[0]) >> 16) ^ xors[0];
525 int16_t r = (((int32_t)in[i^S] * (uint32_t)env_values[1]) >> 16) ^ xors[1];
526 int16_t l2 = (((int32_t)l * (uint32_t)env_values[2]) >> 16) ^ xors[2];
527 int16_t r2 = (((int32_t)r * (uint32_t)env_values[2]) >> 16) ^ xors[3];
528
529 dl[i^S] = clamp_s16(dl[i^S] + l);
530 dr[i^S] = clamp_s16(dr[i^S] + r);
531 wl[i^S] = clamp_s16(wl[i^S] + l2);
532 wr[i^S] = clamp_s16(wr[i^S] + r2);
533 }
534
535 env_values[0] += env_steps[0];
536 env_values[1] += env_steps[1];
537 env_values[2] += env_steps[2];
538
539 dl += 8;
540 dr += 8;
541 wl += 8;
542 wr += 8;
543 in += 8;
544 count -= 8;
545 }
546 }
547
548
alist_mix(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count,int16_t gain)549 void alist_mix(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t gain)
550 {
551 int16_t *dst = (int16_t*)(hle->alist_buffer + dmemo);
552 const int16_t *src = (int16_t*)(hle->alist_buffer + dmemi);
553
554 count >>= 1;
555
556 while(count)
557 {
558 *dst = sample_mix(dst, *src, gain);
559
560 ++dst;
561 ++src;
562 --count;
563 }
564 }
565
alist_multQ44(struct hle_t * hle,uint16_t dmem,uint16_t count,int8_t gain)566 void alist_multQ44(struct hle_t* hle, uint16_t dmem, uint16_t count, int8_t gain)
567 {
568 int16_t *dst = (int16_t*)(hle->alist_buffer + dmem);
569
570 count >>= 1;
571
572 while(count)
573 {
574 *dst = clamp_s16(*dst * gain >> 4);
575
576 ++dst;
577 --count;
578 }
579 }
580
alist_add(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count)581 void alist_add(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
582 {
583 int16_t *dst = (int16_t*)(hle->alist_buffer + dmemo);
584 const int16_t *src = (int16_t*)(hle->alist_buffer + dmemi);
585
586 count >>= 1;
587
588 while(count)
589 {
590 *dst = clamp_s16(*dst + *src);
591
592 ++dst;
593 ++src;
594 --count;
595 }
596 }
597
alist_resample_reset(struct hle_t * hle,uint16_t pos,uint32_t * pitch_accu)598 static void alist_resample_reset(struct hle_t* hle, uint16_t pos, uint32_t* pitch_accu)
599 {
600 unsigned k;
601
602 for(k = 0; k < 4; ++k)
603 *sample(hle, pos + k) = 0;
604
605 *pitch_accu = 0;
606 }
607
alist_resample_load(struct hle_t * hle,uint32_t address,uint16_t pos,uint32_t * pitch_accu)608 static void alist_resample_load(struct hle_t* hle,
609 uint32_t address, uint16_t pos, uint32_t* pitch_accu)
610 {
611 *sample(hle, pos + 0) = *dram_u16(hle, address + 0);
612 *sample(hle, pos + 1) = *dram_u16(hle, address + 2);
613 *sample(hle, pos + 2) = *dram_u16(hle, address + 4);
614 *sample(hle, pos + 3) = *dram_u16(hle, address + 6);
615
616 *pitch_accu = *dram_u16(hle, address + 8);
617 }
618
alist_resample_save(struct hle_t * hle,uint32_t address,uint16_t pos,uint32_t pitch_accu)619 static void alist_resample_save(struct hle_t* hle,
620 uint32_t address, uint16_t pos, uint32_t pitch_accu)
621 {
622 *dram_u16(hle, address + 0) = *sample(hle, pos + 0);
623 *dram_u16(hle, address + 2) = *sample(hle, pos + 1);
624 *dram_u16(hle, address + 4) = *sample(hle, pos + 2);
625 *dram_u16(hle, address + 6) = *sample(hle, pos + 3);
626
627 *dram_u16(hle, address + 8) = pitch_accu;
628 }
629
alist_resample(struct hle_t * hle,bool init,bool flag2,uint16_t dmemo,uint16_t dmemi,uint16_t count,uint32_t pitch,uint32_t address)630 void alist_resample(
631 struct hle_t* hle,
632 bool init,
633 bool flag2,
634 uint16_t dmemo,
635 uint16_t dmemi,
636 uint16_t count,
637 uint32_t pitch, /* Q16.16 */
638 uint32_t address)
639 {
640 uint32_t pitch_accu;
641 uint16_t ipos = (dmemi >> 1) - 4;
642 uint16_t opos = dmemo >> 1;
643
644 count >>= 1;
645
646 #ifndef NDEBUG
647 if (flag2)
648 HleWarnMessage(hle->user_defined, "alist_resample: flag2 is not implemented");
649 #endif
650
651 if (init)
652 alist_resample_reset(hle, ipos, &pitch_accu);
653 else
654 alist_resample_load(hle, address, ipos, &pitch_accu);
655
656 while (count)
657 {
658 const int16_t* lut = RESAMPLE_LUT + ((pitch_accu & 0xfc00) >> 8);
659
660 *sample(hle, opos++) = clamp_s16( (
661 (*sample(hle, ipos ) * lut[0]) +
662 (*sample(hle, ipos + 1) * lut[1]) +
663 (*sample(hle, ipos + 2) * lut[2]) +
664 (*sample(hle, ipos + 3) * lut[3]) ) >> 15);
665
666 pitch_accu += pitch;
667 ipos += (pitch_accu >> 16);
668 pitch_accu &= 0xffff;
669 --count;
670 }
671
672 alist_resample_save(hle, address, ipos, pitch_accu);
673 }
674
alist_resample_zoh(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count,uint32_t pitch,uint32_t pitch_accu)675 void alist_resample_zoh(
676 struct hle_t* hle,
677 uint16_t dmemo,
678 uint16_t dmemi,
679 uint16_t count,
680 uint32_t pitch,
681 uint32_t pitch_accu)
682 {
683 uint16_t ipos = dmemi >> 1;
684 uint16_t opos = dmemo >> 1;
685 count >>= 1;
686
687 while(count)
688 {
689 *sample(hle, opos++) = *sample(hle, ipos);
690
691 pitch_accu += pitch;
692 ipos += (pitch_accu >> 16);
693 pitch_accu &= 0xffff;
694 --count;
695 }
696 }
697
698 typedef unsigned int (*adpcm_predict_frame_t)(struct hle_t* hle,
699 int16_t* dst, uint16_t dmemi, unsigned char scale);
700
adpcm_predict_frame_4bits(struct hle_t * hle,int16_t * dst,uint16_t dmemi,unsigned char scale)701 static unsigned int adpcm_predict_frame_4bits(struct hle_t* hle,
702 int16_t* dst, uint16_t dmemi, unsigned char scale)
703 {
704 unsigned int i;
705 unsigned int rshift = (scale < 12) ? 12 - scale : 0;
706
707 for(i = 0; i < 8; ++i)
708 {
709 uint8_t byte = *alist_u8(hle, dmemi++);
710
711 *(dst++) = adpcm_predict_sample(byte, 0xf0, 8, rshift);
712 *(dst++) = adpcm_predict_sample(byte, 0x0f, 12, rshift);
713 }
714
715 return 8;
716 }
717
adpcm_predict_frame_2bits(struct hle_t * hle,int16_t * dst,uint16_t dmemi,unsigned char scale)718 static unsigned int adpcm_predict_frame_2bits(struct hle_t* hle,
719 int16_t* dst, uint16_t dmemi, unsigned char scale)
720 {
721 unsigned int i;
722 unsigned int rshift = (scale < 14) ? 14 - scale : 0;
723
724 for(i = 0; i < 4; ++i)
725 {
726 uint8_t byte = *alist_u8(hle, dmemi++);
727
728 *(dst++) = adpcm_predict_sample(byte, 0xc0, 8, rshift);
729 *(dst++) = adpcm_predict_sample(byte, 0x30, 10, rshift);
730 *(dst++) = adpcm_predict_sample(byte, 0x0c, 12, rshift);
731 *(dst++) = adpcm_predict_sample(byte, 0x03, 14, rshift);
732 }
733
734 return 4;
735 }
736
alist_adpcm(struct hle_t * hle,bool init,bool loop,bool two_bit_per_sample,uint16_t dmemo,uint16_t dmemi,uint16_t count,const int16_t * codebook,uint32_t loop_address,uint32_t last_frame_address)737 void alist_adpcm(
738 struct hle_t* hle,
739 bool init,
740 bool loop,
741 bool two_bit_per_sample,
742 uint16_t dmemo,
743 uint16_t dmemi,
744 uint16_t count,
745 const int16_t* codebook,
746 uint32_t loop_address,
747 uint32_t last_frame_address)
748 {
749 int16_t last_frame[16];
750 size_t i;
751 adpcm_predict_frame_t predict_frame;
752
753 if (!hle || !codebook)
754 return;
755
756 predict_frame = (two_bit_per_sample)
757 ? adpcm_predict_frame_2bits
758 : adpcm_predict_frame_4bits;
759
760 assert((count & 0x1f) == 0);
761
762 if (init)
763 {
764 for (i = 0; i < 16; i++)
765 last_frame[i] = 0;
766 }
767 else
768 dram_load_u16(hle, (uint16_t*)last_frame, (loop) ? loop_address : last_frame_address, 16);
769
770 for(i = 0; i < 16; ++i, dmemo += 2)
771 *alist_s16(hle, dmemo) = last_frame[i];
772
773 while (count)
774 {
775 int16_t frame[16];
776 uint8_t code = *alist_u8(hle, dmemi++);
777 unsigned char scale = (code & 0xf0) >> 4;
778 const int16_t* const cb_entry = codebook + ((code & 0xf) << 4);
779
780 dmemi += predict_frame(hle, frame, dmemi, scale);
781
782 adpcm_compute_residuals(last_frame , frame , cb_entry, last_frame + 14, 8);
783 adpcm_compute_residuals(last_frame + 8, frame + 8, cb_entry, last_frame + 6 , 8);
784
785 for(i = 0; i < 16; ++i, dmemo += 2)
786 *alist_s16(hle, dmemo) = last_frame[i];
787
788 count -= 32;
789 }
790
791 dram_store_u16(hle, (uint16_t*)last_frame, last_frame_address, 16);
792 }
793
794
alist_filter(struct hle_t * hle,uint16_t dmem,uint16_t count,uint32_t address,const uint32_t * lut_address)795 void alist_filter(
796 struct hle_t* hle,
797 uint16_t dmem,
798 uint16_t count,
799 uint32_t address,
800 const uint32_t* lut_address)
801 {
802 int x;
803 int16_t outbuff[0x3c0];
804 int16_t *outp = outbuff;
805
806 int16_t* const lutt6 = (int16_t*)(hle->dram + lut_address[0]);
807 int16_t* const lutt5 = (int16_t*)(hle->dram + lut_address[1]);
808
809 int16_t* in1 = (int16_t*)(hle->dram + address);
810 int16_t* in2 = (int16_t*)(hle->alist_buffer + dmem);
811
812 for (x = 0; x < 8; ++x)
813 {
814 int32_t v = (lutt5[x] + lutt6[x]) >> 1;
815 lutt5[x] = lutt6[x] = v;
816 }
817
818 for (x = 0; x < count; x += 16)
819 {
820 int32_t v[8];
821
822 v[1] = in1[0] * lutt6[6];
823 v[1] += in1[3] * lutt6[7];
824 v[1] += in1[2] * lutt6[4];
825 v[1] += in1[5] * lutt6[5];
826 v[1] += in1[4] * lutt6[2];
827 v[1] += in1[7] * lutt6[3];
828 v[1] += in1[6] * lutt6[0];
829 v[1] += in2[1] * lutt6[1]; /* 1 */
830
831 v[0] = in1[3] * lutt6[6];
832 v[0] += in1[2] * lutt6[7];
833 v[0] += in1[5] * lutt6[4];
834 v[0] += in1[4] * lutt6[5];
835 v[0] += in1[7] * lutt6[2];
836 v[0] += in1[6] * lutt6[3];
837 v[0] += in2[1] * lutt6[0];
838 v[0] += in2[0] * lutt6[1];
839
840 v[3] = in1[2] * lutt6[6];
841 v[3] += in1[5] * lutt6[7];
842 v[3] += in1[4] * lutt6[4];
843 v[3] += in1[7] * lutt6[5];
844 v[3] += in1[6] * lutt6[2];
845 v[3] += in2[1] * lutt6[3];
846 v[3] += in2[0] * lutt6[0];
847 v[3] += in2[3] * lutt6[1];
848
849 v[2] = in1[5] * lutt6[6];
850 v[2] += in1[4] * lutt6[7];
851 v[2] += in1[7] * lutt6[4];
852 v[2] += in1[6] * lutt6[5];
853 v[2] += in2[1] * lutt6[2];
854 v[2] += in2[0] * lutt6[3];
855 v[2] += in2[3] * lutt6[0];
856 v[2] += in2[2] * lutt6[1];
857
858 v[5] = in1[4] * lutt6[6];
859 v[5] += in1[7] * lutt6[7];
860 v[5] += in1[6] * lutt6[4];
861 v[5] += in2[1] * lutt6[5];
862 v[5] += in2[0] * lutt6[2];
863 v[5] += in2[3] * lutt6[3];
864 v[5] += in2[2] * lutt6[0];
865 v[5] += in2[5] * lutt6[1];
866
867 v[4] = in1[7] * lutt6[6];
868 v[4] += in1[6] * lutt6[7];
869 v[4] += in2[1] * lutt6[4];
870 v[4] += in2[0] * lutt6[5];
871 v[4] += in2[3] * lutt6[2];
872 v[4] += in2[2] * lutt6[3];
873 v[4] += in2[5] * lutt6[0];
874 v[4] += in2[4] * lutt6[1];
875
876 v[7] = in1[6] * lutt6[6];
877 v[7] += in2[1] * lutt6[7];
878 v[7] += in2[0] * lutt6[4];
879 v[7] += in2[3] * lutt6[5];
880 v[7] += in2[2] * lutt6[2];
881 v[7] += in2[5] * lutt6[3];
882 v[7] += in2[4] * lutt6[0];
883 v[7] += in2[7] * lutt6[1];
884
885 v[6] = in2[1] * lutt6[6];
886 v[6] += in2[0] * lutt6[7];
887 v[6] += in2[3] * lutt6[4];
888 v[6] += in2[2] * lutt6[5];
889 v[6] += in2[5] * lutt6[2];
890 v[6] += in2[4] * lutt6[3];
891 v[6] += in2[7] * lutt6[0];
892 v[6] += in2[6] * lutt6[1];
893
894 outp[1] = ((v[1] + 0x4000) >> 15);
895 outp[0] = ((v[0] + 0x4000) >> 15);
896 outp[3] = ((v[3] + 0x4000) >> 15);
897 outp[2] = ((v[2] + 0x4000) >> 15);
898 outp[5] = ((v[5] + 0x4000) >> 15);
899 outp[4] = ((v[4] + 0x4000) >> 15);
900 outp[7] = ((v[7] + 0x4000) >> 15);
901 outp[6] = ((v[6] + 0x4000) >> 15);
902 in1 = in2;
903 in2 += 8;
904 outp += 8;
905 }
906
907 memcpy(hle->dram + address, in2 - 8, 16);
908 memcpy(hle->alist_buffer + dmem, outbuff, count);
909 }
910
alist_polef(struct hle_t * hle,bool init,uint16_t dmemo,uint16_t dmemi,uint16_t count,uint16_t gain,int16_t * table,uint32_t address)911 void alist_polef(
912 struct hle_t* hle,
913 bool init,
914 uint16_t dmemo,
915 uint16_t dmemi,
916 uint16_t count,
917 uint16_t gain,
918 int16_t* table,
919 uint32_t address)
920 {
921 unsigned i;
922 int16_t h2_before[8];
923 int16_t l1 = 0;
924 int16_t l2 = 0;
925 int16_t *dst = (int16_t*)(hle->alist_buffer + dmemo);
926 const int16_t* const h1 = table;
927 int16_t* const h2 = table + 8;
928
929 count = align(count, 16);
930
931 if (!init)
932 {
933 l1 = *dram_u16(hle, address + 4);
934 l2 = *dram_u16(hle, address + 6);
935 }
936
937 for(i = 0; i < 8; ++i)
938 {
939 h2_before[i] = h2[i];
940 h2[i] = (((int32_t)h2[i] * gain) >> 14);
941 }
942
943 do
944 {
945 int16_t frame[8];
946
947 for(i = 0; i < 8; ++i, dmemi += 2)
948 frame[i] = *alist_s16(hle, dmemi);
949
950 for(i = 0; i < 8; ++i)
951 {
952 int32_t accu = frame[i] * gain;
953 accu += h1[i]*l1 + h2_before[i]*l2 + rdot(i, h2, frame + i);
954 dst[i^S] = clamp_s16(accu >> 14);
955 }
956
957 l1 = dst[6^S];
958 l2 = dst[7^S];
959
960 dst += 8;
961 count -= 16;
962 }while(count);
963
964 dram_store_u32(hle, (uint32_t*)(dst - 4), address, 2);
965 }
966
alist_iirf(struct hle_t * hle,bool init,uint16_t dmemo,uint16_t dmemi,uint16_t count,int16_t * table,uint32_t address)967 void alist_iirf(
968 struct hle_t* hle,
969 bool init,
970 uint16_t dmemo,
971 uint16_t dmemi,
972 uint16_t count,
973 int16_t* table,
974 uint32_t address)
975 {
976 int32_t i, prev;
977 int16_t frame[8];
978 int16_t ibuf[4];
979 uint16_t index = 7;
980 int16_t *dst = (int16_t*)(hle->alist_buffer + dmemo);
981 count = align(count, 16);
982
983 if(init)
984 {
985 for(i = 0; i < 8; ++i)
986 frame[i] = 0;
987 ibuf[1] = 0;
988 ibuf[2] = 0;
989 }
990 else
991 {
992 frame[6] = *dram_u16(hle, address + 4);
993 frame[7] = *dram_u16(hle, address + 6);
994 ibuf[1] = (int16_t)*dram_u16(hle, address + 8);
995 ibuf[2] = (int16_t)*dram_u16(hle, address + 10);
996 }
997
998 prev = vmulf(table[9], frame[6]) * 2;
999
1000 do
1001 {
1002 for(i = 0; i < 8; ++i)
1003 {
1004 int32_t accu;
1005
1006 ibuf[index&3] = *alist_s16(hle, dmemi);
1007 accu = prev
1008 + vmulf(table[0], ibuf[index&3])
1009 + vmulf(table[1], ibuf[(index-1)&3])
1010 + vmulf(table[0], ibuf[(index-2)&3]);
1011
1012 accu += vmulf(table[8], frame[index]) * 2;
1013 prev = vmulf(table[9], frame[index]) * 2;
1014 dst[i^S] = frame[i] = accu;
1015 index = (index+1)&7;
1016 dmemi += 2;
1017 }
1018 dst += 8;
1019 count -= 0x10;
1020 } while (count > 0);
1021
1022 dram_store_u16(hle, (uint16_t*)&frame[6], address + 4, 4);
1023 dram_store_u16(hle, (uint16_t*)&ibuf[(index-2)&3], address+8, 2);
1024 dram_store_u16(hle, (uint16_t*)&ibuf[(index-1)&3], address+10, 2);
1025 }
1026