1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *   Mupen64plus-rsp-hle - alist.c                                         *
3  *   Mupen64Plus homepage: http://code.google.com/p/mupen64plus/           *
4  *   Copyright (C) 2014 Bobby Smiles                                       *
5  *   Copyright (C) 2009 Richard Goedeken                                   *
6  *   Copyright (C) 2002 Hacktarux                                          *
7  *                                                                         *
8  *   This program is free software; you can redistribute it and/or modify  *
9  *   it under the terms of the GNU General Public License as published by  *
10  *   the Free Software Foundation; either version 2 of the License, or     *
11  *   (at your option) any later version.                                   *
12  *                                                                         *
13  *   This program is distributed in the hope that it will be useful,       *
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
16  *   GNU General Public License for more details.                          *
17  *                                                                         *
18  *   You should have received a copy of the GNU General Public License     *
19  *   along with this program; if not, write to the                         *
20  *   Free Software Foundation, Inc.,                                       *
21  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
22  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23 
24 #include <stdint.h>
25 #include <string.h>
26 
27 #include <boolean.h>
28 
29 #include "alist.h"
30 #include "arithmetics.h"
31 #include "audio.h"
32 #include "hle_external.h"
33 #include "hle_internal.h"
34 #include "memory.h"
35 
36 struct ramp_t
37 {
38     int64_t value;
39     int64_t step;
40     int64_t target;
41 };
42 
43 /* local functions */
swap(int16_t ** a,int16_t ** b)44 static void swap(int16_t **a, int16_t **b)
45 {
46     int16_t* tmp = *b;
47     *b = *a;
48     *a = tmp;
49 }
50 
51 #define sample(hle, pos)      ((int16_t*)(hle)->alist_buffer + ((pos ^ S) & 0xfff))
52 #define alist_u8(hle, dmem)   ((uint8_t*)((hle)->alist_buffer + ((dmem ^ S8) & 0xfff)))
53 #define alist_s16(hle, dmem)  ((int16_t*)u16((hle)->alist_buffer, (dmem)))
54 #define sample_mix(dst, src, gain)  (clamp_s16(*(dst) + (((src) * (gain)) >> 15)))
55 
alist_envmix_mix(size_t n,int16_t ** dst,const int16_t * gains,int16_t src)56 static void alist_envmix_mix(size_t n, int16_t** dst, const int16_t* gains, int16_t src)
57 {
58     size_t i;
59 
60     for(i = 0; i < n; ++i)
61         *dst[i] = sample_mix(dst[i], src, gains[i]);
62 }
63 
ramp_step(struct ramp_t * ramp)64 static int16_t ramp_step(struct ramp_t* ramp)
65 {
66 	bool target_reached;
67     ramp->value += ramp->step;
68 
69     target_reached = (ramp->step <= 0)
70         ? (ramp->value <= ramp->target)
71         : (ramp->value >= ramp->target);
72 
73     if (target_reached)
74     {
75         ramp->value = ramp->target;
76         ramp->step  = 0;
77     }
78 
79     return (int16_t)(ramp->value >> 16);
80 }
81 
82 /* global functions */
alist_process(struct hle_t * hle,const acmd_callback_t abi[],unsigned int abi_size)83 void alist_process(struct hle_t* hle, const acmd_callback_t abi[], unsigned int abi_size)
84 {
85    uint32_t addr                    = *dmem_u32(hle, TASK_DATA_PTR);
86    const uint32_t *alist            = dram_u32(hle, addr);
87    const uint32_t *const alist_end = alist + (*dmem_u32(hle, TASK_DATA_SIZE) >> 2);
88 
89    while (alist != alist_end)
90    {
91       uint32_t w1 = *(alist++);
92       uint32_t w2 = *(alist++);
93       uint32_t acmd = (w1 >> 24) & 0x7f;
94 
95       if (acmd < abi_size)
96          (*abi[acmd])(hle, w1, w2);
97    }
98 }
99 
alist_get_address(struct hle_t * hle,uint32_t so,const uint32_t * segments,size_t n)100 uint32_t alist_get_address(struct hle_t* hle, uint32_t so, const uint32_t *segments, size_t n)
101 {
102     uint8_t  segment = (so >> 24);
103     uint32_t offset  = (so & 0xffffff);
104 
105     if (segment >= n) {
106         HleWarnMessage(hle->user_defined, "Invalid segment %u", segment);
107         return offset;
108     }
109 
110     return segments[segment] + offset;
111 }
112 
alist_set_address(struct hle_t * hle,uint32_t so,uint32_t * segments,size_t n)113 void alist_set_address(struct hle_t* hle, uint32_t so, uint32_t *segments, size_t n)
114 {
115     uint8_t  segment = (so >> 24);
116     uint32_t offset  = (so & 0xffffff);
117 
118     if (segment >= n) {
119         HleWarnMessage(hle->user_defined, "Invalid segment %u", segment);
120         return;
121     }
122 
123     segments[segment] = offset;
124 }
125 
alist_clear(struct hle_t * hle,uint16_t dmem,uint16_t count)126 void alist_clear(struct hle_t* hle, uint16_t dmem, uint16_t count)
127 {
128    memset(hle->alist_buffer + dmem, 0, count);
129 }
130 
alist_load(struct hle_t * hle,uint16_t dmem,uint32_t address,uint16_t count)131 void alist_load(struct hle_t* hle, uint16_t dmem, uint32_t address, uint16_t count)
132 {
133     /* enforce DMA alignment constraints */
134     dmem    &= ~3;
135     address &= ~7;
136     count = align(count, 8);
137     memcpy(hle->alist_buffer + dmem, hle->dram + address, count);
138 }
139 
alist_save(struct hle_t * hle,uint16_t dmem,uint32_t address,uint16_t count)140 void alist_save(struct hle_t* hle, uint16_t dmem, uint32_t address, uint16_t count)
141 {
142     /* enforce DMA alignment constraints */
143     dmem    &= ~3;
144     address &= ~7;
145     count = align(count, 8);
146     memcpy(hle->dram + address, hle->alist_buffer + dmem, count);
147 }
148 
alist_move(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count)149 void alist_move(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
150 {
151     while (count)
152     {
153        *alist_u8(hle, dmemo++) = *alist_u8(hle, dmemi++);
154        --count;
155     }
156 }
157 
alist_copy_every_other_sample(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count)158 void alist_copy_every_other_sample(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
159 {
160    while (count)
161    {
162       *alist_s16(hle, dmemo) = *alist_s16(hle, dmemi);
163       dmemo += 2;
164       dmemi += 4;
165       --count;
166    }
167 }
168 
alist_repeat64(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint8_t count)169 void alist_repeat64(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint8_t count)
170 {
171    uint16_t buffer[64];
172 
173    memcpy(buffer, hle->alist_buffer + dmemi, 128);
174 
175    while(count)
176    {
177       memcpy(hle->alist_buffer + dmemo, buffer, 128);
178       dmemo += 128;
179       --count;
180    }
181 }
182 
alist_copy_blocks(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t block_size,uint8_t count)183 void alist_copy_blocks(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t block_size, uint8_t count)
184 {
185    int block_left = count;
186 
187    do
188    {
189       int bytes_left = block_size;
190 
191       do
192       {
193          memcpy(hle->alist_buffer + dmemo, hle->alist_buffer + dmemi, 0x20);
194          bytes_left -= 0x20;
195 
196          dmemi += 0x20;
197          dmemo += 0x20;
198 
199       } while(bytes_left > 0);
200 
201       --block_left;
202    } while(block_left > 0);
203 }
204 
alist_interleave(struct hle_t * hle,uint16_t dmemo,uint16_t left,uint16_t right,uint16_t count)205 void alist_interleave(struct hle_t* hle, uint16_t dmemo, uint16_t left, uint16_t right, uint16_t count)
206 {
207    uint16_t       *dst  = (uint16_t*)(hle->alist_buffer + dmemo);
208    const uint16_t *srcL = (uint16_t*)(hle->alist_buffer + left);
209    const uint16_t *srcR = (uint16_t*)(hle->alist_buffer + right);
210 
211    count >>= 2;
212 
213    while(count)
214    {
215       uint16_t l1 = *(srcL++);
216       uint16_t l2 = *(srcL++);
217       uint16_t r1 = *(srcR++);
218       uint16_t r2 = *(srcR++);
219 
220 #ifdef MSB_FIRST
221       *(dst++) = l1;
222       *(dst++) = r1;
223       *(dst++) = l2;
224       *(dst++) = r2;
225 #else
226       *(dst++) = r2;
227       *(dst++) = l2;
228       *(dst++) = r1;
229       *(dst++) = l1;
230 #endif
231       --count;
232    }
233 }
234 
235 
alist_envmix_exp(struct hle_t * hle,bool init,bool aux,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,uint16_t count,int16_t dry,int16_t wet,const int16_t * vol,const int16_t * target,const int32_t * rate,uint32_t address)236 void alist_envmix_exp(
237         struct hle_t* hle,
238         bool init,
239         bool aux,
240         uint16_t dmem_dl, uint16_t dmem_dr,
241         uint16_t dmem_wl, uint16_t dmem_wr,
242         uint16_t dmemi, uint16_t count,
243         int16_t dry, int16_t wet,
244         const int16_t *vol,
245         const int16_t *target,
246         const int32_t *rate,
247         uint32_t address)
248 {
249     struct ramp_t ramps[2];
250     int32_t exp_seq[2];
251     int32_t exp_rates[2];
252     int x, y;
253     size_t n                = (aux) ? 4 : 2;
254 
255     const int16_t* const in = (int16_t*)(hle->alist_buffer + dmemi);
256     int16_t* const dl       = (int16_t*)(hle->alist_buffer + dmem_dl);
257     int16_t* const dr       = (int16_t*)(hle->alist_buffer + dmem_dr);
258     int16_t* const wl       = (int16_t*)(hle->alist_buffer + dmem_wl);
259     int16_t* const wr       = (int16_t*)(hle->alist_buffer + dmem_wr);
260     uint32_t ptr            = 0;
261     short *save_buffer      = (short*)((uint8_t*)hle->dram + address);
262 
263     if (init)
264     {
265         ramps[0].value  = (vol[0] << 16);
266         ramps[1].value  = (vol[1] << 16);
267         ramps[0].target = (target[0] << 16);
268         ramps[1].target = (target[1] << 16);
269         exp_rates[0]    = rate[0];
270         exp_rates[1]    = rate[1];
271         exp_seq[0]      = (vol[0] * rate[0]);
272         exp_seq[1]      = (vol[1] * rate[1]);
273     }
274     else
275     {
276         wet             = *(int16_t *)(save_buffer +  0); /* 0-1 */
277         dry             = *(int16_t *)(save_buffer +  2); /* 2-3 */
278         ramps[0].target = *(int32_t *)(save_buffer +  4); /* 4-5 */
279         ramps[1].target = *(int32_t *)(save_buffer +  6); /* 6-7 */
280         exp_rates[0]    = *(int32_t *)(save_buffer +  8); /* 8-9 (save_buffer is a 16bit pointer) */
281         exp_rates[1]    = *(int32_t *)(save_buffer + 10); /* 10-11 */
282         exp_seq[0]      = *(int32_t *)(save_buffer + 12); /* 12-13 */
283         exp_seq[1]      = *(int32_t *)(save_buffer + 14); /* 14-15 */
284         ramps[0].value  = *(int32_t *)(save_buffer + 16); /* 12-13 */
285         ramps[1].value  = *(int32_t *)(save_buffer + 18); /* 14-15 */
286     }
287 
288     /* init which ensure ramp.step != 0 iff ramp.value == ramp.target */
289     ramps[0].step = ramps[0].target - ramps[0].value;
290     ramps[1].step = ramps[1].target - ramps[1].value;
291 
292     for (y = 0; y < count; y += 16)
293     {
294        if (ramps[0].step)
295        {
296           exp_seq[0] = ((int64_t)exp_seq[0]*(int64_t)exp_rates[0]) >> 16;
297           ramps[0].step = (exp_seq[0] - ramps[0].value) >> 3;
298        }
299 
300        if (ramps[1].step)
301        {
302           exp_seq[1] = ((int64_t)exp_seq[1]*(int64_t)exp_rates[1]) >> 16;
303           ramps[1].step = (exp_seq[1] - ramps[1].value) >> 3;
304        }
305 
306        for (x = 0; x < 8; ++x)
307        {
308           int16_t  gains[4];
309           int16_t* buffers[4];
310           int16_t l_vol = ramp_step(&ramps[0]);
311           int16_t r_vol = ramp_step(&ramps[1]);
312 
313           buffers[0] = dl + (ptr^S);
314           buffers[1] = dr + (ptr^S);
315           buffers[2] = wl + (ptr^S);
316           buffers[3] = wr + (ptr^S);
317 
318           gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
319           gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
320           gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
321           gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
322 
323           alist_envmix_mix(n, buffers, gains, in[ptr^S]);
324           ++ptr;
325        }
326     }
327 
328     *(int16_t *)(save_buffer +  0) = wet;                       /* 0-1 */
329     *(int16_t *)(save_buffer +  2) = dry;                       /* 2-3 */
330     *(int32_t *)(save_buffer +  4) = (int32_t)ramps[0].target;  /* 4-5 */
331     *(int32_t *)(save_buffer +  6) = (int32_t)ramps[1].target;  /* 6-7 */
332     *(int32_t *)(save_buffer +  8) = exp_rates[0];              /* 8-9 (save_buffer is a 16bit pointer) */
333     *(int32_t *)(save_buffer + 10) = exp_rates[1];              /* 10-11 */
334     *(int32_t *)(save_buffer + 12) = exp_seq[0];                /* 12-13 */
335     *(int32_t *)(save_buffer + 14) = exp_seq[1];                /* 14-15 */
336     *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value;   /* 12-13 */
337     *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value;   /* 14-15 */
338 }
339 
alist_envmix_ge(struct hle_t * hle,bool init,bool aux,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,uint16_t count,int16_t dry,int16_t wet,const int16_t * vol,const int16_t * target,const int32_t * rate,uint32_t address)340 void alist_envmix_ge(
341         struct hle_t* hle,
342         bool init,
343         bool aux,
344         uint16_t dmem_dl, uint16_t dmem_dr,
345         uint16_t dmem_wl, uint16_t dmem_wr,
346         uint16_t dmemi, uint16_t count,
347         int16_t dry, int16_t wet,
348         const int16_t *vol,
349         const int16_t *target,
350         const int32_t *rate,
351         uint32_t address)
352 {
353     unsigned k;
354     struct ramp_t ramps[2];
355     size_t n                = (aux) ? 4 : 2;
356 
357     const int16_t* const in = (int16_t*)(hle->alist_buffer + dmemi);
358     int16_t* const dl       = (int16_t*)(hle->alist_buffer + dmem_dl);
359     int16_t* const dr       = (int16_t*)(hle->alist_buffer + dmem_dr);
360     int16_t* const wl       = (int16_t*)(hle->alist_buffer + dmem_wl);
361     int16_t* const wr       = (int16_t*)(hle->alist_buffer + dmem_wr);
362     short *save_buffer      = (short*)((uint8_t*)hle->dram + address);
363 
364     if (init)
365     {
366         ramps[0].value  = (vol[0] << 16);
367         ramps[1].value  = (vol[1] << 16);
368         ramps[0].target = (target[0] << 16);
369         ramps[1].target = (target[1] << 16);
370         ramps[0].step   = rate[0] / 8;
371         ramps[1].step   = rate[1] / 8;
372     }
373     else
374     {
375         wet             = *(int16_t *)(save_buffer +  0);   /* 0-1 */
376         dry             = *(int16_t *)(save_buffer +  2);   /* 2-3 */
377         ramps[0].target = *(int32_t *)(save_buffer +  4);   /* 4-5 */
378         ramps[1].target = *(int32_t *)(save_buffer +  6);   /* 6-7 */
379         ramps[0].step   = *(int32_t *)(save_buffer +  8);   /* 8-9 (save_buffer is a 16bit pointer) */
380         ramps[1].step   = *(int32_t *)(save_buffer + 10);   /* 10-11 */
381         /*                *(int32_t *)(save_buffer + 12);*/ /* 12-13 */
382         /*                *(int32_t *)(save_buffer + 14);*/ /* 14-15 */
383         ramps[0].value  = *(int32_t *)(save_buffer + 16);   /* 12-13 */
384         ramps[1].value  = *(int32_t *)(save_buffer + 18);   /* 14-15 */
385     }
386 
387     count >>= 1;
388     for (k = 0; k < count; ++k)
389     {
390        int16_t  gains[4];
391        int16_t* buffers[4];
392        int16_t l_vol = ramp_step(&ramps[0]);
393        int16_t r_vol = ramp_step(&ramps[1]);
394 
395        buffers[0] = dl + (k^S);
396        buffers[1] = dr + (k^S);
397        buffers[2] = wl + (k^S);
398        buffers[3] = wr + (k^S);
399 
400        gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
401        gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
402        gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
403        gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
404 
405        alist_envmix_mix(n, buffers, gains, in[k^S]);
406     }
407 
408     *(int16_t *)(save_buffer +  0) = wet;                       /* 0-1 */
409     *(int16_t *)(save_buffer +  2) = dry;                       /* 2-3 */
410     *(int32_t *)(save_buffer +  4) = (int32_t)ramps[0].target;  /* 4-5 */
411     *(int32_t *)(save_buffer +  6) = (int32_t)ramps[1].target;  /* 6-7 */
412     *(int32_t *)(save_buffer +  8) = (int32_t)ramps[0].step;    /* 8-9 (save_buffer is a 16bit pointer) */
413     *(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step;    /* 10-11 */
414  /* *(int32_t *)(save_buffer + 12); */                          /* 12-13 */
415  /* *(int32_t *)(save_buffer + 14); */                          /* 14-15 */
416     *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value;   /* 12-13 */
417     *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value;   /* 14-15 */
418 }
419 
alist_envmix_lin(struct hle_t * hle,bool init,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,uint16_t count,int16_t dry,int16_t wet,const int16_t * vol,const int16_t * target,const int32_t * rate,uint32_t address)420 void alist_envmix_lin(
421         struct hle_t* hle,
422         bool init,
423         uint16_t dmem_dl, uint16_t dmem_dr,
424         uint16_t dmem_wl, uint16_t dmem_wr,
425         uint16_t dmemi, uint16_t count,
426         int16_t dry, int16_t wet,
427         const int16_t *vol,
428         const int16_t *target,
429         const int32_t *rate,
430         uint32_t address)
431 {
432     size_t k;
433     struct ramp_t ramps[2];
434     short *save_buffer = (short*)((uint8_t*)hle->dram + address);
435 
436     const int16_t * const in = (int16_t*)(hle->alist_buffer + dmemi);
437     int16_t* const dl = (int16_t*)(hle->alist_buffer + dmem_dl);
438     int16_t* const dr = (int16_t*)(hle->alist_buffer + dmem_dr);
439     int16_t* const wl = (int16_t*)(hle->alist_buffer + dmem_wl);
440     int16_t* const wr = (int16_t*)(hle->alist_buffer + dmem_wr);
441 
442     if (init)
443     {
444         ramps[0].step   = rate[0] / 8;
445         ramps[0].value  = (vol[0] << 16);
446         ramps[0].target = (target[0] << 16);
447         ramps[1].step   = rate[1] / 8;
448         ramps[1].value  = (vol[1] << 16);
449         ramps[1].target = (target[1] << 16);
450     }
451     else
452     {
453         wet             = *(int16_t *)(save_buffer +  0); /* 0-1 */
454         dry             = *(int16_t *)(save_buffer +  2); /* 2-3 */
455         ramps[0].target = *(int16_t *)(save_buffer +  4) << 16; /* 4-5 */
456         ramps[1].target = *(int16_t *)(save_buffer +  6) << 16; /* 6-7 */
457         ramps[0].step   = *(int32_t *)(save_buffer +  8); /* 8-9 (save_buffer is a 16bit pointer) */
458         ramps[1].step   = *(int32_t *)(save_buffer + 10); /* 10-11 */
459         ramps[0].value  = *(int32_t *)(save_buffer + 16); /* 16-17 */
460         ramps[1].value  = *(int32_t *)(save_buffer + 18); /* 16-17 */
461     }
462 
463     count >>= 1;
464     for(k = 0; k < count; ++k) {
465         int16_t  gains[4];
466         int16_t* buffers[4];
467         int16_t l_vol = ramp_step(&ramps[0]);
468         int16_t r_vol = ramp_step(&ramps[1]);
469 
470         buffers[0] = dl + (k^S);
471         buffers[1] = dr + (k^S);
472         buffers[2] = wl + (k^S);
473         buffers[3] = wr + (k^S);
474 
475         gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
476         gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
477         gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
478         gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
479 
480         alist_envmix_mix(4, buffers, gains, in[k^S]);
481     }
482 
483     *(int16_t *)(save_buffer +  0) = wet;                           /* 0-1 */
484     *(int16_t *)(save_buffer +  2) = dry;                           /* 2-3 */
485     *(int16_t *)(save_buffer +  4) = (ramps[0].target>>16)&0xFFFF;  /* 4-5 */
486     *(int16_t *)(save_buffer +  6) = (ramps[1].target>>16)&0xFFFF;  /* 6-7 */
487     *(int32_t *)(save_buffer +  8) = (int32_t)ramps[0].step;        /* 8-9 (save_buffer is a 16bit pointer) */
488     *(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step;        /* 10-11 */
489     *(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value;       /* 16-17 */
490     *(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value;       /* 18-19 */
491 }
492 
alist_envmix_nead(struct hle_t * hle,bool swap_wet_LR,uint16_t dmem_dl,uint16_t dmem_dr,uint16_t dmem_wl,uint16_t dmem_wr,uint16_t dmemi,unsigned count,uint16_t * env_values,uint16_t * env_steps,const int16_t * xors)493 void alist_envmix_nead(
494         struct hle_t* hle,
495         bool swap_wet_LR,
496         uint16_t dmem_dl,
497         uint16_t dmem_dr,
498         uint16_t dmem_wl,
499         uint16_t dmem_wr,
500         uint16_t dmemi,
501         unsigned count,
502         uint16_t *env_values,
503         uint16_t *env_steps,
504         const int16_t *xors)
505 {
506     int16_t *in = (int16_t*)(hle->alist_buffer + dmemi);
507     int16_t *dl = (int16_t*)(hle->alist_buffer + dmem_dl);
508     int16_t *dr = (int16_t*)(hle->alist_buffer + dmem_dr);
509     int16_t *wl = (int16_t*)(hle->alist_buffer + dmem_wl);
510     int16_t *wr = (int16_t*)(hle->alist_buffer + dmem_wr);
511 
512     /* make sure count is a multiple of 8 */
513     count = align(count, 8);
514 
515     if (swap_wet_LR)
516         swap(&wl, &wr);
517 
518     while (count)
519     {
520        size_t i;
521 
522        for(i = 0; i < 8; ++i)
523        {
524           int16_t l  = (((int32_t)in[i^S] * (uint32_t)env_values[0]) >> 16) ^ xors[0];
525           int16_t r  = (((int32_t)in[i^S] * (uint32_t)env_values[1]) >> 16) ^ xors[1];
526           int16_t l2 = (((int32_t)l * (uint32_t)env_values[2]) >> 16) ^ xors[2];
527           int16_t r2 = (((int32_t)r * (uint32_t)env_values[2]) >> 16) ^ xors[3];
528 
529           dl[i^S] = clamp_s16(dl[i^S] + l);
530           dr[i^S] = clamp_s16(dr[i^S] + r);
531           wl[i^S] = clamp_s16(wl[i^S] + l2);
532           wr[i^S] = clamp_s16(wr[i^S] + r2);
533        }
534 
535        env_values[0] += env_steps[0];
536        env_values[1] += env_steps[1];
537        env_values[2] += env_steps[2];
538 
539        dl += 8;
540        dr += 8;
541        wl += 8;
542        wr += 8;
543        in += 8;
544        count -= 8;
545     }
546 }
547 
548 
alist_mix(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count,int16_t gain)549 void alist_mix(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t gain)
550 {
551    int16_t       *dst = (int16_t*)(hle->alist_buffer + dmemo);
552    const int16_t *src = (int16_t*)(hle->alist_buffer + dmemi);
553 
554    count >>= 1;
555 
556    while(count)
557    {
558       *dst = sample_mix(dst, *src, gain);
559 
560       ++dst;
561       ++src;
562       --count;
563    }
564 }
565 
alist_multQ44(struct hle_t * hle,uint16_t dmem,uint16_t count,int8_t gain)566 void alist_multQ44(struct hle_t* hle, uint16_t dmem, uint16_t count, int8_t gain)
567 {
568    int16_t *dst = (int16_t*)(hle->alist_buffer + dmem);
569 
570    count >>= 1;
571 
572    while(count)
573    {
574       *dst = clamp_s16(*dst * gain >> 4);
575 
576       ++dst;
577       --count;
578    }
579 }
580 
alist_add(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count)581 void alist_add(struct hle_t* hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
582 {
583    int16_t       *dst = (int16_t*)(hle->alist_buffer + dmemo);
584    const int16_t *src = (int16_t*)(hle->alist_buffer + dmemi);
585 
586    count >>= 1;
587 
588    while(count)
589    {
590       *dst = clamp_s16(*dst + *src);
591 
592       ++dst;
593       ++src;
594       --count;
595    }
596 }
597 
alist_resample_reset(struct hle_t * hle,uint16_t pos,uint32_t * pitch_accu)598 static void alist_resample_reset(struct hle_t* hle, uint16_t pos, uint32_t* pitch_accu)
599 {
600    unsigned k;
601 
602    for(k = 0; k < 4; ++k)
603       *sample(hle, pos + k) = 0;
604 
605    *pitch_accu = 0;
606 }
607 
alist_resample_load(struct hle_t * hle,uint32_t address,uint16_t pos,uint32_t * pitch_accu)608 static void alist_resample_load(struct hle_t* hle,
609       uint32_t address, uint16_t pos, uint32_t* pitch_accu)
610 {
611     *sample(hle, pos + 0) = *dram_u16(hle, address + 0);
612     *sample(hle, pos + 1) = *dram_u16(hle, address + 2);
613     *sample(hle, pos + 2) = *dram_u16(hle, address + 4);
614     *sample(hle, pos + 3) = *dram_u16(hle, address + 6);
615 
616     *pitch_accu = *dram_u16(hle, address + 8);
617 }
618 
alist_resample_save(struct hle_t * hle,uint32_t address,uint16_t pos,uint32_t pitch_accu)619 static void alist_resample_save(struct hle_t* hle,
620       uint32_t address, uint16_t pos, uint32_t pitch_accu)
621 {
622     *dram_u16(hle, address + 0) = *sample(hle, pos + 0);
623     *dram_u16(hle, address + 2) = *sample(hle, pos + 1);
624     *dram_u16(hle, address + 4) = *sample(hle, pos + 2);
625     *dram_u16(hle, address + 6) = *sample(hle, pos + 3);
626 
627     *dram_u16(hle, address + 8) = pitch_accu;
628 }
629 
alist_resample(struct hle_t * hle,bool init,bool flag2,uint16_t dmemo,uint16_t dmemi,uint16_t count,uint32_t pitch,uint32_t address)630 void alist_resample(
631         struct hle_t* hle,
632         bool init,
633         bool flag2,
634         uint16_t dmemo,
635         uint16_t dmemi,
636         uint16_t count,
637         uint32_t pitch,     /* Q16.16 */
638         uint32_t address)
639 {
640    uint32_t pitch_accu;
641    uint16_t ipos = (dmemi >> 1) - 4;
642    uint16_t opos = dmemo >> 1;
643 
644    count >>= 1;
645 
646 #ifndef NDEBUG
647    if (flag2)
648       HleWarnMessage(hle->user_defined, "alist_resample: flag2 is not implemented");
649 #endif
650 
651    if (init)
652       alist_resample_reset(hle, ipos, &pitch_accu);
653    else
654       alist_resample_load(hle, address, ipos, &pitch_accu);
655 
656    while (count)
657    {
658       const int16_t* lut = RESAMPLE_LUT + ((pitch_accu & 0xfc00) >> 8);
659 
660       *sample(hle, opos++) = clamp_s16( (
661                (*sample(hle, ipos    ) * lut[0]) +
662                (*sample(hle, ipos + 1) * lut[1]) +
663                (*sample(hle, ipos + 2) * lut[2]) +
664                (*sample(hle, ipos + 3) * lut[3]) ) >> 15);
665 
666       pitch_accu += pitch;
667       ipos += (pitch_accu >> 16);
668       pitch_accu &= 0xffff;
669       --count;
670    }
671 
672    alist_resample_save(hle, address, ipos, pitch_accu);
673 }
674 
alist_resample_zoh(struct hle_t * hle,uint16_t dmemo,uint16_t dmemi,uint16_t count,uint32_t pitch,uint32_t pitch_accu)675 void alist_resample_zoh(
676         struct hle_t* hle,
677         uint16_t dmemo,
678         uint16_t dmemi,
679         uint16_t count,
680         uint32_t pitch,
681         uint32_t pitch_accu)
682 {
683    uint16_t ipos = dmemi >> 1;
684    uint16_t opos = dmemo >> 1;
685    count >>= 1;
686 
687    while(count)
688    {
689       *sample(hle, opos++) = *sample(hle, ipos);
690 
691       pitch_accu += pitch;
692       ipos += (pitch_accu >> 16);
693       pitch_accu &= 0xffff;
694       --count;
695    }
696 }
697 
698 typedef unsigned int (*adpcm_predict_frame_t)(struct hle_t* hle,
699       int16_t* dst, uint16_t dmemi, unsigned char scale);
700 
adpcm_predict_frame_4bits(struct hle_t * hle,int16_t * dst,uint16_t dmemi,unsigned char scale)701 static unsigned int adpcm_predict_frame_4bits(struct hle_t* hle,
702       int16_t* dst, uint16_t dmemi, unsigned char scale)
703 {
704    unsigned int i;
705    unsigned int rshift = (scale < 12) ? 12 - scale : 0;
706 
707    for(i = 0; i < 8; ++i)
708    {
709       uint8_t byte = *alist_u8(hle, dmemi++);
710 
711       *(dst++) = adpcm_predict_sample(byte, 0xf0,  8, rshift);
712       *(dst++) = adpcm_predict_sample(byte, 0x0f, 12, rshift);
713    }
714 
715    return 8;
716 }
717 
adpcm_predict_frame_2bits(struct hle_t * hle,int16_t * dst,uint16_t dmemi,unsigned char scale)718 static unsigned int adpcm_predict_frame_2bits(struct hle_t* hle,
719       int16_t* dst, uint16_t dmemi, unsigned char scale)
720 {
721    unsigned int i;
722    unsigned int rshift = (scale < 14) ? 14 - scale : 0;
723 
724    for(i = 0; i < 4; ++i)
725    {
726       uint8_t byte = *alist_u8(hle, dmemi++);
727 
728       *(dst++) = adpcm_predict_sample(byte, 0xc0,  8, rshift);
729       *(dst++) = adpcm_predict_sample(byte, 0x30, 10, rshift);
730       *(dst++) = adpcm_predict_sample(byte, 0x0c, 12, rshift);
731       *(dst++) = adpcm_predict_sample(byte, 0x03, 14, rshift);
732    }
733 
734    return 4;
735 }
736 
alist_adpcm(struct hle_t * hle,bool init,bool loop,bool two_bit_per_sample,uint16_t dmemo,uint16_t dmemi,uint16_t count,const int16_t * codebook,uint32_t loop_address,uint32_t last_frame_address)737 void alist_adpcm(
738         struct hle_t* hle,
739         bool init,
740         bool loop,
741         bool two_bit_per_sample,
742         uint16_t dmemo,
743         uint16_t dmemi,
744         uint16_t count,
745         const int16_t* codebook,
746         uint32_t loop_address,
747         uint32_t last_frame_address)
748 {
749    int16_t last_frame[16];
750    size_t i;
751    adpcm_predict_frame_t predict_frame;
752 
753    if (!hle || !codebook)
754       return;
755 
756    predict_frame = (two_bit_per_sample)
757       ? adpcm_predict_frame_2bits
758       : adpcm_predict_frame_4bits;
759 
760    assert((count & 0x1f) == 0);
761 
762    if (init)
763    {
764       for (i = 0; i < 16; i++)
765          last_frame[i] = 0;
766    }
767    else
768       dram_load_u16(hle, (uint16_t*)last_frame, (loop) ? loop_address : last_frame_address, 16);
769 
770    for(i = 0; i < 16; ++i, dmemo += 2)
771       *alist_s16(hle, dmemo) = last_frame[i];
772 
773    while (count)
774    {
775       int16_t frame[16];
776       uint8_t code = *alist_u8(hle, dmemi++);
777       unsigned char scale = (code & 0xf0) >> 4;
778       const int16_t* const cb_entry = codebook + ((code & 0xf) << 4);
779 
780       dmemi += predict_frame(hle, frame, dmemi, scale);
781 
782       adpcm_compute_residuals(last_frame    , frame    , cb_entry, last_frame + 14, 8);
783       adpcm_compute_residuals(last_frame + 8, frame + 8, cb_entry, last_frame + 6 , 8);
784 
785       for(i = 0; i < 16; ++i, dmemo += 2)
786          *alist_s16(hle, dmemo) = last_frame[i];
787 
788       count -= 32;
789    }
790 
791    dram_store_u16(hle, (uint16_t*)last_frame, last_frame_address, 16);
792 }
793 
794 
alist_filter(struct hle_t * hle,uint16_t dmem,uint16_t count,uint32_t address,const uint32_t * lut_address)795 void alist_filter(
796         struct hle_t* hle,
797         uint16_t dmem,
798         uint16_t count,
799         uint32_t address,
800         const uint32_t* lut_address)
801 {
802    int x;
803    int16_t outbuff[0x3c0];
804    int16_t *outp = outbuff;
805 
806    int16_t* const lutt6 = (int16_t*)(hle->dram + lut_address[0]);
807    int16_t* const lutt5 = (int16_t*)(hle->dram + lut_address[1]);
808 
809    int16_t* in1 = (int16_t*)(hle->dram + address);
810    int16_t* in2 = (int16_t*)(hle->alist_buffer + dmem);
811 
812    for (x = 0; x < 8; ++x)
813    {
814       int32_t v = (lutt5[x] + lutt6[x]) >> 1;
815       lutt5[x] = lutt6[x] = v;
816    }
817 
818    for (x = 0; x < count; x += 16)
819    {
820       int32_t v[8];
821 
822       v[1] =  in1[0] * lutt6[6];
823       v[1] += in1[3] * lutt6[7];
824       v[1] += in1[2] * lutt6[4];
825       v[1] += in1[5] * lutt6[5];
826       v[1] += in1[4] * lutt6[2];
827       v[1] += in1[7] * lutt6[3];
828       v[1] += in1[6] * lutt6[0];
829       v[1] += in2[1] * lutt6[1]; /* 1 */
830 
831       v[0] =  in1[3] * lutt6[6];
832       v[0] += in1[2] * lutt6[7];
833       v[0] += in1[5] * lutt6[4];
834       v[0] += in1[4] * lutt6[5];
835       v[0] += in1[7] * lutt6[2];
836       v[0] += in1[6] * lutt6[3];
837       v[0] += in2[1] * lutt6[0];
838       v[0] += in2[0] * lutt6[1];
839 
840       v[3] =  in1[2] * lutt6[6];
841       v[3] += in1[5] * lutt6[7];
842       v[3] += in1[4] * lutt6[4];
843       v[3] += in1[7] * lutt6[5];
844       v[3] += in1[6] * lutt6[2];
845       v[3] += in2[1] * lutt6[3];
846       v[3] += in2[0] * lutt6[0];
847       v[3] += in2[3] * lutt6[1];
848 
849       v[2] =  in1[5] * lutt6[6];
850       v[2] += in1[4] * lutt6[7];
851       v[2] += in1[7] * lutt6[4];
852       v[2] += in1[6] * lutt6[5];
853       v[2] += in2[1] * lutt6[2];
854       v[2] += in2[0] * lutt6[3];
855       v[2] += in2[3] * lutt6[0];
856       v[2] += in2[2] * lutt6[1];
857 
858       v[5] =  in1[4] * lutt6[6];
859       v[5] += in1[7] * lutt6[7];
860       v[5] += in1[6] * lutt6[4];
861       v[5] += in2[1] * lutt6[5];
862       v[5] += in2[0] * lutt6[2];
863       v[5] += in2[3] * lutt6[3];
864       v[5] += in2[2] * lutt6[0];
865       v[5] += in2[5] * lutt6[1];
866 
867       v[4] =  in1[7] * lutt6[6];
868       v[4] += in1[6] * lutt6[7];
869       v[4] += in2[1] * lutt6[4];
870       v[4] += in2[0] * lutt6[5];
871       v[4] += in2[3] * lutt6[2];
872       v[4] += in2[2] * lutt6[3];
873       v[4] += in2[5] * lutt6[0];
874       v[4] += in2[4] * lutt6[1];
875 
876       v[7] =  in1[6] * lutt6[6];
877       v[7] += in2[1] * lutt6[7];
878       v[7] += in2[0] * lutt6[4];
879       v[7] += in2[3] * lutt6[5];
880       v[7] += in2[2] * lutt6[2];
881       v[7] += in2[5] * lutt6[3];
882       v[7] += in2[4] * lutt6[0];
883       v[7] += in2[7] * lutt6[1];
884 
885       v[6] =  in2[1] * lutt6[6];
886       v[6] += in2[0] * lutt6[7];
887       v[6] += in2[3] * lutt6[4];
888       v[6] += in2[2] * lutt6[5];
889       v[6] += in2[5] * lutt6[2];
890       v[6] += in2[4] * lutt6[3];
891       v[6] += in2[7] * lutt6[0];
892       v[6] += in2[6] * lutt6[1];
893 
894       outp[1] = ((v[1] + 0x4000) >> 15);
895       outp[0] = ((v[0] + 0x4000) >> 15);
896       outp[3] = ((v[3] + 0x4000) >> 15);
897       outp[2] = ((v[2] + 0x4000) >> 15);
898       outp[5] = ((v[5] + 0x4000) >> 15);
899       outp[4] = ((v[4] + 0x4000) >> 15);
900       outp[7] = ((v[7] + 0x4000) >> 15);
901       outp[6] = ((v[6] + 0x4000) >> 15);
902       in1 = in2;
903       in2 += 8;
904       outp += 8;
905    }
906 
907    memcpy(hle->dram + address, in2 - 8, 16);
908    memcpy(hle->alist_buffer + dmem, outbuff, count);
909 }
910 
alist_polef(struct hle_t * hle,bool init,uint16_t dmemo,uint16_t dmemi,uint16_t count,uint16_t gain,int16_t * table,uint32_t address)911 void alist_polef(
912         struct hle_t* hle,
913         bool init,
914         uint16_t dmemo,
915         uint16_t dmemi,
916         uint16_t count,
917         uint16_t gain,
918         int16_t* table,
919         uint32_t address)
920 {
921    unsigned i;
922    int16_t h2_before[8];
923    int16_t l1              = 0;
924    int16_t l2              = 0;
925    int16_t *dst            = (int16_t*)(hle->alist_buffer + dmemo);
926    const int16_t* const h1 = table;
927    int16_t* const h2       = table + 8;
928 
929    count = align(count, 16);
930 
931    if (!init)
932    {
933       l1 = *dram_u16(hle, address + 4);
934       l2 = *dram_u16(hle, address + 6);
935    }
936 
937    for(i = 0; i < 8; ++i)
938    {
939       h2_before[i] = h2[i];
940       h2[i] = (((int32_t)h2[i] * gain) >> 14);
941    }
942 
943    do
944    {
945       int16_t frame[8];
946 
947       for(i = 0; i < 8; ++i, dmemi += 2)
948          frame[i] = *alist_s16(hle, dmemi);
949 
950       for(i = 0; i < 8; ++i)
951       {
952          int32_t accu = frame[i] * gain;
953          accu += h1[i]*l1 + h2_before[i]*l2 + rdot(i, h2, frame + i);
954          dst[i^S] = clamp_s16(accu >> 14);
955       }
956 
957       l1 = dst[6^S];
958       l2 = dst[7^S];
959 
960       dst += 8;
961       count -= 16;
962    }while(count);
963 
964    dram_store_u32(hle, (uint32_t*)(dst - 4), address, 2);
965 }
966 
alist_iirf(struct hle_t * hle,bool init,uint16_t dmemo,uint16_t dmemi,uint16_t count,int16_t * table,uint32_t address)967 void alist_iirf(
968       struct hle_t* hle,
969       bool init,
970       uint16_t dmemo,
971       uint16_t dmemi,
972       uint16_t count,
973       int16_t* table,
974       uint32_t address)
975 {
976    int32_t i, prev;
977    int16_t frame[8];
978    int16_t ibuf[4];
979    uint16_t index = 7;
980    int16_t *dst = (int16_t*)(hle->alist_buffer + dmemo);
981    count        = align(count, 16);
982 
983    if(init)
984    {
985       for(i = 0; i < 8; ++i)
986          frame[i] = 0;
987       ibuf[1] = 0;
988       ibuf[2] = 0;
989    }
990    else
991    {
992       frame[6] = *dram_u16(hle, address + 4);
993       frame[7] = *dram_u16(hle, address + 6);
994       ibuf[1] = (int16_t)*dram_u16(hle, address + 8);
995       ibuf[2] = (int16_t)*dram_u16(hle, address + 10);
996    }
997 
998    prev = vmulf(table[9], frame[6]) * 2;
999 
1000    do
1001    {
1002       for(i = 0; i < 8; ++i)
1003       {
1004          int32_t accu;
1005 
1006          ibuf[index&3] = *alist_s16(hle, dmemi);
1007          accu = prev
1008             + vmulf(table[0], ibuf[index&3])
1009             + vmulf(table[1], ibuf[(index-1)&3])
1010             + vmulf(table[0], ibuf[(index-2)&3]);
1011 
1012          accu         += vmulf(table[8], frame[index]) * 2;
1013          prev          = vmulf(table[9], frame[index]) * 2;
1014          dst[i^S]      = frame[i] = accu;
1015          index         = (index+1)&7;
1016          dmemi        += 2;
1017       }
1018       dst += 8;
1019       count -= 0x10;
1020    } while (count > 0);
1021 
1022    dram_store_u16(hle, (uint16_t*)&frame[6], address + 4, 4);
1023    dram_store_u16(hle, (uint16_t*)&ibuf[(index-2)&3], address+8, 2);
1024    dram_store_u16(hle, (uint16_t*)&ibuf[(index-1)&3], address+10, 2);
1025 }
1026