1 // Copyright 2016 Citra Emulator Project
2 // Licensed under GPLv2 or any later version
3 // Refer to the license.txt file included.
4 
5 #pragma once
6 
7 #include <array>
8 #include <cstddef>
9 #include <memory>
10 #include <type_traits>
11 #include <boost/serialization/access.hpp>
12 #include "audio_core/audio_types.h"
13 #include "audio_core/hle/common.h"
14 #include "common/bit_field.h"
15 #include "common/common_funcs.h"
16 #include "common/common_types.h"
17 #include "common/swap.h"
18 
19 namespace AudioCore::HLE {
20 
21 // The application-accessible region of DSP memory consists of two parts. Both are marked as IO and
22 // have Read/Write permissions.
23 //
24 // First Region:  0x1FF50000 (Size: 0x8000)
25 // Second Region: 0x1FF70000 (Size: 0x8000)
26 //
27 // The DSP reads from each region alternately based on the frame counter for each region much like a
28 // double-buffer. The frame counter is located as the very last u16 of each region and is
29 // incremented each audio tick.
30 
31 constexpr u32 region0_offset = 0x50000;
32 constexpr u32 region1_offset = 0x70000;
33 
34 /**
35  * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
36  * its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian
37  * layout of the ARM11. Hence from the ARM11's point of view the memory space appears to be
38  * middle-endian.
39  *
40  * Unusually this does not appear to be an issue for floating point numbers. The DSP makes the more
41  * sensible choice of keeping that little-endian. There are also some exceptions such as the
42  * IntermediateMixSamples structure, which is little-endian.
43  *
44  * This struct implements the conversion to and from this middle-endianness.
45  */
46 struct u32_dsp {
47     u32_dsp() = default;
u32u32_dsp48     operator u32() const {
49         return Convert(storage);
50     }
51     void operator=(u32 new_value) {
52         storage = Convert(new_value);
53     }
54 
55 private:
Convertu32_dsp56     static constexpr u32 Convert(u32 value) {
57         return (value << 16) | (value >> 16);
58     }
59     u32_le storage;
60 
61     template <class Archive>
serializeu32_dsp62     void serialize(Archive& ar, const unsigned int) {
63         ar& storage;
64     }
65     friend class boost::serialization::access;
66 };
67 static_assert(std::is_trivially_copyable<u32_dsp>::value, "u32_dsp isn't trivially copyable");
68 
69 // There are 15 structures in each memory region. A table of them in the order they appear in memory
70 // is presented below:
71 //
72 //       #           First Region DSP Address   Purpose                               Control
73 //       5           0x8400                     DSP Status                            DSP
74 //       9           0x8410                     DSP Debug Info                        DSP
75 //       6           0x8540                     Final Mix Samples                     DSP
76 //       2           0x8680                     Source Status [24]                    DSP
77 //       8           0x8710                     Compressor Table                      Application
78 //       4           0x9430                     DSP Configuration                     Application
79 //       7           0x9492                     Intermediate Mix Samples              DSP + App
80 //       1           0x9E92                     Source Configuration [24]             Application
81 //       3           0xA792                     Source ADPCM Coefficients [24]        Application
82 //       10          0xA912                     Surround Sound Related
83 //       11          0xAA12                     Surround Sound Related
84 //       12          0xAAD2                     Surround Sound Related
85 //       13          0xAC52                     Surround Sound Related
86 //       14          0xAC5C                     Surround Sound Related
87 //       0           0xBFFF                     Frame Counter                         Application
88 //
89 // #: This refers to the order in which they appear in the DspPipe::Audio DSP pipe.
90 //    See also: HLE::PipeRead.
91 //
92 // Note that the above addresses do vary slightly between audio firmwares observed; the addresses
93 // are not fixed in stone. The addresses above are only an examplar; they're what this
94 // implementation does and provides to applications.
95 //
96 // Application requests the DSP service to convert DSP addresses into ARM11 virtual addresses using
97 // the ConvertProcessAddressFromDspDram service call. Applications seem to derive the addresses for
98 // the second region via:
99 //     second_region_dsp_addr = first_region_dsp_addr | 0x10000
100 //
101 // Applications maintain most of its own audio state, the memory region is used mainly for
102 // communication and not storage of state.
103 //
104 // In the documentation below, filter and effect transfer functions are specified in the z domain.
105 // (If you are more familiar with the Laplace transform, z = exp(sT). The z domain is the digital
106 // frequency domain, just like how the s domain is the analog frequency domain.)
107 
108 #define INSERT_PADDING_DSPWORDS(num_words) INSERT_PADDING_BYTES(2 * (num_words))
109 
110 #define ASSERT_DSP_STRUCT(name, size)                                                              \
111     static_assert(std::is_standard_layout<name>::value,                                            \
112                   "DSP structure " #name " doesn't use standard layout");                          \
113     static_assert(std::is_trivially_copyable<name>::value,                                         \
114                   "DSP structure " #name " isn't trivially copyable");                             \
115     static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name)
116 
117 struct SourceConfiguration {
118     struct Configuration {
119         /// These dirty flags are set by the application when it updates the fields in this struct.
120         /// The DSP clears these each audio frame.
121         union {
122             u32_le dirty_raw;
123 
124             BitField<0, 1, u32> format_dirty;
125             BitField<1, 1, u32> mono_or_stereo_dirty;
126             BitField<2, 1, u32> adpcm_coefficients_dirty;
127             /// Tends to be set when a looped buffer is queued.
128             BitField<3, 1, u32> partial_embedded_buffer_dirty;
129             BitField<4, 1, u32> partial_reset_flag;
130 
131             BitField<16, 1, u32> enable_dirty;
132             BitField<17, 1, u32> interpolation_dirty;
133             BitField<18, 1, u32> rate_multiplier_dirty;
134             BitField<19, 1, u32> buffer_queue_dirty;
135             BitField<20, 1, u32> loop_related_dirty;
136             /// Tends to also be set when embedded buffer is updated.
137             BitField<21, 1, u32> play_position_dirty;
138             BitField<22, 1, u32> filters_enabled_dirty;
139             BitField<23, 1, u32> simple_filter_dirty;
140             BitField<24, 1, u32> biquad_filter_dirty;
141             BitField<25, 1, u32> gain_0_dirty;
142             BitField<26, 1, u32> gain_1_dirty;
143             BitField<27, 1, u32> gain_2_dirty;
144             BitField<28, 1, u32> sync_dirty;
145             BitField<29, 1, u32> reset_flag;
146             BitField<30, 1, u32> embedded_buffer_dirty;
147         };
148 
149         // Gain control
150 
151         /**
152          * Gain is between 0.0-1.0. This determines how much will this source appear on each of the
153          * 12 channels that feed into the intermediate mixers. Each of the three intermediate mixers
154          * is fed two left and two right channels.
155          */
156         float_le gain[3][4];
157 
158         // Interpolation
159 
160         /// Multiplier for sample rate. Resampling occurs with the selected interpolation method.
161         float_le rate_multiplier;
162 
163         enum class InterpolationMode : u8 {
164             Polyphase = 0,
165             Linear = 1,
166             None = 2,
167         };
168 
169         InterpolationMode interpolation_mode;
170         INSERT_PADDING_BYTES(1); ///< Interpolation related
171 
172         // Filters
173 
174         /**
175          * This is the simplest normalized first-order digital recursive filter.
176          * The transfer function of this filter is:
177          *     H(z) = b0 / (1 - a1 z^-1)
178          * Note the feedbackward coefficient is negated.
179          * Values are signed fixed point with 15 fractional bits.
180          */
181         struct SimpleFilter {
182             s16_le b0;
183             s16_le a1;
184         };
185 
186         /**
187          * This is a normalised biquad filter (second-order).
188          * The transfer function of this filter is:
189          *     H(z) = (b0 + b1 z^-1 + b2 z^-2) / (1 - a1 z^-1 - a2 z^-2)
190          * Nintendo chose to negate the feedbackward coefficients. This differs from standard
191          * notation as in: https://ccrma.stanford.edu/~jos/filters/Direct_Form_I.html
192          * Values are signed fixed point with 14 fractional bits.
193          */
194         struct BiquadFilter {
195             s16_le a2;
196             s16_le a1;
197             s16_le b2;
198             s16_le b1;
199             s16_le b0;
200         };
201 
202         union {
203             u16_le filters_enabled;
204             BitField<0, 1, u16> simple_filter_enabled;
205             BitField<1, 1, u16> biquad_filter_enabled;
206         };
207 
208         SimpleFilter simple_filter;
209         BiquadFilter biquad_filter;
210 
211         // Buffer Queue
212 
213         /// A buffer of audio data from the application, along with metadata about it.
214         struct Buffer {
215             /// Physical memory address of the start of the buffer
216             u32_dsp physical_address;
217 
218             /// This is length in terms of samples.
219             /// Note that in different buffer formats a sample takes up different number of bytes.
220             u32_dsp length;
221 
222             /// ADPCM Predictor (4 bits) and Scale (4 bits)
223             union {
224                 u16_le adpcm_ps;
225                 BitField<0, 4, u16> adpcm_scale;
226                 BitField<4, 4, u16> adpcm_predictor;
227             };
228 
229             /// ADPCM Historical Samples (y[n-1] and y[n-2])
230             u16_le adpcm_yn[2];
231 
232             /// This is non-zero when the ADPCM values above are to be updated.
233             u8 adpcm_dirty;
234 
235             /// Is a looping buffer.
236             u8 is_looping;
237 
238             /// This value is shown in SourceStatus::previous_buffer_id when this buffer has
239             /// finished. This allows the emulated application to tell what buffer is currently
240             /// playing.
241             u16_le buffer_id;
242 
243             INSERT_PADDING_DSPWORDS(1);
244         };
245 
246         u16_le buffers_dirty; ///< Bitmap indicating which buffers are dirty (bit i -> buffers[i])
247         Buffer buffers[4];    ///< Queued Buffers
248 
249         // Playback controls
250 
251         u32_dsp loop_related;
252         u8 enable;
253         INSERT_PADDING_BYTES(1);
254         u16_le sync;           ///< Application-side sync (See also: SourceStatus::sync)
255         u32_dsp play_position; ///< Position. (Units: number of samples)
256         INSERT_PADDING_DSPWORDS(2);
257 
258         // Embedded Buffer
259         // This buffer is often the first buffer to be used when initiating audio playback,
260         // after which the buffer queue is used.
261 
262         u32_dsp physical_address;
263 
264         /// This is length in terms of samples.
265         /// Note a sample takes up different number of bytes in different buffer formats.
266         u32_dsp length;
267 
268         enum class MonoOrStereo : u16_le {
269             Mono = 1,
270             Stereo = 2,
271         };
272 
273         enum class Format : u16_le {
274             PCM8 = 0,
275             PCM16 = 1,
276             ADPCM = 2,
277         };
278 
279         union {
280             u16_le flags1_raw;
281             BitField<0, 2, MonoOrStereo> mono_or_stereo;
282             BitField<2, 2, Format> format;
283             BitField<5, 1, u16> fade_in;
284         };
285 
286         /// ADPCM Predictor (4 bit) and Scale (4 bit)
287         union {
288             u16_le adpcm_ps;
289             BitField<0, 4, u16> adpcm_scale;
290             BitField<4, 4, u16> adpcm_predictor;
291         };
292 
293         /// ADPCM Historical Samples (y[n-1] and y[n-2])
294         u16_le adpcm_yn[2];
295 
296         union {
297             u16_le flags2_raw;
298             BitField<0, 1, u16> adpcm_dirty; ///< Has the ADPCM info above been changed?
299             BitField<1, 1, u16> is_looping;  ///< Is this a looping buffer?
300         };
301 
302         /// Buffer id of embedded buffer (used as a buffer id in SourceStatus to reference this
303         /// buffer).
304         u16_le buffer_id;
305     };
306 
307     Configuration config[num_sources];
308 };
309 ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192);
310 ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20);
311 
312 struct SourceStatus {
313     struct Status {
314         u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
315         u8 current_buffer_id_dirty; ///< Non-zero when current_buffer_id changes
316         u16_le sync;                ///< Is set by the DSP to the value of SourceConfiguration::sync
317         u32_dsp buffer_position;    ///< Number of samples into the current buffer
318         u16_le current_buffer_id;   ///< Updated when a buffer finishes playing
319         INSERT_PADDING_DSPWORDS(1);
320     };
321 
322     Status status[num_sources];
323 };
324 ASSERT_DSP_STRUCT(SourceStatus::Status, 12);
325 
326 struct DspConfiguration {
327     /// These dirty flags are set by the application when it updates the fields in this struct.
328     /// The DSP clears these each audio frame.
329     union {
330         u32_le dirty_raw;
331 
332         BitField<8, 1, u32> mixer1_enabled_dirty;
333         BitField<9, 1, u32> mixer2_enabled_dirty;
334         BitField<10, 1, u32> delay_effect_0_dirty;
335         BitField<11, 1, u32> delay_effect_1_dirty;
336         BitField<12, 1, u32> reverb_effect_0_dirty;
337         BitField<13, 1, u32> reverb_effect_1_dirty;
338 
339         BitField<16, 1, u32> volume_0_dirty;
340 
341         BitField<24, 1, u32> volume_1_dirty;
342         BitField<25, 1, u32> volume_2_dirty;
343         BitField<26, 1, u32> output_format_dirty;
344         BitField<27, 1, u32> limiter_enabled_dirty;
345         BitField<28, 1, u32> headphones_connected_dirty;
346     };
347 
348     /// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for
349     /// each at the final mixer.
350     float_le volume[3];
351 
352     INSERT_PADDING_DSPWORDS(3);
353 
354     enum class OutputFormat : u16_le {
355         Mono = 0,
356         Stereo = 1,
357         Surround = 2,
358     };
359 
360     OutputFormat output_format;
361 
362     u16_le limiter_enabled;      ///< Not sure of the exact gain equation for the limiter.
363     u16_le headphones_connected; ///< Application updates the DSP on headphone status.
364     INSERT_PADDING_DSPWORDS(4);  ///< TODO: Surround sound related
365     INSERT_PADDING_DSPWORDS(2);  ///< TODO: Intermediate mixer 1/2 related
366     u16_le mixer1_enabled;
367     u16_le mixer2_enabled;
368 
369     /**
370      * This is delay with feedback.
371      * Transfer function:
372      *     H(z) = a z^-N / (1 - b z^-1 + a g z^-N)
373      *   where
374      *     N = frame_count * samples_per_frame
375      * g, a and b are fixed point with 7 fractional bits
376      */
377     struct DelayEffect {
378         /// These dirty flags are set by the application when it updates the fields in this struct.
379         /// The DSP clears these each audio frame.
380         union {
381             u16_le dirty_raw;
382             BitField<0, 1, u16> enable_dirty;
383             BitField<1, 1, u16> work_buffer_address_dirty;
384             BitField<2, 1, u16> other_dirty; ///< Set when anything else has been changed
385         };
386 
387         u16_le enable;
388         INSERT_PADDING_DSPWORDS(1);
389         u16_le outputs;
390         /// The application allocates a block of memory for the DSP to use as a work buffer.
391         u32_dsp work_buffer_address;
392         /// Frames to delay by
393         u16_le frame_count;
394 
395         // Coefficients
396         s16_le g; ///< Fixed point with 7 fractional bits
397         s16_le a; ///< Fixed point with 7 fractional bits
398         s16_le b; ///< Fixed point with 7 fractional bits
399     };
400 
401     DelayEffect delay_effect[2];
402 
403     struct ReverbEffect {
404         INSERT_PADDING_DSPWORDS(26); ///< TODO
405     };
406 
407     ReverbEffect reverb_effect[2];
408 
409     INSERT_PADDING_DSPWORDS(4);
410 };
411 ASSERT_DSP_STRUCT(DspConfiguration, 196);
412 ASSERT_DSP_STRUCT(DspConfiguration::DelayEffect, 20);
413 ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52);
414 
415 struct AdpcmCoefficients {
416     /// Coefficients are signed fixed point with 11 fractional bits.
417     /// Each source has 16 coefficients associated with it.
418     s16_le coeff[num_sources][16];
419 };
420 ASSERT_DSP_STRUCT(AdpcmCoefficients, 768);
421 
422 struct DspStatus {
423     u16_le unknown;
424     u16_le dropped_frames;
425     INSERT_PADDING_DSPWORDS(0xE);
426 };
427 ASSERT_DSP_STRUCT(DspStatus, 32);
428 
429 /// Final mixed output in PCM16 stereo format, what you hear out of the speakers.
430 /// When the application writes to this region it has no effect.
431 struct FinalMixSamples {
432     s16_le pcm16[samples_per_frame][2];
433 };
434 ASSERT_DSP_STRUCT(FinalMixSamples, 640);
435 
436 /// DSP writes output of intermediate mixers 1 and 2 here.
437 /// Writes to this region by the application edits the output of the intermediate mixers.
438 /// This seems to be intended to allow the application to do custom effects on the ARM11.
439 /// Values that exceed s16 range will be clipped by the DSP after further processing.
440 struct IntermediateMixSamples {
441     struct Samples {
442         s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian.
443     };
444 
445     Samples mix1;
446     Samples mix2;
447 };
448 ASSERT_DSP_STRUCT(IntermediateMixSamples, 5120);
449 
450 /// Compressor table
451 struct Compressor {
452     INSERT_PADDING_DSPWORDS(0xD20); ///< TODO
453 };
454 
455 /// There is no easy way to implement this in a HLE implementation.
456 struct DspDebug {
457     INSERT_PADDING_DSPWORDS(0x130);
458 };
459 ASSERT_DSP_STRUCT(DspDebug, 0x260);
460 
461 struct SharedMemory {
462     /// Padding
463     INSERT_PADDING_DSPWORDS(0x400);
464 
465     DspStatus dsp_status;
466 
467     DspDebug dsp_debug;
468 
469     FinalMixSamples final_samples;
470 
471     SourceStatus source_statuses;
472 
473     Compressor compressor;
474 
475     DspConfiguration dsp_configuration;
476 
477     IntermediateMixSamples intermediate_mix_samples;
478 
479     SourceConfiguration source_configurations;
480 
481     AdpcmCoefficients adpcm_coefficients;
482 
483     struct {
484         INSERT_PADDING_DSPWORDS(0x100);
485     } unknown10;
486 
487     struct {
488         INSERT_PADDING_DSPWORDS(0xC0);
489     } unknown11;
490 
491     struct {
492         INSERT_PADDING_DSPWORDS(0x180);
493     } unknown12;
494 
495     struct {
496         INSERT_PADDING_DSPWORDS(0xA);
497     } unknown13;
498 
499     struct {
500         INSERT_PADDING_DSPWORDS(0x13A3);
501     } unknown14;
502 
503     u16_le frame_counter;
504 };
505 ASSERT_DSP_STRUCT(SharedMemory, 0x8000);
506 
507 union DspMemory {
508     std::array<u8, 0x80000> raw_memory{};
509     struct {
510         u8 unused_0[0x50000];
511         SharedMemory region_0;
512         u8 unused_1[0x18000];
513         SharedMemory region_1;
514         u8 unused_2[0x8000];
515     };
516 };
517 static_assert(offsetof(DspMemory, region_0) == region0_offset,
518               "DSP region 0 is at the wrong offset");
519 static_assert(offsetof(DspMemory, region_1) == region1_offset,
520               "DSP region 1 is at the wrong offset");
521 
522 // Structures must have an offset that is a multiple of two.
523 static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0,
524               "Structures in HLE::SharedMemory must be 2-byte aligned");
525 static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0,
526               "Structures in HLE::SharedMemory must be 2-byte aligned");
527 static_assert(offsetof(SharedMemory, source_statuses) % 2 == 0,
528               "Structures in HLE::SharedMemory must be 2-byte aligned");
529 static_assert(offsetof(SharedMemory, adpcm_coefficients) % 2 == 0,
530               "Structures in HLE::SharedMemory must be 2-byte aligned");
531 static_assert(offsetof(SharedMemory, dsp_configuration) % 2 == 0,
532               "Structures in HLE::SharedMemory must be 2-byte aligned");
533 static_assert(offsetof(SharedMemory, dsp_status) % 2 == 0,
534               "Structures in HLE::SharedMemory must be 2-byte aligned");
535 static_assert(offsetof(SharedMemory, final_samples) % 2 == 0,
536               "Structures in HLE::SharedMemory must be 2-byte aligned");
537 static_assert(offsetof(SharedMemory, intermediate_mix_samples) % 2 == 0,
538               "Structures in HLE::SharedMemory must be 2-byte aligned");
539 static_assert(offsetof(SharedMemory, compressor) % 2 == 0,
540               "Structures in HLE::SharedMemory must be 2-byte aligned");
541 static_assert(offsetof(SharedMemory, dsp_debug) % 2 == 0,
542               "Structures in HLE::SharedMemory must be 2-byte aligned");
543 static_assert(offsetof(SharedMemory, unknown10) % 2 == 0,
544               "Structures in HLE::SharedMemory must be 2-byte aligned");
545 static_assert(offsetof(SharedMemory, unknown11) % 2 == 0,
546               "Structures in HLE::SharedMemory must be 2-byte aligned");
547 static_assert(offsetof(SharedMemory, unknown12) % 2 == 0,
548               "Structures in HLE::SharedMemory must be 2-byte aligned");
549 static_assert(offsetof(SharedMemory, unknown13) % 2 == 0,
550               "Structures in HLE::SharedMemory must be 2-byte aligned");
551 static_assert(offsetof(SharedMemory, unknown14) % 2 == 0,
552               "Structures in HLE::SharedMemory must be 2-byte aligned");
553 
554 #undef INSERT_PADDING_DSPWORDS
555 #undef ASSERT_DSP_STRUCT
556 
557 } // namespace AudioCore::HLE
558