1 // Copyright 2016 Citra Emulator Project 2 // Licensed under GPLv2 or any later version 3 // Refer to the license.txt file included. 4 5 #pragma once 6 7 #include <array> 8 #include <cstddef> 9 #include <memory> 10 #include <type_traits> 11 #include <boost/serialization/access.hpp> 12 #include "audio_core/audio_types.h" 13 #include "audio_core/hle/common.h" 14 #include "common/bit_field.h" 15 #include "common/common_funcs.h" 16 #include "common/common_types.h" 17 #include "common/swap.h" 18 19 namespace AudioCore::HLE { 20 21 // The application-accessible region of DSP memory consists of two parts. Both are marked as IO and 22 // have Read/Write permissions. 23 // 24 // First Region: 0x1FF50000 (Size: 0x8000) 25 // Second Region: 0x1FF70000 (Size: 0x8000) 26 // 27 // The DSP reads from each region alternately based on the frame counter for each region much like a 28 // double-buffer. The frame counter is located as the very last u16 of each region and is 29 // incremented each audio tick. 30 31 constexpr u32 region0_offset = 0x50000; 32 constexpr u32 region1_offset = 0x70000; 33 34 /** 35 * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from 36 * its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian 37 * layout of the ARM11. Hence from the ARM11's point of view the memory space appears to be 38 * middle-endian. 39 * 40 * Unusually this does not appear to be an issue for floating point numbers. The DSP makes the more 41 * sensible choice of keeping that little-endian. There are also some exceptions such as the 42 * IntermediateMixSamples structure, which is little-endian. 43 * 44 * This struct implements the conversion to and from this middle-endianness. 45 */ 46 struct u32_dsp { 47 u32_dsp() = default; u32u32_dsp48 operator u32() const { 49 return Convert(storage); 50 } 51 void operator=(u32 new_value) { 52 storage = Convert(new_value); 53 } 54 55 private: Convertu32_dsp56 static constexpr u32 Convert(u32 value) { 57 return (value << 16) | (value >> 16); 58 } 59 u32_le storage; 60 61 template <class Archive> serializeu32_dsp62 void serialize(Archive& ar, const unsigned int) { 63 ar& storage; 64 } 65 friend class boost::serialization::access; 66 }; 67 static_assert(std::is_trivially_copyable<u32_dsp>::value, "u32_dsp isn't trivially copyable"); 68 69 // There are 15 structures in each memory region. A table of them in the order they appear in memory 70 // is presented below: 71 // 72 // # First Region DSP Address Purpose Control 73 // 5 0x8400 DSP Status DSP 74 // 9 0x8410 DSP Debug Info DSP 75 // 6 0x8540 Final Mix Samples DSP 76 // 2 0x8680 Source Status [24] DSP 77 // 8 0x8710 Compressor Table Application 78 // 4 0x9430 DSP Configuration Application 79 // 7 0x9492 Intermediate Mix Samples DSP + App 80 // 1 0x9E92 Source Configuration [24] Application 81 // 3 0xA792 Source ADPCM Coefficients [24] Application 82 // 10 0xA912 Surround Sound Related 83 // 11 0xAA12 Surround Sound Related 84 // 12 0xAAD2 Surround Sound Related 85 // 13 0xAC52 Surround Sound Related 86 // 14 0xAC5C Surround Sound Related 87 // 0 0xBFFF Frame Counter Application 88 // 89 // #: This refers to the order in which they appear in the DspPipe::Audio DSP pipe. 90 // See also: HLE::PipeRead. 91 // 92 // Note that the above addresses do vary slightly between audio firmwares observed; the addresses 93 // are not fixed in stone. The addresses above are only an examplar; they're what this 94 // implementation does and provides to applications. 95 // 96 // Application requests the DSP service to convert DSP addresses into ARM11 virtual addresses using 97 // the ConvertProcessAddressFromDspDram service call. Applications seem to derive the addresses for 98 // the second region via: 99 // second_region_dsp_addr = first_region_dsp_addr | 0x10000 100 // 101 // Applications maintain most of its own audio state, the memory region is used mainly for 102 // communication and not storage of state. 103 // 104 // In the documentation below, filter and effect transfer functions are specified in the z domain. 105 // (If you are more familiar with the Laplace transform, z = exp(sT). The z domain is the digital 106 // frequency domain, just like how the s domain is the analog frequency domain.) 107 108 #define INSERT_PADDING_DSPWORDS(num_words) INSERT_PADDING_BYTES(2 * (num_words)) 109 110 #define ASSERT_DSP_STRUCT(name, size) \ 111 static_assert(std::is_standard_layout<name>::value, \ 112 "DSP structure " #name " doesn't use standard layout"); \ 113 static_assert(std::is_trivially_copyable<name>::value, \ 114 "DSP structure " #name " isn't trivially copyable"); \ 115 static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name) 116 117 struct SourceConfiguration { 118 struct Configuration { 119 /// These dirty flags are set by the application when it updates the fields in this struct. 120 /// The DSP clears these each audio frame. 121 union { 122 u32_le dirty_raw; 123 124 BitField<0, 1, u32> format_dirty; 125 BitField<1, 1, u32> mono_or_stereo_dirty; 126 BitField<2, 1, u32> adpcm_coefficients_dirty; 127 /// Tends to be set when a looped buffer is queued. 128 BitField<3, 1, u32> partial_embedded_buffer_dirty; 129 BitField<4, 1, u32> partial_reset_flag; 130 131 BitField<16, 1, u32> enable_dirty; 132 BitField<17, 1, u32> interpolation_dirty; 133 BitField<18, 1, u32> rate_multiplier_dirty; 134 BitField<19, 1, u32> buffer_queue_dirty; 135 BitField<20, 1, u32> loop_related_dirty; 136 /// Tends to also be set when embedded buffer is updated. 137 BitField<21, 1, u32> play_position_dirty; 138 BitField<22, 1, u32> filters_enabled_dirty; 139 BitField<23, 1, u32> simple_filter_dirty; 140 BitField<24, 1, u32> biquad_filter_dirty; 141 BitField<25, 1, u32> gain_0_dirty; 142 BitField<26, 1, u32> gain_1_dirty; 143 BitField<27, 1, u32> gain_2_dirty; 144 BitField<28, 1, u32> sync_dirty; 145 BitField<29, 1, u32> reset_flag; 146 BitField<30, 1, u32> embedded_buffer_dirty; 147 }; 148 149 // Gain control 150 151 /** 152 * Gain is between 0.0-1.0. This determines how much will this source appear on each of the 153 * 12 channels that feed into the intermediate mixers. Each of the three intermediate mixers 154 * is fed two left and two right channels. 155 */ 156 float_le gain[3][4]; 157 158 // Interpolation 159 160 /// Multiplier for sample rate. Resampling occurs with the selected interpolation method. 161 float_le rate_multiplier; 162 163 enum class InterpolationMode : u8 { 164 Polyphase = 0, 165 Linear = 1, 166 None = 2, 167 }; 168 169 InterpolationMode interpolation_mode; 170 INSERT_PADDING_BYTES(1); ///< Interpolation related 171 172 // Filters 173 174 /** 175 * This is the simplest normalized first-order digital recursive filter. 176 * The transfer function of this filter is: 177 * H(z) = b0 / (1 - a1 z^-1) 178 * Note the feedbackward coefficient is negated. 179 * Values are signed fixed point with 15 fractional bits. 180 */ 181 struct SimpleFilter { 182 s16_le b0; 183 s16_le a1; 184 }; 185 186 /** 187 * This is a normalised biquad filter (second-order). 188 * The transfer function of this filter is: 189 * H(z) = (b0 + b1 z^-1 + b2 z^-2) / (1 - a1 z^-1 - a2 z^-2) 190 * Nintendo chose to negate the feedbackward coefficients. This differs from standard 191 * notation as in: https://ccrma.stanford.edu/~jos/filters/Direct_Form_I.html 192 * Values are signed fixed point with 14 fractional bits. 193 */ 194 struct BiquadFilter { 195 s16_le a2; 196 s16_le a1; 197 s16_le b2; 198 s16_le b1; 199 s16_le b0; 200 }; 201 202 union { 203 u16_le filters_enabled; 204 BitField<0, 1, u16> simple_filter_enabled; 205 BitField<1, 1, u16> biquad_filter_enabled; 206 }; 207 208 SimpleFilter simple_filter; 209 BiquadFilter biquad_filter; 210 211 // Buffer Queue 212 213 /// A buffer of audio data from the application, along with metadata about it. 214 struct Buffer { 215 /// Physical memory address of the start of the buffer 216 u32_dsp physical_address; 217 218 /// This is length in terms of samples. 219 /// Note that in different buffer formats a sample takes up different number of bytes. 220 u32_dsp length; 221 222 /// ADPCM Predictor (4 bits) and Scale (4 bits) 223 union { 224 u16_le adpcm_ps; 225 BitField<0, 4, u16> adpcm_scale; 226 BitField<4, 4, u16> adpcm_predictor; 227 }; 228 229 /// ADPCM Historical Samples (y[n-1] and y[n-2]) 230 u16_le adpcm_yn[2]; 231 232 /// This is non-zero when the ADPCM values above are to be updated. 233 u8 adpcm_dirty; 234 235 /// Is a looping buffer. 236 u8 is_looping; 237 238 /// This value is shown in SourceStatus::previous_buffer_id when this buffer has 239 /// finished. This allows the emulated application to tell what buffer is currently 240 /// playing. 241 u16_le buffer_id; 242 243 INSERT_PADDING_DSPWORDS(1); 244 }; 245 246 u16_le buffers_dirty; ///< Bitmap indicating which buffers are dirty (bit i -> buffers[i]) 247 Buffer buffers[4]; ///< Queued Buffers 248 249 // Playback controls 250 251 u32_dsp loop_related; 252 u8 enable; 253 INSERT_PADDING_BYTES(1); 254 u16_le sync; ///< Application-side sync (See also: SourceStatus::sync) 255 u32_dsp play_position; ///< Position. (Units: number of samples) 256 INSERT_PADDING_DSPWORDS(2); 257 258 // Embedded Buffer 259 // This buffer is often the first buffer to be used when initiating audio playback, 260 // after which the buffer queue is used. 261 262 u32_dsp physical_address; 263 264 /// This is length in terms of samples. 265 /// Note a sample takes up different number of bytes in different buffer formats. 266 u32_dsp length; 267 268 enum class MonoOrStereo : u16_le { 269 Mono = 1, 270 Stereo = 2, 271 }; 272 273 enum class Format : u16_le { 274 PCM8 = 0, 275 PCM16 = 1, 276 ADPCM = 2, 277 }; 278 279 union { 280 u16_le flags1_raw; 281 BitField<0, 2, MonoOrStereo> mono_or_stereo; 282 BitField<2, 2, Format> format; 283 BitField<5, 1, u16> fade_in; 284 }; 285 286 /// ADPCM Predictor (4 bit) and Scale (4 bit) 287 union { 288 u16_le adpcm_ps; 289 BitField<0, 4, u16> adpcm_scale; 290 BitField<4, 4, u16> adpcm_predictor; 291 }; 292 293 /// ADPCM Historical Samples (y[n-1] and y[n-2]) 294 u16_le adpcm_yn[2]; 295 296 union { 297 u16_le flags2_raw; 298 BitField<0, 1, u16> adpcm_dirty; ///< Has the ADPCM info above been changed? 299 BitField<1, 1, u16> is_looping; ///< Is this a looping buffer? 300 }; 301 302 /// Buffer id of embedded buffer (used as a buffer id in SourceStatus to reference this 303 /// buffer). 304 u16_le buffer_id; 305 }; 306 307 Configuration config[num_sources]; 308 }; 309 ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); 310 ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); 311 312 struct SourceStatus { 313 struct Status { 314 u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) 315 u8 current_buffer_id_dirty; ///< Non-zero when current_buffer_id changes 316 u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync 317 u32_dsp buffer_position; ///< Number of samples into the current buffer 318 u16_le current_buffer_id; ///< Updated when a buffer finishes playing 319 INSERT_PADDING_DSPWORDS(1); 320 }; 321 322 Status status[num_sources]; 323 }; 324 ASSERT_DSP_STRUCT(SourceStatus::Status, 12); 325 326 struct DspConfiguration { 327 /// These dirty flags are set by the application when it updates the fields in this struct. 328 /// The DSP clears these each audio frame. 329 union { 330 u32_le dirty_raw; 331 332 BitField<8, 1, u32> mixer1_enabled_dirty; 333 BitField<9, 1, u32> mixer2_enabled_dirty; 334 BitField<10, 1, u32> delay_effect_0_dirty; 335 BitField<11, 1, u32> delay_effect_1_dirty; 336 BitField<12, 1, u32> reverb_effect_0_dirty; 337 BitField<13, 1, u32> reverb_effect_1_dirty; 338 339 BitField<16, 1, u32> volume_0_dirty; 340 341 BitField<24, 1, u32> volume_1_dirty; 342 BitField<25, 1, u32> volume_2_dirty; 343 BitField<26, 1, u32> output_format_dirty; 344 BitField<27, 1, u32> limiter_enabled_dirty; 345 BitField<28, 1, u32> headphones_connected_dirty; 346 }; 347 348 /// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for 349 /// each at the final mixer. 350 float_le volume[3]; 351 352 INSERT_PADDING_DSPWORDS(3); 353 354 enum class OutputFormat : u16_le { 355 Mono = 0, 356 Stereo = 1, 357 Surround = 2, 358 }; 359 360 OutputFormat output_format; 361 362 u16_le limiter_enabled; ///< Not sure of the exact gain equation for the limiter. 363 u16_le headphones_connected; ///< Application updates the DSP on headphone status. 364 INSERT_PADDING_DSPWORDS(4); ///< TODO: Surround sound related 365 INSERT_PADDING_DSPWORDS(2); ///< TODO: Intermediate mixer 1/2 related 366 u16_le mixer1_enabled; 367 u16_le mixer2_enabled; 368 369 /** 370 * This is delay with feedback. 371 * Transfer function: 372 * H(z) = a z^-N / (1 - b z^-1 + a g z^-N) 373 * where 374 * N = frame_count * samples_per_frame 375 * g, a and b are fixed point with 7 fractional bits 376 */ 377 struct DelayEffect { 378 /// These dirty flags are set by the application when it updates the fields in this struct. 379 /// The DSP clears these each audio frame. 380 union { 381 u16_le dirty_raw; 382 BitField<0, 1, u16> enable_dirty; 383 BitField<1, 1, u16> work_buffer_address_dirty; 384 BitField<2, 1, u16> other_dirty; ///< Set when anything else has been changed 385 }; 386 387 u16_le enable; 388 INSERT_PADDING_DSPWORDS(1); 389 u16_le outputs; 390 /// The application allocates a block of memory for the DSP to use as a work buffer. 391 u32_dsp work_buffer_address; 392 /// Frames to delay by 393 u16_le frame_count; 394 395 // Coefficients 396 s16_le g; ///< Fixed point with 7 fractional bits 397 s16_le a; ///< Fixed point with 7 fractional bits 398 s16_le b; ///< Fixed point with 7 fractional bits 399 }; 400 401 DelayEffect delay_effect[2]; 402 403 struct ReverbEffect { 404 INSERT_PADDING_DSPWORDS(26); ///< TODO 405 }; 406 407 ReverbEffect reverb_effect[2]; 408 409 INSERT_PADDING_DSPWORDS(4); 410 }; 411 ASSERT_DSP_STRUCT(DspConfiguration, 196); 412 ASSERT_DSP_STRUCT(DspConfiguration::DelayEffect, 20); 413 ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52); 414 415 struct AdpcmCoefficients { 416 /// Coefficients are signed fixed point with 11 fractional bits. 417 /// Each source has 16 coefficients associated with it. 418 s16_le coeff[num_sources][16]; 419 }; 420 ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); 421 422 struct DspStatus { 423 u16_le unknown; 424 u16_le dropped_frames; 425 INSERT_PADDING_DSPWORDS(0xE); 426 }; 427 ASSERT_DSP_STRUCT(DspStatus, 32); 428 429 /// Final mixed output in PCM16 stereo format, what you hear out of the speakers. 430 /// When the application writes to this region it has no effect. 431 struct FinalMixSamples { 432 s16_le pcm16[samples_per_frame][2]; 433 }; 434 ASSERT_DSP_STRUCT(FinalMixSamples, 640); 435 436 /// DSP writes output of intermediate mixers 1 and 2 here. 437 /// Writes to this region by the application edits the output of the intermediate mixers. 438 /// This seems to be intended to allow the application to do custom effects on the ARM11. 439 /// Values that exceed s16 range will be clipped by the DSP after further processing. 440 struct IntermediateMixSamples { 441 struct Samples { 442 s32_le pcm32[4][samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. 443 }; 444 445 Samples mix1; 446 Samples mix2; 447 }; 448 ASSERT_DSP_STRUCT(IntermediateMixSamples, 5120); 449 450 /// Compressor table 451 struct Compressor { 452 INSERT_PADDING_DSPWORDS(0xD20); ///< TODO 453 }; 454 455 /// There is no easy way to implement this in a HLE implementation. 456 struct DspDebug { 457 INSERT_PADDING_DSPWORDS(0x130); 458 }; 459 ASSERT_DSP_STRUCT(DspDebug, 0x260); 460 461 struct SharedMemory { 462 /// Padding 463 INSERT_PADDING_DSPWORDS(0x400); 464 465 DspStatus dsp_status; 466 467 DspDebug dsp_debug; 468 469 FinalMixSamples final_samples; 470 471 SourceStatus source_statuses; 472 473 Compressor compressor; 474 475 DspConfiguration dsp_configuration; 476 477 IntermediateMixSamples intermediate_mix_samples; 478 479 SourceConfiguration source_configurations; 480 481 AdpcmCoefficients adpcm_coefficients; 482 483 struct { 484 INSERT_PADDING_DSPWORDS(0x100); 485 } unknown10; 486 487 struct { 488 INSERT_PADDING_DSPWORDS(0xC0); 489 } unknown11; 490 491 struct { 492 INSERT_PADDING_DSPWORDS(0x180); 493 } unknown12; 494 495 struct { 496 INSERT_PADDING_DSPWORDS(0xA); 497 } unknown13; 498 499 struct { 500 INSERT_PADDING_DSPWORDS(0x13A3); 501 } unknown14; 502 503 u16_le frame_counter; 504 }; 505 ASSERT_DSP_STRUCT(SharedMemory, 0x8000); 506 507 union DspMemory { 508 std::array<u8, 0x80000> raw_memory{}; 509 struct { 510 u8 unused_0[0x50000]; 511 SharedMemory region_0; 512 u8 unused_1[0x18000]; 513 SharedMemory region_1; 514 u8 unused_2[0x8000]; 515 }; 516 }; 517 static_assert(offsetof(DspMemory, region_0) == region0_offset, 518 "DSP region 0 is at the wrong offset"); 519 static_assert(offsetof(DspMemory, region_1) == region1_offset, 520 "DSP region 1 is at the wrong offset"); 521 522 // Structures must have an offset that is a multiple of two. 523 static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, 524 "Structures in HLE::SharedMemory must be 2-byte aligned"); 525 static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, 526 "Structures in HLE::SharedMemory must be 2-byte aligned"); 527 static_assert(offsetof(SharedMemory, source_statuses) % 2 == 0, 528 "Structures in HLE::SharedMemory must be 2-byte aligned"); 529 static_assert(offsetof(SharedMemory, adpcm_coefficients) % 2 == 0, 530 "Structures in HLE::SharedMemory must be 2-byte aligned"); 531 static_assert(offsetof(SharedMemory, dsp_configuration) % 2 == 0, 532 "Structures in HLE::SharedMemory must be 2-byte aligned"); 533 static_assert(offsetof(SharedMemory, dsp_status) % 2 == 0, 534 "Structures in HLE::SharedMemory must be 2-byte aligned"); 535 static_assert(offsetof(SharedMemory, final_samples) % 2 == 0, 536 "Structures in HLE::SharedMemory must be 2-byte aligned"); 537 static_assert(offsetof(SharedMemory, intermediate_mix_samples) % 2 == 0, 538 "Structures in HLE::SharedMemory must be 2-byte aligned"); 539 static_assert(offsetof(SharedMemory, compressor) % 2 == 0, 540 "Structures in HLE::SharedMemory must be 2-byte aligned"); 541 static_assert(offsetof(SharedMemory, dsp_debug) % 2 == 0, 542 "Structures in HLE::SharedMemory must be 2-byte aligned"); 543 static_assert(offsetof(SharedMemory, unknown10) % 2 == 0, 544 "Structures in HLE::SharedMemory must be 2-byte aligned"); 545 static_assert(offsetof(SharedMemory, unknown11) % 2 == 0, 546 "Structures in HLE::SharedMemory must be 2-byte aligned"); 547 static_assert(offsetof(SharedMemory, unknown12) % 2 == 0, 548 "Structures in HLE::SharedMemory must be 2-byte aligned"); 549 static_assert(offsetof(SharedMemory, unknown13) % 2 == 0, 550 "Structures in HLE::SharedMemory must be 2-byte aligned"); 551 static_assert(offsetof(SharedMemory, unknown14) % 2 == 0, 552 "Structures in HLE::SharedMemory must be 2-byte aligned"); 553 554 #undef INSERT_PADDING_DSPWORDS 555 #undef ASSERT_DSP_STRUCT 556 557 } // namespace AudioCore::HLE 558