1 /* Sound_extensions.cpp
2 *
3 * Copyright (C) 1993-2021 David Weenink, 2017 Paul Boersma
4 *
5 * This code is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or (at
8 * your option) any later version.
9 *
10 * This code is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this work. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "Formula.h"
20 #include "Intensity_extensions.h"
21 #include "Sound_extensions.h"
22 #include "Sound_and_Spectrum.h"
23 #include "Spectrum_extensions.h"
24 #include "Sound_and_Spectrogram.h"
25 #include "Spectrogram_extensions.h"
26 #include "Sound_to_Intensity.h"
27 #include "Sound_to_Pitch.h"
28 #include "Vector.h"
29 #include "Pitch_extensions.h"
30 #include "Pitch_to_PitchTier.h"
31 #include "Pitch_to_PointProcess.h"
32 #include "Polygon_extensions.h"
33 #include "TextGrid_extensions.h"
34 #include "DurationTier.h"
35 #include "Ltas.h"
36 #include "Manipulation.h"
37 #include "NUMcomplex.h"
38 #include "../external/vorbis/vorbis_codec.h"
39 #include "../external/vorbis/vorbisfile.h"
40 #include "../external/opusfile/opusfile.h"
41
42 #include "enums_getText.h"
43 #include "Sound_extensions_enums.h"
44 #include "enums_getValue.h"
45 #include "Sound_extensions_enums.h"
46
47 #define MAX_T 0.02000000001 /* Maximum interval between two voice pulses (otherwise voiceless). */
48
PitchTier_modifyExcursionRange(PitchTier me,double tmin,double tmax,double multiplier,double fref_Hz)49 static void PitchTier_modifyExcursionRange (PitchTier me, double tmin, double tmax, double multiplier, double fref_Hz) {
50 if (fref_Hz <= 0)
51 return;
52 const double fref_st = 12.0 * log (fref_Hz / 100.0) / NUMln2;
53 for (integer i = 1; i <= my points.size; i ++) {
54 const RealPoint point = my points.at [i];
55 const double f = point -> value;
56 if (point -> number < tmin || point -> number > tmax)
57 continue;
58 if (f > 0.0) {
59 const double f_st = fref_st + 12.0 * log2 (f / fref_Hz) * multiplier;
60 point -> value = 100.0 * exp (f_st * (NUMln2 / 12.0));
61 }
62 }
63 }
64
Pitch_scaleDuration(Pitch me,double multiplier)65 static void Pitch_scaleDuration (Pitch me, double multiplier) {
66 if (multiplier != 1.0) {
67 // keep xmin at the same value
68 my dx *= multiplier;
69 my x1 = my xmin + (my x1 - my xmin) * multiplier;
70 my xmax = my xmin + (my xmax - my xmin) * multiplier;
71 }
72 }
73
Pitch_scalePitch(Pitch me,double multiplier)74 static void Pitch_scalePitch (Pitch me, double multiplier) {
75 for (integer i = 1; i <= my nx; i ++) {
76 double f = my frames [i].candidates [1].frequency;
77 f *= multiplier;
78 if (f < my ceiling)
79 my frames [i].candidates [1].frequency = f;
80 }
81 }
82
i1write(Sound me,FILE * f,integer * nClip)83 static void i1write (Sound me, FILE *f, integer *nClip) {
84 const double min = -128.0, max = 127.0;
85 *nClip = 0;
86 for (integer i = 1; i <= my nx; i ++) {
87 double sample = round (my z [1] [i] * 128.0);
88 if (sample > max) {
89 sample = max;
90 (*nClip) ++;
91 } else if (sample < min) {
92 sample = min;
93 (*nClip) ++;
94 }
95 binputi8 ((int) sample, f);
96 }
97 }
98
i1read(Sound me,FILE * f)99 static void i1read (Sound me, FILE *f) {
100 for (integer i = 1; i <= my nx; i ++)
101 my z [1] [i] = bingeti8 (f) / 128.0;
102 }
103
u1write(Sound me,FILE * f,integer * nClip)104 static void u1write (Sound me, FILE *f, integer *nClip) {
105 const double min = 0.0, max = 255.0;
106 *nClip = 0;
107 for (integer i = 1; i <= my nx; i ++) {
108 double sample = round ((my z [1] [i] + 1.0) * 255.0 / 2.0);
109 if (sample > max) {
110 sample = max;
111 (*nClip) ++;
112 } else if (sample < min) {
113 sample = min;
114 (*nClip) ++;
115 }
116 binputu8 ((unsigned int) sample, f);
117 }
118 }
119
u1read(Sound me,FILE * f)120 static void u1read (Sound me, FILE *f) {
121 for (integer i = 1; i <= my nx; i ++)
122 my z [1] [i] = bingetu8 (f) / 128.0 - 1.0;
123 }
124
i2write(Sound me,FILE * f,bool littleEndian,integer * nClip)125 static void i2write (Sound me, FILE *f, bool littleEndian, integer *nClip) {
126 const double min = -32768.0, max = 32767.0;
127 void (*put) (int16, FILE *) = littleEndian ? binputi16LE : binputi16;
128 *nClip = 0;
129 for (integer i = 1; i <= my nx; i ++) {
130 double sample = round (my z [1] [i] * 32768.0);
131 if (sample > max) {
132 sample = max;
133 (*nClip) ++;
134 } else if (sample < min) {
135 sample = min;
136 (*nClip) ++;
137 }
138 put ((int16) sample, f);
139 }
140 }
141
i2read(Sound me,FILE * f,bool littleEndian)142 static void i2read (Sound me, FILE *f, bool littleEndian) {
143 int16 (*get) (FILE *) = littleEndian ? bingeti16LE : bingeti16;
144 for (integer i = 1; i <= my nx; i ++)
145 my z [1] [i] = get (f) / 32768.0;
146 }
147
u2write(Sound me,FILE * f,bool littleEndian,integer * nClip)148 static void u2write (Sound me, FILE *f, bool littleEndian, integer *nClip) {
149 const double min = 0.0, max = 65535.0;
150 void (*put) (uint16, FILE *) = littleEndian ? binputu16LE : binputu16;
151 *nClip = 0;
152 for (integer i = 1; i <= my nx; i ++) {
153 double sample = round ((my z [1] [i] + 1.0) * 65535.0 / 2.0);
154 if (sample > max) {
155 sample = max;
156 (*nClip) ++;
157 } else if (sample < min) {
158 sample = min;
159 (*nClip) ++;
160 }
161 put ((uint16) sample, f);
162 }
163 }
164
u2read(Sound me,FILE * f,bool littleEndian)165 static void u2read (Sound me, FILE *f, bool littleEndian) {
166 uint16 (*get) (FILE *) = littleEndian ? bingetu16LE : bingetu16;
167 for (integer i = 1; i <= my nx; i ++)
168 my z [1] [i] = get (f) / 32768.0 - 1.0;
169 }
170
i4write(Sound me,FILE * f,bool littleEndian,integer * nClip)171 static void i4write (Sound me, FILE *f, bool littleEndian, integer *nClip) {
172 const double min = -2147483648.0, max = 2147483647.0;
173 void (*put) (int32, FILE *) = littleEndian ? binputi32LE : binputi32;
174 *nClip = 0;
175 for (integer i = 1; i <= my nx; i ++) {
176 double sample = round (my z [1] [i] * 2147483648.0);
177 if (sample > max) {
178 sample = max;
179 (*nClip) ++;
180 } else if (sample < min) {
181 sample = min;
182 (*nClip) ++;
183 }
184 put ((int32) sample, f);
185 }
186 }
187
i4read(Sound me,FILE * f,bool littleEndian)188 static void i4read (Sound me, FILE *f, bool littleEndian) {
189 int32 (*get) (FILE *) = littleEndian ? bingeti32LE : bingeti32;
190 for (integer i = 1; i <= my nx; i ++)
191 my z [1] [i] = get (f) / 2147483648.0;
192 }
193
194
u4write(Sound me,FILE * f,bool littleEndian,integer * nClip)195 static void u4write (Sound me, FILE *f, bool littleEndian, integer *nClip) {
196 const double min = 0.0, max = 4294967295.0;
197 void (*put) (uint32, FILE *) = littleEndian ? binputu32LE : binputu32;
198 *nClip = 0;
199 for (integer i = 1; i <= my nx; i ++) {
200 double sample = Melder_round_tieUp (my z [1] [i] * 4294967295.0);
201 if (sample > max) {
202 sample = max;
203 (*nClip) ++;
204 } else if (sample < min) {
205 sample = min;
206 (*nClip) ++;
207 }
208 put ((uint32) sample, f);
209 }
210 }
211
u4read(Sound me,FILE * f,bool littleEndian)212 static void u4read (Sound me, FILE *f, bool littleEndian) {
213 int32 (*get) (FILE *) = littleEndian ? bingeti32LE : bingeti32;
214 for (integer i = 1; i <= my nx; i ++)
215 my z [1] [i] = get (f) / 2147483648.0 - 1.0;
216 }
217
218
r4write(Sound me,FILE * f)219 static void r4write (Sound me, FILE *f) {
220 for (integer i = 1; i <= my nx; i ++)
221 binputr32 (my z [1] [i], f);
222 }
223
r4read(Sound me,FILE * f)224 static void r4read (Sound me, FILE *f) {
225 for (integer i = 1; i <= my nx; i ++)
226 my z [1] [i] = bingetr32 (f);
227 }
228
229 /* Old TIMIT sound-file format */
Sound_readFromCmuAudioFile(MelderFile file)230 autoSound Sound_readFromCmuAudioFile (MelderFile file) {
231 try {
232 constexpr bool littleEndian = true;
233 autofile f = Melder_fopen (file, "rb");
234 Melder_require (bingeti16LE (f) == 6,
235 U"Incorrect header size.");
236
237 bingeti16LE (f);
238 const short nChannels = bingeti16LE (f);
239 Melder_require (nChannels == 1,
240 U"Incorrect number of channels.");
241 Melder_require (bingeti16LE (f) > 0,
242 U"Incorrect sampling frequency.");
243
244 const integer nSamples = bingeti32LE (f);
245 Melder_require (nSamples > 0,
246 U"Incorrect number of samples.");
247
248 autoSound me = Sound_createSimple (1, nSamples / 16000.0, 16000);
249 i2read (me.get(), f, littleEndian);
250 f.close (file);
251 return me;
252 } catch (MelderError) {
253 Melder_throw (U"Sound not read from CMU audio file ", MelderFile_messageName (file), U".");
254 }
255 }
256
Sound_readFromRawFile(MelderFile file,const char * format,int nBitsCoding,bool littleEndian,bool unSigned,integer skipNBytes,double samplingFrequency)257 autoSound Sound_readFromRawFile (MelderFile file, const char *format, int nBitsCoding, bool littleEndian, bool unSigned, integer skipNBytes, double samplingFrequency) {
258 try {
259 autofile f = Melder_fopen (file, "rb");
260 if (! format)
261 format = "integer";
262 if (nBitsCoding <= 0)
263 nBitsCoding = 16;
264 integer nBytesPerSample = (nBitsCoding + 7) / 8;
265 if (strequ (format, "float"))
266 nBytesPerSample = 4;
267 Melder_require (! (nBytesPerSample == 3),
268 U"Number of bytes per sample should be 1, 2 or 4.");
269
270 if (skipNBytes <= 0)
271 skipNBytes = 0;
272 const integer nSamples = (MelderFile_length (file) - skipNBytes) / nBytesPerSample;
273 Melder_require (nSamples > 0,
274 U"No samples left to read.");
275
276 autoSound me = Sound_createSimple (1, nSamples / samplingFrequency, samplingFrequency);
277 fseek (f, skipNBytes, SEEK_SET);
278 if (nBytesPerSample == 1 && unSigned)
279 u1read (me.get(), f);
280 else if (nBytesPerSample == 1 && ! unSigned)
281 i1read (me.get(), f);
282 else if (nBytesPerSample == 2 && unSigned)
283 u2read (me.get(), f, littleEndian);
284 else if (nBytesPerSample == 2 && ! unSigned)
285 i2read (me.get(), f, littleEndian);
286 else if (nBytesPerSample == 4 && unSigned)
287 u4read (me.get(), f, littleEndian);
288 else if (nBytesPerSample == 4 && ! unSigned)
289 i4read (me.get(), f, littleEndian);
290 else if (nBytesPerSample == 4 && strequ (format, "float"))
291 r4read (me.get(), f);
292 f.close (file);
293 return me;
294 } catch (MelderError) {
295 Melder_throw (U"Sound not read from raw audio file ", MelderFile_messageName (file), U".");
296 }
297 }
298
Sound_writeToRawFile(Sound me,MelderFile file,const char * format,bool littleEndian,int nBitsCoding,bool unSigned)299 void Sound_writeToRawFile (Sound me, MelderFile file, const char *format, bool littleEndian, int nBitsCoding, bool unSigned) {
300 try {
301 integer nClip = 0;
302 autofile f = Melder_fopen (file, "wb");
303 if (! format)
304 format = "integer";
305 if (nBitsCoding <= 0)
306 nBitsCoding = 16;
307 integer nBytesPerSample = (nBitsCoding + 7) / 8;
308 if (strequ (format, "float"))
309 nBytesPerSample = 4;
310 Melder_require (! (nBytesPerSample == 3),
311 U"number of bytes per sample should be 1, 2 or 4.");
312
313 if (nBytesPerSample == 1 && unSigned)
314 u1write (me, f, & nClip);
315 else if (nBytesPerSample == 1 && ! unSigned)
316 i1write (me, f, & nClip);
317 else if (nBytesPerSample == 2 && unSigned)
318 u2write (me, f, littleEndian, & nClip);
319 else if (nBytesPerSample == 2 && ! unSigned)
320 i2write (me, f, littleEndian, & nClip);
321 else if (nBytesPerSample == 4 && unSigned)
322 u4write (me, f, littleEndian, & nClip);
323 else if (nBytesPerSample == 4 && ! unSigned)
324 i4write (me, f, littleEndian, & nClip);
325 else if (nBytesPerSample == 4 && strequ (format, "float"))
326 r4write (me, f);
327 if (nClip > 0)
328 Melder_warning (nClip, U" from ", my nx, U" samples have been clipped.\nAdvice: you could scale the amplitudes or save as a binary file.");
329
330 Melder_require (feof ((FILE *) f) == 0 && ferror ((FILE *) f) == 0,
331 U"Sound_writeToRawFile: not completed");
332
333 f.close (file);
334 } catch (MelderError) {
335 Melder_throw (me, U": saving as raw file not performed.");
336 }
337 }
338
339 struct dialogic_adpcm {
340 char code;
341 short last, index;
342 short step_size [49];
343 short adjust [8];
344 };
345
dialogic_adpcm_init(struct dialogic_adpcm * adpcm)346 static void dialogic_adpcm_init (struct dialogic_adpcm *adpcm) {
347 constexpr short step_size [49] = {
348 16, 17, 19, 21, 23, 25, 28, 31, 34, 37,
349 41, 45, 50, 55, 60, 66, 73, 80, 88, 97,
350 107, 118, 130, 143, 157, 173, 190, 209, 230, 253,
351 279, 307, 337, 371, 408, 449, 494, 544, 598, 658,
352 724, 796, 876, 963, 1060, 1166, 1282, 1411, 1552
353 };
354 constexpr short adjust [8] = { -1, -1, -1, -1, 2, 4, 6, 8 };
355
356 adpcm -> last = 0;
357 adpcm -> index = 0;
358 for (integer i = 0; i < 49; i ++)
359 adpcm -> step_size [i] = step_size [i];
360 for (integer i = 0; i < 8; i ++)
361 adpcm -> adjust [i] = adjust [i];
362 }
363
364 /*
365 The code is adapted from:
366 Bob Edgar (), "PC Telephony - The complete guide to designing,
367 building and programming systems using Dialogic and Related
368 Hardware", 272-276.
369 */
dialogic_adpcm_decode(struct dialogic_adpcm * adpcm)370 static float dialogic_adpcm_decode (struct dialogic_adpcm *adpcm) {
371 constexpr float scale = 32767.0 / 32768.0 / 2048.0;
372 /*
373 nibble = B3 B2 B1 B0 (4 lower bits)
374 d(n) = ss(n)*B2 + ss(n)/2 *B1 + ss(n)/4*B0 + ss(n)/8
375 */
376 const short ss = adpcm -> step_size [adpcm -> index];
377 short e = ss / 8;
378 if (adpcm -> code & 0x01)
379 e += ss / 4;
380 if (adpcm -> code & 0x02)
381 e += ss / 2;
382 if (adpcm -> code & 0x04)
383 e += ss;
384 /*
385 If B3==1 then d(n) = -d(n);
386 */
387 const short diff = (adpcm -> code & 0x08) ? -e : e;
388 /*
389 x(n) = x(n-1)+d(n)
390 */
391 short s = adpcm -> last + diff;
392 if (s > 2048)
393 s = 2048;
394 if (s < -2048)
395 s = -2048;
396 adpcm -> last = s;
397 /*
398 ss(n+1) = ss(n) * 1.1*M(L(n)) via lookup table
399 */
400 adpcm -> index += adpcm -> adjust [adpcm -> code & 0x07];
401 if (adpcm -> index < 0)
402 adpcm -> index = 0;
403 if (adpcm -> index > 48)
404 adpcm -> index = 48;
405 return scale * s;
406 }
407
Sound_readFromDialogicADPCMFile(MelderFile file,double sampleRate)408 autoSound Sound_readFromDialogicADPCMFile (MelderFile file, double sampleRate) {
409 try {
410 autofile f = Melder_fopen (file, "rb");
411
412 const integer filelength = MelderFile_length (file);
413
414 Melder_require (filelength > 0,
415 U"File should not be empty.");
416
417 const integer numberOfSamples = 2 * filelength; // Two samples in each byte
418
419 autoSound me = Sound_createSimple (1, numberOfSamples / sampleRate, sampleRate);
420 /*
421 Read all bytes and decode
422 */
423 struct dialogic_adpcm adpcm;
424 dialogic_adpcm_init (& adpcm);
425
426 integer n = 1;
427 for (integer i = 1; i <= filelength; i ++) {
428 unsigned char sc;
429 const integer nread = fread (& sc, 1, 1, f);
430 Melder_require (nread == 1,
431 U"Error: trying to read byte number ", i, U".");
432 adpcm.code = (char) ((sc >> 4) & 0x0f);
433 my z [1] [n ++] = dialogic_adpcm_decode (& adpcm);
434 adpcm.code = (char) (sc & 0x0f);
435 my z [1] [n ++] = dialogic_adpcm_decode (& adpcm);
436 }
437 f.close (file);
438 return me;
439 } catch (MelderError) {
440 Melder_throw (U"Sound not read from Dialogic ADPCM file ", MelderFile_messageName (file), U".");
441 }
442 }
443
444 /*
445 The code in this Ogg Vorbis file reader was modeled after code in vorbisfile.c and vorbis_decoder.c.
446 */
Sound_readFromOggVorbisFile(MelderFile file)447 autoSound Sound_readFromOggVorbisFile (MelderFile file) {
448 try {
449 autofile f = Melder_fopen (file, "rb");
450
451 /*
452 We open the file as a vorbis file as was done in the vorbisfile_example.c in libvorbis-1.3.7/examples
453 and get the data necessary to create the Sound object.
454 Then we rewind the file and use code from decode_example.c because the code in the vorbisfile_example
455 does not read the file completely but skips some samples just after the start of the sound.
456 */
457 OggVorbis_File vorbisFile;
458 if (ov_open_callbacks (f, & vorbisFile, nullptr, 0, OV_CALLBACKS_NOCLOSE) < 0)
459 Melder_throw (U"Input does not appear to be an Ogg Vorbis bitstream.");
460 vorbis_info *vorbInfo = ov_info (& vorbisFile, -1);
461 const integer numberOfChannels = vorbInfo -> channels;
462 const long samplingFrequency_asLong = vorbInfo -> rate;
463 const double samplingFrequency = samplingFrequency_asLong;
464 const double samplingTime = 1.0 / samplingFrequency;
465 const integer numberOfSamples = ov_pcm_total (& vorbisFile, -1);
466 double xmin = 0.0; // the start time in the file can be > 0!!!
467 const double xmax = numberOfSamples * samplingTime;
468 autoSound me = Sound_create (numberOfChannels, xmin, xmax, numberOfSamples, samplingTime, 0.5 * samplingTime);
469
470 ov_clear (& vorbisFile);
471 rewind (f);
472
473 const long readBufferSize = 4096;
474 integer numberOfPCMValuesProcessed = 0;
475
476 ogg_sync_state oggSyncState; // sync and verify incoming physical bitstream
477 ogg_sync_init (& oggSyncState); // Now we can read pages
478
479 integer vorbisChainNumber = 0;
480 while (true) { // we repeat if the bitstream is chained
481 vorbisChainNumber ++;
482 /*
483 Grab some data at the head of the stream. We want the first page
484 (which is guaranteed to be small and only contain the Vorbis
485 stream initial header) We need the first page to get the stream
486 serialno.
487
488 Submit a 4k block to libvorbis' Ogg layer
489 */
490 char *readBuffer = ogg_sync_buffer (& oggSyncState, readBufferSize);
491 size_t bytesRead = fread (readBuffer, 1, readBufferSize, f);
492 ogg_sync_wrote (& oggSyncState, bytesRead);
493
494 /* Get the first page. */
495 ogg_page oggPage; // one Ogg bitstream page. Vorbis packets are inside
496 if (ogg_sync_pageout (& oggSyncState, & oggPage) != 1) {
497 if (bytesRead < readBufferSize) // have we simply run out of data? If so, we're done.
498 break;
499 Melder_throw (U"Input does not appear to be an Ogg Vorbis file.");
500 }
501 /*
502 Get the serial number and set up the rest of decode.
503 serialno first; use it to set up a logical stream
504 */
505 ogg_stream_state oggStream; // take physical pages, weld into a logical stream of packets
506 ogg_stream_init (& oggStream, ogg_page_serialno (& oggPage));
507 /*
508 Eextract the initial header from the first page and verify that the
509 Ogg bitstream is in fact Vorbis data
510
511 I handle the initial header first instead of just having the code
512 read all three Vorbis headers at once because reading the initial
513 header is an easy way to identify a Vorbis bitstream and it's
514 useful to see that functionality seperated out.
515 */
516 vorbis_info vorbisInfo; // struct that stores all the static vorbis bitstream settings
517 vorbis_info_init (& vorbisInfo);
518 vorbis_comment vorbisComment; // struct that stores all the bitstream user comments
519 vorbis_comment_init (& vorbisComment);
520 if (ogg_stream_pagein (& oggStream, & oggPage) < 0)
521 Melder_throw (U"Error reading first page of Ogg Vorbis bitstream data.");
522 ogg_packet oggPacket; // one raw packet of data for decode
523 if (ogg_stream_packetout (& oggStream, & oggPacket) != 1)
524 Melder_throw (U"Error reading initial header packet.");
525 if (vorbis_synthesis_headerin (& vorbisInfo, & vorbisComment, & oggPacket) < 0)
526 Melder_throw (U"This Ogg bitstream does not contain Vorbis audio data.");
527 /*
528 At this point, we're sure we're Vorbis. We've set up the logical
529 (Ogg) bitstream decoder. Get the comment and codebook headers and
530 set up the Vorbis decoder
531
532 The next two packets in order are the comment and codebook headers.
533 They're likely large and may span multiple pages. Thus we read
534 and submit data until we get our two packets, watching that no
535 pages are missing. If a page is missing, error out; losing a
536 header page is the only place where missing data is fatal.
537 */
538 integer i = 0;
539 bool eos = false;
540 while (i < 2) {
541 while(i < 2) {
542 int result = ogg_sync_pageout (& oggSyncState, & oggPage);
543 if (result == 0)
544 break; // Need more data
545 /*
546 Don't complain about missing or corrupt data yet.
547 We'll catch it at the packet output phase
548 */
549 if (result == 1) {
550 ogg_stream_pagein (& oggStream, & oggPage);
551 // we can ignore any errors here as they'll also become apparent at packetout
552 while (i < 2) {
553 result = ogg_stream_packetout (& oggStream, & oggPacket);
554 if (result == 0)
555 break;
556 if (result < 0)
557 Melder_throw (U"Corrupt secondary header.");
558 result = vorbis_synthesis_headerin (& vorbisInfo, & vorbisComment, & oggPacket);
559 if (result < 0)
560 Melder_throw (U"Corrupt secondary header.");
561 i ++;
562 }
563 }
564 }
565 readBuffer = ogg_sync_buffer (& oggSyncState, readBufferSize);
566 bytesRead = fread (readBuffer, 1, readBufferSize, f);
567 if (bytesRead == 0 && i < 2)
568 Melder_throw (U"End of file before finding all Vorbis headers");
569 ogg_sync_wrote (& oggSyncState, bytesRead);
570 }
571 Melder_require (vorbisInfo.channels == numberOfChannels,
572 U"The number of channels in all chains should be equal. It changed from ", numberOfChannels, U" to ",
573 vorbisInfo.channels, U" in chain ", vorbisChainNumber, U".");
574 Melder_require (samplingFrequency_asLong == vorbisInfo.rate,
575 U"The sampling frequency in all chains should be equal. It changed from ", samplingFrequency_asLong, U" to ",
576 vorbisInfo.rate, U" in chain ", vorbisChainNumber, U".");
577 /*
578 Parsed all three headers. Initialize the Vorbis packet->PCM decoder.
579 */
580 vorbis_dsp_state vorbisDspState; // central working state for the packet->PCM decoder
581 vorbis_block vorbisBlock; // local working space for packet->PCM decode
582 if (vorbis_synthesis_init (& vorbisDspState, & vorbisInfo) == 0) { // central decode state
583 vorbis_block_init (& vorbisDspState, & vorbisBlock);
584 /*
585 local state for most of the decode so multiple block decodes can proceed in parallel.
586 We could init multiple vorbis_block structures for vorbisDspState here
587
588 The rest is just a straight decode loop until end of stream
589 */
590 while (! eos) {
591 while (! eos) {
592 int result = ogg_sync_pageout (& oggSyncState, & oggPage);
593 if (result == 0)
594 break; // need more data
595 if (result < 0)
596 Melder_casual (U"Corrupt or missing data in Vorbis bitstream; continuing...");
597 else {
598 ogg_stream_pagein (& oggStream, & oggPage); // can safely ignore errors at this point
599 while (true) {
600 result = ogg_stream_packetout (& oggStream, & oggPacket);
601 if (result == 0)
602 break; // need more data
603 if (result < 0){
604 /*
605 missing or corrupt data at this page position
606 no reason to complain; already complained above
607 */
608 } else {
609 /*
610 we have a packet. Decode it
611 */
612 if (vorbis_synthesis (& vorbisBlock, & oggPacket) == 0)
613 vorbis_synthesis_blockin (& vorbisDspState, & vorbisBlock);
614 float **pcmOutFloats;
615 integer numberOfSamplesDecoded;
616 /*
617 The output from vorbis_synthesis_pcmout is a multichannel float vector. In stereo, for
618 example, pcmOutFloats [0] is the left channel, and pcmOutFloats [1] is the right.
619 The numberOfSamplesDecoded is the size of each channel, where all
620 floats are in the interval [-1.0, 1.0].
621 */
622 while ((numberOfSamplesDecoded = vorbis_synthesis_pcmout (& vorbisDspState, & pcmOutFloats)) > 0) {
623 Melder_require (numberOfPCMValuesProcessed + numberOfSamplesDecoded <= numberOfSamples,
624 U"The number of samples read is too large.");
625 for (integer ichan = 1; ichan <= vorbisInfo.channels; ichan ++){
626 float *oneChannelFloats = pcmOutFloats [ichan - 1];
627 for (integer j = 1; j <= numberOfSamplesDecoded; j ++)
628 my z [ichan] [numberOfPCMValuesProcessed + j] = oneChannelFloats [j - 1];
629 }
630 numberOfPCMValuesProcessed += numberOfSamplesDecoded;
631 /*
632 Tell libvorbis how many samples we actually consumed
633 */
634 vorbis_synthesis_read (& vorbisDspState, numberOfSamplesDecoded);
635 }
636 }
637 }
638 if (ogg_page_eos (& oggPage))
639 eos = true;
640 }
641 }
642 if (! eos) {
643 readBuffer = ogg_sync_buffer (& oggSyncState, readBufferSize);
644 bytesRead = fread (readBuffer, 1, readBufferSize, f);
645 ogg_sync_wrote (& oggSyncState, bytesRead);
646 if (bytesRead == 0)
647 eos = true;
648 }
649 }
650 /*
651 ogg_page and ogg_packet structs always point to storage in
652 libvorbis. They're never freed or manipulated directly
653 */
654 vorbis_block_clear (& vorbisBlock);
655 vorbis_dsp_clear (& vorbisDspState);
656 } else {
657 Melder_throw (U"Corrupt header during playback initialization");
658 }
659 /*
660 Clean up this logical bitstream; before exit we see if we're
661 followed by another [chained]
662 */
663 ogg_stream_clear (& oggStream);
664 vorbis_comment_clear (& vorbisComment);
665 vorbis_info_clear (& vorbisInfo); // must be called last
666 }
667 ogg_sync_clear (& oggSyncState);
668 return me;
669 } catch (MelderError) {
670 Melder_throw (U"Sound not read from Ogg Vorbis file ", MelderFile_messageName (file), U".");
671 }
672 }
673
Sound_readFromOggOpusFile(MelderFile file)674 autoSound Sound_readFromOggOpusFile (MelderFile file) {
675 try {
676 conststring32 path = Melder_fileToPath (file);
677 int error;
678 OggOpusFile *opusFile = op_open_file (Melder_peek32to8 (path), & error);
679 if (error != 0) {
680 if (error == OP_EREAD)
681 Melder_throw (U"Reading error.");
682 else if (error == OP_EFAULT)
683 Melder_throw (U"Memory error.");
684 else if (error == OP_EIMPL)
685 Melder_throw (U"Feature is not implemented.");
686 else if (error == OP_EINVAL)
687 Melder_throw (U"Seek function error.");
688 else if (error == OP_ENOTFORMAT)
689 Melder_throw (U"Link doea not have any logical Opus streams.");
690 else if (error == OP_EBADHEADER)
691 Melder_throw (U"Malformed header.");
692 else if (error == OP_EVERSION)
693 Melder_throw (U"Unrecognised version number.");
694 else if (error == OP_EBADLINK)
695 Melder_throw (U"Failed to find data.");
696 else if (error == OP_EBADTIMESTAMP)
697 Melder_throw (U"invalid time stamp.");
698 }
699 const OpusHead *head = op_head (opusFile, 0);
700 integer samplingFrequency_asLong = head -> input_sample_rate;
701 if (samplingFrequency_asLong == 0)
702 samplingFrequency_asLong = 44100;
703 const integer numberOfChannels = head -> channel_count;
704 const double samplingTime = 1.0 / 48000.0; // fixed decoding rate
705 const integer numberOfSamples = op_pcm_total (opusFile, -1);
706 double xmin = 0.0; // the start time in the file can be > 0!!!
707 const double xmax = numberOfSamples * samplingTime;
708 autoSound me = Sound_create (numberOfChannels, xmin, xmax, numberOfSamples, samplingTime, 0.5 * samplingTime);
709 const integer maximumBufferSize = 5760 * numberOfChannels; // 0.12 s at 48 kHz * numberOfChannels
710 autovector<float> multiChannelFloats = autovector<float> (maximumBufferSize, MelderArray::kInitializationType::RAW);
711 integer numberOfPCMValuesProcessed = 0;
712 int previousLinkIndex = -1;
713 integer opusChainNumber = 0;
714 while (true) {
715 int linkIndex;
716
717 integer numberOfSamplesDecoded = op_read_float (opusFile, multiChannelFloats.asArgumentToFunctionThatExpectsZeroBasedArray(), maximumBufferSize, & linkIndex);
718 if (numberOfSamplesDecoded < 0) {
719 if (numberOfSamplesDecoded == OP_HOLE) {
720 Melder_casual (U"Warning: Hole in data. Some samples may be skipped. "); // should we throw instead?
721 continue;
722 } else
723 Melder_throw (U"Decoding error.");
724 }
725 if (numberOfSamplesDecoded == 0)
726 break; // we're done
727
728 if (linkIndex != previousLinkIndex) {
729 opusChainNumber ++;
730
731 head = op_head (opusFile, linkIndex);
732 Melder_require (head -> channel_count == numberOfChannels,
733 U"The number of channels in all chains should be equal. It changed from ", numberOfChannels, U" to ",
734 head -> channel_count, U" in chain ", opusChainNumber, U".");
735 Melder_require (samplingFrequency_asLong == head -> input_sample_rate,
736 U"The sampling frequency in all chains should be equal. It changed from ", samplingFrequency_asLong, U" to ",
737 head -> input_sample_rate, U" in chain ", opusChainNumber, U".");
738 }
739 previousLinkIndex = linkIndex;
740 Melder_require (numberOfPCMValuesProcessed + numberOfSamplesDecoded <= numberOfSamples,
741 U"The number of samples read is too large.");
742 integer ifloat = 1;
743 for (integer j = 1; j <= numberOfSamplesDecoded; j ++)
744 for (integer ichan = 1; ichan <= numberOfChannels; ichan ++, ifloat ++){
745 my z [ichan] [numberOfPCMValuesProcessed + j] = multiChannelFloats [ifloat];
746 }
747 numberOfPCMValuesProcessed += numberOfSamplesDecoded;
748 }
749 if (samplingFrequency_asLong != 48000)
750 me = Sound_resample (me.get(), samplingFrequency_asLong, 50);
751 return me;
752 } catch (MelderError) {
753 Melder_throw (U"Sound not read from Ogg Opus file ", MelderFile_messageName (file), U".");
754 }
755 }
756
Sound_preEmphasis(Sound me,double preEmphasisFrequency)757 void Sound_preEmphasis (Sound me, double preEmphasisFrequency) {
758 if (preEmphasisFrequency >= 0.5 / my dx)
759 return; // above Nyquist?
760 const double preEmphasis = exp (- NUM2pi * preEmphasisFrequency * my dx);
761 for (integer channel = 1; channel <= my ny; channel ++) {
762 VEC s = my z.row (channel);
763 for (integer i = my nx; i >= 2; i --)
764 s [i] -= preEmphasis * s [i - 1];
765 }
766 }
767
Sound_deEmphasis(Sound me,double deEmphasisFrequency)768 void Sound_deEmphasis (Sound me, double deEmphasisFrequency) {
769 const double deEmphasis = exp (- NUM2pi * deEmphasisFrequency * my dx);
770 for (integer channel = 1; channel <= my ny; channel ++) {
771 VEC s = my z.row (channel);
772 for (integer i = 2; i <= my nx; i ++)
773 s [i] += deEmphasis * s [i - 1];
774 }
775 }
776
Sound_createGaussian(double windowDuration,double samplingFrequency)777 autoSound Sound_createGaussian (double windowDuration, double samplingFrequency) {
778 try {
779 autoSound me = Sound_createSimple (1, windowDuration, samplingFrequency);
780 VEC s = my z.row (1);
781 const double imid = 0.5 * (my nx + 1), edge = exp (-12.0);
782 for (integer i = 1; i <= my nx; i ++)
783 s [i] = (exp (-48.0 * (i - imid) * (i - imid) / (my nx + 1) / (my nx + 1)) - edge) / (1 - edge);
784 return me;
785 } catch (MelderError) {
786 Melder_throw (U"Sound not created from Gaussian function.");
787 }
788 }
789
Sound_createHamming(double windowDuration,double samplingFrequency)790 autoSound Sound_createHamming (double windowDuration, double samplingFrequency) {
791 try {
792 autoSound me = Sound_createSimple (1, windowDuration, samplingFrequency);
793 const double p = NUM2pi / (my nx - 1);
794 for (integer i = 1; i <= my nx; i ++)
795 my z [1] [i] = 0.54 - 0.46 * cos ((i - 1) * p);
796 return me;
797 } catch (MelderError) {
798 Melder_throw (U"Sound not created from Hamming function.");
799 };
800 }
801
Sound_create2(double minimumTime,double maximumTime,double samplingFrequency)802 static autoSound Sound_create2 (double minimumTime, double maximumTime, double samplingFrequency) {
803 return Sound_create (1, minimumTime, maximumTime, Melder_iround ( (maximumTime - minimumTime) * samplingFrequency),
804 1.0 / samplingFrequency, minimumTime + 0.5 / samplingFrequency);
805 }
806
807 /*
808 Trig functions whose arguments form a linear sequence x = x1 + n.dx,
809 for n=0,1,2,... are efficiently calculated by the following recurrence:
810 cos(a+dx) = cos(a) - (alpha . cos(a) + beta . sin(a))
811 sin(a+dx) = sin(a) - (alpha . sin(a) - beta . sin(a))
812 where alpha and beta are precomputed coefficients
813 alpha = 2 sin^2(dx/2) and beta = sin(dx)
814 In this way aplha and beta do not lose significance if the increment
815 dx is small.
816 */
817
Sound_createToneComplex(double minimumTime,double maximumTime,double samplingFrequency,double firstFrequency,integer numberOfComponents,double frequencyDistance,integer mistunedComponent,double mistuningFraction,bool scaleAmplitudes)818 static autoSound Sound_createToneComplex (double minimumTime, double maximumTime, double samplingFrequency, double firstFrequency,
819 integer numberOfComponents, double frequencyDistance, integer mistunedComponent, double mistuningFraction, bool scaleAmplitudes)
820 {
821 try {
822 autoSound me = Sound_create2 (minimumTime, maximumTime, samplingFrequency);
823 for (integer j = 1; j <= numberOfComponents; j ++) {
824 const double fraction = ( j == mistunedComponent ? mistuningFraction : 0.0 );
825 const double w = NUM2pi * (firstFrequency + (j - 1 + fraction) * frequencyDistance);
826 const double delta = w * my dx;
827 const double alpha = 2.0 * sin (delta / 2.0) * sin (delta / 2.0);
828 const double beta = sin (delta);
829 double sint = sin (w * my x1);
830 double cost = cos (w * my x1);
831 my z [1] [1] += sint;
832 for (integer i = 2; i <= my nx; i ++) {
833 const double costd = cost - (alpha * cost + beta * sint);
834 const double sintd = sint - (alpha * sint - beta * cost);
835 my z [1] [i] += sintd;
836 cost = costd;
837 sint = sintd;
838 }
839 }
840 if (scaleAmplitudes)
841 Vector_scale (me.get(), 0.99996948);
842 return me;
843 } catch (MelderError) {
844 Melder_throw (U"Sound not created from tone complex.");
845 }
846 }
847
848
Sound_createSimpleToneComplex(double minimumTime,double maximumTime,double samplingFrequency,double firstFrequency,integer numberOfComponents,double frequencyDistance,bool scaleAmplitudes)849 autoSound Sound_createSimpleToneComplex (double minimumTime, double maximumTime, double samplingFrequency, double firstFrequency,
850 integer numberOfComponents, double frequencyDistance, bool scaleAmplitudes)
851 {
852 if (firstFrequency + (numberOfComponents - 1) * frequencyDistance > samplingFrequency / 2) {
853 Melder_warning (U"Sound_createSimpleToneComplex: frequency of (some) components too high.");
854 numberOfComponents = Melder_ifloor (1.0 + (0.5 * samplingFrequency - firstFrequency) / frequencyDistance);
855 }
856 return Sound_createToneComplex (minimumTime, maximumTime, samplingFrequency,
857 firstFrequency, numberOfComponents, frequencyDistance, 0, 0, scaleAmplitudes);
858 }
859
Sound_createMistunedHarmonicComplex(double minimumTime,double maximumTime,double samplingFrequency,double firstFrequency,integer numberOfComponents,integer mistunedComponent,double mistuningFraction,bool scaleAmplitudes)860 autoSound Sound_createMistunedHarmonicComplex (double minimumTime, double maximumTime, double samplingFrequency, double firstFrequency,
861 integer numberOfComponents, integer mistunedComponent, double mistuningFraction, bool scaleAmplitudes)
862 {
863 if (firstFrequency + (numberOfComponents - 1) * firstFrequency > samplingFrequency / 2) {
864 Melder_warning (U"Sound_createMistunedHarmonicComplex: frequency of (some) components too high.");
865 numberOfComponents = Melder_ifloor (1.0 + (0.5 * samplingFrequency - firstFrequency) / firstFrequency);
866 }
867 if (mistunedComponent > numberOfComponents) {
868 Melder_warning (U"Sound_createMistunedHarmonicComplex: mistuned component too high.");
869 }
870 return Sound_createToneComplex (minimumTime, maximumTime, samplingFrequency, firstFrequency, numberOfComponents, firstFrequency, mistunedComponent, mistuningFraction, scaleAmplitudes);
871 }
872
873 /*
874 The gammachirp is a "chirp tone" with a gamma-function envelope:
875 f(t) = t^(n-1) exp (-2 pi b t) cos (2 pi f0 t + c ln (t) + p0)
876 = t^(n-1) exp (-2 pi b t) cos (phi(t))
877 Instantaneous frequency f is defined as f = d phi(t) / dt / (2 pi)
878 and so: f = f0 + c /(2 pi t)
879 Irino: bandwidth = (frequency * (6.23e-6 * frequency + 93.39e-3) + 28.52)
880 */
Sound_createGammaTone(double minimumTime,double maximumTime,double samplingFrequency,double gamma,double frequency,double bandwidth,double initialPhase,double addition,bool scaleAmplitudes)881 autoSound Sound_createGammaTone (double minimumTime, double maximumTime, double samplingFrequency, double gamma, double frequency, double bandwidth, double initialPhase, double addition, bool scaleAmplitudes) {
882 try {
883 autoSound me = Sound_create2 (minimumTime, maximumTime, samplingFrequency);
884 for (integer i = 1; i <= my nx; i ++) {
885 const double t = (i - 0.5) * my dx;
886 const double f = frequency + addition / (NUM2pi * t);
887 if (f > 0 && f < samplingFrequency / 2)
888 my z [1] [i] = pow (t, gamma - 1.0) * exp (- NUM2pi * bandwidth * t) *
889 cos (NUM2pi * frequency * t + addition * log (t) + initialPhase);
890 }
891 if (scaleAmplitudes)
892 Vector_scale (me.get(), 0.99996948);
893 return me;
894 } catch (MelderError) {
895 Melder_throw (U"Sound not created from gammatone function.");
896 }
897 }
898
899 #if 0
900 // This routine is unstable for small values of f and large b. Better to use cross-correlation of gammatone with sound.
901 static void NUMgammatoneFilter4 (double *x, double *y, integer n, double centre_frequency, double bandwidth, double samplingFrequency) {
902 double a [5], b [9], dt = 1.0 / samplingFrequency, wt = NUMpi * centre_frequency * dt;
903 double bt = 2 * NUMpi * bandwidth * dt, dt2 = dt * dt, dt4 = dt2 * dt2;
904
905 Melder_assert (n > 0 && centre_frequency > 0 && bandwidth >= 0 && samplingFrequency > 0);
906
907 /*
908 The filter function is:
909 H(z) = sum (i=0..4, a [i] z^-i) / sum (j=0..4, b [j] z^-j)
910 Coefficients a & b according to:
911 Slaney (1993), An efficient implementation of the Patterson-Holdsworth
912 auditory filterbank, Apple Computer Technical Report 35, 41 pages.
913 For the a's we have left out an overal scale factor of dt^4.
914 This makes a [0] = 1.
915 */
916
917 a [0] = dt4;
918 a [1] = -4 * dt4 * cos (2 * wt) * exp (- bt);
919 a [2] = 6 * dt4 * cos (4 * wt) * exp (-2 * bt);
920 a [3] = -4 * dt4 * cos (6 * wt) * exp (-3 * bt);
921 a [4] = dt4 * cos (8 * wt) * exp (-4 * bt);
922
923 b [0] = 1;
924 b [1] = -8 * cos (2 * wt) * exp (- bt);
925 b [2] = (16 + 12 * cos (4 * wt)) * exp (-2 * bt);
926 b [3] = (-48 * cos (2 * wt) - 8 * cos (6 * wt)) * exp (-3 * bt);
927 b [4] = (36 + 32 * cos (4 * wt) + 2 * cos (8 * wt)) * exp (-4 * bt);
928 b [5] = (-48 * cos (2 * wt) - 8 * cos (6 * wt)) * exp (-5 * bt);
929 b [6] = (16 + 12 * cos (4 * wt)) * exp (-6 * bt);
930 b [7] = -8 * cos (2 * wt) * exp (-7 * bt);
931 b [8] = exp (-8 * bt);
932
933 // Calculate gain (= Abs (H(z); f=fc) and scale a [0-4] with it.
934
935 double zr = cos (2 * wt), zi = -sin (2 * wt);
936 double dr = a [4], di = 0, tr, ti, nr, ni;
937
938 for (integer j = 1; j <= 4; j ++) {
939 tr = a [4 - j] + zr * dr - zi * di;
940 ti = zi * dr + zr * di;
941 dr = tr; di = ti;
942 }
943
944 dr = b [8];
945 di = 0;
946 for (integer j = 1; j <= 8; j ++) {
947 nr = b [8 - j] + zr * dr - zi * di;
948 ni = zi * dr + zr * di;
949 dr = nr; di = ni;
950 }
951
952 double n2 = nr * nr + ni * ni;
953 double gr = tr * nr + ti * ni;
954 double gi = ti * nr - tr * ni;
955 double gain = hypot (gr, gi) / n2;
956
957 for (integer j = 0; j <= 4; j ++) {
958 a [j] /= gain;
959 }
960
961 if (Melder_debug == -1) {
962 Melder_casual (
963 U" --gammatonefilter4 --\nF = ", centre_frequency,
964 U", B = ", bandwidth,
965 U", T = ", dt,
966 U"\nGain = ", gain
967 );
968 for (integer i = 0; i <= 4; i ++) {
969 Melder_casual (U"a [", i, U"] = ", a [i]);
970 }
971 for (integer i = 0; i <= 8; i ++) {
972 Melder_casual (U"b [", i, U"] = ", b [i]);
973 }
974 }
975
976 /* Perform the filtering. For the first 8 samples we must do some extra work.
977 y [1] = a [0] * x [1];
978 if (n > 1) {
979 y [2] = a [0] * x [2];
980 y [2] += a [1] * x [1] - b [1] * y [1];
981 }
982 if (n > 2) {
983 y [2] = a [0] * x [2];
984 y [2] += a [2] * x [i - 2] - b [2] * y [i - 2];
985 }
986 */
987 integer n8 = n < 8 ? n : 8;
988 for (integer i = 1; i <= n8; i ++) {
989 y [i] = a [0] * x [i];
990 if (i > 1) {
991 y [i] += a [1] * x [i - 1] - b [1] * y [i - 1];
992 } else {
993 continue;
994 }
995 if (i > 2) {
996 y [i] += a [2] * x [i - 2] - b [2] * y [i - 2];
997 } else {
998 continue;
999 }
1000 if (i > 3) {
1001 y [i] += a [3] * x [i - 3] - b [3] * y [i - 3];
1002 } else {
1003 continue;
1004 }
1005 if (i > 4) {
1006 y [i] += a [4] * x [i - 4] - b [4] * y [i - 4];
1007 } else {
1008 continue;
1009 }
1010 if (i > 5) {
1011 y [i] -= b [5] * y [i - 5];
1012 } else {
1013 continue;
1014 }
1015 if (i > 6) {
1016 y [i] -= b [6] * y [i - 6];
1017 } else {
1018 continue;
1019 }
1020 if (i > 7) {
1021 y [i] -= b [7] * y [i - 7];
1022 }
1023 }
1024
1025 for (integer i = n8 + 1; i <= n; i ++) {
1026 // y [i] = a [0] * x [i];
1027 // y [i] += a [1] * x [i-1] + a [2] * x [i-2] + a [3] * x [i-3] + a [4] * x [i-4];
1028 // y [i] -= b [1] * y [i-1] + b [2] * y [i-2] + b [3] * y [i-3] + b [4] * y [i-4];
1029 // y [i] -= b [5] * y [i-5] + b [6] * y [i-6] + b [7] * y [i-7] + b [8] * y [i-8];
1030 y [i] = a [0] * x [i] + a [1] * x [i - 1] + a [2] * x [i - 2] + a [3] * x [i - 3] + a [4] * x [i - 4]
1031 - b [1] * y [i - 1] - b [2] * y [i - 2] - b [3] * y [i - 3] - b [4] * y [i - 4]
1032 - b [5] * y [i - 5] - b [6] * y [i - 6] - b [7] * y [i - 7] - b [8] * y [i - 8];
1033 }
1034 }
1035
1036 autoSound Sound_filterByGammaToneFilter4 (Sound me, double centre_frequency, double bandwidth) {
1037 try {
1038 Meldr_require (centre_frequency > 0,
1039 U"Centre frequency should be positive.");
1040 Melder_require (bandwidth > 0,
1041 U"Bandwidth should be positive.");
1042
1043 autoSound thee = Sound_create (my ny, my xmin, my xmax, my nx, my dx, my x1);
1044 autoVEC y = zero_VEC (my nx);
1045 autoVEC x = zero_VEC (my nx);
1046
1047 const double fs = 1.0 / my dx;
1048 for (integer channel = 1; channel <= my ny; channel ++) {
1049 for (integer i = 1; i <= my nx; i ++)
1050 x [i] = my z [channel] [i];
1051 NUMgammatoneFilter4 (x.asArgumentToFunctionThatExpectsOneBasedArray(), y.asArgumentToFunctionThatExpectsOneBasedArray(),
1052 my nx, centre_frequency, bandwidth, fs);
1053 for (integer i = 1; i <= my nx; i ++)
1054 thy z [channel] [i] = y [i];
1055 }
1056 return thee;
1057 } catch (MelderError) {
1058 Melder_throw (U"Sound not filtered by gammatone filter4.");
1059 }
1060 }
1061 #endif
1062
1063
Sound_filterByGammaToneFilter4(Sound me,double centre_frequency,double bandwidth)1064 autoSound Sound_filterByGammaToneFilter4 (Sound me, double centre_frequency, double bandwidth) {
1065 return Sound_filterByGammaToneFilter (me, centre_frequency, bandwidth, 4.0, 0.0);
1066 }
1067
Sound_filterByGammaToneFilter(Sound me,double centre_frequency,double bandwidth,double gamma,double initialPhase)1068 autoSound Sound_filterByGammaToneFilter (Sound me, double centre_frequency, double bandwidth, double gamma, double initialPhase) {
1069 try {
1070 autoSound gammaTone = Sound_createGammaTone (my xmin, my xmax, 1.0 / my dx, gamma, centre_frequency, bandwidth, initialPhase, 0.0, 0);
1071 // kSounds_convolve_scaling_INTEGRAL, SUM, NORMALIZE, PEAK_099
1072 autoSound thee = Sounds_convolve (me, gammaTone.get(), kSounds_convolve_scaling::INTEGRAL, kSounds_convolve_signalOutsideTimeDomain::ZERO);
1073
1074 const dcomplex r = gammaToneFilterResponseAtCentreFrequency (centre_frequency, bandwidth, gamma, initialPhase, my xmax - my xmin);
1075
1076 const double scale = 1.0 / sqrt (r.real() * r.real() + r.imag() * r.imag());
1077 thy z.row (1) *= scale;
1078
1079 return thee;
1080 } catch (MelderError) {
1081 Melder_throw (U"Sound not filtered by gammatone filter4.");
1082 }
1083 }
1084
1085
1086 /*
1087 Sound Sound_createShepardTone (double minimumTime, double maximumTime, double samplingFrequency,
1088 double baseFrequency, double frequencyShiftFraction, double maximumFrequency, double amplitudeRange)
1089 {
1090 Sound me; integer i, j, nComponents = 1 + log2 (maximumFrequency / 2 / baseFrequency);
1091 double lmin = pow (10, - amplitudeRange / 10);
1092 double twoPi = NUM2pi, f = baseFrequency * (1 + frequencyShiftFraction);
1093 if (nComponents < 2) Melder_warning (U"Sound_createShepardTone: only 1 component.");
1094 Melder_casual (U"Sound_createShepardTone: ", nComponents, U" components.");
1095 if (! (me = Sound_create2 (minimumTime, maximumTime, samplingFrequency))) return nullptr;
1096
1097 for (j=1; j <= nComponents; j ++)
1098 {
1099 double fj = f * pow (2, j-1), wj = twoPi * fj;
1100 double amplitude = lmin + (1 - lmin) *
1101 (1 - cos (twoPi * log (fj + 1) / log (maximumFrequency + 1))) / 2;
1102 for (i=1; i <= my nx; i ++)
1103 {
1104 my z [1] [i] += amplitude * sin (wj * (i - 0.5) * my dx);
1105 }
1106 }
1107 Vector_scale (me, 0.99996948);
1108 return me;
1109 }
1110 */
1111
Sound_createShepardToneComplex(double minimumTime,double maximumTime,double samplingFrequency,double lowestFrequency,integer numberOfComponents,double frequencyChange_st,double amplitudeRange_dB,double octaveShiftFraction)1112 autoSound Sound_createShepardToneComplex (double minimumTime, double maximumTime, double samplingFrequency, double lowestFrequency, integer numberOfComponents, double frequencyChange_st, double amplitudeRange_dB, double octaveShiftFraction) {
1113 try {
1114 const double highestFrequency = lowestFrequency * pow (2, numberOfComponents);
1115 const double lmax_db = 0, lmin_db = lmax_db - fabs (amplitudeRange_dB);
1116
1117 Melder_require (highestFrequency <= samplingFrequency / 2.0,
1118 U"The highest frequency you want to generate is "
1119 U"above the Nyquist frequency. Choose a larger value for \"Sampling frequency\", or lower values for "
1120 U"\"Number of components\" or \"Lowest frequency\".");
1121 Melder_require (octaveShiftFraction >= 0.0 && octaveShiftFraction < 1.0,
1122 U"Octave offset fraction should be greater or equal zero and smaller than one.");
1123
1124 double octaveTime, sweeptime;
1125 if (frequencyChange_st != 0.0) {
1126 octaveTime = 12.0 / fabs (frequencyChange_st);
1127 sweeptime = numberOfComponents * octaveTime;
1128 } else {
1129 octaveTime = sweeptime = 1e308;
1130 }
1131 autoSound me = Sound_create2 (minimumTime, maximumTime, samplingFrequency);
1132
1133 const double a = frequencyChange_st / 12.0;
1134 for (integer ifreq = 1; ifreq <= numberOfComponents; ifreq ++) {
1135 double freqi = lowestFrequency * pow (2.0, ifreq - 1 + octaveShiftFraction);
1136 double b1, b2, tswitch;
1137 double phase1 = 0, phasejm1 = 0;
1138 /*
1139 The frequency is f(t) = lowestFrequency * 2^tone(t)
1140 The tone is parametrized with a straight line: tone(t) = a * t + b
1141 where a = frequencyChange_st / 12 and b depends on the component
1142 If frequencyChange_st >=0
1143 The tone rises until highest frequency at t=tswich, then falls to lowest and starts rising again.
1144 The slope is always the same. The offsets are b1 and b2 respectively.
1145 We count octaveShiftFraction as distance from tone base
1146 else if frequencyChange_st < 0
1147 The tone falls until the lowest frequency at t=tswich, then jumps to highest and starts falling again
1148 All tones start one octave higher as in rising case.
1149 We also count octaveShiftFraction down from this tone base.
1150 else
1151 No changes in frequency of the components.
1152 endif
1153 */
1154 if (frequencyChange_st >= 0) {
1155 b1 = ifreq - 1 + octaveShiftFraction;
1156 b2 = 0.0;
1157 tswitch = (numberOfComponents - b1) * octaveTime;
1158 } else {
1159 freqi *= 2.0;
1160 b1 = ifreq - octaveShiftFraction;
1161 b2 = numberOfComponents;
1162 tswitch = b1 * octaveTime;
1163 }
1164 for (integer j = 1; j <= my nx; j ++) {
1165 const double t = Sampled_indexToX (me.get(), j);
1166 const double tmod = fmod (t, sweeptime);
1167 const double tone = tmod <= tswitch ? b1 + a * tmod : b2 + a * (tmod - tswitch);
1168 const double f = lowestFrequency * pow (2, tone);
1169 /* double theta = 2 * NUMpi * log2 (f / lowestFrequency) / numberOfComponents; */
1170 const double theta = 2 * NUMpi * tone / numberOfComponents;
1171 const double level = pow (10.0, (lmin_db + (lmax_db - lmin_db) * (1.0 - cos (theta)) / 2.0) / 20.0);
1172 const double phasej = phasejm1 + 2.0 * NUMpi * f * my dx; /* Integrate 2*pi*f(t) */
1173
1174 if (j == 1)
1175 phase1 = phasej;
1176 my z [1] [j] += level * sin (phasej - phase1); // si
1177 phasejm1 = phasej;
1178 }
1179 }
1180 Vector_scale (me.get(), 0.99996948);
1181 return me;
1182 } catch (MelderError) {
1183 Melder_throw (U"Sound not created from Shepard tone complex.");
1184 }
1185 }
1186
1187 /* can be implemented more efficiently with sin recurrence? */
1188 /* amplitude(f) = min + (1-min)*(1-cos(2*pi*(ln(f/f1) / ln(fn/f1)))/2 */
Sound_createShepardTone(double minimumTime,double maximumTime,double samplingFrequency,double lowestFrequency,integer numberOfComponents,double frequencyChange_st,double amplitudeRange)1189 autoSound Sound_createShepardTone (double minimumTime, double maximumTime, double samplingFrequency, double lowestFrequency, integer numberOfComponents, double frequencyChange_st, double amplitudeRange) {
1190 try {
1191 const double scale = pow (2.0, numberOfComponents);
1192 const double maximumFrequency = lowestFrequency * scale;
1193 const double lmin = pow (10.0, - amplitudeRange / 10.0), twoPi = NUM2pi;
1194 const double ln2t0 = log (2.0) * frequencyChange_st / 12.0;
1195 const double lnf1 = log (lowestFrequency + 1.0);
1196 const double amplarg = twoPi / log ((maximumFrequency + 1.0) / (lowestFrequency + 1.0));
1197
1198 Melder_require (lowestFrequency <= 0.5 * samplingFrequency,
1199 U"Sound_createShepardTone: lowest frequency too high.");
1200 Melder_require (maximumFrequency <= 0.5 * samplingFrequency,
1201 U"Sound_createShepardTone: frequency of highest component too high.");
1202 autoSound me = Sound_create2 (minimumTime, maximumTime, samplingFrequency);
1203
1204 for (integer i = 1; i <= my nx; i ++) {
1205 const double t = (i - 0.5) * my dx;
1206 double argt, ft = lowestFrequency;
1207 if (frequencyChange_st != 0.0) {
1208 const double expt = exp (ln2t0 * t);
1209 argt = twoPi * lowestFrequency * (expt - 1.0) / ln2t0;
1210 ft *= expt;
1211 } else {
1212 argt = twoPi * ft * t;
1213 }
1214 for (integer j = 1; j <= numberOfComponents; j ++) {
1215 while (ft >= maximumFrequency) {
1216 ft /= scale;
1217 argt /= scale;
1218 }
1219 //amplitude = lmin + (1 - lmin) * (1 - cos (twoPi * log (ft + 1) / log (maximumFrequency + 1))) / 2;
1220 const double amplitude = lmin + (1 - lmin) * (1 - cos (amplarg * (log (ft + 1) - lnf1))) / 2.0;
1221 my z [1] [i] += amplitude * sin (argt);
1222 ft *= 2.0;
1223 argt *= 2.0;
1224 }
1225 }
1226 Vector_scale (me.get(), 0.99996948);
1227 return me;
1228 } catch (MelderError) {
1229 Melder_throw (U"Sound not created from Shepard tone.");
1230 }
1231 }
1232
Sound_createPattersonWightmanTone(double minimumTime,double maximumTime,double samplingFrequency,double baseFrequency,double frequencyShiftRatio,integer numberOfComponents)1233 autoSound Sound_createPattersonWightmanTone (double minimumTime, double maximumTime, double samplingFrequency, double baseFrequency, double frequencyShiftRatio, integer numberOfComponents) {
1234 try {
1235 Melder_require ((numberOfComponents - 1 + frequencyShiftRatio) * baseFrequency <= samplingFrequency / 2.0,
1236 U"Frequency of one or more components too large.");
1237 autoSound me = Sound_create2 (minimumTime, maximumTime, samplingFrequency);
1238 const double w0 = NUM2pi * baseFrequency;
1239 for (integer i = 1; i <= my nx; i ++) {
1240 const double t = (i - 0.5) * my dx;
1241 double a = 0.0;
1242 for (integer j = 1; j <= numberOfComponents; j ++)
1243 a += sin ( (j + frequencyShiftRatio) * w0 * t);
1244 my z [1] [i] = a;
1245 }
1246 Vector_scale (me.get(), 0.99996948);
1247 return me;
1248 } catch (MelderError) {
1249 Melder_throw (U"Sound not created from Patterson Wightman tone.");
1250 }
1251 }
1252
Sound_createPlompTone(double minimumTime,double maximumTime,double samplingFrequency,double baseFrequency,double frequencyFraction,integer m)1253 autoSound Sound_createPlompTone (double minimumTime, double maximumTime, double samplingFrequency, double baseFrequency, double frequencyFraction, integer m) {
1254 try {
1255 Melder_require (12.0 * (1.0 + frequencyFraction) * baseFrequency <= samplingFrequency / 2.0,
1256 U"Sound_createPlompTone: frequency of one or more components too large.");
1257
1258 const double w1 = NUM2pi * (1.0 - frequencyFraction) * baseFrequency;
1259 const double w2 = NUM2pi * (1.0 + frequencyFraction) * baseFrequency;
1260 autoSound me = Sound_create2 (minimumTime, maximumTime, samplingFrequency);
1261 for (integer i = 1; i <= my nx; i ++) {
1262 const double t = (i - 0.5) * my dx;
1263 double a = 0.0;
1264 for (integer j = 1; j <= m; j ++)
1265 a += sin (j * w1 * t);
1266 for (integer j = m + 1; j <= 12; j ++)
1267 a += sin (j * w2 * t);
1268 my z [1] [i] = a;
1269 }
1270 Vector_scale (me.get(), 0.99996948);
1271 return me;
1272 } catch (MelderError) {
1273 Melder_throw (U"Sound not created from Plomp tone.");
1274 }
1275 }
1276
Sounds_multiply(Sound me,Sound thee)1277 void Sounds_multiply (Sound me, Sound thee) {
1278 const integer n = std::min (my nx, thy nx );
1279 my z.row (1).part(1, n) *= thy z.row (1).part(1, n);
1280 }
1281
Sound_power(Sound me)1282 double Sound_power (Sound me) {
1283 const double sumSq = NUMsum2 (my z.row (1));
1284 return sqrt (sumSq) * my dx / (my xmax - my xmin);
1285 }
1286
Sound_correlateParts(Sound me,double tx,double ty,double duration)1287 double Sound_correlateParts (Sound me, double tx, double ty, double duration) {
1288 if (ty < tx)
1289 std::swap (tx, ty);
1290 const integer nbx = Sampled_xToNearestIndex (me, tx);
1291 const integer nby = Sampled_xToNearestIndex (me, ty);
1292 const integer ney = Sampled_xToNearestIndex (me, ty + duration);
1293 const integer increment = nbx < 1 ? 1 - nbx : 0;
1294 const integer decrement = ney > my nx ? ney - my nx : 0;
1295 const integer ns = Melder_ifloor (duration / my dx) - increment - decrement;
1296 if (ns < 1)
1297 return 0.0;
1298
1299 const double *x = & my z [1] [nbx + increment - 1];
1300 const double *y = & my z [1] [nby + increment - 1];
1301 double xm = 0.0, ym = 0.0;
1302 for (integer i = 1; i <= ns; i ++) {
1303 xm += x [i];
1304 ym += y [i];
1305 }
1306 xm /= ns;
1307 ym /= ns;
1308 double sxx = 0.0, syy = 0.0, sxy = 0.0;
1309 for (integer i = 1; i <= ns; i ++) {
1310 const double xt = x [i] - xm, yt = y [i] - ym;
1311 sxx += xt * xt;
1312 syy += yt * yt;
1313 sxy += xt * yt;
1314 }
1315 const double denum = sxx * syy;
1316 const double rxy = ( denum > 0.0 ? sxy / sqrt (denum) : 0.0 );
1317 return rxy;
1318 }
1319
1320
interpolate(Sound me,integer i1,integer channel,double level)1321 static double interpolate (Sound me, integer i1, integer channel, double level)
1322 /* Precondition: my z [1] [i1] != my z [1] [i1 + 1]; */
1323 {
1324 const integer i2 = i1 + 1;
1325 const double x1 = Sampled_indexToX (me, i1), x2 = Sampled_indexToX (me, i2);
1326 const double y1 = my z [channel] [i1], y2 = my z [channel] [i2];
1327 return x1 + (x2 - x1) * (y1 - level) / (y1 - y2); // linear
1328 }
1329
Sound_getNearestLevelCrossing(Sound me,integer channel,double position,double level,kSoundSearchDirection searchDirection)1330 double Sound_getNearestLevelCrossing (Sound me, integer channel, double position, double level, kSoundSearchDirection searchDirection) {
1331 const double *amplitude = & my z [channel] [0];
1332 const integer leftSample = Sampled_xToLowIndex (me, position);
1333 if (leftSample > my nx)
1334 return undefined;
1335 const integer rightSample = leftSample + 1;
1336 /*
1337 Are we already at a level crossing?
1338 */
1339 if (leftSample >= 1 && rightSample <= my nx &&
1340 (amplitude [leftSample] >= level) != (amplitude [rightSample] >= level))
1341 {
1342 const double crossing = interpolate (me, leftSample, channel, level);
1343 return searchDirection == kSoundSearchDirection::LEFT ?
1344 ( crossing <= position ? crossing : undefined ) :
1345 ( crossing >= position ? crossing : undefined );
1346 }
1347
1348 double leftCrossing = undefined;
1349 if (searchDirection == kSoundSearchDirection::LEFT || searchDirection == kSoundSearchDirection::NEAREST) {
1350 for (integer ileft = leftSample - 1; ileft >= 1; ileft --)
1351 if ((amplitude [ileft] >= level) != (amplitude [ileft + 1] >= level)) {
1352 leftCrossing = interpolate (me, ileft, channel, level);
1353 break;
1354 }
1355 if (searchDirection == kSoundSearchDirection::LEFT)
1356 return leftCrossing;
1357 }
1358
1359 if (rightSample < 1)
1360 return undefined;
1361 double rightCrossing = undefined;
1362 if (searchDirection == kSoundSearchDirection::RIGHT || searchDirection == kSoundSearchDirection::NEAREST) {
1363 for (integer iright = rightSample + 1; iright <= my nx; iright ++)
1364 if ((amplitude [iright] >= level) != (amplitude [iright - 1] >= level)) {
1365 rightCrossing = interpolate (me, iright - 1, channel, level);
1366 break;
1367 }
1368 if (searchDirection == kSoundSearchDirection::RIGHT)
1369 return rightCrossing;
1370 }
1371
1372 return
1373 isdefined (leftCrossing) && isdefined (rightCrossing) ?
1374 ( position - leftCrossing < rightCrossing - position ? leftCrossing : rightCrossing )
1375 : isdefined (leftCrossing) ? leftCrossing
1376 : isdefined (rightCrossing) ? rightCrossing
1377 : undefined;
1378 }
1379
Sound_localPeak(Sound me,double fromTime,double toTime,double reference)1380 double Sound_localPeak (Sound me, double fromTime, double toTime, double reference) {
1381 integer n1 = Sampled_xToNearestIndex (me, fromTime);
1382 integer n2 = Sampled_xToNearestIndex (me, toTime);
1383 const double *s = & my z [1] [0];
1384 double peak = -1e308;
1385 if (fromTime <= toTime) {
1386 if (n1 < 1)
1387 n1 = 1;
1388 if (n2 > my nx)
1389 n2 = my nx;
1390 for (integer i = n1; i <= n2; i ++) {
1391 const double ds = fabs (s [i] - reference);
1392 if (ds > peak)
1393 peak = ds;
1394 }
1395 }
1396 return peak;
1397 }
1398
Sound_into_Sound(Sound me,Sound to,double startTime)1399 void Sound_into_Sound (Sound me, Sound to, double startTime) {
1400 const integer index = Sampled_xToNearestIndex (me, startTime);
1401 for (integer i = 1; i <= to -> nx; i ++) {
1402 const integer j = index - 1 + i;
1403 to -> z [1] [i] = (j < 1 || j > my nx ? 0.0 : my z [1] [j]);
1404 }
1405 }
1406
1407 /*
1408 IntervalTier Sound_PointProcess_to_IntervalTier (Sound me, PointProcess thee, double window);
1409 IntervalTier Sound_PointProcess_to_IntervalTier (Sound me, PointProcess thee, double window)
1410 {
1411 double window2 = window / 2;
1412 double t1 = thy t [1] - window2;
1413 if (t1 < my xmin) t1 = my xmin;
1414 double t2 = t1 + window2;
1415 if (t2 > my xmax) t2 = my xmax;
1416 autoIntervalTier him = IntervalTier_create (my xmin, my xmax);
1417 autoTextInterval interval = TextInterval_create (t1, t2, "yes");
1418 his intervals -> addItem_move (interval.move());
1419
1420 for (integer i = 2; i <= thy nt; i ++)
1421 {
1422 double t = thy t [i];
1423
1424 if (t <= t2)
1425 {
1426 integer index = his points.size;
1427 RealPoint point = his points->at [index];
1428 t2 = t + window2;
1429 if (t2 > my xmax) t2 = my xmax;
1430 point -> value = t2;
1431 }
1432 else
1433 {
1434 t2 = t + window2;
1435 if (t2 > my xmax) t2 = my xmax;
1436 RealTier_addPoint (him, t, t2);
1437 }
1438 }
1439 return him;
1440 }
1441 */
1442
1443
1444 /*
1445 First approximation on the basis of differences in the sampled signal.
1446 The underlying analog signal still could have jumps undetected by this algorithm.
1447 We could get a better approximation by first upsampling the signal.
1448 */
Sound_to_PointProcess_getJumps(Sound me,integer channel,double minimumJump,double maximumDuration)1449 autoPointProcess Sound_to_PointProcess_getJumps (Sound me, integer channel, double minimumJump, double maximumDuration) {
1450 try {
1451 Melder_require (channel >= 1 && channel <= my ny,
1452 U"The channel number should be in the interval from 1 to ", my ny, U".");
1453 autoPointProcess thee = PointProcess_create (my xmin, my xmax, 10);
1454 integer index = 1, intervalSize_samples = Melder_ifloor (maximumDuration / my dx);
1455 if (intervalSize_samples < 1)
1456 intervalSize_samples = 1;
1457 constVEC samples = my z.row (channel);
1458 while (index < my nx) {
1459 integer nextIndex = index + 1, step = 1;
1460 while (nextIndex <= index + intervalSize_samples && nextIndex <= my nx) {
1461 if (fabs (samples [index] - samples [nextIndex]) > minimumJump) {
1462 const double t = Sampled_indexToX (me, index);
1463 PointProcess_addPoint (thee.get(), t);
1464 step = nextIndex - index + 1;
1465 break;
1466 }
1467 nextIndex ++;
1468 }
1469 index += step;
1470 }
1471 return thee;
1472 } catch (MelderError) {
1473 Melder_throw (me, U": no PointProcess created.");
1474 }
1475 }
1476
1477 /* Internal pitch representation in semitones */
Sound_Pitch_changeSpeaker(Sound me,Pitch him,double formantMultiplier,double pitchMultiplier,double pitchRangeMultiplier,double durationMultiplier)1478 autoSound Sound_Pitch_changeSpeaker (Sound me, Pitch him, double formantMultiplier, double pitchMultiplier, double pitchRangeMultiplier, double durationMultiplier) {
1479 try {
1480 const double samplingFrequency_old = 1.0 / my dx;
1481
1482 Melder_require (my xmin == his xmin && my xmax == his xmax,
1483 U"The Pitch and the Sound object should have the same domain.");
1484
1485 autoSound sound = Data_copy (me);
1486 Vector_subtractMean (sound.get());
1487
1488 if (formantMultiplier != 1.0) // Shift all frequencies (inclusive pitch!)
1489 Sound_overrideSamplingFrequency (sound.get(), samplingFrequency_old * formantMultiplier);
1490
1491 autoPitch pitch = Data_copy (him);
1492 Pitch_scaleDuration (pitch.get(), 1.0 / formantMultiplier); //
1493 Pitch_scalePitch (pitch.get(), formantMultiplier);
1494
1495 autoPointProcess pulses = Sound_Pitch_to_PointProcess_cc (sound.get(), pitch.get());
1496 autoPitchTier pitchTier = Pitch_to_PitchTier (pitch.get());
1497
1498 const double median = Pitch_getQuantile (pitch.get(), 0.0, 0.0, 0.5, kPitch_unit::HERTZ);
1499 if (isdefined (median) && median != 0.0) {
1500 /*
1501 Incorporate pitch shift from overriding the sampling frequency
1502 */
1503 PitchTier_multiplyFrequencies (pitchTier.get(), sound -> xmin, sound -> xmax, pitchMultiplier / formantMultiplier);
1504 PitchTier_modifyExcursionRange (pitchTier.get(), sound -> xmin, sound -> xmax, pitchRangeMultiplier, median);
1505 } else if (pitchMultiplier != 1) {
1506 Melder_warning (U"Pitch has not been changed because the sound was entirely voiceless.");
1507 }
1508 autoDurationTier duration = DurationTier_create (my xmin, my xmax);
1509 RealTier_addPoint (duration.get(), (my xmin + my xmax) / 2, formantMultiplier * durationMultiplier);
1510
1511 autoSound thee = Sound_Point_Pitch_Duration_to_Sound (sound.get(), pulses.get(), pitchTier.get(), duration.get(), MAX_T);
1512 /*
1513 Resample to the original sampling frequency
1514 */
1515 if (formantMultiplier != 1.0)
1516 thee = Sound_resample (thee.get(), samplingFrequency_old, 10);
1517
1518 return thee;
1519 } catch (MelderError) {
1520 Melder_throw (U"Sound not created from Pitch & Sound.");
1521 }
1522 }
1523
Sound_changeSpeaker(Sound me,double pitchMin,double pitchMax,double formantMultiplier,double pitchMultiplier,double pitchRangeMultiplier,double durationMultiplier)1524 autoSound Sound_changeSpeaker (Sound me, double pitchMin, double pitchMax, double formantMultiplier, double pitchMultiplier, double pitchRangeMultiplier, double durationMultiplier) { // > 0
1525 try {
1526 autoPitch pitch = Sound_to_Pitch (me, 0.8 / pitchMin, pitchMin, pitchMax);
1527 autoSound thee = Sound_Pitch_changeSpeaker (me, pitch.get(), formantMultiplier, pitchMultiplier, pitchRangeMultiplier, durationMultiplier);
1528 return thee;
1529 } catch (MelderError) {
1530 Melder_throw (me, U": speaker not changed.");
1531 }
1532 }
1533
IntervalTier_addBoundaryUnsorted(IntervalTier me,integer iinterval,double time,conststring32 leftLabel)1534 static void IntervalTier_addBoundaryUnsorted (IntervalTier me, integer iinterval, double time, conststring32 leftLabel) {
1535 Melder_require (time > my xmin && time <= my xmax,
1536 U"Time is outside interval.");
1537 /*
1538 Find interval to split.
1539 */
1540 if (iinterval <= 0)
1541 iinterval = IntervalTier_timeToLowIndex (me, time);
1542 /*
1543 Modify end time of left label.
1544 */
1545 const TextInterval ti = my intervals.at [iinterval];
1546 ti -> xmax = time;
1547 TextInterval_setText (ti, leftLabel);
1548 if (time != my xmax) {
1549 autoTextInterval ti_new = TextInterval_create (time, my xmax, U"");
1550 my intervals. addItem_unsorted_move (ti_new.move());
1551 }
1552 }
1553
Spectrogram_to_Intensity_silenceDetection(Spectrogram me)1554 static autoIntensity Spectrogram_to_Intensity_silenceDetection (Spectrogram me) {
1555 try {
1556 autoIntensity thee = Intensity_create (my xmin, my xmax, my nx, my dx, my x1);
1557 VEC intensityBins = thy z.row (1);
1558 /*
1559 Add the values in the frequency bins, they are power (~amplitude squared)
1560 Don't use frequencies below 80 Hz and above 8000 Hz.
1561 */
1562 constexpr double fmin = 80.0, fmax = 8000.0;
1563 const integer iFreqFrom = std::max (1_integer, Melder_iroundDown (fmin / my dy));
1564 const integer iFreqTo = std::min (Melder_iroundUp (fmax / my dy), my ny);
1565 for (integer ifreq = iFreqFrom; ifreq <= iFreqTo; ifreq ++)
1566 intensityBins += my z.row (ifreq);
1567 /*
1568 Scale intensity.
1569 */
1570 intensityBins /= 4.0e-10; // threshold of hearing
1571 const double halfWindow = 3.2 * 0.01; // as if minimumPitch were 100 Hz
1572 autoIntensity him = Intensity_create (my xmin, my xmax, my nx, my dx, my x1);
1573 for (integer iframe = 1; iframe <= my nx; iframe ++) {
1574 const double xmid = Sampled_indexToX (thee.get(), iframe);
1575 const double intensity = Sampled_getMean (thee.get(), xmid - halfWindow, xmid + halfWindow, 1, 0, true);
1576 his z [1] [iframe] = (intensity < 1.0e-30 ? -300.0 : 10.0 * log10 (intensity) );
1577 }
1578 return him;
1579 } catch (MelderError) {
1580 Melder_throw (me, U": could not determine Intensity.");
1581 }
1582 }
1583
Sound_to_TextGrid_detectVoiceActivity_lsfm(Sound me,double timeStep,double longTermWindow,double shorttimeWindow,double fmin,double fmax,double lsfmThreshold,double silenceThreshold_dB,double minSilenceDuration,double minSoundingDuration,conststring32 novoiceActivityLabel,conststring32 voiceActivityLabel)1584 autoTextGrid Sound_to_TextGrid_detectVoiceActivity_lsfm (Sound me, double timeStep, double longTermWindow, double shorttimeWindow, double fmin, double fmax,
1585 double lsfmThreshold, double silenceThreshold_dB, double minSilenceDuration, double minSoundingDuration,
1586 conststring32 novoiceActivityLabel, conststring32 voiceActivityLabel) {
1587 try {
1588 if (timeStep <= 0.0)
1589 timeStep = 0.01;
1590 Melder_require (fmin < fmax,
1591 U"The minimum frequency of the range should be smaller than the maximum frequency.");
1592 const double nyquistFrequency = 0.5 / my dx;
1593 Melder_clipRight (& fmax, nyquistFrequency);
1594 const double effectiveAnalysisWidth = std::max (0.02, 2.0 * timeStep);
1595 const double minimumFreqStep = 20.0;
1596 const double maximumTimeOversampling = 8.0, maximumFreqOversampling = 8.0;
1597 autoSpectrogram spectrogram = Sound_to_Spectrogram (me, effectiveAnalysisWidth, fmax, timeStep, minimumFreqStep,
1598 kSound_to_Spectrogram_windowShape::HANNING, maximumTimeOversampling, maximumFreqOversampling);
1599 autoMatrix lsfmMatrix = Spectrogram_getLongtermSpectralFlatnessMeasure (spectrogram.get(), longTermWindow, shorttimeWindow, fmin, fmax);
1600 autoTextGrid thee = TextGrid_create (my xmin, my xmax, U"VAD", U"");
1601 const IntervalTier vadTier = (IntervalTier) thy tiers->at [1];
1602 TextInterval_setText (vadTier -> intervals.at [1], voiceActivityLabel);
1603 if (minSilenceDuration > my xmax - my xmin)
1604 return thee;
1605 /*
1606 Step 1. Find activity intervals
1607 */
1608 VEC lsfm = lsfmMatrix -> z.row (1);
1609 conststring32 label;
1610 integer iinterval = 1;
1611 bool activityInterval = ( lsfm [1] < lsfmThreshold );
1612 for (integer index = 2; index <= lsfm.size; index ++) {
1613 bool addBoundary = false;
1614 if (lsfm [index] < lsfmThreshold) {
1615 if (! activityInterval) { // start of activity
1616 addBoundary = true;
1617 activityInterval = true;
1618 label = novoiceActivityLabel;
1619 }
1620 } else {
1621 if (activityInterval) { // end of activity
1622 addBoundary = true;
1623 activityInterval = false;
1624 label = voiceActivityLabel;
1625 }
1626 }
1627
1628 if (addBoundary) {
1629 const double time = Sampled_indexToX (lsfmMatrix.get(), index);
1630 IntervalTier_addBoundaryUnsorted (vadTier, iinterval, time, label);
1631 iinterval ++;
1632 }
1633 }
1634 /*
1635 Set the label of the last interval.
1636 */
1637 label = activityInterval ? voiceActivityLabel : novoiceActivityLabel;
1638 TextInterval_setText (vadTier -> intervals.at [iinterval], label);
1639 vadTier -> intervals. sort ();
1640 /*
1641 Step 2: remove intervals that are too short.
1642 First remove short activity intervals in-between noActivity intervals and
1643 then remove the remaining short silence intervals.
1644 This works much better than first removing short silence intervals and
1645 then short non-silence intervals.
1646 */
1647 if (minSoundingDuration > 0.0) {
1648 IntervalTier_cutIntervals_minimumDuration (vadTier, voiceActivityLabel, minSoundingDuration);
1649 IntervalTier_combineIntervalsOnLabelMatch (vadTier, novoiceActivityLabel);
1650 }
1651 if (minSilenceDuration > 0.0) {
1652 IntervalTier_cutIntervals_minimumDuration (vadTier, novoiceActivityLabel, minSilenceDuration);
1653 IntervalTier_combineIntervalsOnLabelMatch (vadTier, voiceActivityLabel);
1654 }
1655 /*
1656 Step 3: Find silences, because the VAD doesn't
1657 */
1658 if (silenceThreshold_dB > -50.0) {
1659 /*
1660 Step 3: Find silences, because the VAD doesn't
1661 */
1662 autoIntensity intensity = Spectrogram_to_Intensity_silenceDetection (spectrogram.get());
1663 autoTextGrid silences = Intensity_to_TextGrid_detectSilences (intensity.get(), silenceThreshold_dB, minSilenceDuration, minSoundingDuration, novoiceActivityLabel, voiceActivityLabel);
1664 /*
1665 Step 4: Union of the two VAD and the silences intervals.
1666 */
1667 autoTextGrid unionTextGrid = TextGrid_create (my xmin, my xmax, U"union", U"");
1668 integer unionIndex = 1;
1669 const double timeMargin = std::max (0.02, std::min (0.02, std::min (minSilenceDuration, minSoundingDuration)));
1670 const IntervalTier silenceTier = (IntervalTier) silences -> tiers -> at [1];
1671 const IntervalTier unionTier = (IntervalTier) unionTextGrid -> tiers -> at [1];
1672 const integer silenceNumberOfIntervals = silenceTier -> intervals.size;
1673 const integer vadNumberOfIntervals = vadTier -> intervals.size;
1674 for (integer silenceIndex = 1; silenceIndex <= silenceNumberOfIntervals; silenceIndex ++) {
1675 const TextInterval silenceTextInterval = silenceTier -> intervals.at [silenceIndex];
1676 const double silenceStartTime = silenceTextInterval -> xmin;
1677 const conststring32 silenceLabel = silenceTextInterval -> text.get();
1678 const double silenceEndTime = silenceTextInterval -> xmax;
1679 if (Melder_stringMatchesCriterion (silenceLabel, kMelder_string::EQUAL_TO, novoiceActivityLabel, false)) {
1680 /*
1681 Silent interval. Simply add it.
1682 */
1683 IntervalTier_addBoundaryUnsorted (unionTier, unionIndex, silenceEndTime, novoiceActivityLabel);
1684 unionIndex ++;
1685 } else {
1686 /*
1687 Non silent interval. Do we have to split it into VAD intervals?
1688 */
1689 integer vadIndex = IntervalTier_timeToIndex (vadTier, silenceStartTime);
1690 bool unionContinues = true;
1691 while (unionContinues && vadIndex <= vadNumberOfIntervals) {
1692 const TextInterval vadTextInterval = vadTier -> intervals.at [vadIndex];
1693 const double vadStartTime = vadTextInterval -> xmin;
1694 const conststring32 vadLabel = vadTextInterval -> text.get();
1695 const double vadEndTime = vadTextInterval -> xmax;
1696 if (vadEndTime > silenceEndTime - timeMargin) {
1697 // extends beyound the end
1698 IntervalTier_addBoundaryUnsorted (unionTier, unionIndex, silenceEndTime, vadLabel);
1699 unionIndex ++;
1700 unionContinues = false;
1701 } else {
1702 IntervalTier_addBoundaryUnsorted (unionTier, unionIndex, vadEndTime, vadLabel);
1703 unionIndex ++;
1704 }
1705 vadIndex ++;
1706 }
1707 }
1708 }
1709 unionTier -> intervals. sort ();
1710 IntervalTier_combineIntervalsOnLabelMatch (unionTier, novoiceActivityLabel);
1711 TextGrid_addTier_copy (unionTextGrid.get(), silenceTier);
1712 TextGrid_addTier_copy (unionTextGrid.get(), vadTier);
1713 return unionTextGrid;
1714 }
1715 return thee;
1716 } catch (MelderError) {
1717 Melder_throw (me, U": could not detect voice activity.");
1718 }
1719 }
1720
Sound_to_TextGrid_detectSilences(Sound me,double minPitch,double timeStep,double silenceThreshold,double minSilenceDuration,double minSoundingDuration,conststring32 silentLabel,conststring32 soundingLabel)1721 autoTextGrid Sound_to_TextGrid_detectSilences (Sound me, double minPitch, double timeStep,
1722 double silenceThreshold, double minSilenceDuration, double minSoundingDuration,
1723 conststring32 silentLabel, conststring32 soundingLabel) {
1724 try {
1725 const bool subtractMeanPressure = true;
1726 autoSound filtered = Sound_filter_passHannBand (me, 80.0, 8000.0, 80.0);
1727 autoIntensity thee = Sound_to_Intensity (filtered.get(), minPitch, timeStep, subtractMeanPressure);
1728 autoTextGrid him = Intensity_to_TextGrid_detectSilences (thee.get(), silenceThreshold, minSilenceDuration, minSoundingDuration, silentLabel, soundingLabel);
1729 return him;
1730 } catch (MelderError) {
1731 Melder_throw (me, U": no TextGrid with silences created.");
1732 }
1733 }
1734
Sound_getStartAndEndTimesOfSounding(Sound me,double minPitch,double timeStep,double silenceThreshold,double minSilenceDuration,double minSoundingDuration,double * out_t1,double * out_t2)1735 void Sound_getStartAndEndTimesOfSounding (Sound me, double minPitch, double timeStep, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double *out_t1, double *out_t2) {
1736 try {
1737 const conststring32 silentLabel = U"-", soundingLabel = U"+";
1738 autoTextGrid dbs = Sound_to_TextGrid_detectSilences (me, minPitch, timeStep, silenceThreshold, minSilenceDuration, minSoundingDuration, silentLabel, soundingLabel);
1739 const IntervalTier tier = (IntervalTier) dbs -> tiers->at [1];
1740 Melder_assert (tier -> intervals.size > 0);
1741 TextInterval interval = tier -> intervals.at [1];
1742 if (out_t1) {
1743 *out_t1 = my xmin;
1744 if (Melder_equ (interval -> text.get(), silentLabel))
1745 *out_t1 = interval -> xmax;
1746 }
1747 if (out_t2) {
1748 *out_t2 = my xmax;
1749 interval = tier -> intervals.at [tier -> intervals.size];
1750 if (Melder_equ (interval -> text.get(), silentLabel))
1751 *out_t2 = interval -> xmin;
1752 }
1753 } catch (MelderError) {
1754 Melder_throw (U"Sounding times not found.");
1755 }
1756 }
1757
Sound_IntervalTier_cutPartsMatchingLabel(Sound me,IntervalTier thee,conststring32 match)1758 autoSound Sound_IntervalTier_cutPartsMatchingLabel (Sound me, IntervalTier thee, conststring32 match) {
1759 try {
1760 /*
1761 Count samples of the trimmed sound
1762 */
1763 integer ixmin, ixmax, numberOfSamples = 0, previous_ixmax = 0;
1764 double xmin = my xmin; // start time of output sound is start time of input sound
1765 for (integer iint = 1; iint <= thy intervals.size; iint ++) {
1766 TextInterval interval = thy intervals.at [iint];
1767 if (! Melder_equ (interval -> text.get(), match)) {
1768 numberOfSamples += Sampled_getWindowSamples (me, interval -> xmin, interval -> xmax, & ixmin, & ixmax);
1769 /*
1770 If two contiguous intervals have to be copied then the last sample of previous interval
1771 and first sample of current interval might sometimes be equal
1772 */
1773 if (ixmin == previous_ixmax)
1774 -- numberOfSamples;
1775 previous_ixmax = ixmax;
1776 } else { // matches label
1777 if (iint == 1) // Start time of output sound is end time of first interval
1778 xmin = interval -> xmax;
1779 }
1780 }
1781 /*
1782 Now copy the parts. The output sound starts at xmin
1783 */
1784 autoSound him = Sound_create (my ny, xmin, xmin + numberOfSamples * my dx, numberOfSamples, my dx, xmin + 0.5 * my dx);
1785 numberOfSamples = 0;
1786 previous_ixmax = 0;
1787 for (integer iint = 1; iint <= thy intervals.size; iint ++) {
1788 const TextInterval interval = thy intervals.at [iint];
1789 if (! Melder_equ (interval -> text.get(), match)) {
1790 Sampled_getWindowSamples (me, interval -> xmin, interval -> xmax, & ixmin, & ixmax);
1791 if (ixmin == previous_ixmax)
1792 ixmin ++;
1793 previous_ixmax = ixmax;
1794 integer numberOfSamplesToCopy = ixmax - ixmin + 1;
1795 his z.part (1, my ny, numberOfSamples + 1, numberOfSamples + numberOfSamplesToCopy) <<= my z.part (1, my ny, ixmin, ixmax);
1796 numberOfSamples += numberOfSamplesToCopy;
1797 }
1798 }
1799 Melder_assert (numberOfSamples == his nx);
1800 return him;
1801 } catch (MelderError) {
1802 Melder_throw (me, U": intervals not trimmed.");
1803 }
1804 }
1805
Sound_trimSilences(Sound me,double trimDuration,bool onlyAtStartAndEnd,double minPitch,double timeStep,double silenceThreshold,double minSilenceDuration,double minSoundingDuration,autoTextGrid * p_tg,conststring32 trimLabel)1806 autoSound Sound_trimSilences (Sound me, double trimDuration, bool onlyAtStartAndEnd, double minPitch, double timeStep, double silenceThreshold, double minSilenceDuration, double minSoundingDuration, autoTextGrid *p_tg, conststring32 trimLabel) {
1807 try {
1808 Melder_require (my ny == 1,
1809 U"The sound should be a mono sound.");
1810
1811 const conststring32 silentLabel = U"silent", soundingLabel = U"sounding";
1812 const conststring32 copyLabel = U"";
1813 autoTextGrid tg = Sound_to_TextGrid_detectSilences (me, minPitch, timeStep, silenceThreshold, minSilenceDuration, minSoundingDuration, silentLabel, soundingLabel);
1814 autoIntervalTier itg = Data_copy ((IntervalTier) tg -> tiers->at [1]);
1815 IntervalTier tier = (IntervalTier) tg -> tiers->at [1];
1816 for (integer iint = 1; iint <= tier -> intervals.size; iint ++) {
1817 const TextInterval ti = tier -> intervals.at [iint];
1818 const TextInterval ati = itg -> intervals.at [iint];
1819 const double duration = ti -> xmax - ti -> xmin;
1820 if (duration > trimDuration && Melder_equ (ti -> text.get(), silentLabel)) { // silent
1821 conststring32 label = trimLabel;
1822 if (iint == 1) { // first is special
1823 const double trim_t = ti -> xmax - trimDuration;
1824 IntervalTier_moveBoundary (itg.get(), iint, false, trim_t);
1825 } else if (iint == tier -> intervals.size) { // last is special
1826 const double trim_t = ti -> xmin + trimDuration;
1827 IntervalTier_moveBoundary (itg.get(), iint, true, trim_t);
1828 } else {
1829 if (onlyAtStartAndEnd) {
1830 label = ati -> text.get();
1831 } else {
1832 double trim_t = ti -> xmin + 0.5 * trimDuration;
1833 IntervalTier_moveBoundary (itg.get(), iint, true, trim_t);
1834 trim_t = ti -> xmax - 0.5 * trimDuration;
1835 IntervalTier_moveBoundary (itg.get(), iint, false, trim_t);
1836 }
1837 }
1838 TextInterval_setText (ati, label);
1839 } else { // sounding
1840 TextInterval_setText (ati, copyLabel);
1841 }
1842 }
1843 autoSound thee = Sound_IntervalTier_cutPartsMatchingLabel (me, itg.get(), trimLabel);
1844 if (p_tg) {
1845 TextGrid_addTier_copy (tg.get(), itg.get());
1846 *p_tg = tg.move();
1847 }
1848 return thee;
1849 } catch (MelderError) {
1850 Melder_throw (me, U": silences not trimmed.");
1851 }
1852 }
1853
Sound_trimSilencesAtStartAndEnd(Sound me,double trimDuration,double minPitch,double timeStep,double silenceThreshold,double minSilenceDuration,double minSoundingDuration,double * startTimeOfSounding,double * endTimeOfSounding)1854 autoSound Sound_trimSilencesAtStartAndEnd (Sound me, double trimDuration, double minPitch, double timeStep,
1855 double silenceThreshold, double minSilenceDuration, double minSoundingDuration, double *startTimeOfSounding, double *endTimeOfSounding)
1856 {
1857 try {
1858 autoTextGrid tg;
1859 autoSound thee = Sound_trimSilences (me, trimDuration, true, minPitch, timeStep, silenceThreshold,
1860 minSilenceDuration, minSoundingDuration, & tg, U"trimmed");
1861 const IntervalTier trim = (IntervalTier) tg -> tiers->at [2];
1862 const TextInterval ti1 = trim -> intervals.at [1];
1863 if (startTimeOfSounding) {
1864 *startTimeOfSounding = my xmin;
1865 if (Melder_equ (ti1 -> text.get(), U"trimmed"))
1866 *startTimeOfSounding = ti1 -> xmax;
1867 }
1868 const TextInterval ti2 = trim -> intervals.at [trim -> intervals.size];
1869 if (endTimeOfSounding) {
1870 *endTimeOfSounding = my xmax;
1871 if (Melder_equ (ti2 -> text.get(), U"trimmed"))
1872 *endTimeOfSounding = ti2 -> xmin;
1873 }
1874 return thee;
1875 } catch (MelderError) {
1876 Melder_throw (me, U": silences not trimmed.");
1877 }
1878 }
1879
1880 /* Compatibility with old Sound(&pitch)_changeGender ***********************************/
1881
PitchTier_modifyRange_old(PitchTier me,double tmin,double tmax,double factor,double fmid)1882 static void PitchTier_modifyRange_old (PitchTier me, double tmin, double tmax, double factor, double fmid) {
1883 for (integer i = 1; i <= my points.size; i ++) {
1884 const RealPoint point = my points.at [i];
1885 const double f = point -> value;
1886 if (point -> number < tmin || point -> number > tmax)
1887 continue;
1888 point -> value = fmid + (f - fmid) * factor;
1889 /*
1890 This scaling could lead to a negative value.
1891 Negative pitch values have no meaning,
1892 and even a zero pitch value cannot be handled by Sound_Point_Pitch_Duration_to_Sound.
1893 So in such a case we bail out.
1894 */
1895 if (point -> value < 0.0)
1896 Melder_throw (U"Change gender: your pitch manipulation would lead to negative pitch values.");
1897 }
1898 }
1899
Pitch_scaleTime_old(Pitch me,double scaleFactor)1900 static autoPitch Pitch_scaleTime_old (Pitch me, double scaleFactor) {
1901 try {
1902 double dx = my dx, x1 = my x1, xmax = my xmax;
1903 if (scaleFactor != 1.0) {
1904 dx = my dx * scaleFactor;
1905 x1 = my xmin + 0.5 * dx;
1906 xmax = my xmin + my nx * dx;
1907 }
1908 autoPitch thee = Pitch_create (my xmin, xmax, my nx, dx, x1, my ceiling, 2);
1909
1910 for (integer i = 1; i <= my nx; i ++) {
1911 double f = my frames [i].candidates [1].frequency;
1912 thy frames [i]. candidates [1].strength = my frames [i]. candidates [1].strength;
1913 f /= scaleFactor;
1914 if (f < my ceiling)
1915 thy frames [i]. candidates [1]. frequency = f;
1916 }
1917 return thee;
1918 } catch (MelderError) {
1919 Melder_throw (U"Pitch not scaled.");
1920 }
1921 }
1922
Sound_Pitch_changeGender_old(Sound me,Pitch him,double formantRatio,double new_pitch,double pitchRangeFactor,double durationFactor)1923 autoSound Sound_Pitch_changeGender_old (Sound me, Pitch him, double formantRatio, double new_pitch, double pitchRangeFactor, double durationFactor) {
1924 try {
1925 const double samplingFrequency_old = 1.0 / my dx;
1926
1927 Melder_require (my ny == 1,
1928 U"Change Gender works only on mono sounds.");
1929 Melder_require (my xmin == his xmin && my xmax == his xmax,
1930 U"The Pitch and the Sound object should have the same domain.");
1931 Melder_require (new_pitch >= 0,
1932 U"The new pitch median should not be negative.");
1933
1934 autoSound sound = Data_copy (me);
1935 Vector_subtractMean (sound.get());
1936
1937 if (formantRatio != 1.0)
1938 // Shift all frequencies (including pitch!)
1939 Sound_overrideSamplingFrequency (sound.get(), samplingFrequency_old * formantRatio);
1940
1941 autoPitch pitch = Pitch_scaleTime_old (him, 1 / formantRatio);
1942 autoPointProcess pulses = Sound_Pitch_to_PointProcess_cc (sound.get(), pitch.get());
1943 autoPitchTier pitchTier = Pitch_to_PitchTier (pitch.get());
1944
1945 const double median = Pitch_getQuantile (pitch.get(), 0, 0, 0.5, kPitch_unit::HERTZ);
1946 if (isdefined (median) && median != 0.0) {
1947 // Incorporate pitch shift from overriding the sampling frequency
1948 if (new_pitch == 0.0)
1949 new_pitch = median / formantRatio;
1950 const double factor = new_pitch / median;
1951 PitchTier_multiplyFrequencies (pitchTier.get(), sound -> xmin, sound -> xmax, factor);
1952 PitchTier_modifyRange_old (pitchTier.get(), sound -> xmin, sound -> xmax, pitchRangeFactor, new_pitch);
1953 } else {
1954 Melder_warning (U"There were no voiced segments found.");
1955 }
1956 autoDurationTier duration = DurationTier_create (my xmin, my xmax);
1957 RealTier_addPoint (duration.get(), (my xmin + my xmax) / 2, formantRatio * durationFactor);
1958
1959 autoSound thee = Sound_Point_Pitch_Duration_to_Sound (sound.get(), pulses.get(), pitchTier.get(), duration.get(), 1.25 / Pitch_getMinimum (pitch.get(), 0.0, 0.0, kPitch_unit::HERTZ, false));
1960 /*
1961 Resample to the original sampling frequency
1962 */
1963 if (formantRatio != 1.0)
1964 thee = Sound_resample (thee.get(), samplingFrequency_old, 10);
1965 return thee;
1966 } catch (MelderError) {
1967 Melder_throw (U"Sound not created from Pitch & Sound.");
1968 }
1969 }
1970
Sound_changeGender_old(Sound me,double fmin,double fmax,double formantRatio,double new_pitch,double pitchRangeFactor,double durationFactor)1971 autoSound Sound_changeGender_old (Sound me, double fmin, double fmax, double formantRatio, double new_pitch, double pitchRangeFactor, double durationFactor) {
1972 try {
1973 autoPitch pitch = Sound_to_Pitch (me, 0.8 / fmin, fmin, fmax);
1974 autoSound thee = Sound_Pitch_changeGender_old (me, pitch.get(), formantRatio, new_pitch, pitchRangeFactor, durationFactor);
1975 return thee;
1976 } catch (MelderError) {
1977 Melder_throw (U"Sound not created for gender change.");
1978 }
1979 }
1980
1981 /* End of compatibility with Sound_changeGender and Sound_Pitch_changeGender ***********************************/
1982
1983 /* Draw a sound vertically, from bottom to top */
Sound_draw_btlr(Sound me,Graphics g,double tmin,double tmax,double amin,double amax,kSoundDrawingDirection drawingDirection,bool garnish)1984 void Sound_draw_btlr (Sound me, Graphics g, double tmin, double tmax, double amin, double amax, kSoundDrawingDirection drawingDirection, bool garnish) {
1985 double xmin, xmax, ymin, ymax;
1986
1987 if (tmin == tmax) {
1988 tmin = my xmin;
1989 tmax = my xmax;
1990 }
1991 integer itmin, itmax;
1992 Matrix_getWindowSamplesX (me, tmin, tmax, & itmin, & itmax);
1993 if (amin == amax) {
1994 Matrix_getWindowExtrema (me, itmin, itmax, 1, my ny, & amin, & amax);
1995 if (amin == amax) {
1996 amin -= 1.0;
1997 amax += 1.0;
1998 }
1999 }
2000 /*
2001 In bottom-to-top-drawing, the maximum amplitude is on the left, the minimum on the right.
2002 */
2003 if (drawingDirection == kSoundDrawingDirection::BOTTOM_TO_TOP) {
2004 xmin = amax;
2005 xmax = amin;
2006 ymin = tmin;
2007 ymax = tmax;
2008 } else if (drawingDirection == kSoundDrawingDirection::TOP_TO_BOTTOM) {
2009 xmin = amin;
2010 xmax = amax;
2011 ymin = tmax;
2012 ymax = tmin;
2013 } else if (drawingDirection == kSoundDrawingDirection::RIGHT_TO_LEFT) {
2014 xmin = tmax;
2015 xmax = tmin;
2016 ymin = amin;
2017 ymax = amax;
2018 } else { //if (drawingDirection == kSoundDrawingDirection::LEFT_TO_RIGHT)
2019 xmin = tmin;
2020 xmax = tmax;
2021 ymin = amin;
2022 ymax = amax;
2023 }
2024 Graphics_setWindow (g, xmin, xmax, ymin, ymax);
2025 double a1 = my z [1] [itmin];
2026 double t1 = Sampled_indexToX (me, itmin);
2027 for (integer it = itmin + 1; it <= itmax; it ++) {
2028 const double t2 = Sampled_indexToX (me, it);
2029 const double a2 = my z [1] [it];
2030 if (drawingDirection == kSoundDrawingDirection::BOTTOM_TO_TOP ||
2031 drawingDirection == kSoundDrawingDirection::TOP_TO_BOTTOM) {
2032 Graphics_line (g, a1, t1, a2, t2);
2033 } else {
2034 Graphics_line (g, t1, a1, t2, a2);
2035 }
2036 a1 = a2;
2037 t1 = t2;
2038 }
2039 if (garnish) {
2040 if (drawingDirection == kSoundDrawingDirection::BOTTOM_TO_TOP) {
2041 if (amin * amax < 0.0)
2042 Graphics_markBottom (g, 0.0, false, true, true, nullptr);
2043 } else if (drawingDirection == kSoundDrawingDirection::TOP_TO_BOTTOM) {
2044 if (amin * amax < 0.0)
2045 Graphics_markTop (g, 0.0, false, true, true, nullptr);
2046 } else if (drawingDirection == kSoundDrawingDirection::RIGHT_TO_LEFT) {
2047 if (amin * amax < 0.0)
2048 Graphics_markRight (g, 0.0, false, true, true, nullptr);
2049 } else { //if (drawingDirection == kSoundDrawingDirection::LEFT_TO_RIGHT)
2050 if (amin * amax < 0.0)
2051 Graphics_markLeft (g, 0.0, false, true, true, nullptr);
2052 }
2053 Graphics_rectangle (g, xmin, xmax, ymin, ymax);
2054 }
2055 }
2056
Sound_fadeIn_general(Sound me,int channel,double time,double fadeTime,bool fromStart)2057 static void Sound_fadeIn_general (Sound me, int channel, double time, double fadeTime, bool fromStart) {
2058 Melder_require (channel >= 0 && channel <= my ny,
2059 U"Invalid channel number: ", channel, U".");
2060 const integer channelFrom = channel == 0 ? 1 : channel;
2061 const integer channelTo = channel == 0 ? my ny : channel;
2062
2063 double startTime = time > my xmax ? my xmax : ( time < my xmin ? my xmin : time );
2064 double endTime = startTime + fadeTime;
2065 if (startTime > endTime)
2066 std::swap (startTime, endTime);
2067
2068 Melder_require (startTime < my xmax,
2069 U"The start time for fade-in should earlier than the end time of the sound.");
2070
2071 const integer numberOfSamplesFade = Melder_ifloor (fabs (fadeTime) / my dx);
2072 autoVEC fadeWindow = raw_VEC (numberOfSamplesFade);
2073
2074 for (integer isamp = 1; isamp <= numberOfSamplesFade; isamp ++)
2075 fadeWindow [isamp] = 0.5 * (1.0 + cos (NUMpi*(1.0 + (isamp - 1.0)/ (numberOfSamplesFade - 1))));
2076
2077 const integer startSample = Sampled_xToNearestIndex (me, startTime);
2078 integer endSample = startSample + numberOfSamplesFade - 1;
2079 endSample = std::min (endSample, my nx);
2080
2081 for (integer ichannel = channelFrom; ichannel <= channelTo; ichannel ++) {
2082 my z [channel].part (startSample, endSample) *= fadeWindow.part (1, endSample - startSample + 1);
2083 if (fromStart && startSample > 1)
2084 my z [channel].part (1, startSample - 1) <<= 0.0;
2085 }
2086 }
2087
Sound_fadeOut_general(Sound me,int channel,double time,double fadeTime,bool toEnd)2088 static void Sound_fadeOut_general (Sound me, int channel, double time, double fadeTime, bool toEnd) {
2089 Melder_require (channel >= 0 && channel <= my ny,
2090 U"Invalid channel number: ", channel, U".");
2091 const integer channelFrom = channel == 0 ? 1 : channel;
2092 const integer channelTo = channel == 0 ? my ny : channel;
2093
2094 Melder_assert (my xmax >= my xmin); // for Melder_clipped
2095 double startTime = Melder_clipped (my xmin, time, my xmax);
2096 double endTime = startTime + fadeTime;
2097 if (startTime > endTime)
2098 std::swap (startTime, endTime);
2099
2100 Melder_require (endTime > my xmin,
2101 U"The end time for fade-out should not be earlier than the start time of the sound.");
2102
2103 const integer numberOfSamplesFade = Melder_ifloor (fabs (fadeTime) / my dx);
2104 autoVEC fadeWindow = raw_VEC (numberOfSamplesFade);
2105
2106 for (integer isamp = 1; isamp <= numberOfSamplesFade; isamp ++)
2107 fadeWindow [isamp] = 0.5 * (1.0 + cos (NUMpi*((isamp - 1.0)/ (numberOfSamplesFade - 1))));
2108
2109 const integer startSample = Sampled_xToNearestIndex (me, startTime);
2110 integer endSample = startSample + numberOfSamplesFade - 1;
2111 endSample = std::min (endSample, my nx);
2112 Melder_require (endSample > 0,
2113 U"The fade-out interval should not be located before the start time of the sound.");
2114
2115 for (integer ichannel = channelFrom; ichannel <= channelTo; ichannel ++) {
2116 my z [channel].part (startSample, endSample) *= fadeWindow.part (1, endSample - startSample + 1);
2117 if (toEnd && endSample < my nx)
2118 my z [channel].part (endSample + 1, my nx) <<= 0.0;
2119 }
2120 }
2121
Sound_fade(Sound me,int channel,double t,double fadeTime,bool fadeOut,bool fadeGlobal)2122 void Sound_fade (Sound me, int channel, double t, double fadeTime, bool fadeOut, bool fadeGlobal) {
2123 integer numberOfSamples = Melder_ifloor (fabs (fadeTime) / my dx);
2124 double t1 = t, t2 = t1 + fadeTime;
2125 bool fadeIn = ! fadeOut;
2126 const conststring32 fade_string = ( fadeOut ? U"out" : U"in" );
2127
2128 Melder_require (channel >= 0 && channel <= my ny,
2129 U"Invalid channel number: ", channel, U".");
2130
2131 if (t > my xmax) {
2132 t = my xmax;
2133 if (fadeIn) {
2134 Melder_warning (U"The start time of the fade-in is after the end time of the sound. The fade-in will not happen.");
2135 return;
2136 }
2137 } else if (t < my xmin) {
2138 t = my xmin;
2139 if (fadeOut) {
2140 Melder_warning (U"The start time of the fade-out is before the start time of the sound. The fade-out will not happen.");
2141 return;
2142 }
2143 }
2144 if (fadeTime < 0.0) {
2145 t1 = t + fadeTime;
2146 t2 = t;
2147 } else if (fadeTime > 0.0) {
2148 t1 = t;
2149 t2 = t + fadeTime;
2150 } else {
2151 Melder_warning (U"You have given a \"Fade time\" of zero seconds. The fade-", fade_string, U" will not happen.");
2152 return;
2153 }
2154 integer i0 = 0, iystart, iyend;
2155 if (channel == 0) { // all
2156 iystart = 1;
2157 iyend = my ny;
2158 } else {
2159 iystart = iyend = channel;
2160 }
2161
2162 integer istart = Sampled_xToNearestIndex (me, t1);
2163 if (istart < 1)
2164 istart = 1;
2165 if (istart >= my nx) {
2166 Melder_warning (U"The part to fade ", fade_string, U" lies after the end time of the sound. The fade-", fade_string, U" will not happen.");
2167 return;
2168 }
2169 integer iend = Sampled_xToNearestIndex (me, t2);
2170 if (iend <= 1) {
2171 Melder_warning (U"The part to fade ", fade_string, U" lies before the start time of the sound. Fade-", fade_string, U" will be incomplete.");
2172 return;
2173 }
2174 if (iend > my nx)
2175 iend = my nx;
2176 if (iend - istart + 1 >= numberOfSamples) {
2177 numberOfSamples = iend - istart + 1;
2178 } else {
2179 /*
2180 If the start of the fade is before xmin, arrange starting phase.
2181 The end of the fade after xmax presents no problems (i0 = 0).
2182 */
2183 if (fadeTime < 0)
2184 i0 = numberOfSamples - (iend - istart + 1);
2185 Melder_warning (U"The fade time is larger than the part of the sound to fade ", fade_string, U". Fade-", fade_string, U" will be incomplete.");
2186 }
2187 for (integer ichannel = iystart; ichannel <= iyend; ichannel ++) {
2188 for (integer i = istart; i <= iend; i ++) {
2189 double cosp = cos (NUMpi * (i0 + i - istart) / (numberOfSamples - 1));
2190 if (fadeIn)
2191 cosp = -cosp; // fade-in
2192 my z [ichannel] [i] *= 0.5 * (1.0 + cosp);
2193 }
2194 if (fadeGlobal) {
2195 if (fadeIn) {
2196 if (istart > 1)
2197 my z [ichannel].part (1, istart - 1) <<= 0.0;
2198 } else {
2199 if (iend < my nx)
2200 my z [ichannel].part (iend, my nx) <<= 0.0;
2201 }
2202 }
2203 }
2204 }
2205
2206 /* 1; rect 2:hamming 3: bartlet 4: welch 5: hanning 6:gaussian */
Sound_createFromWindowFunction(double windowDuration,double samplingFrequency,int windowType)2207 autoSound Sound_createFromWindowFunction (double windowDuration, double samplingFrequency, int windowType) {
2208 try {
2209 autoSound me = Sound_createSimple (1, windowDuration, samplingFrequency);
2210
2211 for (integer i = 1; i <= my nx; i ++) {
2212 double phase = (my x1 + (i - 1) * my dx) / windowDuration;
2213 double value = 1.0;
2214 switch (windowType) {
2215 case 1:
2216 value = 1.0;
2217 break;
2218 case 2: /* Hamming */
2219 value = 0.54 - 0.46 * cos (NUM2pi * phase);
2220 break;
2221 case 3: /* Bartlett */
2222 value = 1.0 - fabs ( (2.0 * phase - 1.0));
2223 break;
2224 case 4: /* Welch */
2225 value = 1.0 - (2.0 * phase - 1.0) * (2.0 * phase - 1.0);
2226 break;
2227 case 5: /* Hanning */
2228 value = 0.5 * (1.0 - cos (NUM2pi * phase));
2229 break;
2230 case 6: { /* Gaussian */
2231 const double edge = exp (-12.0);
2232 phase -= 0.5; /* -0.5 .. +0.5 */
2233 value = (exp (-48.0 * phase * phase) - edge) / (1.0 - edge);
2234 break;
2235 }
2236 break;
2237 }
2238 my z [1] [i] = value;
2239 }
2240 return me;
2241 } catch (MelderError) {
2242 Melder_throw (U"Sound not created from window function.");
2243 }
2244 }
2245
2246 /* y [n] = sum(i=-n, i=n, x [n+mi])/(2*n+1) */
Sound_localAverage(Sound me,double averagingInterval,int windowType)2247 autoSound Sound_localAverage (Sound me, double averagingInterval, int windowType) {
2248 try {
2249 const double windowDuration = windowType == 6 ? 2 * averagingInterval : averagingInterval;
2250 autoSound thee = Data_copy (me);
2251 autoSound window = Sound_createFromWindowFunction (windowDuration, 1 / my dx, windowType);
2252
2253 const integer nswindow2 = window -> nx / 2;
2254 const integer nswindow2p = (window -> nx - 1) / 2; // nx is odd: one sample less in the forward direction
2255 if (nswindow2 < 1) {
2256 return thee;
2257 }
2258 const double *w = & window -> z [1] [0];
2259
2260 for (integer k = 1; k <= thy ny; k ++) {
2261 for (integer i = 1; i <= my nx; i ++) {
2262 longdouble sum = 0.0, wsum = 0.0;
2263 integer m = (nswindow2 + 1 - i + 1) < 1 ? 1 : (nswindow2 + 1 - i + 1);
2264 const integer jfrom = (i - nswindow2) < 1 ? 1 : (i - nswindow2);
2265 const integer jto = (i + nswindow2p) > my nx ? my nx : (i + nswindow2p);
2266 for (integer j = jfrom; j <= jto; j ++, m ++) {
2267 sum += my z [k] [j] * w [m];
2268 wsum += w [m];
2269 }
2270 thy z [k] [i] = double (sum / wsum);
2271 }
2272 }
2273 return thee;
2274 } catch (MelderError) {
2275 Melder_throw (me, U": no Sound (local average) created.");
2276 }
2277 }
2278
_Sound_garnish(Sound me,Graphics g,double tmin,double tmax,double minimum,double maximum)2279 static void _Sound_garnish (Sound me, Graphics g, double tmin, double tmax, double minimum, double maximum) {
2280 Graphics_drawInnerBox (g);
2281 Graphics_textBottom (g, true, U"Time (s)");
2282 Graphics_marksBottom (g, 2, true, true, false);
2283 Graphics_setWindow (g, tmin, tmax, minimum - (my ny - 1) * (maximum - minimum), maximum);
2284 Graphics_markLeft (g, minimum, true, true, false, nullptr);
2285 Graphics_markLeft (g, maximum, true, true, false, nullptr);
2286 if (minimum != 0.0 && maximum != 0.0 && (minimum > 0.0) != (maximum > 0.0))
2287 Graphics_markLeft (g, 0.0, true, true, true, nullptr);
2288 if (my ny == 2) {
2289 Graphics_setWindow (g, tmin, tmax, minimum, maximum + (my ny - 1) * (maximum - minimum));
2290 Graphics_markRight (g, minimum, true, true, false, nullptr);
2291 Graphics_markRight (g, maximum, true, true, false, nullptr);
2292 if (minimum != 0.0 && maximum != 0.0 && (minimum > 0.0) != (maximum > 0.0))
2293 Graphics_markRight (g, 0.0, true, true, true, nullptr);
2294 }
2295 }
2296
autowindowAndGetWindowSamplesAndAutoscale(Sound me,double * tmin,double * tmax,double * minimum,double * maximum,integer * ixmin,integer * ixmax)2297 static void autowindowAndGetWindowSamplesAndAutoscale (Sound me, double *tmin, double *tmax, double *minimum, double *maximum, integer *ixmin, integer *ixmax) {
2298 if (*tmin == *tmax) {
2299 *tmin = my xmin;
2300 *tmax = my xmax;
2301 }
2302
2303 Matrix_getWindowSamplesX (me, *tmin, *tmax, ixmin, ixmax);
2304
2305 if (*minimum == *maximum) {
2306 Matrix_getWindowExtrema (me, *ixmin, *ixmax, 1, my ny, minimum, maximum);
2307 if (*minimum == *maximum) {
2308 *minimum -= 1.0;
2309 *maximum += 1.0;
2310 }
2311 }
2312 }
2313
2314 /*
2315 Given sample numbers ileft and ileft+1, where the formula evaluates to the booleans left and right, respectively.
2316 We want to find the x value in this interval where the formula switches from true to false.
2317 The x-value of the best point is approximated by a number of bisections.
2318 It is essential that the intermediate interpolated y-values are always between the values at samples ileft and ileft+1.
2319 We cannot use a sinc-interpolation because at strong amplitude changes high-frequency oscillations may occur.
2320 */
Sound_findIntermediatePoint_bs(Sound me,integer ichannel,integer ileft,bool left,bool right,conststring32 formula,Interpreter interpreter,integer numberOfBisections,double * x,double * y)2321 static void Sound_findIntermediatePoint_bs (Sound me, integer ichannel, integer ileft, bool left, bool right,
2322 conststring32 formula, Interpreter interpreter, integer numberOfBisections, double *x, double *y)
2323 {
2324 Melder_require (left != right,
2325 U"Invalid situation.");
2326
2327 if (left) {
2328 *x = Matrix_columnToX (me, ileft);
2329 *y = my z [ichannel] [ileft];
2330 } else {
2331 *x = Matrix_columnToX (me, ileft + 1);
2332 *y = my z [ichannel] [ileft + 1];
2333 }
2334
2335 if (numberOfBisections < 1)
2336 return;
2337 /*
2338 For the bisection we create a Sound with only 3 samples in it which is sufficient for doing linear interpolation.
2339 The domain needs to be the same as the sound otherwise the formula might give wrong answers because
2340 it might contains references to other matrix objects!
2341 We also need all the channels because the formula might involve relations between the
2342 sample values in these channels too!
2343 */
2344 double xleft = Matrix_columnToX (me, ileft);
2345 autoSound thee = Sound_create (my ny, my xmin, my xmax, 3, 0.5 * my dx, xleft);
2346
2347 for (integer channel = 1; channel <= my ny; channel ++) {
2348 thy z [channel] [1] = my z [channel] [ileft];
2349 thy z [channel] [3] = my z [channel] [ileft + 1];
2350 }
2351 integer istep = 1;
2352 double xright = xleft + my dx, xmid; // !!
2353 do {
2354 xmid = 0.5 * (xleft + xright); // the bisection
2355
2356 for (integer channel = 1; channel <= my ny; channel ++)
2357 thy z [channel] [2] = Vector_getValueAtX (me, xmid, channel, kVector_valueInterpolation :: LINEAR);
2358 Formula_compile (interpreter, thee.get(), formula, kFormula_EXPRESSION_TYPE_NUMERIC, true);
2359 Formula_Result result;
2360 Formula_run (ichannel, 2, & result);
2361 const bool mid = ( result. numericResult != 0.0 );
2362
2363 thy dx *= 0.5;
2364 if (left == mid) {
2365 xleft = xmid;
2366 for (integer channel = 1; channel <= my ny; channel ++)
2367 thy z [channel] [1] = thy z [channel] [2];
2368 thy x1 = xleft;
2369 } else {
2370 xright = xmid;
2371 for (integer channel = 1; channel <= my ny; channel ++)
2372 thy z [channel] [3] = thy z [channel] [2];
2373 }
2374 istep ++;
2375 } while (istep < numberOfBisections);
2376
2377 *x = xmid;
2378 *y = thy z [ichannel] [2];
2379 }
2380
Sound_drawWhere(Sound me,Graphics g,double tmin,double tmax,double minimum,double maximum,bool garnish,conststring32 method,integer numberOfBisections,conststring32 formula,Interpreter interpreter)2381 void Sound_drawWhere (Sound me, Graphics g, double tmin, double tmax, double minimum, double maximum,
2382 bool garnish, conststring32 method, integer numberOfBisections, conststring32 formula, Interpreter interpreter) {
2383
2384 Formula_compile (interpreter, me, formula, kFormula_EXPRESSION_TYPE_NUMERIC, true);
2385 Formula_Result result;
2386
2387 integer ixmin, ixmax;
2388 autowindowAndGetWindowSamplesAndAutoscale (me, & tmin, & tmax, & minimum, & maximum, & ixmin, & ixmax);
2389 /*
2390 Set coordinates for drawing.
2391 */
2392 Graphics_setInner (g);
2393 for (integer channel = 1; channel <= my ny; channel ++) {
2394 Graphics_setWindow (g, tmin, tmax, minimum - (my ny - channel) * (maximum - minimum), maximum + (channel - 1) * (maximum - minimum));
2395 if (str32str (method, U"bars") || str32str (method, U"Bars")) {
2396 for (integer ix = ixmin; ix <= ixmax; ix ++) {
2397 Formula_run (channel, ix, & result);
2398 if (result. numericResult != 0.0) {
2399 const double x = Sampled_indexToX (me, ix);
2400 double y = my z [channel] [ix];
2401 double left = x - 0.5 * my dx, right = x + 0.5 * my dx;
2402 if (y > maximum)
2403 y = maximum;
2404 if (left < tmin)
2405 left = tmin;
2406 if (right > tmax)
2407 right = tmax;
2408 Graphics_line (g, left, y, right, y);
2409 Graphics_line (g, left, y, left, minimum);
2410 Graphics_line (g, right, y, right, minimum);
2411 }
2412 }
2413 } else if (str32str (method, U"poles") || str32str (method, U"Poles")) {
2414 for (integer ix = ixmin; ix <= ixmax; ix ++) {
2415 Formula_run (channel, ix, & result);
2416 if (result. numericResult != 0.0) {
2417 const double x = Sampled_indexToX (me, ix);
2418 double y = my z [channel] [ix];
2419 if (y > maximum)
2420 y = maximum;
2421 if (y < minimum)
2422 y = minimum;
2423 Graphics_line (g, x, 0.0, x, y);
2424 }
2425 }
2426 } else if (str32str (method, U"speckles") || str32str (method, U"Speckles")) {
2427 for (integer ix = ixmin; ix <= ixmax; ix ++) {
2428 Formula_run (channel, ix, & result);
2429 if (result. numericResult != 0.0) {
2430 const double x = Sampled_indexToX (me, ix);
2431 Graphics_speckle (g, x, my z [channel] [ix]);
2432 }
2433 }
2434 } else {
2435 /*
2436 The default: draw as a curve.
2437 */
2438 Formula_run (channel, 1, & result);
2439 bool previous = (result. numericResult != 0.0); // numericResult == 0.0 means false!
2440 integer istart = ixmin; // first sample of segment to be drawn
2441 double xb, yb, xe, ye;
2442 for (integer ix = ixmin + 1; ix <= ixmax; ix ++) {
2443 Formula_run (channel, ix, & result);
2444 const bool current = (result. numericResult != 0.0); // numericResult == 0.0 means false!
2445 if (previous && not current) {
2446 /*
2447 T to F change: we are leaving a segment to be drawn
2448 1. Draw the curve between the sample numbers from istart to ix-1 (previous).
2449 2. Find the (x,y) in the interval between sample numbers ix-1 and ix (current) where the change from
2450 T to F occurs and draw the line between the previous point and (x,y).
2451 3. Compile the formula again because it has been changed during the interpolation
2452 */
2453 xb = Matrix_columnToX (me, ix - 1);
2454 yb = my z [channel] [ix - 1];
2455 if (ix - istart > 1) {
2456 const double x1 = Matrix_columnToX (me, istart);
2457 Graphics_function (g, & my z [channel] [0], istart, ix - 1, x1, xb);
2458 }
2459 Sound_findIntermediatePoint_bs (me, channel, ix - 1, previous, current, formula, interpreter, numberOfBisections, & xe, & ye);
2460 Graphics_line (g, xb, yb, xe, ye);
2461 Formula_compile (interpreter, me, formula, kFormula_EXPRESSION_TYPE_NUMERIC, true);
2462 } else if (not previous && current ) {
2463 /*
2464 F to T change: we are entering a segment to be drawn.
2465 1. Find the (x,y) where the F changes to T and then draw the line from that (x,y) to the current point.
2466 2. Compile the formula again
2467 */
2468 istart = ix;
2469 Sound_findIntermediatePoint_bs (me, channel, ix - 1, previous, current, formula, interpreter, numberOfBisections, & xb, & yb);
2470 xe = Sampled_indexToX (me, ix);
2471 ye = my z [channel] [ix];
2472 Graphics_line (g, xb, yb, xe, ye);
2473 Formula_compile (interpreter, me, formula, kFormula_EXPRESSION_TYPE_NUMERIC, true);
2474 }
2475 previous = current;
2476 }
2477 if (previous && ixmax - istart > 0) {
2478 xb = Matrix_columnToX (me, istart);
2479 xe = Matrix_columnToX (me, ixmax);
2480 Graphics_function (g, & my z [channel] [0], istart, ixmax, xb, xe);
2481 }
2482 }
2483 }
2484
2485 Graphics_setWindow (g, tmin, tmax, minimum, maximum);
2486 if (garnish && my ny == 2)
2487 Graphics_line (g, tmin, 0.5 * (minimum + maximum), tmax, 0.5 * (minimum + maximum));
2488 Graphics_unsetInner (g);
2489
2490 if (garnish)
2491 _Sound_garnish (me, g, tmin, tmax, minimum, maximum);
2492 }
2493
Sound_paintWhere(Sound me,Graphics g,MelderColour colour,double tmin,double tmax,double minimum,double maximum,double level,bool garnish,integer numberOfBisections,conststring32 formula,Interpreter interpreter)2494 void Sound_paintWhere (Sound me, Graphics g, MelderColour colour, double tmin, double tmax,
2495 double minimum, double maximum, double level, bool garnish,
2496 integer numberOfBisections, conststring32 formula, Interpreter interpreter)
2497 {
2498 try {
2499 Formula_compile (interpreter, me, formula, kFormula_EXPRESSION_TYPE_NUMERIC, true);
2500 Formula_Result result;
2501
2502 integer ixmin, ixmax;
2503 autowindowAndGetWindowSamplesAndAutoscale (me, & tmin, & tmax, & minimum, & maximum, & ixmin, & ixmax);
2504
2505 Graphics_setColour (g, colour);
2506 Graphics_setInner (g);
2507 for (integer channel = 1; channel <= my ny; channel ++) {
2508 Graphics_setWindow (g, tmin, tmax, minimum - (my ny - channel) * (maximum - minimum), maximum + (channel - 1) * (maximum - minimum));
2509 bool current, previous = true, fill = false; // fill only when leaving area
2510 double tmini = tmin, tmaxi = tmax, xe, ye;
2511 integer ix = ixmin;
2512 do {
2513 Formula_run (channel, ix, & result);
2514 current = ( result. numericResult != 0.0 );
2515 if (ix == ixmin)
2516 previous = current;
2517 if (previous != current) {
2518 Sound_findIntermediatePoint_bs (me, channel, ix - 1, previous, current, formula, interpreter, numberOfBisections, & xe, & ye);
2519 if (current) { // entering painting area
2520 tmini = xe;
2521 } else { //leaving painting area
2522 tmaxi = xe;
2523 fill = true;
2524 }
2525 Formula_compile (interpreter, me, formula, kFormula_EXPRESSION_TYPE_NUMERIC, true);
2526 }
2527 if (ix == ixmax && current) {
2528 tmaxi = tmax;
2529 fill = true;
2530 }
2531 if (fill) {
2532 autoPolygon him = Sound_to_Polygon (me, channel, tmini, tmaxi, minimum, maximum, level);
2533 Graphics_fillArea (g, his numberOfPoints, & his x [1], & his y [1]);
2534 fill = false;
2535 }
2536 previous = current;
2537 } while ( ++ix <= ixmax);
2538 }
2539 Graphics_setWindow (g, tmin, tmax, minimum, maximum);
2540 if (garnish && my ny == 2)
2541 Graphics_line (g, tmin, 0.5 * (minimum + maximum), tmax, 0.5 * (minimum + maximum));
2542 Graphics_unsetInner (g);
2543 if (garnish)
2544 _Sound_garnish (me, g, tmin, tmax, minimum, maximum);
2545 } catch (MelderError) {
2546 Melder_clearError ();
2547 }
2548 }
2549
Sounds_paintEnclosed(Sound me,Sound thee,Graphics g,MelderColour colour,double tmin,double tmax,double minimum,double maximum,bool garnish)2550 void Sounds_paintEnclosed (Sound me, Sound thee, Graphics g, MelderColour colour, double tmin, double tmax, double minimum, double maximum, bool garnish) {
2551 try {
2552 integer ixmin, ixmax;
2553 const integer numberOfChannels = std::max (my ny, thy ny);
2554 double min1 = minimum, max1 = maximum, tmin1 = tmin, tmax1 = tmax;
2555 double min2 = min1, max2 = max1, tmin2 = tmin1, tmax2 = tmax1;
2556 const double xmin = std::max (my xmin, thy xmin);
2557 const double xmax = std::min (my xmax, thy xmax);
2558 if (xmax <= xmin)
2559 return;
2560 if (tmin >= tmax) { // ppgb: why this, if autoscaling occurs anyway?
2561 tmin = xmin;
2562 tmax = xmax;
2563 }
2564 autowindowAndGetWindowSamplesAndAutoscale (thee, & tmin1, & tmax1, & min1, & max1, & ixmin, & ixmax);
2565 autowindowAndGetWindowSamplesAndAutoscale (me, & tmin2, & tmax2, & min2, & max2, & ixmin, & ixmax);
2566 minimum = std::min (min1, min2);
2567 maximum = std::max (max1, max2);
2568
2569 Graphics_setColour (g, colour);
2570 Graphics_setInner (g);
2571 for (integer channel = 1; channel <= numberOfChannels; channel ++) {
2572 autoPolygon him = Sounds_to_Polygon_enclosed (me, thee, channel, tmin, tmax, minimum, maximum);
2573 Graphics_setWindow (g, tmin, tmax, minimum - (numberOfChannels - channel) * (maximum - minimum), maximum + (channel - 1) * (maximum - minimum));
2574 Graphics_fillArea (g, his numberOfPoints, &his x [1], &his y [1]);
2575 }
2576 Graphics_setWindow (g, tmin, tmax, minimum, maximum);
2577 if (garnish && (my ny == 2 || thy ny == 2))
2578 Graphics_line (g, tmin, 0.5 * (minimum + maximum), tmax, 0.5 * (minimum + maximum));
2579 Graphics_unsetInner (g);
2580 if (garnish)
2581 _Sound_garnish (my ny == 2 ? me : thee, g, tmin, tmax, minimum, maximum);
2582 } catch (MelderError) {
2583 Melder_clearError ();
2584 }
2585 }
2586
2587 /* After a script by Ton Wempe */
Sound_reduceNoiseBySpectralSubtraction_mono(Sound me,Sound noise,double windowLength,double noiseReduction_dB)2588 static autoSound Sound_reduceNoiseBySpectralSubtraction_mono (Sound me, Sound noise, double windowLength, double noiseReduction_dB) {
2589 try {
2590 Melder_require (my dx == noise -> dx,
2591 U"The sound and the noise should have the same sampling frequency.");
2592 Melder_require (noise -> ny == 1 && noise -> ny == 1,
2593 U"The number of channels in the noise and the sound should equal 1.");
2594
2595 const double samplingFrequency = 1.0 / my dx, nyquistFrequency = 0.5 / my dx;
2596 autoSound denoised = Sound_create (1, my xmin, my xmax, my nx, my dx, my x1);
2597 autoSound const analysisWindow = Sound_createSimple (1, windowLength, samplingFrequency);
2598 const integer windowSamples = analysisWindow -> nx;
2599 const integer wantedNumberOfFrequencyBins = windowSamples / 2 + 1;
2600 autoSound const noise_copy = Data_copy (noise);
2601 Sound_multiplyByWindow (noise_copy.get(), kSound_windowShape::HANNING);
2602 /*
2603 The number of bands in the noise Ltas and the number of frequencies in the
2604 sound spectra preferably should to be equal.
2605 numberOfBands = Melder_iceiling (nyquistFrequency / bandwidth)
2606 wantedNumberOfFrequencyBins = windowSamples / 2 + 1;
2607 We can calculate the bandwidth to make numberOfBands == wantedNumberOfFrequencyBins by applying
2608 the following two conditions
2609 (1) nyquistFrequency / b > wantedNumberOfFrequencyBins - 1 && (2) nyquistFrequency / b < wantedNumberOfFrequencyBins
2610 (1) gives b1 < nyquistFrequency / (wantedNumberOfFrequencyBins - 1)
2611 (2) gives b2 > nyquistFrequency / wantedNumberOfFrequencyBins
2612 Take b = (b1 + b2) / 2
2613 */
2614 double bandwidth = nyquistFrequency * (wantedNumberOfFrequencyBins - 0.5) / (wantedNumberOfFrequencyBins * (wantedNumberOfFrequencyBins - 1));
2615 autoLtas const noiseLtas = Sound_to_Ltas (noise_copy.get(), bandwidth);
2616 Melder_assert (noiseLtas -> nx == wantedNumberOfFrequencyBins);
2617 autoVEC const noiseAmplitudes = raw_VEC (noiseLtas -> nx);
2618 for (integer iband = 1; iband <= noiseLtas -> nx; iband ++) {
2619 const double powerDensity = 4e-10 * pow (10.0, noiseLtas -> z [1] [iband] / 10.0);
2620 noiseAmplitudes [iband] = sqrt (0.5 * powerDensity);
2621 }
2622 autoMelderProgress progress (U"Remove noise");
2623
2624 const double noiseAmplitudeSubtractionScaleFactor = 1.0 - pow (10.0, noiseReduction_dB / 20.0);
2625 const integer stepSizeSamples = windowSamples / 4;
2626 const integer numberOfSteps = my nx / stepSizeSamples;
2627 for (integer istep = 1; istep <= numberOfSteps; istep ++) {
2628 const integer istart = (istep - 1) * stepSizeSamples + 1;
2629
2630 if (istart >= my nx)
2631 break; // finished
2632 const integer nsamples = std::min (my nx - istart + 1, windowSamples);
2633
2634 analysisWindow -> z.row (1).part (1, nsamples) <<= my z.row (1).part (istart, istart + nsamples - 1);
2635 if (nsamples < windowSamples)
2636 analysisWindow -> z.row (1).part (nsamples + 1, windowSamples) <<= 0.0;
2637
2638 // Sound_multiplyByWindow (analysisWindow.get(), kSound_windowShape::HANNING);
2639 autoSpectrum const analysisSpectrum = Sound_to_Spectrum (analysisWindow.get(), false);
2640 /*
2641 Suppress noise in the analysisSpectrum by subtracting the noise spectrum
2642 */
2643 VEC const re = analysisSpectrum -> z.row (1), im = analysisSpectrum -> z.row (2);
2644 for (integer ifreq = 1; ifreq <= analysisSpectrum -> nx; ifreq ++) {
2645 const double amp = hypot (re [ifreq], im [ifreq]);
2646 const double factor = std::max (1.0 - noiseAmplitudeSubtractionScaleFactor * noiseAmplitudes [ifreq] / amp, 1e-6);
2647 re [ifreq] *= factor;
2648 im [ifreq] *= factor;
2649 }
2650 autoSound const suppressed = Spectrum_to_Sound (analysisSpectrum.get());
2651 Sound_multiplyByWindow (suppressed.get(), kSound_windowShape::HANNING);
2652 denoised -> z.row (1).part (istart, istart + nsamples - 1) += 0.5 * suppressed -> z.row (1).part (1, nsamples); // 0.5 because of 2-fold
2653 if (istep % 10 == 1)
2654 Melder_progress (double (istep) / numberOfSteps,
2655 U"Remove noise: frame ", istep, U" out of ", numberOfSteps, U".");
2656 }
2657 return denoised;
2658 } catch (MelderError) {
2659 Melder_throw (me, U": noise not subtracted.");
2660 }
2661 }
2662
2663 //TODO improve?
Sound_findNoise(Sound me,double minimumNoiseDuration,double * noiseStart,double * noiseEnd)2664 static void Sound_findNoise (Sound me, double minimumNoiseDuration, double *noiseStart, double *noiseEnd) {
2665 try {
2666 *noiseStart = undefined;
2667 *noiseEnd = undefined;
2668 autoIntensity const intensity = Sound_to_Intensity (me, 20.0, 0.005, true);
2669 double tmin = Vector_getXOfMinimum (intensity.get(), intensity -> xmin, intensity -> xmax, kVector_peakInterpolation :: PARABOLIC) - minimumNoiseDuration / 2.0;
2670 double tmax = tmin + minimumNoiseDuration;
2671 if (tmin < my xmin) {
2672 tmin = my xmin;
2673 tmax = tmin + minimumNoiseDuration;
2674 }
2675 if (tmax > my xmax) {
2676 tmax = my xmax;
2677 tmin = tmax - minimumNoiseDuration;
2678 }
2679 Melder_require (tmin >= my xmin,
2680 U"Sound too short, or window length too long.");
2681
2682 *noiseStart = tmin;
2683 *noiseEnd = tmax;
2684 } catch (MelderError) {
2685 Melder_throw (me, U": noise not found.");
2686 }
2687 }
2688
Sound_removeNoise(Sound me,double noiseStart,double noiseEnd,double windowLength,double minBandFilterFrequency,double maxBandFilterFrequency,double smoothing,kSoundNoiseReductionMethod method)2689 autoSound Sound_removeNoise (Sound me, double noiseStart, double noiseEnd, double windowLength, double minBandFilterFrequency, double maxBandFilterFrequency, double smoothing, kSoundNoiseReductionMethod method) {
2690 return Sound_reduceNoise (me, noiseStart, noiseEnd, windowLength, minBandFilterFrequency, maxBandFilterFrequency, smoothing, 0.0, method);
2691 }
2692
Sound_reduceNoise(Sound me,double noiseStart,double noiseEnd,double windowLength,double minBandFilterFrequency,double maxBandFilterFrequency,double smoothing,double noiseReduction_dB,kSoundNoiseReductionMethod method)2693 autoSound Sound_reduceNoise (Sound me, double noiseStart, double noiseEnd, double windowLength, double minBandFilterFrequency, double maxBandFilterFrequency, double smoothing, double noiseReduction_dB, kSoundNoiseReductionMethod method) {
2694 try {
2695 autoSound const filtered = Sound_filter_passHannBand (me, minBandFilterFrequency, maxBandFilterFrequency, smoothing);
2696 autoSound denoised = Sound_create (my ny, my xmin, my xmax, my nx, my dx, my x1);
2697 const bool findNoise = ( noiseEnd <= noiseStart );
2698 const double minimumNoiseDuration = 2.0 * windowLength;
2699 for (integer ichannel = 1; ichannel <= my ny; ichannel ++) {
2700 autoSound denoisedi, channeli = Sound_extractChannel (filtered.get(), ichannel);
2701 if (findNoise)
2702 Sound_findNoise (channeli.get(), minimumNoiseDuration, & noiseStart, & noiseEnd);
2703 autoSound noise = Sound_extractPart (channeli.get(), noiseStart, noiseEnd, kSound_windowShape::RECTANGULAR, 1.0, false);
2704 if (method == kSoundNoiseReductionMethod::SPECTRAL_SUBTRACTION) { // spectral subtraction
2705 denoisedi = Sound_reduceNoiseBySpectralSubtraction_mono (filtered.get(), noise.get(), windowLength, noiseReduction_dB);
2706 } else {
2707 Melder_fatal (U"Unknown method in Sound_reduceNoise.");
2708 }
2709 denoised -> z.row (ichannel) <<= denoisedi -> z.row (1);
2710 }
2711 return denoised;
2712 } catch (MelderError) {
2713 Melder_throw (me, U": not denoised.");
2714 }
2715 }
2716
Sound_playAsFrequencyShifted(Sound me,double shiftBy,double newSamplingFrequency,integer precision)2717 void Sound_playAsFrequencyShifted (Sound me, double shiftBy, double newSamplingFrequency, integer precision) {
2718 try {
2719 autoSpectrum const spectrum = Sound_to_Spectrum (me, true);
2720 autoSpectrum const shifted = Spectrum_shiftFrequencies (spectrum.get(), shiftBy, newSamplingFrequency / 2, precision);
2721 autoSound const thee = Spectrum_to_Sound (shifted.get());
2722 Sound_playPart (thee.get(), my xmin, my xmax, nullptr, nullptr);
2723 } catch (MelderError) {
2724 Melder_throw (me, U" not played with frequencies shifted.");
2725 }
2726 }
2727
2728 /* End of file Sound_extensions.cpp */
2729