1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* */
34 /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35 /* Date: Fri May 9 1997 */
36 /* ------------------------------------------------------------------- */
37 /* Example of declaration and use of tracks. */
38 /* */
39 /*************************************************************************/
40
41
42 #include <iostream>
43 #include <cstdlib>
44 #include "EST_Track.h"
45 #include "EST_Wave.h"
46 #include "EST_sigpr.h"
47 #include "EST_error.h"
48
49
50 /** @name EST_Track class example code
51 * @toc
52 * Some examples of track manipulations.
53 *
54 */
55 //@{
56
main(void)57 int main(void)
58
59 {
60 int i, j;
61
62 /* This program is designed as an example not as something to run
63 so for testing purpose it simply exists */
64 exit(0);
65 /**@name Initialising and Resizing a Track
66
67 The constructor functions can be used to create a track with
68 zero frames and channels or a track with a specified number of
69 frames and channels
70 */
71
72 //@{
73 //@{ code
74 EST_Track tr; // default track declaration
75 EST_Track tra(500, 10); // allocate track with 500 frames and 10 channels
76 //@} code
77
78 /** tracks can be resized at any time:
79 */
80 //@{ code
81 tr.resize(10, 500); // resize track to have 10 frames and 500 channels
82 tr.resize(500, 10); // resize track to have 500 frames and 10 channels
83 //@} code
84
85 /** by default, resizing preserves values in the track. This
86 may involve copying some information, so if the existing values
87 are not needed, a flag can be set which usually results in
88 quicker resizing
89 */
90 //@{ code
91 tr.resize(250, 5, 0); // throw away any existing values
92 //@} code
93 /** If only the number of channels or the number of frames needs
94 to be changed, this an be done with the following functions:
95 */
96
97 //@{ code
98 tr.set_num_channels(10); // makes 10 channels, keeps same no of frames
99
100 tr.set_num_frames(400); // makes 400 frames, keeps same no of channels
101 //@} code
102 /** The preserve flag works in the same way with these functions
103 */
104 //@}
105
106 /** @name Simple Access
107
108 Values in the track can be accessed and set by frame
109 number and channel number.
110
111 The following resizes a track to have 500 frames and 10 channels
112 and fills every position with -5.
113 */
114 //@{
115 //@{ code
116 tr.resize(500, 10);
117
118 for (i = 0; i < tr.num_frames(); ++i)
119 for (j = 0; j < tr.num_channels(); ++j)
120 tr.a(i, j) = -5.0;
121
122 //@} code
123
124 /** A well formed track will have a time value, specified in seconds,
125 for every frame. The time array can be filled directly:
126 */
127 //@{ code
128 for (i = 0; i < tr.num_frames(); ++i)
129 tr.t(i) = (float) i * 0.01;
130 //@} code
131 /** which fills the time array with values 0.01, 0.02,
132 0.03... 5.0. However, A shortcut function is provided for fixed
133 frame spacing:
134 */
135 //@{ code
136 tr.fill_time(0.1);
137
138 //@} code
139 /** which performs the same operation as above. Frames do not have
140 to be evenly spaced, in pitch synchronous processing the time
141 array holds the time position of each pitch period. In such
142 cases each position in the time array must obviously be set
143 individually.</para><para>
144
145 Some representations have undefined values during certain
146 sections of the track, for example the F0 value during
147 unvoiced speech.</para><para>
148
149 The break/value array can be used to specify if a frame has an
150 undefined value.<para></para>. If a frame in this array is 1,
151 that means the amplitude is defined at that point. If 0, the
152 amplitude is undefined. By default, every frame has a value.
153 </para><para>
154
155 Breaks (undefined values) can be set by <method>set_break()
156 </method>. The following sets every frame from 50 to 99 as a
157 break:
158 */
159 //@{ code
160 for (i = 50; i < 100; ++i)
161 tr.set_break(i);
162 //@} code
163 /** frames can be turned back to values as follows:
164 */
165 //@{ code
166 for (i = 50; i < 100; ++i)
167 tr.set_value(i);
168 //@} code
169 /** It is up to individual functions to decide how to interpret breaks.
170 </para><para>
171 A frame's status can be checked as follows:
172 */
173 //@{ code
174 if (tr.val(60))
175 cout << "Frame 60 is not a break\n";
176
177 if (tr.track_break(60))
178 cout << "Frame 60 is a break\n";
179 //@} code
180 //@}
181
182 /** @name Naming Channels
183 @id tr-example-naming-channels
184
185 While channels can be accessed by their index, it is often useful
186 to give them names and refer to them by those names.
187
188 The set_channel_name() function sets the name of a single channel:
189 */
190 //@{
191 //@{ code
192 tr.set_channel_name("F0", 0);
193 tr.set_channel_name("energy", 1);
194 //@} code
195
196 /** An alternative is to use a predefined set of channel names
197 stored in a <emphasis>map</emphasis>.A track map
198 is simply a String List strings which describe a channel name
199 configuration. The <method>resize</method> function can take
200 this and resize the number of channels to the number of channels
201 indicated in the map, and give each channel its name from the
202 map. For example:
203 */
204 //@{ code
205 EST_StrList map;
206 map.append("F0");
207 map.append("energy");
208
209 tr.resize(500, map); // this makes a 2 channel track and sets the names to F0 and energy
210 //@} code
211
212 /** A convention is used for channels which comprise
213 components of a multi-dimensional analysis such as
214 cepstra. In such cases the channels are named
215 <replaceable>TYPE_I</replaceable>. The last coefficient is
216 always names <replaceable>TYPE_N</replaceable> regardless of
217 the number of coefficients. This is very useful in extracting
218 a set of related channels without needing to know the order
219 of the analysis.
220
221 For example, a track map might look like:
222
223 */
224 //@{ code
225
226 map.clear();
227 map.append("F0");
228 map.append("energy");
229
230 map.append("cep_0");
231 map.append("cep_1");
232 map.append("cep_2");
233 map.append("cep_3");
234 map.append("cep_4");
235 map.append("cep_5");
236 map.append("cep_6");
237 map.append("cep_7");
238 map.append("cep_N");
239
240 tr.resize(500, map); // makes a 11 channel track and sets the names
241 //@} code
242
243 /** This obviously gets unwieldy quite quickly, so the mapping
244 mechanism provides a short hand for multi-dimensional data.
245
246 */
247
248 //@{ code
249 map.clear();
250 map.append("F0");
251 map.append("energy");
252
253 map.append("$cep-0+8");
254
255 tr.resize(500, map); // does exactly as above
256 //@} code
257
258 /** Here $ indicates the special status, "cep" the name of the
259 coefficients, "-0" that the first is number 0 and "+8" that
260 there are 8 more to follow.
261 */
262
263 //@}
264
265
266 /** @name Access single frames or single channels.
267
268 @id tr-example-frames-and-channels
269
270 Often functions perform their operations on only a single
271 frame or channel, and the track class provides a general
272 mechanism for doing this.
273
274 Single frames or channels can be accessed as EST_FVectors:
275 Given a track with 500 frames and 10 channels, the 50th frame
276 can be accessed as:
277 */
278 //@{
279 //@{ code
280 EST_FVector tmp_frame;
281
282 tr.frame(tmp_frame, 50);
283 //@} code
284 /** now tmp_frame is 10 element vector, which is
285 a window into tr: any changes to the contents of tmp_frame will
286 change tr. tmp_frame cannot be resized. (This operation can
287 be thought in standard C terms as tmp_frame being a pointer
288 to the 5th frame of tr).
289 </para> <para>
290 Likewise with channels:
291 */
292 //@{ code
293 EST_FVector tmp_channel;
294
295 tr.channel(tmp_channel, 5);
296 //@} code
297 /** Again, tmp_channel is 500 element vector, which is
298 a window into tr: any changes to the contents of tmp_channel will
299 change tr. tmp_channel cannot be resized.
300 </para><para>
301 Channels can also be extracted by name:
302 */
303 //@{ code
304 tr.channel(tmp_channel, "energy");
305 //@} code
306 /** not all the channels need be put into the temporary frame.
307 Imagine we have a track with a F0 channel,a energy channel and
308 10 cepstrum channels. The following makes a frame from the
309 50th frame, which only includes the cepstral information in
310 channels 2 through 11 */
311 //@{ code
312 tr.frame(tmp_frame, 50, 2, 9);
313 //@} code
314 /** Likewise, the 5th channel with only the last 100 frames can be set up
315 as: */
316 //@{ code
317 tr.channel(tmp_channel, 5, 400, 100);
318 //@} code
319 //@}
320 /** @name Access multiple frames or channels.
321 @id tr-example-sub-tracks
322 In addition to extracting single frames and channels, multiple
323 frame and channel portions can be extracted in a similar
324 way. In the following example, we make a sub-track sub, which
325 points to the entire cepstrum portion of a track (channels 2
326 through 11)
327 */
328 //@{
329 //@{ code
330 EST_Track sub;
331
332 tr.sub_track(sub, 0, EST_ALL, 2, 9);
333
334 //@} code
335
336 /** <parameter>sub</parameter> behaves exactly like a normal
337 track in every way, except that it cannot be resized. Its
338 contents behave like a point into the designated portion of
339 <parameter>tr</parameter>, so changing
340 <parameter>sub</parameter> will change<parameter>
341 tr</parameter>.
342
343 </para><para> The first argument is the
344 <parameter>sub</parameter> track. The second states the start
345 frame and the total number of frames required. EST_ALL is a
346 special constant that specifies that all the frames are
347 required here. The next argument is the start channel number
348 (remember channels are numbered from 0), and the last argument
349 is the total number of channels required. </para><para>
350
351 This facility is particularly useful for using standard
352 signal processing functions efficiently. For example,
353 the <function>melcep</function> in the signal processing library
354 takes a waveform and produces a mel-scale cepstrum. It determines
355 the order of the cepstral analysis by the number of channels in
356 the track it is given, which has already been allocated to have
357 the correct number of frames and channels.
358
359 </para><para> The following will process the waveform
360 <parameter>sig</parameter>, produce a 10th order mel cepstrum
361 and place the output in <parameter>sub</parameter>. (For
362 explanation of the other options see
363 <function>melcep</function> */
364 //@{ code
365 EST_Wave sig;
366
367 melcep(sig, sub, 1.0, 20, 22);
368 //@} code
369
370 /** because we have made<parameter>sub</parameter> a window
371 into<parameter> tr</parameter>, the melcep function writes its
372 output into the correct location, i.e. channels 2-11 of tr. If
373 it were no for the sub_track facility, either a separate track
374 of the right size would be passed into melcep and then it
375 would be copied into tr (wasteful), or else tr would be passed
376 in and other arguments would have to specify which channels
377 should be written to (messy). </para><para>
378
379 Sub-tracks can also be set using channel names. The
380 following example does exactly as above, but is referenced by
381 the name of the first channel required and the number of
382 channels to follow: */
383 //@{ code
384
385 tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
386 //@} code
387 /** and this specifies the end by a string also:
388 */
389 //@{ code
390 tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
391 //@} code
392 /** sub_tracks can be any set of continuous frames and
393 channels. For example if a word started at frame 43 and ended
394 and frame 86, the following would set a sub track to that
395 portion: */
396 //@{ code
397
398 tr.sub_track(sub, 47, 39, "cep_0", "cep_N");
399
400 //@} code
401
402 /** We can step through the frames of a Track using a standard
403 * iterator. The frames are returned as one-frame sub-tracks.
404 */
405
406 //@{ code
407 EST_Track::Entries frames;
408
409 // print out the time of every 50th track
410 cout << "Times:";
411
412 for (frames.begin(tr); frames; ++frames)
413 {
414 const EST_Track &frame = *frames;
415 if (frames.n() % 50 ==0)
416 cout << " " << frames.n() << "[" << frame.t() << "]";
417 }
418 cout << "\n";
419
420 //@} code
421
422 /** The <function>channel</function>, <function>frame</function>
423 and <function>sub_track</function> functions are most commonly
424 used to write into a track using a convenient
425 sub-portion. Sometimes, however a simple copy is required
426 whose contents can be written without affecting the original.
427
428 The <member>copy_cub_track</member> function does this */
429 //@{ code
430 EST_Track tr_copy;
431
432 // tr.copy_sub_track(tr_copy, 47, 39, "cep_0", "cep_N");
433 //@} code
434
435 /** Individual frames and channels can be copied out into
436 pre-allocated float * arrays as follows:
437 */
438 //@{ code
439 float *channel_buf, *frame_buf;
440 channel_buf = new float[tr.num_frames()];
441 frame_buf = new float[tr.num_channels()];
442
443 tr.copy_channel_out(5, channel_buf); // copy channel 5 into channel_buf
444 tr.copy_frame_out(43, frame_buf); // copy frame 4 into frame_buf
445 //@} code
446
447 /** Individual frames and channels can be copied into the track
448 from float * arrays as follows:
449 */
450 //@{ code
451 tr.copy_channel_in(5, channel_buf); // copy channel_buf into channel 5
452 tr.copy_frame_in(43, frame_buf); // copy frame_buf into frame 4
453 //@} code
454 //@}
455
456
457 /** @name Auxiliary Channels
458 Auxiliary channels are used for storing frame information other than
459 amplitude coefficients, for example voicing decisions and points of
460 interest in the track.
461
462 Auxiliary channels always have the same number of frames as the
463 amplitude channels. They are resized by assigning names to the
464 channels that need to be created:
465 */
466 //@{
467 //@{ code
468
469
470 EST_StrList aux_names;
471
472 aux_names.append("voicing");
473 aux_names.append("join_points");
474 aux_names.append("cost");
475
476 tr.resize_aux(aux_names);
477
478 //@} code
479 /** The following fills in these three channels with some values:
480 */
481 //@{ code
482
483 for (i = 0; i < 500; ++i)
484 {
485 tr.aux(i, "voicing") = i;
486 tr.aux(i, "join_points") = EST_String("stuff");
487 tr.aux(i, "cost") = 0.111;
488 }
489 //@} code
490 //@}
491
492 /** @name File I/O
493 Tracks in various formats can be saved and loaded:
494
495 Save as a HTK file:
496 */
497 //@{
498 //@{ code
499 if (tr.save("tmp/track.htk", "htk") != write_ok)
500 EST_error("can't save htk file\n");
501 //@} code
502 /** Save as a EST file:
503 */
504 //@{ code
505 if (tr.save("tmp/track.est", "est") != write_ok)
506 EST_error("can't save est file\n");
507 //@} code
508 /** Save as an ascii file:
509 */
510 //@{ code
511 if (tr.save("tmp/track.ascii", "ascii") != write_ok)
512 EST_error("can't save ascii file\n");
513 //@} code
514 /** The file type is automatically determined from the file's
515 header during loading:
516 */
517 //@{ code
518
519 EST_Track tr2;
520 if (tr2.load("tmp/track.htk") != read_ok)
521 EST_error("can't reload htk\n");
522 //@} code
523
524 /** If no header is found, the function assumes the
525 file is ascii data, with a fixed frame shift, arranged with rows
526 representing frames and columns channels. In this case, the
527 frame shift must be specified as an argument to this function:
528 */
529 //@{ code
530 if (tr.load("tmp/track.ascii", 0.01) != read_ok)
531 EST_error("can't reload ascii file\n");
532 //@} code
533 //@}
534
535 exit(0);
536 }
537
538 //@}
539
540
541
542
543
544
545
546