1  /************************************************************************/
2  /*                                                                      */
3  /*                Centre for Speech Technology Research                 */
4  /*                     University of Edinburgh, UK                      */
5  /*                       Copyright (c) 1996,1997                        */
6  /*                        All Rights Reserved.                          */
7  /*                                                                      */
8  /*  Permission is hereby granted, free of charge, to use and distribute */
9  /*  this software and its documentation without restriction, including  */
10  /*  without limitation the rights to use, copy, modify, merge, publish, */
11  /*  distribute, sublicense, and/or sell copies of this work, and to     */
12  /*  permit persons to whom this work is furnished to do so, subject to  */
13  /*  the following conditions:                                           */
14  /*   1. The code must retain the above copyright notice, this list of   */
15  /*      conditions and the following disclaimer.                        */
16  /*   2. Any modifications must be clearly marked as such.               */
17  /*   3. Original authors' names are not deleted.                        */
18  /*   4. The authors' names are not used to endorse or promote products  */
19  /*      derived from this software without specific prior written       */
20  /*      permission.                                                     */
21  /*                                                                      */
22  /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
23  /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
24  /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
25  /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
26  /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
27  /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
28  /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
29  /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
30  /*  THIS SOFTWARE.                                                      */
31  /*                                                                      */
32  /*************************************************************************/
33  /*                                                                       */
34  /*                 Author: Richard Caley (rjc@cstr.ed.ac.uk)             */
35  /*                   Date: Fri May  9 1997                               */
36  /* -------------------------------------------------------------------   */
37  /* Example of declaration and use of tracks.                             */
38  /*                                                                       */
39  /*************************************************************************/
40 
41 
42 #include <iostream>
43 #include <cstdlib>
44 #include "EST_Track.h"
45 #include "EST_Wave.h"
46 #include "EST_sigpr.h"
47 #include "EST_error.h"
48 
49 
50 /** @name EST_Track class example code
51   * @toc
52   * Some examples of track manipulations.
53   *
54   */
55 //@{
56 
main(void)57 int main(void)
58 
59 {
60     int i, j;
61 
62     /* This program is designed as an example not as something to run
63        so for testing purpose it simply exists */
64     exit(0);
65     /**@name Initialising and Resizing a Track
66 
67        The constructor functions can be used to create a track with
68        zero frames and channels or a track with a specified number of
69        frames and channels
70     */
71 
72     //@{
73     //@{ code
74     EST_Track tr;           // default track declaration
75     EST_Track tra(500, 10); // allocate track with 500 frames and 10 channels
76     //@} code
77 
78     /** tracks can be resized at any time:
79      */
80     //@{ code
81     tr.resize(10, 500); // resize track to have 10 frames and 500 channels
82     tr.resize(500, 10); // resize track to have 500 frames and 10 channels
83     //@} code
84 
85     /** by default, resizing preserves values in the track. This
86 	may involve copying some information, so if the existing values
87 	are not needed, a flag can be set which usually results in
88 	quicker resizing
89     */
90     //@{ code
91     tr.resize(250, 5, 0);  // throw away any existing values
92     //@} code
93     /** If only the number of channels or the number of frames needs
94 	to be changed, this an be done with the following functions:
95     */
96 
97     //@{ code
98     tr.set_num_channels(10);   // makes 10 channels, keeps same no of frames
99 
100     tr.set_num_frames(400);    // makes 400 frames, keeps same no of channels
101     //@} code
102     /** The preserve flag works in the same way with these functions
103      */
104     //@}
105 
106     /** @name Simple Access
107 
108 	Values in the track can be accessed and set by frame
109 	number and channel number.
110 
111 	The following resizes a track to have 500 frames and 10 channels
112 	and fills every position with -5.
113     */
114     //@{
115     //@{ code
116     tr.resize(500, 10);
117 
118     for (i = 0; i < tr.num_frames(); ++i)
119 	for (j = 0; j < tr.num_channels(); ++j)
120 	    tr.a(i, j) = -5.0;
121 
122     //@} code
123 
124     /** A well formed track will have a time value, specified in seconds,
125 	for every frame. The time array can be filled directly:
126     */
127     //@{ code
128     for (i = 0; i < tr.num_frames(); ++i)
129 	tr.t(i) = (float) i * 0.01;
130     //@} code
131     /** which fills the time array with values 0.01, 0.02,
132 	0.03... 5.0. However, A shortcut function is provided for fixed
133 	frame spacing:
134     */
135     //@{ code
136     tr.fill_time(0.1);
137 
138     //@} code
139     /** which performs the same operation as above. Frames do not have
140 	to be evenly spaced, in pitch synchronous processing the time
141 	array holds the time position of each pitch period. In such
142 	cases each position in the time array must obviously be set
143 	individually.</para><para>
144 
145 	Some representations have undefined values during certain
146 	sections of the track, for example the F0 value during
147 	unvoiced speech.</para><para>
148 
149 	The break/value array can be used to specify if a frame has an
150 	undefined value.<para></para>. If a frame in this array is 1,
151 	that means the amplitude is defined at that point. If 0, the
152 	amplitude is undefined. By default, every frame has a value.
153 	</para><para>
154 
155 	Breaks (undefined values) can be set by <method>set_break()
156 	</method>. The following sets every frame from 50 to 99 as a
157 	break:
158     */
159     //@{ code
160     for (i = 50; i < 100; ++i)
161 	tr.set_break(i);
162     //@} code
163     /** frames can be turned back to values as follows:
164      */
165     //@{ code
166     for (i = 50; i < 100; ++i)
167 	tr.set_value(i);
168     //@} code
169     /** It is up to individual functions to decide how to interpret breaks.
170 	</para><para>
171 	A frame's status can be checked as follows:
172     */
173     //@{ code
174     if (tr.val(60))
175 	cout << "Frame 60 is not a break\n";
176 
177     if (tr.track_break(60))
178 	cout << "Frame 60 is a break\n";
179     //@} code
180     //@}
181 
182     /** @name Naming Channels
183 	@id tr-example-naming-channels
184 
185 	While channels can be accessed by their index, it is often useful
186 	to give them names and refer to them by those names.
187 
188 	The set_channel_name() function sets the name of a single channel:
189     */
190     //@{
191     //@{ code
192     tr.set_channel_name("F0", 0);
193     tr.set_channel_name("energy", 1);
194     //@} code
195 
196     /** An alternative is to use a predefined set of channel names
197 	stored in a <emphasis>map</emphasis>.A track map
198 	is simply a String List strings which describe a channel name
199 	configuration. The <method>resize</method> function can take
200 	this and resize the number of channels to the number of channels
201 	indicated in the map, and give each channel its name from the
202 	map. For example:
203     */
204     //@{ code
205     EST_StrList map;
206     map.append("F0");
207     map.append("energy");
208 
209     tr.resize(500, map); // this makes a 2 channel track and sets the names to F0 and energy
210     //@} code
211 
212     /** A convention is used for channels which comprise
213 	components of a multi-dimensional analysis such as
214 	cepstra. In such cases the channels are named
215 	<replaceable>TYPE_I</replaceable>.  The last coefficient is
216 	always names <replaceable>TYPE_N</replaceable> regardless of
217 	the number of coefficients. This is very useful in extracting
218 	a set of related  channels without needing to know the order
219 	of the analysis.
220 
221 	For example, a track map might look like:
222 
223     */
224     //@{ code
225 
226     map.clear();
227     map.append("F0");
228     map.append("energy");
229 
230     map.append("cep_0");
231     map.append("cep_1");
232     map.append("cep_2");
233     map.append("cep_3");
234     map.append("cep_4");
235     map.append("cep_5");
236     map.append("cep_6");
237     map.append("cep_7");
238     map.append("cep_N");
239 
240     tr.resize(500, map); // makes a 11 channel track and sets the names
241     //@} code
242 
243     /** This obviously gets unwieldy quite quickly, so the mapping
244 	mechanism provides a short hand for multi-dimensional data.
245 
246     */
247 
248     //@{ code
249     map.clear();
250     map.append("F0");
251     map.append("energy");
252 
253     map.append("$cep-0+8");
254 
255     tr.resize(500, map); // does exactly as above
256     //@} code
257 
258     /** Here $ indicates the special status, "cep" the name of the
259 	coefficients, "-0" that the first is number 0 and "+8" that
260 	there are 8 more to follow.
261     */
262 
263     //@}
264 
265 
266     /** @name Access single frames or single channels.
267 
268 	@id tr-example-frames-and-channels
269 
270 	Often functions perform their operations on only a single
271 	frame or channel, and the track class provides a general
272 	mechanism for doing this.
273 
274 	Single frames or channels can be accessed as EST_FVectors:
275 	Given a track with 500 frames and 10 channels, the 50th frame
276 	can be accessed as:
277     */
278     //@{
279     //@{ code
280     EST_FVector tmp_frame;
281 
282     tr.frame(tmp_frame, 50);
283     //@} code
284     /** now tmp_frame is 10 element vector, which is
285 	a window into tr: any changes to the contents of tmp_frame will
286 	change tr. tmp_frame cannot be resized. (This operation can
287 	be thought in standard C terms as tmp_frame being a pointer
288 	to the 5th frame of tr).
289 	</para>	<para>
290 	Likewise with channels:
291     */
292     //@{ code
293     EST_FVector tmp_channel;
294 
295     tr.channel(tmp_channel, 5);
296     //@} code
297     /** Again, tmp_channel is 500 element vector, which is
298 	a window into tr: any changes to the contents of tmp_channel will
299 	change tr. tmp_channel cannot be resized.
300 	</para><para>
301 	Channels can also be extracted by name:
302     */
303     //@{ code
304     tr.channel(tmp_channel, "energy");
305     //@} code
306     /** not all the channels need be put into the temporary frame.
307 	Imagine we have a track with a F0 channel,a energy channel and
308 	10 cepstrum channels. The following makes a frame from the
309 	50th frame, which only includes the cepstral information in
310 	channels 2 through 11 */
311     //@{ code
312     tr.frame(tmp_frame, 50, 2, 9);
313     //@} code
314     /** Likewise, the 5th channel with only the last 100 frames can be set up
315 	as: */
316     //@{ code
317     tr.channel(tmp_channel, 5, 400, 100);
318     //@} code
319     //@}
320     /** @name Access multiple frames or channels.
321 	@id tr-example-sub-tracks
322 	In addition to extracting single frames and channels, multiple
323 	frame and channel portions can be extracted in a similar
324 	way. In the following example, we make a sub-track sub, which
325 	points to the entire cepstrum portion of a track (channels 2
326 	through 11)
327     */
328     //@{
329     //@{ code
330     EST_Track sub;
331 
332     tr.sub_track(sub, 0, EST_ALL, 2, 9);
333 
334     //@} code
335 
336     /** <parameter>sub</parameter> behaves exactly like a normal
337 	track in every way, except that it cannot be resized. Its
338 	contents behave like a point into the designated portion of
339 	<parameter>tr</parameter>, so changing
340 	<parameter>sub</parameter> will change<parameter>
341 	tr</parameter>.
342 
343 	</para><para> The first argument is the
344 	<parameter>sub</parameter> track. The second states the start
345 	frame and the total number of frames required. EST_ALL is a
346 	special constant that specifies that all the frames are
347 	required here. The next argument is the start channel number
348 	(remember channels are numbered from 0), and the last argument
349 	is the total number of channels required.  </para><para>
350 
351 	This facility is particularly useful for using standard
352 	signal processing functions efficiently. For example,
353 	the <function>melcep</function> in the signal processing library
354 	takes a waveform and produces a mel-scale cepstrum. It determines
355 	the order of the cepstral analysis by the number of channels in
356 	the track it is given, which has already been allocated to have
357 	the correct number of frames and channels.
358 
359 	</para><para> The following will process the waveform
360 	<parameter>sig</parameter>, produce a 10th order mel cepstrum
361 	and place the output in <parameter>sub</parameter>. (For
362 	explanation of the other options see
363 	<function>melcep</function> */
364     //@{ code
365     EST_Wave sig;
366 
367     melcep(sig, sub, 1.0, 20, 22);
368     //@} code
369 
370     /** because we have made<parameter>sub</parameter> a window
371 	into<parameter> tr</parameter>, the melcep function writes its
372 	output into the correct location, i.e. channels 2-11 of tr. If
373 	it were no for the sub_track facility, either a separate track
374 	of the right size would be passed into melcep and then it
375 	would be copied into tr (wasteful), or else tr would be passed
376 	in and other arguments would have to specify which channels
377 	should be written to (messy).  </para><para>
378 
379 	Sub-tracks can also be set using channel names. The
380 	following example does exactly as above, but is referenced by
381 	the name of the first channel required and the number of
382 	channels to follow: */
383     //@{ code
384 
385     tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
386     //@} code
387     /** and this specifies the end by a string also:
388      */
389     //@{ code
390     tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
391     //@} code
392     /** sub_tracks can be any set of continuous frames and
393         channels. For example if a word started at frame 43 and ended
394         and frame 86, the following would set a sub track to that
395         portion: */
396     //@{ code
397 
398     tr.sub_track(sub, 47, 39, "cep_0", "cep_N");
399 
400     //@} code
401 
402     /** We can step through the frames of a Track using a standard
403      * iterator. The frames are returned as one-frame sub-tracks.
404      */
405 
406     //@{ code
407     EST_Track::Entries frames;
408 
409     // print out the time of every 50th track
410     cout << "Times:";
411 
412     for (frames.begin(tr); frames; ++frames)
413       {
414 	const EST_Track &frame = *frames;
415 	if (frames.n() % 50 ==0)
416 	    cout << " " << frames.n() << "[" << frame.t() << "]";
417     }
418     cout << "\n";
419 
420     //@} code
421 
422     /** The <function>channel</function>, <function>frame</function>
423 	and <function>sub_track</function> functions are most commonly
424 	used to write into a track using a convenient
425 	sub-portion. Sometimes, however a simple copy is required
426 	whose contents can be written without affecting the original.
427 
428 	The <member>copy_cub_track</member> function does this */
429     //@{ code
430     EST_Track tr_copy;
431 
432 //    tr.copy_sub_track(tr_copy, 47, 39, "cep_0", "cep_N");
433     //@} code
434 
435     /** Individual frames and channels can be copied out into
436 	pre-allocated float * arrays as follows:
437     */
438     //@{ code
439     float *channel_buf, *frame_buf;
440     channel_buf = new float[tr.num_frames()];
441     frame_buf = new float[tr.num_channels()];
442 
443     tr.copy_channel_out(5, channel_buf);   // copy channel 5 into channel_buf
444     tr.copy_frame_out(43, frame_buf);      // copy frame 4 into frame_buf
445     //@} code
446 
447     /** Individual frames and channels can be copied into the track
448 	from float * arrays as follows:
449     */
450     //@{ code
451     tr.copy_channel_in(5, channel_buf);    // copy channel_buf into channel 5
452     tr.copy_frame_in(43, frame_buf);       // copy frame_buf into frame 4
453     //@} code
454     //@}
455 
456 
457     /** @name Auxiliary Channels
458 	Auxiliary channels are used for storing frame information other than
459 	amplitude coefficients, for example voicing decisions and points of
460 	interest in the track.
461 
462 	Auxiliary channels always have the same number of frames as the
463 	amplitude channels. They are resized by assigning names to the
464 	channels that need to be created:
465     */
466     //@{
467     //@{ code
468 
469 
470     EST_StrList aux_names;
471 
472     aux_names.append("voicing");
473     aux_names.append("join_points");
474     aux_names.append("cost");
475 
476     tr.resize_aux(aux_names);
477 
478     //@} code
479     /** The following fills in these three channels with some values:
480      */
481     //@{ code
482 
483     for (i = 0; i < 500; ++i)
484     {
485 	tr.aux(i, "voicing") = i;
486 	tr.aux(i, "join_points") = EST_String("stuff");
487 	tr.aux(i, "cost") =  0.111;
488     }
489     //@} code
490     //@}
491 
492     /** @name File I/O
493 	Tracks in various formats can be saved and loaded:
494 
495 	Save as a HTK file:
496     */
497     //@{
498     //@{ code
499     if (tr.save("tmp/track.htk", "htk") != write_ok)
500 	EST_error("can't save htk file\n");
501     //@} code
502     /** Save as a EST file:
503      */
504     //@{ code
505     if (tr.save("tmp/track.est", "est") != write_ok)
506 	EST_error("can't save est file\n");
507     //@} code
508     /** Save as an ascii file:
509      */
510     //@{ code
511     if (tr.save("tmp/track.ascii", "ascii") != write_ok)
512 	EST_error("can't save ascii file\n");
513     //@} code
514     /** The file type is automatically determined from the file's
515 	header during loading:
516     */
517     //@{ code
518 
519     EST_Track tr2;
520     if (tr2.load("tmp/track.htk") != read_ok)
521 	EST_error("can't reload htk\n");
522     //@} code
523 
524     /** If no header is found, the function assumes the
525 	file is ascii data, with a fixed frame shift, arranged with rows
526 	representing frames and columns channels. In this case, the
527 	frame shift must be specified as an argument to this function:
528     */
529     //@{ code
530     if (tr.load("tmp/track.ascii", 0.01) != read_ok)
531 	EST_error("can't reload ascii file\n");
532     //@} code
533     //@}
534 
535     exit(0);
536 }
537 
538 //@}
539 
540 
541 
542 
543 
544 
545 
546