1  /************************************************************************/
2  /*                                                                      */
3  /*                Centre for Speech Technology Research                 */
4  /*                     University of Edinburgh, UK                      */
5  /*                       Copyright (c) 1996,1997                        */
6  /*                        All Rights Reserved.                          */
7  /*                                                                      */
8  /*  Permission is hereby granted, free of charge, to use and distribute */
9  /*  this software and its documentation without restriction, including  */
10  /*  without limitation the rights to use, copy, modify, merge, publish, */
11  /*  distribute, sublicense, and/or sell copies of this work, and to     */
12  /*  permit persons to whom this work is furnished to do so, subject to  */
13  /*  the following conditions:                                           */
14  /*   1. The code must retain the above copyright notice, this list of   */
15  /*      conditions and the following disclaimer.                        */
16  /*   2. Any modifications must be clearly marked as such.               */
17  /*   3. Original authors' names are not deleted.                        */
18  /*   4. The authors' names are not used to endorse or promote products  */
19  /*      derived from this software without specific prior written       */
20  /*      permission.                                                     */
21  /*                                                                      */
22  /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
23  /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
24  /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
25  /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
26  /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
27  /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
28  /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
29  /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
30  /*  THIS SOFTWARE.                                                      */
31  /*                                                                      */
32  /*************************************************************************/
33  /*                                                                       */
34  /*                 Author: Richard Caley (rjc@cstr.ed.ac.uk)             */
35  /* --------------------------------------------------------------------  */
36  /* Functions to load and save utterances in various formats.             */
37  /*                                                                       */
38  /*************************************************************************/
39 
40 #include <cstdlib>
41 #include <cstdio>
42 #include <iostream>
43 #include <fstream>
44 #include "EST_string_aux.h"
45 #include "EST_FileType.h"
46 #include "EST_Token.h"
47 #include "ling_class/EST_Utterance.h"
48 #include "EST_UtteranceFile.h"
49 
50 static EST_read_status load_all_contents(EST_TokenStream &ts,
51 //					 EST_THash<int,EST_Val> &sitems,
52 					 EST_TVector < EST_Item_Content * > &sitems,
53 					 int &max_id);
54 static EST_read_status load_relations(EST_TokenStream &ts,
55 				      EST_Utterance &utt,
56 				      const EST_TVector < EST_Item_Content * > &sitems
57 //				      const EST_THash<int,EST_Val> &sitems
58 				      );
59 // static EST_write_status save_est_ascii(ostream &outf,const EST_Utterance &utt);
60 static EST_write_status utt_save_all_contents(ostream &outf,
61 					      const EST_Utterance &utt,
62 					      EST_TKVL<void *,int> &sinames);
63 static EST_write_status utt_save_all_contents(ostream &outf,
64 					      EST_Item *n,
65 					      EST_TKVL<void *,int> &sinames,
66 					      int &si_count);
67 static EST_write_status utt_save_ling_content(ostream &outf,
68 					      EST_Item *si,
69 					      EST_TKVL<void *,int> &sinames,
70 					      int &si_count);
71 
node_tidy_up(int & k,EST_Item_Content * node)72 static void node_tidy_up(int &k, EST_Item_Content *node)
73 {
74     // Called to delete the nodes in the hash table when a load
75     (void)k;
76 
77     if (node->unref_relation("__READ__"))
78       delete node;
79 }
80 
load_est_ascii(EST_TokenStream & ts,EST_Utterance & u,int & max_id)81 EST_read_status EST_UtteranceFile::load_est_ascii(EST_TokenStream &ts,
82 						  EST_Utterance &u,
83 						  int &max_id)
84 {
85     EST_Option hinfo;
86     bool ascii;
87     EST_EstFileType t;
88     EST_read_status r;
89     //    EST_THash<int,EST_Val> sitems(100);
90 
91     EST_TVector< EST_Item_Content * > sitems(100);
92 
93     // set up the character constant values for this stream
94     ts.set_SingleCharSymbols(";()");
95     ts.set_quotes('"','\\');
96 
97     if ((r = read_est_header(ts, hinfo, ascii, t)) != format_ok)
98 	return r;
99     if (t != est_file_utterance)
100 	return misc_read_error;
101     if (hinfo.ival("version") != 2)
102     {
103       if (hinfo.ival("version") == 3)
104 	EST_warning("Loading est utterance format version 3, ladders will not be understood");
105       else
106 	{
107 	  EST_error("utt_load: %s  wrong version of utterance format expected 2 (or 3) but found %d",
108 		    (const char *)ts.pos_description(), hinfo.ival("version"));
109 	}
110     }
111 
112     // Utterance features
113     if (ts.get() != "Features")
114     {
115 	cerr << "utt_load: " << ts.pos_description() <<
116 	    " missing utterance features section" << endl;
117 	return misc_read_error;
118     }
119     else
120 	u.f.load(ts);
121     // items
122     if (ts.get() != "Stream_Items")
123     {
124 	cerr << "utt_load: " << ts.pos_description() <<
125 	    " missing Items section" << endl;
126 	return misc_read_error;
127     }
128     max_id = 0;
129     r = load_all_contents(ts, sitems, max_id);
130 
131     // Only exist in older form utterances so soon wont be necessary
132     if (ts.peek() == "Streams")
133     {
134 	cerr << "utt.load: streams found in utterance file, " <<
135 	    "no longer supported" << endl;
136 	return misc_read_error;
137     }
138 
139     // Relations
140     if ((r == format_ok) && (ts.get() != "Relations"))
141     {
142 	cerr << "utt_load: " << ts.pos_description() <<
143 	    " missing Relations section" << endl;
144 	return misc_read_error;
145     }
146 
147     r = load_relations(ts, u, sitems);
148 
149     if ((r == format_ok) && (ts.get() != "End_of_Utterance"))
150     {
151 	cerr << "utt_load: " << ts.pos_description() <<
152 	    " End_of_Utterance expected but not found" << endl;
153 	return misc_read_error;
154     }
155 
156     //    if (r != format_ok)
157     //    {
158 	// This works because even if some of these si's have been
159 	// linked to nodes they will be unlink when the si is destroyed
160     for(int ni=0; ni < sitems.length(); ni++)
161       {
162 	EST_Item_Content *c = sitems[ni];
163 	if (c != NULL)
164 	  node_tidy_up(ni, c);
165       }
166 	//    }
167 
168     return r;
169 
170 }
171 
load_all_contents(EST_TokenStream & ts,EST_TVector<EST_Item_Content * > & sitems,int & max_id)172 static EST_read_status load_all_contents(EST_TokenStream &ts,
173 //					 EST_THash<int,EST_Val> &sitems,
174 					 EST_TVector < EST_Item_Content * > &sitems,
175 					 int &max_id)
176 {
177     // Load items into table with names for later reference
178     // by relations
179     EST_String Sid;
180     bool ok;
181     int id,idval;
182 
183     while (ts.peek() != "End_of_Stream_Items")
184     {
185 	EST_Item_Content *si = new EST_Item_Content;
186 
187 	si->relations.add_item("__READ__", est_val((EST_Item *)NULL), 1);
188 
189 	id = 0;
190 
191 	Sid = ts.get().string();
192 
193 	id = Sid.Int(ok);
194 	if (!ok)
195 	{
196 	    cerr << "utt_load: " << ts.pos_description() <<
197 		" Item name not a number: " << Sid << endl;
198 	    return misc_read_error;
199 	}
200 	if (id >= sitems.length())
201 	  {
202 	    sitems.resize(id*2, 1);
203 	  }
204 	sitems[id] = si;
205 	//	sitems.add_item(id,est_val(si));
206 	if (si->f.load(ts) != format_ok)
207 	    return misc_read_error;
208 	idval = si->f.I("id",0);
209 	if (idval > max_id)
210 	    max_id = idval;
211 	if (ts.eof())
212 	    return misc_read_error;  // just in case this happens
213     }
214 
215     ts.get(); // skip "End_of_Stream_Items"
216 
217     return format_ok;
218 }
219 
load_relations(EST_TokenStream & ts,EST_Utterance & utt,const EST_TVector<EST_Item_Content * > & sitems)220 static EST_read_status load_relations(EST_TokenStream &ts,
221 				      EST_Utterance &utt,
222 				      const EST_TVector < EST_Item_Content * > &sitems
223 //				      const EST_THash<int,EST_Val> &sitems
224 				      )
225 {
226     // Load relations
227 
228     while (ts.peek() != "End_of_Relations")
229     {
230 	// can't use create relation as we don't know its name until
231 	// after its loaded
232 	EST_Relation *r = new EST_Relation;
233 
234 	if (r->load(ts,sitems) != format_ok)
235 	    return misc_read_error;
236 
237 	r->set_utt(&utt);
238 	utt.relations.set_val(r->name(),est_val(r));
239 
240 	if (ts.eof())
241 	    return misc_read_error;
242     }
243 
244     ts.get();  // Skip "End_of_Relations"
245 
246     return format_ok;
247 }
248 
249 
save_est_ascii(ostream & outf,const EST_Utterance & utt)250 EST_write_status EST_UtteranceFile::save_est_ascii(ostream &outf,const EST_Utterance &utt)
251 {
252     EST_write_status v = write_ok;
253 
254     outf.precision(8);
255     outf.setf(ios::fixed, ios::floatfield);
256     outf.width(8);
257 
258     outf << "EST_File utterance\n"; // EST header identifier.
259     outf << "DataType ascii\n";
260     outf << "version 2\n";
261     outf << "EST_Header_End\n"; // EST end of header identifier.
262 
263     // Utterance features
264     outf << "Features ";
265     utt.f.save(outf);
266     outf << endl;
267 
268     outf << "Stream_Items\n";
269     EST_TKVL<void *,int> sinames;
270     v = utt_save_all_contents(outf,utt,sinames);
271     if (v == write_fail) return v;
272     outf << "End_of_Stream_Items\n";
273 
274     // Relations
275     outf << "Relations\n";
276     EST_Features::Entries p;
277     for (p.begin(utt.relations); p; p++)
278     {
279 	v = relation(p->v)->save(outf,sinames);
280 	if (v == write_fail) return v;
281     }
282     outf << "End_of_Relations\n";
283 
284     outf << "End_of_Utterance\n";
285     return write_ok;
286 }
287 
utt_save_all_contents(ostream & outf,const EST_Utterance & utt,EST_TKVL<void *,int> & sinames)288 static EST_write_status utt_save_all_contents(ostream &outf,
289 					      const EST_Utterance &utt,
290 					      EST_TKVL<void *,int> &sinames)
291 {
292     // Write out all stream items in the utterance, as they may appear in
293     // various places in an utterance keep a record of which ones
294     // have been printed and related them to names for reference by
295     // the Relations (and older Stream architecture).
296     int si_count = 1;
297     EST_write_status v = write_ok;
298 
299     // Find the stream items in the relations
300     EST_Features::Entries p;
301     for (p.begin(utt.relations); p; p++)
302     {
303 	v = utt_save_all_contents(outf,relation(p->v)->head(),
304 				  sinames,si_count);
305 	if (v == write_fail) return v;
306     }
307 
308     return v;
309 }
310 
utt_save_all_contents(ostream & outf,EST_Item * n,EST_TKVL<void *,int> & sinames,int & si_count)311 static EST_write_status utt_save_all_contents(ostream &outf,
312 					      EST_Item *n,
313 					      EST_TKVL<void *,int> &sinames,
314 					      int &si_count)
315 {
316     if (n == 0)
317 	return write_ok;
318     else
319     {
320 	utt_save_ling_content(outf,n,sinames,si_count);
321 	// As we have more complex structures this will need to
322 	// be updated (i.e. we'll need a marking method for nodes)
323 	utt_save_all_contents(outf,n->next(),sinames,si_count);
324 	utt_save_all_contents(outf,n->down(),sinames,si_count);
325     }
326     return write_ok;
327 }
328 
utt_save_ling_content(ostream & outf,EST_Item * si,EST_TKVL<void *,int> & sinames,int & si_count)329 static EST_write_status utt_save_ling_content(ostream &outf,
330 					      EST_Item *si,
331 					      EST_TKVL<void *,int> &sinames,
332 					      int &si_count)
333 {
334     // Save item and features if not already saved
335 
336     if ((si != 0) && (!sinames.present(si->contents())))
337     {
338 	sinames.add_item(si->contents(),si_count);
339 	outf << si_count << " ";
340 	si->features().save(outf);
341 	outf << endl;
342 	si_count++;
343     }
344     return write_ok;
345 }
346 
load_xlabel(EST_TokenStream & ts,EST_Utterance & u,int & max_id)347 EST_read_status EST_UtteranceFile::load_xlabel(EST_TokenStream &ts,
348 					       EST_Utterance &u,
349 					       int &max_id)
350 {
351   (void)max_id;
352   EST_read_status status = read_ok;
353 
354   u.clear();
355 
356   EST_Relation *rel = u.create_relation("labels");
357 
358   status = rel->load("", ts, "esps");
359 
360   EST_Item *i = rel->head();
361   float t=0.0;
362 
363   while (i != NULL)
364     {
365       i->set("start", t);
366       t = i->F("end");
367       i = i->next();
368     }
369 
370   return status;
371 }
372 
save_xlabel(ostream & outf,const EST_Utterance & utt)373 EST_write_status EST_UtteranceFile::save_xlabel(ostream &outf,
374 						const EST_Utterance &utt)
375 {
376   EST_write_status status = write_error;
377 
378   EST_Relation *rel;
379 
380   EST_Features::Entries p;
381 
382   for (p.begin(utt.relations); p; p++)
383     {
384       rel = ::relation(p->v);
385 
386       EST_Item * hd = rel->head();
387 
388 
389       while (hd)
390 	{
391 	  if (hd->up() || hd->down())
392 	    break;
393 	  hd=hd->next();
394 	}
395 
396       // didn't find anything => this is linear
397       if(!hd)
398 	  return rel->save(outf, "esps", 0);
399     }
400 
401   // Found no linear relations
402 
403   return status;
404 }
405 
406 #if defined(INCLUDE_XML_FORMATS)
407 
408 #include "genxml.h"
409 #include "apml.h"
410 
411 // APML support
load_apml(EST_TokenStream & ts,EST_Utterance & u,int & max_id)412 EST_read_status EST_UtteranceFile::load_apml(EST_TokenStream &ts,
413 						EST_Utterance &u,
414 						int &max_id)
415 {
416   FILE *stream;
417 
418   if ((stream=ts.filedescriptor())==NULL)
419     return read_error;
420 
421   long pos=ftell(stream);
422 
423   {
424   char buf[80];
425 
426   fgets(buf, 80, stream);
427 
428   if (strncmp(buf, "<?xml", 5) != 0)
429     return read_format_error;
430 
431   fgets(buf, 80, stream);
432 
433   if (strncmp(buf, "<!DOCTYPE apml", 14) != 0)
434     return read_format_error;
435   }
436 
437   fseek(stream, pos, 0);
438 
439   EST_read_status stat = apml_read(stream, ts.filename(),u, max_id);
440 
441   if (stat != read_ok)
442     fseek(stream, pos, 0);
443 
444   return stat;
445 }
446 
447 
448 // GenXML support
449 
load_genxml(EST_TokenStream & ts,EST_Utterance & u,int & max_id)450 EST_read_status EST_UtteranceFile::load_genxml(EST_TokenStream &ts,
451 						EST_Utterance &u,
452 						int &max_id)
453 {
454   FILE *stream;
455 
456   if ((stream=ts.filedescriptor())==NULL)
457     return read_error;
458 
459   long pos=ftell(stream);
460 
461   {
462   char buf[80];
463 
464   fgets(buf, 80, stream);
465 
466   if (strncmp(buf, "<?xml", 5) != 0)
467     return read_format_error;
468   }
469 
470   fseek(stream, pos, 0);
471 
472   EST_read_status stat = EST_GenXML::read_xml(stream, ts.filename(),u, max_id);
473 
474   if (stat != read_ok)
475     fseek(stream, pos, 0);
476 
477   return stat;
478 }
479 
save_genxml(ostream & outf,const EST_Utterance & utt)480 EST_write_status EST_UtteranceFile::save_genxml(ostream &outf,
481 						const EST_Utterance &utt)
482 {
483   EST_write_status status=write_ok;
484 
485   EST_TStringHash<int> features(20);
486 
487   EST_Features::Entries p;
488 
489   for (p.begin(utt.relations); p; ++p)
490     {
491       EST_Relation *rel = ::relation(p->v);
492 
493       EST_Item * hd = rel->head();
494 
495       while (hd)
496 	{
497 	  EST_Features::Entries fp;
498 	  for (fp.begin(hd->features()); fp; ++fp)
499 	    features.add_item(fp->k, 1);
500 	  hd=hd->next();
501 	}
502     }
503 
504   outf << "<?xml version='1.0'?>\n";
505 
506   outf << "<!DOCTYPE utterance PUBLIC '//CSTR EST//DTD cstrutt//EN' 'cstrutt.dtd'\n\t[\n";
507 
508   EST_TStringHash<int>::Entries f;
509 
510   outf << "\t<!ATTLIST item\n";
511   for (f.begin(features); f; ++f)
512     {
513       if (f->k != "id")
514 	{
515 	  outf << "\t\t" << f->k << "\tCDATA #IMPLIED\n";
516 	}
517     }
518 
519   outf << "\t\t>\n";
520 
521   outf << "\t]>\n";
522 
523   outf << "<utterance>\n";
524 
525   outf << "<language name='unknown'/>\n";
526 
527   for (p.begin(utt.relations); p; ++p)
528     {
529       EST_Relation *rel = ::relation(p->v);
530 
531       EST_Item * hd = rel->head();
532 
533 
534       while (hd)
535 	{
536 	  if (hd->up() || hd->down())
537 	    break;
538 	  hd=hd->next();
539 	}
540 
541       // didn't find anything => this is linear
542       if(!hd)
543 	{
544 	  outf << "<relation name='"<< rel->name()<< "' structure-type='list'>\n";
545 
546 	  hd = rel->head();
547 	  while (hd)
548 	    {
549 	      outf << "    <item\n";
550 
551 	      EST_Features::Entries p;
552 	      for (p.begin(hd->features()); p; ++p)
553 		if (p->k != "estContentFeature")
554 		  outf << "         " << p->k << "='" << p->v << "'\n";
555 
556 	      outf << "         />\n";
557 
558 	      hd=hd->next();
559 	    }
560 
561 	  outf << "</relation>\n";
562 	}
563       else // for now give an error for non-linear relations
564 	status=write_partial;
565     }
566 
567 
568   outf << "</utterance>\n";
569 
570   return status;
571 ;
572 }
573 #endif
574 
options_short(void)575 EST_String EST_UtteranceFile::options_short(void)
576 {
577     EST_String s("");
578 
579     for(int n=0; n< EST_UtteranceFile::map.n() ; n++)
580     {
581       EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
582       if (type != uff_none)
583 	{
584 	  for(int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
585 	    {
586 	      const char *nm = EST_UtteranceFile::map.name(type, ni);
587 	      if (nm==NULL)
588 		break;
589 
590 	      if (s != "")
591 		s += ", ";
592 
593 	      s += nm;
594 	    }
595 	}
596     }
597     return s;
598 }
599 
options_supported(void)600 EST_String EST_UtteranceFile::options_supported(void)
601 {
602     EST_String s("Available utterance file formats:\n");
603 
604     for(int n=0; n< EST_UtteranceFile::map.n() ; n++)
605     {
606       EST_UtteranceFileType type = EST_UtteranceFile::map.nth_token(n);
607       if (type != uff_none)
608 	{
609 	  const char *d = EST_UtteranceFile::map.info(type).description;
610 	  for(int ni=0; ni<NAMED_ENUM_MAX_SYNONYMS; ni++)
611 	    {
612 	      const char *nm = EST_UtteranceFile::map.name(type, ni);
613 	      if (nm==NULL)
614 		break;
615 
616 	      s += EST_String::cat("        ", (nm?nm:"NULL"), EST_String(" ")*(12-strlen((nm?nm:"NULL"))), (d?d:"NULL"), "\n");
617 	    }
618 	}
619     }
620     return s;
621 }
622 
623 
624 
625 // note the order here defines the order in which loads are tried.
Start_TNamedEnumI_T(EST_UtteranceFileType,EST_UtteranceFile::Info,EST_UtteranceFile::map,utterancefile)626 Start_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
627   { uff_none,		{ NULL },
628     { FALSE, NULL, NULL, "unknown utterance file type"} },
629   { uff_est,		{ "est", "est_ascii"},
630     { TRUE, EST_UtteranceFile::load_est_ascii,  EST_UtteranceFile::save_est_ascii, "Standard EST Utterance File" } },
631 #if defined(INCLUDE_XML_FORMATS)
632   { uff_apml,		{ "apml", "xml"},
633     { TRUE, EST_UtteranceFile::load_apml,  NULL, "Utterance in APML" } },
634   { uff_genxml,		{ "genxml", "xml"},
635     { TRUE, EST_UtteranceFile::load_genxml,  EST_UtteranceFile::save_genxml, "Utterance in XML, Any DTD" } },
636 #endif
637   { uff_xlabel,	{ "xlabel"},
638     { TRUE, EST_UtteranceFile::load_xlabel,  EST_UtteranceFile::save_xlabel, "Xwaves Label File" } },
639   { uff_none,		{NULL},
640       { FALSE, NULL, NULL, "unknown utterance file type"} }
641 
642 End_TNamedEnumI_T(EST_UtteranceFileType, EST_UtteranceFile::Info, EST_UtteranceFile::map, utterancefile)
643 
644 Declare_TNamedEnumI(EST_UtteranceFileType, EST_UtteranceFile::Info)
645 
646 #if defined(INSTANTIATE_TEMPLATES)
647 #include "../base_class/EST_TNamedEnum.cc"
648 Instantiate_TNamedEnumI(EST_UtteranceFileType, EST_UtteranceFile::Info)
649 #endif
650 
651 Declare_TVector_Base_T(EST_Item_Content *, NULL, NULL, EST_Item_ContentP)
652 
653 #if defined(INSTANTIATE_TEMPLATES)
654 
655 #include "../base_class/EST_TSimpleVector.cc"
656 #include "../base_class/EST_TVector.cc"
657 #include "../base_class/EST_Tvectlist.cc"
658 
659 Instantiate_TVector_T(EST_Item_Content *, EST_Item_ContentP)
660 
661 #endif
662 
663