1 /*************************************************************************/
2 /*                                                                       */
3 /*                Centre for Speech Technology Research                  */
4 /*                     University of Edinburgh, UK                       */
5 /*                       Copyright (c) 1996,1997                         */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*                                                                       */
34 /*                 Author: Paul Taylor                                   */
35 /*                   Date: 6 Jan 1998                                    */
36 /* --------------------------------------------------------------------- */
37 /*                 Acoustic Unit Concatenation                           */
38 /*                                                                       */
39 /*************************************************************************/
40 
41 
42 #include "siod.h"
43 #include "EST_sigpr.h"
44 #include "EST_wave_aux.h"
45 #include "EST_track_aux.h"
46 #include "EST_ling_class.h"
47 #include "us_synthesis.h"
48 #include <cmath>
49 
50 #include "Phone.h"
51 
52 void merge_features(EST_Item *from, EST_Item *to, int keep_id);
53 
54 void dp_time_align(EST_Utterance &utt, const EST_String &source_name,
55 		   const EST_String &target_name,
56 		   const EST_String &time_name,
57 		   bool do_start);
58 
59 void concatenate_unit_coefs(EST_Relation &unit_stream, EST_Track &source_lpc);
60 void us_unit_raw_concat(EST_Utterance &utt);
61 
62 void window_units(EST_Relation &unit_stream,
63 		  EST_TVector<EST_Wave> &frames,
64 		  float window_factor,
65 		  EST_String window_name,
66 		  bool window_symmetric,
67 		  EST_IVector *pm_indices=0);
68 
69 bool dp_match(const EST_Relation &lexical,
70 	      const EST_Relation &surface,
71 	      EST_Relation &match,
72 	      float ins, float del, float sub);
73 
74 void map_match_times(EST_Relation &target, const EST_String &match_name,
75 	       const EST_String &time_name, bool do_start);
76 
77 
window_frame(EST_Wave & frame,EST_Wave & whole,float scale,int start,int end,EST_WindowFunc * window_function,int centre_index=-1)78 static void window_frame(EST_Wave &frame, EST_Wave &whole, float scale,
79 			 int start, int end, EST_WindowFunc *window_function,
80 			 int centre_index=-1)
81 {
82   int i, j, send;
83   EST_TBuffer<float> window;
84   int window_length = (end-start)+1;
85 
86   if (frame.num_samples() != (window_length))
87     frame.resize(window_length);
88   frame.set_sample_rate(whole.sample_rate());
89   // Ensure we have a safe end
90   if (end < whole.num_samples())
91     send = end;
92   else
93     send = whole.num_samples();
94 
95 
96   int print_centre;
97   if ( centre_index < 0 ){
98     window_function( window_length, window, -1 );
99     print_centre = (window_length-1)/2+start;
100   }
101   else{
102     window_function( window_length, window, (centre_index-start));
103     print_centre = centre_index;
104   }
105 
106 
107 #if defined(EST_DEBUGGING)
108   cerr << "(start centre end window_length wholewavelen) "
109        << start << " "
110        << print_centre << " "
111        << end   << " "
112        << window_length << " "
113        << whole.num_samples() << endl;
114 #endif
115 
116 
117   // To allow a_no_check access we do this in three stages
118   for (i = 0, j = start; j < 0; ++i, ++j)
119     frame.a_no_check(i) = 0;
120   for ( ; j < send; ++i, ++j)
121     frame.a_no_check(i) = (int)((float)whole.a_no_check(j) * window(i) * scale);
122   for ( ; j < end; ++j,++i)
123     frame.a_no_check(i) = 0;
124 
125 
126 #if defined(EST_DEBUGGING)
127   // It's not always very nice to resynthesise speech from
128   // inserted zeros!  These checks should alert the user (me ;)
129   if( start<0 )
130     EST_warning( "padded start of pitch period with zeros (index %d)", i );
131 
132   if( end>whole.num_samples() )
133     EST_warning( "padded end of pitch period with zeros (frame %d)", i );
134 #endif
135 }
136 
137 
138 // The window_signal function has been changed in several ways:
139 //
140 // *) The function now has an asymmetric window mode.
141 //
142 //    In this mode, asymmetric windows are used from pitchmark at t-1
143 //    to pitchmark at time t+1, with the maximum value of 1.0 at
144 //    pitchmark at time t.
145 //
146 // *) In the original symmetric mode:
147 //
148 //    The first change is to ensure the window frames always have an
149 //    odd number of samples (a convention for how to handle rounding
150 //    problems when converting from times (float) to sample numbers
151 //    (int)).  The centre sample corresponds to the pitch mark time.
152 //
153 //    The second change is that the estimate of local pitch period is
154 //    always based in current and *previous* pitchmark.  In the case
155 //    of the first pitch mark in track pm, the previous pitchmark is
156 //    assumed to be at zero time.  Hopefully, this won't break much.
157 //    However, if this convention is not used everywhere else that
158 //    it's needed and some things break, then arguably those
159 //    things need to be fixed to adhere to this same convention...
window_signal(EST_Wave & sig,EST_Track & pm,EST_WaveVector & frames,int & i,float scale,float window_factor,EST_WindowFunc * window_function,bool window_symmetric,EST_IVector * pm_indices=0)160 void window_signal(EST_Wave &sig, EST_Track &pm,
161 		   EST_WaveVector &frames, int &i, float scale,
162 		   float window_factor,
163 		   EST_WindowFunc *window_function,
164 		   bool window_symmetric,
165 		   EST_IVector *pm_indices=0)
166 {
167     float first_pos, period=0.0;
168     float prev_pm, current_pm;
169     int first_sample, centre_sample, last_sample;
170     int sample_rate = sig.sample_rate();
171     int pm_num_frames = pm.num_frames();
172 
173     // estimate first period as pitchmark time itself (i.e. assume a previous
174     // pitchmark at 0.0 time, waveform sample 0)
175     prev_pm = 0.0;
176 
177 
178     if( window_symmetric )
179       {
180 	if (pm_num_frames < 1 )
181 	  EST_error( "Attempted to Window around less than 1 pitchmark" );
182 
183 	for( int j=0; j<pm_num_frames; ++j, ++i ){
184 	  current_pm = pm.t(j);
185 	  period = current_pm - prev_pm;
186 	  centre_sample = (int)rint( current_pm*(float)sample_rate );
187 
188 	  first_pos = prev_pm - (period * (window_factor-1.0));
189 	  first_sample = (int)rint( first_pos*(float)sample_rate );
190 
191 	  last_sample  = (2*centre_sample)-first_sample;
192 
193 	  window_frame(frames[i], sig, scale, first_sample, last_sample, window_function);
194 
195 	  prev_pm = current_pm;
196 	}
197       }
198     else{
199       if( pm_indices == 0 )
200 	EST_error( "required pitchmark indices EST_IVector is null" );
201 
202       int j;
203 
204       // Rob's experiment to see if we can handle small bits of speech with no pitchmarks.
205       // We just 0 the frames in this case.
206 
207       if (pm_num_frames < 1 )
208 	{
209 	  EST_warning( "Attempted to Window around less than 1 pitchmark" );
210 	}
211       else
212 	{
213 	  for( j=0; j<pm_num_frames-1; ++j, ++i ){
214 	    current_pm = pm.t(j);
215 	    period = current_pm - prev_pm;
216 	    centre_sample = (int)rint( current_pm*(float)sample_rate );
217 
218 	    first_pos = prev_pm - (period * (window_factor-1.0));
219 	    first_sample = (int)rint( first_pos*(float)sample_rate );
220 
221 	    float next_pm = pm.t(j+1);
222 	    float last_pos = next_pm + ((next_pm-current_pm)*(window_factor-1.0));
223 	    last_sample = (int)rint( last_pos*(float)sample_rate );
224 
225 	    window_frame(frames[i], sig, scale, first_sample,
226 			 last_sample, window_function, centre_sample);
227 	    (*pm_indices)[i] = centre_sample - first_sample;
228 
229 	    prev_pm = current_pm;
230 	  }
231 
232       //last frame window size is set according to pm.t(end) and the number
233       //of samples in the waveform (it is presumed the waveform begins at the
234       //preceeding pitchmark and ends at the pitchmark following the current
235       //unit...)
236 
237 	  current_pm = pm.t(j);
238 	  centre_sample = (int)rint( current_pm*(float)sample_rate );
239 	  first_pos = prev_pm - (period * (window_factor-1.0));
240 	  first_sample = (int)rint( first_pos*(float)sample_rate );
241 	  last_sample = sig.num_samples()-1;
242 	  window_frame(frames[i], sig, scale, first_sample,
243 		       last_sample, window_function);
244 	  (*pm_indices)[i] = centre_sample - first_sample;
245 
246 #if defined(EST_DEBUGGING)
247 	  cerr << "changed: " << i << " " << pm_indices->n() << endl;
248 #endif
249 
250 	  ++i;
251 	}
252     }
253 }
254 
window_units(EST_Relation & unit_stream,EST_TVector<EST_Wave> & frames,float window_factor,EST_String window_name,bool window_symmetric,EST_IVector * pm_indices)255 void window_units( EST_Relation &unit_stream,
256 		   EST_TVector<EST_Wave> &frames,
257 		   float window_factor,
258 		   EST_String window_name,
259 		   bool window_symmetric,
260 		   EST_IVector *pm_indices )
261 {
262     int i;
263     EST_Wave *sig;
264     EST_Item *u;
265     EST_Track *coefs;
266     int num = 0;
267     float scale;
268     EST_WindowFunc *window_function;
269 
270     for (u = unit_stream.head(); u; u = u->next())
271 	num += track(u->f("coefs"))->num_frames();
272     frames.resize(num);
273 
274     if( pm_indices != 0 )
275       pm_indices->resize(num);
276 
277     if (window_name == "")
278       window_name = "hanning";
279 
280     window_function =  EST_Window::creator(window_name);
281 
282     for (i = 0, u = unit_stream.head(); u; u = u->next())
283     {
284 	sig = wave(u->f("sig"));
285 	coefs = track(u->f("coefs"));
286 	scale = (u->f_present("scale") ? u->F("scale") : 1.0);
287 
288 	window_signal(*sig, *coefs, frames, i, scale, window_factor,
289 		      window_function, window_symmetric, pm_indices);
290     }
291 }
292 
293 
us_unit_concat(EST_Utterance & utt,float window_factor,const EST_String & window_name,bool no_waveform=false,bool window_symmetric=true)294 void us_unit_concat(EST_Utterance &utt, float window_factor,
295 		    const EST_String &window_name,
296 		    bool no_waveform=false,
297 		    bool window_symmetric=true)
298 
299 {
300   EST_Relation *unit_stream;
301   EST_Track *source_coef = new EST_Track;
302   EST_WaveVector *frames = new EST_WaveVector;
303   EST_IVector *pm_indices = 0;
304 
305   unit_stream = utt.relation("Unit", 1);
306 
307   concatenate_unit_coefs(*unit_stream, *source_coef);
308 
309   utt.create_relation("SourceCoef");
310   EST_Item *item = utt.relation("SourceCoef")->append();
311   item->set("name", "coef");
312   item->set_val("coefs", est_val(source_coef));
313 
314   if (!no_waveform){
315     if( !window_symmetric )
316       pm_indices = new EST_IVector;
317 
318     window_units(*unit_stream, *frames,
319 		 window_factor, window_name, window_symmetric, pm_indices);
320 
321     item->set_val("frame", est_val(frames));
322 
323     if( !window_symmetric )
324       item->set_val("pm_indices", est_val(pm_indices));
325   }
326 }
327 
328 
us_get_copy_wave(EST_Utterance & utt,EST_Wave & source_sig,EST_Track & source_coefs,EST_Relation & source_seg)329 void us_get_copy_wave(EST_Utterance &utt, EST_Wave &source_sig,
330 		       EST_Track &source_coefs, EST_Relation &source_seg)
331 {
332     EST_Item *s, *n;
333 
334     if (!utt.relation_present("Segment"))
335 	EST_error("utterance must have \"Segment\" relation\n");
336 
337     utt.create_relation("TmpSegment");
338 
339     for (s = source_seg.head(); s; s = s->next())
340     {
341 	n = utt.relation("TmpSegment")->append();
342 	merge_features(n, s, 0);
343     }
344 
345     utt.relation("Segment")->remove_item_feature("source_end");
346 
347     dp_time_align(utt, "TmpSegment", "Segment", "source_", 0);
348 
349     utt.create_relation("Unit");
350     EST_Item *d = utt.relation("Unit")->append();
351 
352 
353     EST_Wave *ss = new EST_Wave;
354     *ss = source_sig;
355 
356     EST_Track *c = new EST_Track;
357     *c = source_coefs;
358 
359     d->set_val("sig", est_val(ss));
360     d->set_val("coefs", est_val(c));
361 
362     utt.remove_relation("TmpSegment");
363 }
364 
365 
us_energy_normalise(EST_Relation & unit)366 void us_energy_normalise(EST_Relation &unit)
367 {
368     EST_Wave *sig;
369 
370     for (EST_Item *s = unit.head(); s; s = s->next())
371     {
372 	sig = wave(s->f("sig"));
373 	if (s->f_present("energy_factor"))
374 	    sig->rescale(s->F("energy_factor"));
375     }
376 }
377 
us_unit_raw_concat(EST_Utterance & utt)378 void us_unit_raw_concat(EST_Utterance &utt)
379 {
380     EST_Wave *sig, *unit_sig;
381     EST_Track *unit_coefs=0;
382     float window_factor;
383     int i, j, k;
384     int first_pm, last_pm, last_length;
385     float first_pos, last_pos;
386 
387     window_factor = get_c_float(siod_get_lval("window_factor",
388 					      "UniSyn: no window_factor"));
389     sig = new EST_Wave;
390 
391     sig->resize(1000000);
392     sig->fill(0);
393     j = 0;
394 
395     for (EST_Item *s = utt.relation("Unit", 1)->head(); s; s = s->next())
396     {
397 	unit_sig = wave(s->f("sig"));
398 	unit_coefs = track(s->f("coefs"));
399 
400 	first_pos = unit_coefs->t(1);
401 	first_pm = (int)(first_pos * (float)unit_sig->sample_rate());
402 
403 	last_pos = unit_coefs->t(unit_coefs->num_frames()-2);
404 	last_pm = (int)(last_pos * (float)unit_sig->sample_rate());
405 	last_length = unit_sig->num_samples() - last_pm;
406 
407 //	cout << "first pm: " << first_pm << endl;
408 //	cout << "last pm: " << last_pm << endl;
409 //	cout << "last length: " << last_length << endl;
410 
411 	j -= first_pm;
412 
413 	for (i = 0; i < first_pm; ++i, ++j)
414 	    sig->a_safe(j) += (short)((((float) i)/ (float)first_pm) *(float)unit_sig->a_safe(i)+0.5);
415 
416 	for (; i < last_pm; ++i, ++j)
417 	    sig->a(j) = unit_sig->a(i);
418 
419 	for (k = 0; i < unit_sig->num_samples(); ++i, ++j, ++k)
420 	    sig->a_safe(j) += (short)((1.0 - (((float) k) / (float) last_length))
421 	      * (float)unit_sig->a_safe(i) + 0.5);
422 
423 //	j -= last_length;
424 //	j += 2000;
425     }
426 
427     sig->resize(j);
428     sig->set_sample_rate(16000);
429 
430     add_wave_to_utterance(utt, *sig, "Wave");
431 }
432 
433 
concatenate_unit_coefs(EST_Relation & unit_stream,EST_Track & source_lpc)434 void concatenate_unit_coefs(EST_Relation &unit_stream, EST_Track &source_lpc)
435 {
436     int num_source_frames   = 0;
437     int num_source_channels = 0;;
438     float prev_time, abs_offset, rel_offset, period, offset;
439     int i, j, k, l;
440     EST_Track *coefs;
441 
442     EST_Item *u = unit_stream.head();
443     if( u == 0 ){
444       //sometimes we are just asked to synthesise empty utterances, and
445       //code elsewhere wants us to continue...
446       source_lpc.resize(0,0);
447     }
448     else{
449       EST_Track *t = 0;
450       for ( ; u; u = u->next())
451 	{
452 	  t = track(u->f("coefs"));
453 	  num_source_frames += t->num_frames();
454 	}
455 
456       num_source_channels = t->num_channels();
457 
458       source_lpc.resize(num_source_frames, num_source_channels);
459       source_lpc.copy_setup(*t);
460 
461       prev_time = 0.0;
462       // copy basic information
463       for (i = 0, l = 0, u = unit_stream.head(); u; u = u->next())
464 	{
465 	  coefs = track(u->f("coefs"));
466 
467 	  for (j = 0; j < coefs->num_frames(); ++j, ++i)
468 	    {
469 	      for (k = 0; k < coefs->num_channels(); ++k)
470 		source_lpc.a_no_check(i, k) = coefs->a_no_check(j, k);
471 	      source_lpc.t(i) = coefs->t(j) + prev_time;
472 	    }
473 
474 	  prev_time = source_lpc.t(i - 1);
475 	  u->set("end", prev_time);
476 	  u->set("num_frames", coefs->num_frames());
477 	}
478     }
479 
480     // adjust pitchmarks
481     abs_offset = 0.0;
482     rel_offset = 0.0;
483     // absolute offset in seconds
484     abs_offset = get_c_float(siod_get_lval("us_abs_offset", "zz"));
485     // relative offset as a function of local pitch period
486     rel_offset = get_c_float(siod_get_lval("us_rel_offset", "zz"));
487 
488     if( abs_offset!=0.0 || rel_offset!=0.0 ){
489       cerr << "Adjusting pitchmarks" << endl;
490       for (i = 0; i < source_lpc.num_frames(); ++i){
491 	period = get_time_frame_size(source_lpc, (i));
492 	offset = abs_offset + (rel_offset * period);
493 	source_lpc.t(i) = source_lpc.t(i) + offset;
494       }
495     }
496 }
497 
498 // jointimes specifies centre of last pitch period in each
499 // concatenated unit
500 // void us_linear_smooth_amplitude( EST_Wave *w,
501 // 				 const EST_Track &pm,
502 // 				 const EST_FVector &jointimes)
503 // {
504 //   int num_joins = jointimes.length();
505 
506 //   EST_Track *factor_contour = new EST_Track( num_joins );
507 
508 //   for( int i=0; i<num_joins; ++i ){
509 //     float join_t = jointimes(i);
510 //     int join_indx = pm.index_below( join_t );
511 
512 //     // estimate local short-time energy function either side of join
513 //     int left_start = rount(pm.t(join_indx-2)*(float)16000);
514 //     int left_end   = rount(pm.t(join_indx)*(float)16000);
515 //     float left_power = 0.0 ;
516 //     for( int j=left_start; j<left_end; ++j )
517 //       left_power += pow( w[j], 2 );
518 
519 //     left_power /= (left_end - left_start); //normalise for frame length
520 
521 //     int right_start = rount(pm.t(join_indx+1)*(float)16000);
522 //     int right_end   = rount(pm.t(join_indx+3)*(float)16000);
523 //     float right_power = 0.0;
524 //     for( int j=right_start; j<right_end; ++j )
525 //       right_power += pow( w[j], 2 );
526 
527 //     right_power /= (right_end - right_start); //normalise for frame length
528 
529 //     float mean_power = (left_power+right_power)/2.0;
530 
531 //     float left_factor  = left_power/mean_power;
532 //     float right_factor = right_power/mean_power;
533 
534 //     (*factor_contour)[i]   =  left_factor;
535 //     (*factor_contour)[i+1] = right_factor;
536 //   }
537 
538 // }
539 
us_pitch_period_energy_contour(const EST_WaveVector & pp,const EST_Track & pm)540 static EST_Track* us_pitch_period_energy_contour( const EST_WaveVector &pp,
541 						  const EST_Track &pm )
542 {
543   const int pp_length = pp.length();
544 
545   EST_Track *contour = new EST_Track;
546   contour->resize( pp_length, 1 );
547 
548   for( int i=0; i<pp_length; ++i ){
549     const EST_Wave &frame = pp(i);
550     const int frame_length = frame.length();
551 
552     // RMSE for EST_Wave window
553     int j;
554     for( contour->a_no_check(i,0) = 0.0, j=0; j<frame_length; ++j )
555       contour->a_no_check( i, 0 ) += pow( float(frame.a_no_check( j )), float(2.0) );
556 
557     contour->a_no_check(i,0) = sqrt( contour->a_no_check(i,0) / (float)j );
558     contour->t(i) = pm.t(i);
559   }
560 
561   return contour;
562 }
563 
564 EST_Val ffeature(EST_Item *item,const EST_String &fname);
565 
us_linear_smooth_amplitude(EST_Utterance * utt)566 void us_linear_smooth_amplitude( EST_Utterance *utt )
567 {
568   EST_WaveVector *pp = wavevector(utt->relation("SourceCoef")->first()->f("frame"));
569   EST_Track *pm = track(utt->relation("SourceCoef")->first()->f("coefs"));
570 
571   EST_Track *energy = us_pitch_period_energy_contour( *pp, *pm );
572   energy->save( "./energy_track.est", "est" );
573 
574   FILE *ofile = fopen( "./join_times.est", "w" );
575   EST_Relation *units = utt->relation("Unit");
576   for( EST_Item *u=units->head(); u; u=u->next() ){
577 
578     EST_Item *diphone_left = u;
579     //    EST_Item *diphone_right = u->next();
580 
581     fprintf( ofile, "%s\t%f\n", diphone_left->S("name").str(), diphone_left->F("end"));
582 
583     EST_Item *join_phone_left = item(diphone_left->f("ph1"))->next();
584     EST_String phone_name = join_phone_left->S("name");
585     if( ph_is_sonorant( phone_name ) && !ph_is_silence( phone_name )){
586 
587       //if( (ffeature(join_phone_left, "ph_vc")).S() == "+"){ // ideally for sonorants
588 
589       cerr << "smoothing phone " << join_phone_left->S("name") << "\n";
590 
591       //      EST_Item *join_phone_right = item(diphone_right->f("ph1"));
592 
593       int left_end_index = energy->index(diphone_left->F("end"));
594       int right_start_index = left_end_index + 1;
595       float left_power  = energy->a(left_end_index,0);
596       float right_power = energy->a(right_start_index,0);
597 
598       float mean_power = (left_power+right_power)/2.0;
599       float left_factor  = left_power/mean_power;
600       float right_factor = right_power/mean_power;
601 
602       int smooth_start_index = left_end_index-5;
603       int smooth_end_index   = right_start_index+5;
604 
605 
606       // rescale left pitch periods
607       float factor = 1.0;
608       float factor_incr = (left_factor-1.0)/(float)(left_end_index - smooth_start_index);
609       for( int i=smooth_start_index; i<=left_end_index; ++i, factor+=factor_incr ){
610 	(*pp)[i].rescale( factor, 0 );
611 	cerr << "rescaled frame " << i << "(factor " << factor << ")\n";
612       }
613 
614       // rescale right pitch periods
615       factor = right_factor;
616       factor_incr = (1.0-right_factor)/(float)(smooth_end_index-right_start_index);
617       for( int i=right_start_index; i<=smooth_end_index; ++i, factor+=factor_incr){
618 	(*pp)[i].rescale( factor, 0 );
619 	cerr << "rescaled frame " << i << "(factor " << factor << ")\n";
620       }
621     }
622     else
623       cerr << "no smoothing for " << join_phone_left->S("name") << "\n";
624 
625     cerr <<endl;
626   }
627 
628   fclose( ofile );
629   delete energy;
630 }
631 
632