1 /*
2  *  LAME MP3 encoder for DirectShow
3  *  LAME encoder wrapper
4  *
5  *  Copyright (c) 2000-2005 Marie Orlova, Peter Gubanov, Vitaly Ivanov, Elecard Ltd.
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public
18  * License along with this library; if not, write to the
19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20  * Boston, MA 02111-1307, USA.
21  */
22 
23 #include <streams.h>
24 #include "Encoder.h"
25 
26 
27 //////////////////////////////////////////////////////////////////////
28 // Construction/Destruction
29 //////////////////////////////////////////////////////////////////////
CEncoder()30 CEncoder::CEncoder() :
31     m_bInpuTypeSet(FALSE),
32     m_bOutpuTypeSet(FALSE),
33     m_bFinished(FALSE),
34     m_outOffset(0),
35     m_outReadOffset(0),
36     m_frameCount(0),
37     pgf(NULL)
38 {
39     m_outFrameBuf = new unsigned char[OUT_BUFFER_SIZE];
40 }
41 
~CEncoder()42 CEncoder::~CEncoder()
43 {
44     Close(NULL);
45 
46     if (m_outFrameBuf)
47         delete [] m_outFrameBuf;
48 }
49 
50 //////////////////////////////////////////////////////////////////////
51 // SetInputType - check if given input type is supported
52 //////////////////////////////////////////////////////////////////////
SetInputType(LPWAVEFORMATEX lpwfex,bool bJustCheck)53 HRESULT CEncoder::SetInputType(LPWAVEFORMATEX lpwfex, bool bJustCheck)
54 {
55     CAutoLock l(&m_lock);
56 
57     if (lpwfex->wFormatTag == WAVE_FORMAT_PCM)
58     {
59         if (lpwfex->nChannels == 1 || lpwfex->nChannels == 2)
60         {
61             if (lpwfex->nSamplesPerSec  == 48000 ||
62                 lpwfex->nSamplesPerSec  == 44100 ||
63                 lpwfex->nSamplesPerSec  == 32000 ||
64                 lpwfex->nSamplesPerSec  == 24000 ||
65                 lpwfex->nSamplesPerSec  == 22050 ||
66                 lpwfex->nSamplesPerSec  == 16000 ||
67                 lpwfex->nSamplesPerSec  == 12000 ||
68                 lpwfex->nSamplesPerSec  == 11025 ||
69                 lpwfex->nSamplesPerSec  ==  8000)
70             {
71                 if (lpwfex->wBitsPerSample == 16)
72                 {
73                     if (!bJustCheck)
74                     {
75                         memcpy(&m_wfex, lpwfex, sizeof(WAVEFORMATEX));
76                         m_bInpuTypeSet = true;
77                     }
78 
79                     return S_OK;
80                 }
81             }
82         }
83     }
84 
85     if (!bJustCheck)
86         m_bInpuTypeSet = false;
87 
88     return E_INVALIDARG;
89 }
90 
91 //////////////////////////////////////////////////////////////////////
92 // SetOutputType - try to initialize encoder with given output type
93 //////////////////////////////////////////////////////////////////////
SetOutputType(MPEG_ENCODER_CONFIG & mabsi)94 HRESULT CEncoder::SetOutputType(MPEG_ENCODER_CONFIG &mabsi)
95 {
96     CAutoLock l(&m_lock);
97 
98     m_mabsi = mabsi;
99     m_bOutpuTypeSet = true;
100 
101     return S_OK;
102 }
103 
104 //////////////////////////////////////////////////////////////////////
105 // SetDefaultOutputType - sets default MPEG audio properties according
106 // to input type
107 //////////////////////////////////////////////////////////////////////
SetDefaultOutputType(LPWAVEFORMATEX lpwfex)108 HRESULT CEncoder::SetDefaultOutputType(LPWAVEFORMATEX lpwfex)
109 {
110     CAutoLock l(&m_lock);
111 
112     if(lpwfex->nChannels == 1 || m_mabsi.bForceMono)
113         m_mabsi.ChMode = MONO;
114 
115     if((lpwfex->nSamplesPerSec < m_mabsi.dwSampleRate) || (lpwfex->nSamplesPerSec % m_mabsi.dwSampleRate != 0))
116         m_mabsi.dwSampleRate = lpwfex->nSamplesPerSec;
117 
118     return S_OK;
119 }
120 
121 //////////////////////////////////////////////////////////////////////
122 // Init - initialized or reiniyialized encoder SDK with given input
123 // and output settings
124 //////////////////////////////////////////////////////////////////////
Init()125 HRESULT CEncoder::Init()
126 {
127     CAutoLock l(&m_lock);
128 
129     m_outOffset     = 0;
130     m_outReadOffset = 0;
131 
132     m_bFinished     = FALSE;
133 
134     m_frameCount    = 0;
135 
136     if (!pgf)
137     {
138         if (!m_bInpuTypeSet || !m_bOutpuTypeSet)
139             return E_UNEXPECTED;
140 
141         // Init Lame library
142         // note: newer, safer interface which doesn't
143         // allow or require direct access to 'gf' struct is being written
144         // see the file 'API' included with LAME.
145         if (pgf = lame_init())
146         {
147             lame_set_num_channels(pgf, m_wfex.nChannels);
148             lame_set_in_samplerate(pgf, m_wfex.nSamplesPerSec);
149             lame_set_out_samplerate(pgf, m_mabsi.dwSampleRate);
150             if ((lame_get_out_samplerate(pgf) >= 32000) && (m_mabsi.dwBitrate < 32))
151                 lame_set_brate(pgf, 32);
152             else
153                 lame_set_brate(pgf, m_mabsi.dwBitrate);
154             lame_set_VBR(pgf, m_mabsi.vmVariable);
155             lame_set_VBR_min_bitrate_kbps(pgf, m_mabsi.dwVariableMin);
156             lame_set_VBR_max_bitrate_kbps(pgf, m_mabsi.dwVariableMax);
157 
158             lame_set_copyright(pgf, m_mabsi.bCopyright);
159             lame_set_original(pgf, m_mabsi.bOriginal);
160             lame_set_error_protection(pgf, m_mabsi.bCRCProtect);
161 
162             lame_set_bWriteVbrTag(pgf, m_mabsi.dwXingTag);
163             lame_set_strict_ISO(pgf, m_mabsi.dwStrictISO);
164             lame_set_VBR_hard_min(pgf, m_mabsi.dwEnforceVBRmin);
165 
166             if (lame_get_num_channels(pgf) == 2 && !m_mabsi.bForceMono)
167             {
168                 //int act_br = pgf->VBR ? pgf->VBR_min_bitrate_kbps + pgf->VBR_max_bitrate_kbps / 2 : pgf->brate;
169 
170                 // Disabled. It's for user's consideration now
171                 //int rel = pgf->out_samplerate / (act_br + 1);
172                 //pgf->mode = rel < 200 ? m_mabsi.ChMode : JOINT_STEREO;
173 
174                 lame_set_mode(pgf, m_mabsi.ChMode);
175             }
176             else
177                 lame_set_mode(pgf, MONO);
178 
179             if (lame_get_mode(pgf) == JOINT_STEREO)
180                 lame_set_force_ms(pgf, m_mabsi.dwForceMS);
181             else
182                 lame_set_force_ms(pgf, 0);
183 
184 //            pgf->mode_fixed = m_mabsi.dwModeFixed;
185 
186             if (m_mabsi.dwVoiceMode != 0)
187             {
188                 lame_set_lowpassfreq(pgf,12000);
189                 ///pgf->VBR_max_bitrate_kbps = 160;
190             }
191 
192             if (m_mabsi.dwKeepAllFreq != 0)
193             {
194                 ///pgf->lowpassfreq = -1;
195                 ///pgf->highpassfreq = -1;
196                 /// not available anymore
197             }
198 
199             lame_set_quality(pgf, m_mabsi.dwQuality);
200             lame_set_VBR_q(pgf, m_mabsi.dwVBRq);
201 
202             lame_init_params(pgf);
203 
204             // encoder delay compensation
205             {
206                 int const nch = lame_get_num_channels(pgf);
207                 short * start_padd = (short *)calloc(48, nch * sizeof(short));
208 
209 				int out_bytes = 0;
210 
211                 if (nch == 2)
212                     out_bytes = lame_encode_buffer_interleaved(pgf, start_padd, 48, m_outFrameBuf, OUT_BUFFER_SIZE);
213                 else
214                     out_bytes = lame_encode_buffer(pgf, start_padd, start_padd, 48, m_outFrameBuf, OUT_BUFFER_SIZE);
215 
216 				if (out_bytes > 0)
217 					m_outOffset += out_bytes;
218 
219                 free(start_padd);
220             }
221 
222             return S_OK;
223         }
224 
225         return E_FAIL;
226     }
227 
228     return S_OK;
229 }
230 
231 //////////////////////////////////////////////////////////////////////
232 // Close - closes encoder
233 //////////////////////////////////////////////////////////////////////
Close(IStream * pStream)234 HRESULT CEncoder::Close(IStream* pStream)
235 {
236 	CAutoLock l(&m_lock);
237     if (pgf)
238     {
239 		if(lame_get_bWriteVbrTag(pgf) && pStream)
240 		{
241 			updateLameTagFrame(pStream);
242 		}
243 
244         lame_close(pgf);
245         pgf = NULL;
246     }
247 
248     return S_OK;
249 }
250 
251 //////////////////////////////////////////////////////////////////////
252 // Encode - encodes data placed on pdata and returns
253 // the number of processed bytes
254 //////////////////////////////////////////////////////////////////////
Encode(const short * pdata,int data_size)255 int CEncoder::Encode(const short * pdata, int data_size)
256 {
257     CAutoLock l(&m_lock);
258 
259     if (!pgf || !m_outFrameBuf || !pdata || data_size < 0 || (data_size & (sizeof(short) - 1)))
260         return -1;
261 
262     // some data left in the buffer, shift to start
263     if (m_outReadOffset > 0)
264     {
265         if (m_outOffset > m_outReadOffset)
266             memmove(m_outFrameBuf, m_outFrameBuf + m_outReadOffset, m_outOffset - m_outReadOffset);
267 
268         m_outOffset -= m_outReadOffset;
269     }
270 
271     m_outReadOffset = 0;
272 
273 
274 
275     m_bFinished = FALSE;
276 
277     int bytes_processed = 0;
278     int const nch = lame_get_num_channels(pgf);
279 
280     while (1)
281     {
282         int nsamples = (data_size - bytes_processed) / (sizeof(short) * nch);
283 
284         if (nsamples <= 0)
285             break;
286 
287         if (nsamples > 1152)
288             nsamples = 1152;
289 
290         if (m_outOffset >= OUT_BUFFER_MAX)
291             break;
292 
293         int out_bytes = 0;
294 
295         if (nch == 2)
296             out_bytes = lame_encode_buffer_interleaved(
297                                             pgf,
298                                             (short *)(pdata + (bytes_processed / sizeof(short))),
299                                             nsamples,
300                                             m_outFrameBuf + m_outOffset,
301                                             OUT_BUFFER_SIZE - m_outOffset);
302         else
303             out_bytes = lame_encode_buffer(
304                                             pgf,
305                                             pdata + (bytes_processed / sizeof(short)),
306                                             pdata + (bytes_processed / sizeof(short)),
307                                             nsamples,
308                                             m_outFrameBuf + m_outOffset,
309                                             OUT_BUFFER_SIZE - m_outOffset);
310 
311         if (out_bytes < 0)
312             return -1;
313 
314         m_outOffset     += out_bytes;
315         bytes_processed += nsamples * nch * sizeof(short);
316     }
317 
318     return bytes_processed;
319 }
320 
321 //
322 // Finsh - flush the buffered samples
323 //
Finish()324 HRESULT CEncoder::Finish()
325 {
326     CAutoLock l(&m_lock);
327 
328     if (!pgf || !m_outFrameBuf || (m_outOffset >= OUT_BUFFER_MAX))
329         return E_FAIL;
330 
331     m_outOffset += lame_encode_flush(pgf, m_outFrameBuf + m_outOffset, OUT_BUFFER_SIZE - m_outOffset);
332 
333     m_bFinished = TRUE;
334 
335     return S_OK;
336 }
337 
338 
getFrameLength(const unsigned char * pdata)339 int getFrameLength(const unsigned char * pdata)
340 {
341     if (!pdata || pdata[0] != 0xff || (pdata[1] & 0xe0) != 0xe0)
342         return -1;
343 
344     const int sample_rate_tab[4][4] =
345     {
346         {11025,12000,8000,1},
347         {1,1,1,1},
348         {22050,24000,16000,1},
349         {44100,48000,32000,1}
350     };
351 
352 #define MPEG_VERSION_RESERVED   1
353 #define MPEG_VERSION_1          3
354 
355 #define LAYER_III               1
356 
357 #define BITRATE_FREE            0
358 #define BITRATE_RESERVED        15
359 
360 #define SRATE_RESERVED          3
361 
362 #define EMPHASIS_RESERVED       2
363 
364     int version_id      = (pdata[1] & 0x18) >> 3;
365     int layer           = (pdata[1] & 0x06) >> 1;
366     int bitrate_id      = (pdata[2] & 0xF0) >> 4;
367     int sample_rate_id  = (pdata[2] & 0x0C) >> 2;
368     int padding         = (pdata[2] & 0x02) >> 1;
369     int emphasis        =  pdata[3] & 0x03;
370 
371     if (version_id      != MPEG_VERSION_RESERVED &&
372         layer           == LAYER_III &&
373         bitrate_id      != BITRATE_FREE &&
374         bitrate_id      != BITRATE_RESERVED &&
375         sample_rate_id  != SRATE_RESERVED &&
376         emphasis        != EMPHASIS_RESERVED)
377     {
378         int spf         = (version_id == MPEG_VERSION_1) ? 1152 : 576;
379         int sample_rate = sample_rate_tab[version_id][sample_rate_id];
380         int bitrate     = dwBitRateValue[version_id != MPEG_VERSION_1][bitrate_id - 1] * 1000;
381 
382         return (bitrate * spf) / (8 * sample_rate) + padding;
383     }
384 
385     return -1;
386 }
387 
388 
GetFrame(const unsigned char ** pframe)389 int CEncoder::GetFrame(const unsigned char ** pframe)
390 {
391     if (!pgf || !m_outFrameBuf || !pframe)
392         return -1;
393 
394 	while ((m_outOffset - m_outReadOffset) > 4)
395     {
396         int frame_length = getFrameLength(m_outFrameBuf + m_outReadOffset);
397 
398         if (frame_length < 0)
399         {
400             m_outReadOffset++;
401         }
402         else if (frame_length <= (m_outOffset - m_outReadOffset))
403         {
404             *pframe = m_outFrameBuf + m_outReadOffset;
405             m_outReadOffset += frame_length;
406 
407             m_frameCount++;
408 
409             // don't deliver the first and the last frames
410             if (m_frameCount != 1 && !(m_bFinished && (m_outOffset - m_outReadOffset) < 5))
411                 return frame_length;
412         }
413         else
414             break;
415     }
416 
417     return 0;
418 }
419 
420 ////////////////////////////////////////////////////////////////////////////////
421 // Returns block of a mp3 file, witch size integer multiples of cbAlign
422 // or not aligned if finished
423 ////////////////////////////////////////////////////////////////////////////////
GetBlockAligned(const unsigned char ** pblock,int * piBufferSize,const long & cbAlign)424 int CEncoder::GetBlockAligned(const unsigned char ** pblock, int* piBufferSize, const long& cbAlign)
425 {
426 	ASSERT(piBufferSize);
427     if (!pgf || !m_outFrameBuf || !pblock)
428         return -1;
429 
430 	int iBlockLen = m_outOffset - m_outReadOffset;
431 	ASSERT(iBlockLen >= 0);
432 
433 	if(!m_bFinished)
434 	{
435 		if(cbAlign > 0)
436 			iBlockLen-=iBlockLen%cbAlign;
437 		*piBufferSize = iBlockLen;
438 	}
439 	else
440 	{
441 		if(cbAlign && iBlockLen%cbAlign)
442 		{
443 			*piBufferSize = iBlockLen + cbAlign - iBlockLen%cbAlign;
444 		}
445 		else
446 		{
447 			*piBufferSize = iBlockLen;
448 		}
449 	}
450 
451 	if(iBlockLen) {
452 		*pblock = m_outFrameBuf + m_outReadOffset;
453 		m_outReadOffset+=iBlockLen;
454 	}
455 
456 	return iBlockLen;
457 }
458 
maybeSyncWord(IStream * pStream)459 HRESULT CEncoder::maybeSyncWord(IStream *pStream)
460 {
461 	HRESULT hr = S_OK;
462     unsigned char mp3_frame_header[4];
463 	ULONG nbytes;
464 	if(FAILED(hr = pStream->Read(mp3_frame_header, sizeof(mp3_frame_header), &nbytes)))
465 		return hr;
466 
467     if ( nbytes != sizeof(mp3_frame_header) ) {
468         return E_FAIL;
469     }
470     if ( mp3_frame_header[0] != 0xffu ) {
471         return S_FALSE; /* doesn't look like a sync word */
472     }
473     if ( (mp3_frame_header[1] & 0xE0u) != 0xE0u ) {
474 		return S_FALSE; /* doesn't look like a sync word */
475     }
476     return S_OK;
477 }
478 
skipId3v2(IStream * pStream,size_t lametag_frame_size)479 HRESULT CEncoder::skipId3v2(IStream *pStream, size_t lametag_frame_size)
480 {
481 	HRESULT hr = S_OK;
482     ULONG  nbytes;
483     size_t  id3v2TagSize = 0;
484     unsigned char id3v2Header[10];
485 	LARGE_INTEGER seekTo;
486 
487     /* seek to the beginning of the stream */
488 	seekTo.QuadPart = 0;
489 	if (FAILED(hr = pStream->Seek(seekTo,  STREAM_SEEK_SET, NULL))) {
490         return hr;  /* not seekable, abort */
491     }
492     /* read 10 bytes in case there's an ID3 version 2 header here */
493 	hr = pStream->Read(id3v2Header, sizeof(id3v2Header), &nbytes);
494     if (FAILED(hr))
495 		return hr;
496 	if(nbytes != sizeof(id3v2Header)) {
497         return E_FAIL;  /* not readable, maybe opened Write-Only */
498     }
499     /* does the stream begin with the ID3 version 2 file identifier? */
500     if (!strncmp((char *) id3v2Header, "ID3", 3)) {
501         /* the tag size (minus the 10-byte header) is encoded into four
502         * bytes where the most significant bit is clear in each byte
503         */
504         id3v2TagSize = (((id3v2Header[6] & 0x7f) << 21)
505             | ((id3v2Header[7] & 0x7f) << 14)
506             | ((id3v2Header[8] & 0x7f) << 7)
507             | (id3v2Header[9] & 0x7f))
508             + sizeof id3v2Header;
509     }
510     /* Seek to the beginning of the audio stream */
511 	seekTo.QuadPart = id3v2TagSize;
512 	if (FAILED(hr = pStream->Seek(seekTo, STREAM_SEEK_SET, NULL))) {
513         return hr;
514     }
515     if (S_OK != (hr = maybeSyncWord(pStream))) {
516 		return SUCCEEDED(hr)?E_FAIL:hr;
517     }
518 	seekTo.QuadPart = id3v2TagSize+lametag_frame_size;
519 	if (FAILED(hr = pStream->Seek(seekTo, STREAM_SEEK_SET, NULL))) {
520         return hr;
521     }
522     if (S_OK != (hr = maybeSyncWord(pStream))) {
523         return SUCCEEDED(hr)?E_FAIL:hr;
524     }
525     /* OK, it seems we found our LAME-Tag/Xing frame again */
526     /* Seek to the beginning of the audio stream */
527 	seekTo.QuadPart = id3v2TagSize;
528 	if (FAILED(hr = pStream->Seek(seekTo, STREAM_SEEK_SET, NULL))) {
529         return hr;
530     }
531     return S_OK;
532 }
533 
534 // Updates VBR tag
updateLameTagFrame(IStream * pStream)535 HRESULT CEncoder::updateLameTagFrame(IStream* pStream)
536 {
537 	HRESULT hr = S_OK;
538 	size_t n = lame_get_lametag_frame( pgf, 0, 0 ); /* ask for bufer size */
539 
540     if ( n > 0 )
541     {
542         unsigned char* buffer = 0;
543         ULONG m = n;
544 
545         if ( FAILED(hr = skipId3v2(pStream, n) ))
546         {
547             /*DispErr( "Error updating LAME-tag frame:\n\n"
548                      "can't locate old frame\n" );*/
549             return hr;
550         }
551 
552         buffer = (unsigned char*)malloc( n );
553 
554         if ( buffer == 0 )
555         {
556             /*DispErr( "Error updating LAME-tag frame:\n\n"
557                      "can't allocate frame buffer\n" );*/
558             return E_OUTOFMEMORY;
559         }
560 
561         /* Put it all to disk again */
562         n = lame_get_lametag_frame( pgf, buffer, n );
563         if ( n > 0 )
564         {
565 			hr = pStream->Write(buffer, n, &m);
566         }
567         free( buffer );
568 
569         if ( m != n )
570         {
571             /*DispErr( "Error updating LAME-tag frame:\n\n"
572                      "couldn't write frame into file\n" );*/
573 			return E_FAIL;
574         }
575     }
576     return hr;
577 }
578