1 /*******************************************************************************
2 *                         Goggles Music Manager                                *
3 ********************************************************************************
4 *           Copyright (C) 2016-2021 by Sander Jansen. All Rights Reserved      *
5 *                               ---                                            *
6 * This program is free software: you can redistribute it and/or modify         *
7 * it under the terms of the GNU General Public License as published by         *
8 * the Free Software Foundation, either version 3 of the License, or            *
9 * (at your option) any later version.                                          *
10 *                                                                              *
11 * This program is distributed in the hope that it will be useful,              *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of               *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                *
14 * GNU General Public License for more details.                                 *
15 *                                                                              *
16 * You should have received a copy of the GNU General Public License            *
17 * along with this program.  If not, see http://www.gnu.org/licenses.           *
18 ********************************************************************************/
19 #include "gmdefs.h"
20 #include "gmutils.h"
21 #include "ap.h"
22 #include "GMTrack.h"
23 #include "GMLyrics.h"
24 
25 #if FOXVERSION < FXVERSION(1,7,55)
26 
27 // From FOX-1.7.55
28 // Copyright (C) 2016 by Jeroen van der Zijp.
29 
30 enum {
31   CRLF = 0x0001,      /// CRLF, LFCR, CR, LF map to LF
32   REFS = 0x0002,      /// Character references processed
33   };
34 
35 
36 // Decode escaped special characters from XML stream
xml_decode(FXString & dst,const FXString & src,FXuint flags=CRLF|REFS)37 static FXbool xml_decode(FXString& dst,const FXString& src,FXuint flags=CRLF|REFS){
38   register FXival p,q;
39   register FXwchar wc;
40 
41   // Measure the resulting string first
42   p=q=0;
43   while(q<src.length()){
44     wc=src[q++];
45     if(wc=='\r' && (flags&CRLF)){               // CR, CRLF -> LF
46       if(src[q]=='\n'){ q++; }
47       p++;
48       continue;
49       }
50     if(wc=='\n' && (flags&CRLF)){               // LF, LFCR -> LF
51       if(src[q]=='\r'){ q++; }
52       p++;
53       continue;
54       }
55     if(wc=='&' && (flags&REFS)){
56       if(src[q]=='#'){
57         if(src[q+1]=='x'){                      // &#xXXXX;
58           q+=2;
59           if(!Ascii::isHexDigit(src[q])) return false;  // Expected at least one hex digit
60           wc=FXString::digit2Value[(FXuchar)src[q++]];
61           while(Ascii::isHexDigit(src[q])){
62             wc=wc*16+FXString::digit2Value[(FXuchar)src[q++]];
63             }
64           if(src[q++]!=';') return false;       // Expected semicolon
65           }
66         else{                                   // &#DDDD;
67           q+=1;
68           if(!Ascii::isDigit(src[q])) return false;     // Expected at least one digit
69           wc=src[q++]-'0';
70           while(Ascii::isDigit(src[q])){
71             wc=wc*10+(src[q++]-'0');
72             }
73           if(src[q++]!=';') return false;       // Expected semicolon
74           }
75         p+=wc2utf(wc);
76         continue;
77         }
78       if(src[q]=='q' && src[q+1]=='u' && src[q+2]=='o' && src[q+3]=='t' && src[q+4]==';'){      // &quot;
79         q+=5;
80         p++;
81         continue;
82         }
83       if(src[q]=='a' && src[q+1]=='p' && src[q+2]=='o' && src[q+3]=='s' && src[q+4]==';'){      // &apos;
84         q+=5;
85         p++;
86         continue;
87         }
88       if(src[q]=='a' && src[q+1]=='m' && src[q+2]=='p' && src[q+3]==';'){       // &amp;
89         q+=4;
90         p++;
91         continue;
92         }
93       if(src[q]=='l' && src[q+1]=='t' && src[q+2]==';'){        // &lt;
94         q+=3;
95         p++;
96         continue;
97         }
98       if(src[q]=='g' && src[q+1]=='t' && src[q+2]==';'){        // &gt;
99         q+=3;
100         p++;
101         continue;
102         }
103       return false;                             // Unknown reference
104       }
105     p++;
106     }
107 
108   // Now allocate space
109   dst.length(p);
110 
111   // Now produce the result string
112   p=q=0;
113   while(q<src.length()){
114     wc=src[q++];
115     if(wc=='\r' && (flags&CRLF)){               // CR, CRLF -> LF
116       if(src[q]=='\n'){ q++; }
117       dst[p++]='\n';
118       continue;
119       }
120     if(wc=='\n' && (flags&CRLF)){               // LF, LFCR -> LF
121       if(src[q]=='\r'){ q++; }
122       dst[p++]='\n';
123       continue;
124       }
125     if(wc=='&' && (flags&REFS)){
126       if(src[q]=='#'){
127         if(src[q+1]=='x'){                      // &#xXXXX;
128           q+=2;
129           FXASSERT(Ascii::isHexDigit(src[q]));  // Expected at least one hex digit
130           wc=FXString::digit2Value[(FXuchar)src[q++]];
131           while(Ascii::isHexDigit(src[q])){
132             wc=wc*16+FXString::digit2Value[(FXuchar)src[q++]];
133             }
134           FXASSERT(src[q]==';');                // Expected semicolon
135           q++;
136           }
137         else{                                   // &#DDDD;
138           q+=1;
139           FXASSERT(Ascii::isDigit(src[q]));     // Expected at least one digit
140           wc=src[q++]-'0';
141           while(Ascii::isDigit(src[q])){
142             wc=wc*10+(src[q++]-'0');
143             }
144           FXASSERT(src[q]==';');                // Expected semicolon
145           q++;
146           }
147         p+=wc2utf(&dst[p],wc);
148         continue;
149         }
150       if(src[q]=='q' && src[q+1]=='u' && src[q+2]=='o' && src[q+3]=='t' && src[q+4]==';'){      // &quot;
151         q+=5;
152         dst[p++]='\"';
153         continue;
154         }
155       if(src[q]=='a' && src[q+1]=='p' && src[q+2]=='o' && src[q+3]=='s' && src[q+4]==';'){      // &apos;
156         q+=5;
157         dst[p++]='\'';
158         continue;
159         }
160       if(src[q]=='a' && src[q+1]=='m' && src[q+2]=='p' && src[q+3]==';'){       // &amp;
161         q+=4;
162         dst[p++]='&';
163         continue;
164         }
165       if(src[q]=='l' && src[q+1]=='t' && src[q+2]==';'){        // &lt;
166         q+=3;
167         dst[p++]='<';
168         continue;
169         }
170       if(src[q]=='g' && src[q+1]=='t' && src[q+2]==';'){        // &gt;
171         q+=3;
172         dst[p++]='>';
173         continue;
174         }
175       }
176     dst[p++]=wc;
177     }
178   FXASSERT(p<=dst.length());
179   return true;
180   }
181 #endif
182 
183 
ap_xml_decode(FXString & dst,const FXString & src)184 FXbool ap_xml_decode(FXString & dst,const FXString & src) {
185 #if FOXVERSION < FXVERSION(1,7,55)
186   return xml_decode(dst,src,CRLF|REFS);
187 #else
188   return FXXML::decode(dst,src,FXXML::CRLF|FXXML::REFS);
189 #endif
190   }
191 
192 
193 
194 class LyricsSource {
195 public:
196   virtual FXbool fetch(GMTrack & track) = 0;
~LyricsSource()197   virtual ~LyricsSource() {};
198   };
199 
200 
201 
202 class LrcSource : public LyricsSource {
203 protected:
204   FXRex wordtags;
205 public:
LrcSource()206   LrcSource() {
207     wordtags.parse("<\\d\\d:\\d\\d.\\d\\d>",FXRex::Normal);
208     }
209 
fetch(GMTrack & track)210   virtual FXbool fetch(GMTrack & track) override {
211 
212     FXString filename = track.url;
213     if (!FXPath::isAbsolute(track.url)) {
214       filename = FXURL::fileFromURL(track.url);
215       if (filename.empty()) return false;
216       }
217     filename = FXPath::stripExtension(filename) + ".lrc";
218     if (!FXStat::exists(filename)) return false;
219     FXString data;
220     FXString lyrics;
221     if (!gm_buffer_file(filename,data)) return false;
222 
223 
224     // Parse each line
225     FXint start=0,end=0,next=0;
226     for (FXint i=0;i<data.length();i++) {
227       if (data[i]=='\n') {
228         end=i;
229         next=i+1;
230 
231         /// Skip white space
232         while(start<end && Ascii::isSpace(data[start])) start++;
233 
234         /// Skip white space
235         while(end>start && Ascii::isSpace(data[end])) end--;
236 
237         /// Skip
238         if (data[start]=='[') {
239           while(start<end && data[start]!=']') start++;
240           start+=1;
241           if ((end-start>2) && (data[start]=='D' || data[start]=='M' || data[start]=='F') && data[start+1]==':')
242             start+=2;
243           }
244 
245         /// New Line
246         if (!lyrics.empty()) lyrics+='\n';
247 
248         /// Parse the actual line.
249         if ((end-start)) {
250           lyrics+=data.mid(start,1+end-start);
251           }
252         start=next;
253         }
254       }
255 
256     // Remove any wordtags
257     FXint bb[2],ee[2],ff=0;
258     while(wordtags.search(lyrics,ff,lyrics.length()-1,FXRex::Normal,bb,ee,1)>=0) {
259       GM_DEBUG_PRINT("[lyrics] found <tags>: %s\n",lyrics.mid(bb[0],ee[0]-bb[0]).text());
260       lyrics.erase(bb[0],ee[0]-bb[0]);
261       ff=bb[0];
262       }
263 
264     // Finalize
265     lyrics.trim();
266     if (!lyrics.empty()) {
267       track.lyrics.adopt(lyrics);
268       }
269     return true;
270     }
271 };
272 
273 
274 
275 
276 
277 class HtmlSource : public LyricsSource {
278 protected:
279   FXRex linebreaks;
280   FXRex sup;
281   FXRex tags;
282 public:
283 
HtmlSource()284   HtmlSource() {
285     linebreaks.parse("(</?div[^>*]>|<\\s*br\\s*/?>)",FXRex::IgnoreCase|FXRex::Normal);
286     sup.parse("<sup\\s*>.*</sup\\s*>",FXRex::IgnoreCase|FXRex::Normal);
287     tags.parse("</?[^>]*/?>",FXRex::Normal);
288     }
289 
clearMarkup(FXString & src)290   void clearMarkup(FXString & src) {
291     FXint bb[2],ee[2],ff=0;
292 
293     // Turn <br> and <div> into newlines
294     ff=0;
295     while(linebreaks.search(src,ff,src.length()-1,FXRex::Normal,bb,ee,1)>=0) {
296       src.replace(bb[0],ee[0]-bb[0],"\n");
297       ff=bb[0]+1;
298       }
299 
300     // Remove <sup>...</sup>
301     ff=0;
302     while(sup.search(src,ff,src.length()-1,FXRex::Normal,bb,ee,1)>=0) {
303       GM_DEBUG_PRINT("[lyrics] found <sup>: %s\n",src.mid(bb[0],ee[0]-bb[0]).text());
304       src.erase(bb[0],ee[0]-bb[0]);
305       ff=bb[0];
306       }
307 
308     // Remove any tags
309     ff=0;
310     while(tags.search(src,ff,src.length()-1,FXRex::Normal,bb,ee,1)>=0) {
311       GM_DEBUG_PRINT("[lyrics] found <tags>: %s\n",src.mid(bb[0],ee[0]-bb[0]).text());
312       src.erase(bb[0],ee[0]-bb[0]);
313       ff=bb[0];
314       }
315     FXString result;
316     ap_xml_decode(result,src);
317     src = result.trim();
318     }
319   };
320 
321 
322 class LyricsWikiaSource : public HtmlSource {
323 protected:
324   FXRex lyricbox;
325 
326 public:
LyricsWikiaSource()327   LyricsWikiaSource() {
328     lyricbox.parse("<div class=\'lyricbox\'>(.*)<div class=\'lyricsbreak\'>",FXRex::IgnoreCase|FXRex::Capture);
329     }
330 
fetch(GMTrack & track)331   FXbool fetch(GMTrack & track) override {
332     HttpClient http;
333 
334     http.setAcceptEncoding(HttpClient::AcceptEncodingGZip);
335 
336     FXString artist = track.artist;
337     FXString title  = track.title;
338 
339     FXString url = FXString::value("http://lyrics.wikia.com/%s:%s",FXURL::encode(artist.substitute(' ','_')).text(),
340                                                                    FXURL::encode(title.substitute(' ','_')).text());
341     if (http.basic("GET",url)) {
342       FXString html = http.textBody();
343       FXint b[2],e[2];
344       if (lyricbox.search(html,0,html.length()-1,FXRex::Normal,b,e,2)>=0){
345         FXString content = html.mid(b[1],e[1]-b[1]);
346         clearMarkup(content);
347         track.lyrics = content;
348         return true;
349         }
350       }
351     return false;
352     }
353 
354   };
355 
356 
Lyrics()357 Lyrics::Lyrics() {
358   source[0] = new LrcSource;
359   source[1] = new LyricsWikiaSource;
360   }
361 
~Lyrics()362 Lyrics::~Lyrics() {
363   delete source[0];
364   delete source[1];
365   }
366 
fetch(GMTrack & track) const367 FXbool Lyrics::fetch(GMTrack & track) const {
368   for (FXint i=0;i<2;i++) {
369     if (source[i]->fetch(track)) {
370       GM_DEBUG_PRINT("Found lyrics for %s from src %d\n",track.url.text(),i);
371       return true;
372       }
373     }
374   return false;
375   }
376 
377 
378 
379 
380 
381