1 /*******************************************************************************
2 * Goggles Music Manager *
3 ********************************************************************************
4 * Copyright (C) 2016-2021 by Sander Jansen. All Rights Reserved *
5 * --- *
6 * This program is free software: you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation, either version 3 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program. If not, see http://www.gnu.org/licenses. *
18 ********************************************************************************/
19 #include "gmdefs.h"
20 #include "gmutils.h"
21 #include "ap.h"
22 #include "GMTrack.h"
23 #include "GMLyrics.h"
24
25 #if FOXVERSION < FXVERSION(1,7,55)
26
27 // From FOX-1.7.55
28 // Copyright (C) 2016 by Jeroen van der Zijp.
29
30 enum {
31 CRLF = 0x0001, /// CRLF, LFCR, CR, LF map to LF
32 REFS = 0x0002, /// Character references processed
33 };
34
35
36 // Decode escaped special characters from XML stream
xml_decode(FXString & dst,const FXString & src,FXuint flags=CRLF|REFS)37 static FXbool xml_decode(FXString& dst,const FXString& src,FXuint flags=CRLF|REFS){
38 register FXival p,q;
39 register FXwchar wc;
40
41 // Measure the resulting string first
42 p=q=0;
43 while(q<src.length()){
44 wc=src[q++];
45 if(wc=='\r' && (flags&CRLF)){ // CR, CRLF -> LF
46 if(src[q]=='\n'){ q++; }
47 p++;
48 continue;
49 }
50 if(wc=='\n' && (flags&CRLF)){ // LF, LFCR -> LF
51 if(src[q]=='\r'){ q++; }
52 p++;
53 continue;
54 }
55 if(wc=='&' && (flags&REFS)){
56 if(src[q]=='#'){
57 if(src[q+1]=='x'){ // &#xXXXX;
58 q+=2;
59 if(!Ascii::isHexDigit(src[q])) return false; // Expected at least one hex digit
60 wc=FXString::digit2Value[(FXuchar)src[q++]];
61 while(Ascii::isHexDigit(src[q])){
62 wc=wc*16+FXString::digit2Value[(FXuchar)src[q++]];
63 }
64 if(src[q++]!=';') return false; // Expected semicolon
65 }
66 else{ // &#DDDD;
67 q+=1;
68 if(!Ascii::isDigit(src[q])) return false; // Expected at least one digit
69 wc=src[q++]-'0';
70 while(Ascii::isDigit(src[q])){
71 wc=wc*10+(src[q++]-'0');
72 }
73 if(src[q++]!=';') return false; // Expected semicolon
74 }
75 p+=wc2utf(wc);
76 continue;
77 }
78 if(src[q]=='q' && src[q+1]=='u' && src[q+2]=='o' && src[q+3]=='t' && src[q+4]==';'){ // "
79 q+=5;
80 p++;
81 continue;
82 }
83 if(src[q]=='a' && src[q+1]=='p' && src[q+2]=='o' && src[q+3]=='s' && src[q+4]==';'){ // '
84 q+=5;
85 p++;
86 continue;
87 }
88 if(src[q]=='a' && src[q+1]=='m' && src[q+2]=='p' && src[q+3]==';'){ // &
89 q+=4;
90 p++;
91 continue;
92 }
93 if(src[q]=='l' && src[q+1]=='t' && src[q+2]==';'){ // <
94 q+=3;
95 p++;
96 continue;
97 }
98 if(src[q]=='g' && src[q+1]=='t' && src[q+2]==';'){ // >
99 q+=3;
100 p++;
101 continue;
102 }
103 return false; // Unknown reference
104 }
105 p++;
106 }
107
108 // Now allocate space
109 dst.length(p);
110
111 // Now produce the result string
112 p=q=0;
113 while(q<src.length()){
114 wc=src[q++];
115 if(wc=='\r' && (flags&CRLF)){ // CR, CRLF -> LF
116 if(src[q]=='\n'){ q++; }
117 dst[p++]='\n';
118 continue;
119 }
120 if(wc=='\n' && (flags&CRLF)){ // LF, LFCR -> LF
121 if(src[q]=='\r'){ q++; }
122 dst[p++]='\n';
123 continue;
124 }
125 if(wc=='&' && (flags&REFS)){
126 if(src[q]=='#'){
127 if(src[q+1]=='x'){ // &#xXXXX;
128 q+=2;
129 FXASSERT(Ascii::isHexDigit(src[q])); // Expected at least one hex digit
130 wc=FXString::digit2Value[(FXuchar)src[q++]];
131 while(Ascii::isHexDigit(src[q])){
132 wc=wc*16+FXString::digit2Value[(FXuchar)src[q++]];
133 }
134 FXASSERT(src[q]==';'); // Expected semicolon
135 q++;
136 }
137 else{ // &#DDDD;
138 q+=1;
139 FXASSERT(Ascii::isDigit(src[q])); // Expected at least one digit
140 wc=src[q++]-'0';
141 while(Ascii::isDigit(src[q])){
142 wc=wc*10+(src[q++]-'0');
143 }
144 FXASSERT(src[q]==';'); // Expected semicolon
145 q++;
146 }
147 p+=wc2utf(&dst[p],wc);
148 continue;
149 }
150 if(src[q]=='q' && src[q+1]=='u' && src[q+2]=='o' && src[q+3]=='t' && src[q+4]==';'){ // "
151 q+=5;
152 dst[p++]='\"';
153 continue;
154 }
155 if(src[q]=='a' && src[q+1]=='p' && src[q+2]=='o' && src[q+3]=='s' && src[q+4]==';'){ // '
156 q+=5;
157 dst[p++]='\'';
158 continue;
159 }
160 if(src[q]=='a' && src[q+1]=='m' && src[q+2]=='p' && src[q+3]==';'){ // &
161 q+=4;
162 dst[p++]='&';
163 continue;
164 }
165 if(src[q]=='l' && src[q+1]=='t' && src[q+2]==';'){ // <
166 q+=3;
167 dst[p++]='<';
168 continue;
169 }
170 if(src[q]=='g' && src[q+1]=='t' && src[q+2]==';'){ // >
171 q+=3;
172 dst[p++]='>';
173 continue;
174 }
175 }
176 dst[p++]=wc;
177 }
178 FXASSERT(p<=dst.length());
179 return true;
180 }
181 #endif
182
183
ap_xml_decode(FXString & dst,const FXString & src)184 FXbool ap_xml_decode(FXString & dst,const FXString & src) {
185 #if FOXVERSION < FXVERSION(1,7,55)
186 return xml_decode(dst,src,CRLF|REFS);
187 #else
188 return FXXML::decode(dst,src,FXXML::CRLF|FXXML::REFS);
189 #endif
190 }
191
192
193
194 class LyricsSource {
195 public:
196 virtual FXbool fetch(GMTrack & track) = 0;
~LyricsSource()197 virtual ~LyricsSource() {};
198 };
199
200
201
202 class LrcSource : public LyricsSource {
203 protected:
204 FXRex wordtags;
205 public:
LrcSource()206 LrcSource() {
207 wordtags.parse("<\\d\\d:\\d\\d.\\d\\d>",FXRex::Normal);
208 }
209
fetch(GMTrack & track)210 virtual FXbool fetch(GMTrack & track) override {
211
212 FXString filename = track.url;
213 if (!FXPath::isAbsolute(track.url)) {
214 filename = FXURL::fileFromURL(track.url);
215 if (filename.empty()) return false;
216 }
217 filename = FXPath::stripExtension(filename) + ".lrc";
218 if (!FXStat::exists(filename)) return false;
219 FXString data;
220 FXString lyrics;
221 if (!gm_buffer_file(filename,data)) return false;
222
223
224 // Parse each line
225 FXint start=0,end=0,next=0;
226 for (FXint i=0;i<data.length();i++) {
227 if (data[i]=='\n') {
228 end=i;
229 next=i+1;
230
231 /// Skip white space
232 while(start<end && Ascii::isSpace(data[start])) start++;
233
234 /// Skip white space
235 while(end>start && Ascii::isSpace(data[end])) end--;
236
237 /// Skip
238 if (data[start]=='[') {
239 while(start<end && data[start]!=']') start++;
240 start+=1;
241 if ((end-start>2) && (data[start]=='D' || data[start]=='M' || data[start]=='F') && data[start+1]==':')
242 start+=2;
243 }
244
245 /// New Line
246 if (!lyrics.empty()) lyrics+='\n';
247
248 /// Parse the actual line.
249 if ((end-start)) {
250 lyrics+=data.mid(start,1+end-start);
251 }
252 start=next;
253 }
254 }
255
256 // Remove any wordtags
257 FXint bb[2],ee[2],ff=0;
258 while(wordtags.search(lyrics,ff,lyrics.length()-1,FXRex::Normal,bb,ee,1)>=0) {
259 GM_DEBUG_PRINT("[lyrics] found <tags>: %s\n",lyrics.mid(bb[0],ee[0]-bb[0]).text());
260 lyrics.erase(bb[0],ee[0]-bb[0]);
261 ff=bb[0];
262 }
263
264 // Finalize
265 lyrics.trim();
266 if (!lyrics.empty()) {
267 track.lyrics.adopt(lyrics);
268 }
269 return true;
270 }
271 };
272
273
274
275
276
277 class HtmlSource : public LyricsSource {
278 protected:
279 FXRex linebreaks;
280 FXRex sup;
281 FXRex tags;
282 public:
283
HtmlSource()284 HtmlSource() {
285 linebreaks.parse("(</?div[^>*]>|<\\s*br\\s*/?>)",FXRex::IgnoreCase|FXRex::Normal);
286 sup.parse("<sup\\s*>.*</sup\\s*>",FXRex::IgnoreCase|FXRex::Normal);
287 tags.parse("</?[^>]*/?>",FXRex::Normal);
288 }
289
clearMarkup(FXString & src)290 void clearMarkup(FXString & src) {
291 FXint bb[2],ee[2],ff=0;
292
293 // Turn <br> and <div> into newlines
294 ff=0;
295 while(linebreaks.search(src,ff,src.length()-1,FXRex::Normal,bb,ee,1)>=0) {
296 src.replace(bb[0],ee[0]-bb[0],"\n");
297 ff=bb[0]+1;
298 }
299
300 // Remove <sup>...</sup>
301 ff=0;
302 while(sup.search(src,ff,src.length()-1,FXRex::Normal,bb,ee,1)>=0) {
303 GM_DEBUG_PRINT("[lyrics] found <sup>: %s\n",src.mid(bb[0],ee[0]-bb[0]).text());
304 src.erase(bb[0],ee[0]-bb[0]);
305 ff=bb[0];
306 }
307
308 // Remove any tags
309 ff=0;
310 while(tags.search(src,ff,src.length()-1,FXRex::Normal,bb,ee,1)>=0) {
311 GM_DEBUG_PRINT("[lyrics] found <tags>: %s\n",src.mid(bb[0],ee[0]-bb[0]).text());
312 src.erase(bb[0],ee[0]-bb[0]);
313 ff=bb[0];
314 }
315 FXString result;
316 ap_xml_decode(result,src);
317 src = result.trim();
318 }
319 };
320
321
322 class LyricsWikiaSource : public HtmlSource {
323 protected:
324 FXRex lyricbox;
325
326 public:
LyricsWikiaSource()327 LyricsWikiaSource() {
328 lyricbox.parse("<div class=\'lyricbox\'>(.*)<div class=\'lyricsbreak\'>",FXRex::IgnoreCase|FXRex::Capture);
329 }
330
fetch(GMTrack & track)331 FXbool fetch(GMTrack & track) override {
332 HttpClient http;
333
334 http.setAcceptEncoding(HttpClient::AcceptEncodingGZip);
335
336 FXString artist = track.artist;
337 FXString title = track.title;
338
339 FXString url = FXString::value("http://lyrics.wikia.com/%s:%s",FXURL::encode(artist.substitute(' ','_')).text(),
340 FXURL::encode(title.substitute(' ','_')).text());
341 if (http.basic("GET",url)) {
342 FXString html = http.textBody();
343 FXint b[2],e[2];
344 if (lyricbox.search(html,0,html.length()-1,FXRex::Normal,b,e,2)>=0){
345 FXString content = html.mid(b[1],e[1]-b[1]);
346 clearMarkup(content);
347 track.lyrics = content;
348 return true;
349 }
350 }
351 return false;
352 }
353
354 };
355
356
Lyrics()357 Lyrics::Lyrics() {
358 source[0] = new LrcSource;
359 source[1] = new LyricsWikiaSource;
360 }
361
~Lyrics()362 Lyrics::~Lyrics() {
363 delete source[0];
364 delete source[1];
365 }
366
fetch(GMTrack & track) const367 FXbool Lyrics::fetch(GMTrack & track) const {
368 for (FXint i=0;i<2;i++) {
369 if (source[i]->fetch(track)) {
370 GM_DEBUG_PRINT("Found lyrics for %s from src %d\n",track.url.text(),i);
371 return true;
372 }
373 }
374 return false;
375 }
376
377
378
379
380
381