1 /*
2  * URL.cpp
3  *
4  * Copyright (C) 1999 Stephen F. White
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program (see the file "COPYING" for details); if
18  * not, write to the Free Software Foundation, Inc., 675 Mass Ave,
19  * Cambridge, MA 02139, USA.
20  */
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <sys/stat.h>
26 #include <sys/types.h>
27 #include <errno.h>
28 #ifdef _WIN32
29 # include <direct.h>
30 #endif
31 #include "stdafx.h"
32 #ifdef AIX
33 # include "strings.h"
34 #endif
35 #include "URL.h"
36 #include "maxpath.h"
37 #include "DuneApp.h"
38 #include "swt.h"
39 
URL(const char * u)40 URL::URL(const char *u)
41 {
42     Parse(u);
43 }
44 
URL(void)45 URL::URL(void)
46 {
47     m_url = "";
48     m_scheme = "";
49     m_hostname = "";
50     m_port = 80;
51     m_path = "";
52 }
53 
URL(const char * base,const char * url)54 URL::URL(const char *base, const char *url)
55 {
56     if (strstr(url, "://")) {   /* URL is already fully qualified */
57         Parse(url);
58     } else if (url[0] == '/') { /* URL is absolute */
59         Parse(base);
60         m_path = url;
61         SimplifyPath();
62     } else if (strstr(url, ":/")) {  /* URL is MS-DOS style */
63         Parse(base);
64 #ifdef _WIN32
65         m_path="";
66 #else
67         m_path="/";
68 #endif
69         m_path+=url;
70         SimplifyPath();
71     } /* else if (strstr(url, "../")) { URL is relative
72         Parse(base);
73         m_path=url;
74     } */
75        else {
76         int len = strlen(base);
77         if((len != 0) && (base[len-1] != '/')) {
78             char *newBase = new char[len+1];
79             strcpy(newBase, base);
80             char *b = strrchr(newBase, '/');
81 #ifdef _WIN32
82             char *w = strrchr(newBase, '\\');
83 #endif
84             if (b) {
85                 *(b+1) = '\0';
86                 Parse(newBase);
87                 delete [] newBase;
88 #ifdef _WIN32
89             } else if (w) {
90                 *(w+1) = '\0';
91                 Parse(newBase);
92                 delete [] newBase;
93 #endif
94             } else
95                 Parse("./");
96         } else {
97             Parse(base);
98         }
99         m_path += url;
100         SimplifyPath();
101     }
102     Rebuild();
103 }
104 
105 bool
Parse(const char * u)106 URL::Parse(const char *u)
107 {
108     enum {
109         START, SCHEME, SLASH1, SLASH2, HOST, PORT1, PORT, PATH, TOPIC, ERR
110     } state = START;
111 
112     if (u == NULL) {
113         u = m_url;
114     } else {
115         m_url = "";
116         m_url += u;
117     }
118 
119     m_scheme = "";
120     m_hostname = "";
121     m_port = 80;
122     m_path = "";
123 
124     if (!u[0]) {
125         return false;
126     }
127 
128     bool dospath=false;
129     char driveLetter=(char) 0;
130 #ifdef _WIN32
131     if ((strlen(u) >= 2) && (u[1] == ':')) {
132         dospath=true;
133         driveLetter=u[0];
134         m_path+=u[0];
135         m_path+=u[1];
136         m_path+="\\";
137     }
138 #endif
139     while(*u && state != ERR) {
140         if ((*u == '|') && (u != m_url)) {
141             dospath=true;
142             driveLetter=*(u-1);
143         }
144         switch(state) {
145           case START:
146             if (*u != ' ') {
147                 m_scheme += *u;
148                 state = SCHEME;
149             }
150             break;
151           case SCHEME:  // getting scheme
152             if (isalnum(*u) || *u == '+' || *u == '-' || *u == '.') {
153                 m_scheme += *u;
154             } else if (*u == ':') {
155                 state = SLASH1;
156             } else if (*u == '/') {
157                 m_hostname = "";
158                 m_path = m_scheme;
159                 m_path += "/";
160                 m_scheme = "";
161                 state = PATH;
162             } else {
163                 m_scheme += *u;
164                 m_hostname = "";
165                 m_path = m_scheme;
166                 state = PATH;
167             }
168             break;
169           case SLASH1:  // getting first slash
170             if (*u == '/') {
171                 state = SLASH2;
172             } else if (isdigit(*u)) {  // another hack
173                 m_hostname = m_scheme;
174                 m_scheme = "";
175                 m_port = (short) (*u - '0');
176                 state = PORT;
177             } else if (isalpha(*u)) {  // yet another hack, for IExplorer
178                 m_path += *u;
179                 state = PATH;
180             }
181             break;
182           case SLASH2:  // getting second slash
183             if (*u == '/') state = HOST; else state = ERR;
184             break;
185           case HOST:  // getting hostname
186             if (isalnum(*u) || *u == '.' || *u == '-') {
187                 m_hostname += *u;
188             } else if (*u == ':') {
189                 state = PORT1;
190             } else if (*u == '/') {
191                 if (dospath) {
192                      dospath=false;
193                      m_path=driveLetter;
194                      m_path+=':';
195                 }
196                 m_path += *u;
197                 state = PATH;
198             } else {
199                 state = ERR;
200             }
201             break;
202           case PORT1:
203             if (isdigit(*u)) {
204                 m_port = (short) (*u - '0');
205                 state = PORT;
206             } else {
207                 state = ERR;
208             }
209             break;
210           case PORT: // waiting for port
211             if (isdigit(*u)) {
212                 m_port = m_port * 10 + (*u - '0');
213             } else if (*u == '/') {
214                 m_path += *u;
215                 state = PATH;
216             } else {
217                 state = ERR;
218             }
219             break;
220           case PATH:  // getting path
221             if (*u == '#') {
222                 state = TOPIC;
223             } else {
224                 m_path += *u;
225             }
226             break;
227           case TOPIC:
228             m_topic += *u;
229             break;
230           default:
231             break;
232         }
233         u++;
234     }
235 
236     if (state == SCHEME) {
237         m_hostname = m_scheme;
238         m_scheme = "";
239         if (m_path.length() == 0) {
240             m_path = "./";
241             m_path += m_hostname;
242             m_hostname = "";
243         }
244     }
245     if (m_scheme.length() == 0) {
246         if (!stringncmp(m_hostname, "ftp")) {
247             m_scheme = "ftp";
248         } else if (!stringncmp(m_hostname, "http")) {
249             m_scheme = "http";
250         } else {
251             m_scheme = "file";
252         }
253     }
254     if (m_path.length() == 0) {
255         m_path = "/";
256     }
257 
258     Rebuild();
259     return (state != ERR);
260 }
261 
getpwd(void)262 static char *getpwd(void)
263 {
264     char *pwd;
265     if ((pwd = getenv("PWD")) != NULL)
266         pwd = mystrdup(pwd);
267     else {
268         char buf[MY_MAX_PATH];
269         if (getcwd(buf, MY_MAX_PATH) == NULL)
270             while (write(2, "getcwd failed\n", 14) == -1);
271         pwd = mystrdup(buf);
272     }
273 
274     // handle a path of form '"D:something"'
275     if (pwd[0] == '"') {
276         char *newPwd = mystrdup(pwd + 1);
277         free(pwd);
278         pwd = newPwd;
279     }
280     if (pwd[strlen(pwd)-1] == '"')
281         pwd[strlen(pwd)-1] = (char) 0;
282 
283     return pwd;
284 }
285 
Rebuild(bool withTopic)286 void  URL::Rebuild(bool withTopic /* = true*/)
287 {
288     if (!strcasecmp(m_scheme, "news") ||
289         !strcasecmp(m_scheme, "mailto") ||
290         !strcasecmp(m_scheme, "nntp")) {
291         m_url = m_scheme;
292         m_url += ":";
293     } else {
294         m_url = (const char *) m_scheme;
295         if (m_scheme[0])
296            m_url += "://";
297         m_url += m_hostname;
298         if (!strcasecmp(m_scheme, "http") && m_port != 80) {
299             char buf[128];
300             mysnprintf(buf, 128, ":%d", m_port);
301             m_url += buf;
302         }
303     }
304     m_url += m_path;
305     if (withTopic && m_topic.length() > 0) {
306         m_url += '#';
307         m_url += m_topic;
308     }
309 }
310 
311 #ifdef _WIN32
TruncateToFit(HDC hDC,int pixels,MyString * out)312 void  URL::TruncateToFit(HDC hDC, int pixels, MyString *out)
313 {
314     MyString prefix;
315     const char *newpath = m_path;
316     MyString filename;
317     SIZE size;
318 
319     *out = m_scheme;
320     *out += "://";
321     *out += m_hostname;
322     if (m_port != 80 || m_scheme != "http") {
323         char buf[128];
324         mysnprintf(buf, 128, ":%d", m_port);
325         *out += buf;
326     }
327     *out += newpath;
328     GetTextExtentPoint(hDC, *out, out->length(), &size);
329     if (size.cx > pixels) {
330         *out = m_scheme;
331         *out += "://...";
332         *out += newpath;
333     }
334     while (GetTextExtentPoint(hDC, *out, out->length(), &size) &&
335            (size.cx > pixels)) {
336         const char *slash = strchr(newpath + 1, '/');
337         if (slash) {
338             newpath = slash;
339             *out = m_scheme;
340             *out += "://...";
341             *out += newpath;
342         } else {
343             break;
344         }
345     }
346     if (GetTextExtentPoint(hDC, *out, out->length(), &size) &&
347         (size.cx > pixels)) {
348         // last ditch attempt to squeeze it smaller
349         *out = ".../";
350         *out += GetFileName();
351     }
352 }
353 #endif
354 
GetFileName(void)355 MyString URL::GetFileName(void)
356 {
357     MyString filename = "";
358     const char *slash = strrchr(m_path, '/');
359 
360     if (!slash) {
361         slash = strrchr(m_path, '\\');
362     }
363     if (slash) {
364         filename += slash + 1;
365     } else {
366         filename += m_path;
367     }
368     return filename;
369 }
370 
371 char *
GetFileNameWithoutExtension(void)372 URL::GetFileNameWithoutExtension(void)
373 {
374     char *filename = NULL;
375     if (strlen(GetFileName()) == 0)
376         filename = strdup("Untitled");
377     else
378         filename = strdup(GetFileName());
379     char *dot = strchr(filename, '.');
380     if (dot != NULL)
381         dot[0] = 0;
382     return filename;
383 }
384 
RelativeTo(const char * parentURL)385 MyString URL::RelativeTo(const char *parentURL)
386 {
387 #ifdef m_WIN32
388     if (!sameDrive(m_url, parentURL))
389         return m_url;
390 #endif
391 
392     URL parent(parentURL);
393     MyString ret = "";
394     const char *c, *p;
395 
396     if (m_scheme == parent.m_scheme && !strcasecmp(m_hostname, parent.m_hostname)
397         && m_port == parent.m_port) {
398         for (p = parent.m_path, c = m_path; *p && *p == *c; p++, c++) {}
399         if (p == parent.m_path + 1) return m_path;
400         if (*p && *c == '/') c--;
401 #ifdef _WIN32
402         // test for paths starting with "Driveletter:"
403         if ((c == m_path) && (c[0]!=0) && (c[1]==':'))
404             return(m_url);
405 #endif
406         while ((p = strchr(p, '/')) != NULL) {
407             ret += "../";
408             p++;
409         }
410         while (c > (const char *) m_path && *c != '/') {
411             c--;
412         }
413         ret += c + 1;
414         return ret;
415     } else {
416         return m_url;
417     }
418 }
419 
ToPath() const420 MyString URL::ToPath() const
421 {
422     MyString path;
423 
424 #ifdef _WIN32
425     if (m_hostname.length() != 0) {
426         path = "\\\\";
427         path += m_hostname;
428         path += "\\";
429     } else {
430         path = "";
431     }
432     int i = 0;
433     for (const char *c = m_path; *c; c++) {
434         switch (*c) {
435           case '/':
436             if (i != 0)
437                 path += '\\';
438             break;
439           case '|':
440             path += ':';
441             break;
442           default:
443             path += *c;
444             break;
445         }
446         i++;
447     }
448     return path;
449 #else
450     return m_path;
451 #endif
452 }
453 
SimplifyPath()454 void URL::SimplifyPath()
455 {
456     const char *s1 = m_path;
457     char *buf = new char[strlen(s1) + 1];
458     char *s2 = buf;
459     char *topic;
460 
461     *s2 = '\0';
462     while (*s1) {
463 #ifdef _WIN32
464         // finding /D: means D:windowspath/something
465         if (s1[2] == ':') {
466             m_path = mystrdup(s1 + 1);
467             return;
468         }
469 #endif
470         if (!strncmp(s1, "/./", 3)) {
471             s1 += 2;
472         } else  {
473             do {
474                 *s2++ = *s1++;
475             } while (*s1 && *s1 != '/');
476         }
477         *s2 = '\0';
478     }
479 #ifdef _WIN32
480     // finding /D: means D:windowspath/something
481     bool hasDriveColon = false;
482     char *driveColon = strchr(buf, ':');
483     if (driveColon != NULL)
484         if (strlen(buf) > (strlen(driveColon) + 1))
485             if (*(driveColon - 2) == '/') {
486                 hasDriveColon = true;
487                 m_path = mystrdup(driveColon - 1);
488             }
489     if (!hasDriveColon)
490 #endif
491     topic = strstr(buf, "#");
492     if (topic == NULL)
493         m_path = mystrdup(buf);
494     else {
495         *topic = '\0';
496         m_path = mystrdup(buf);
497         m_topic = mystrdup(topic + 1);
498     }
499     delete [] buf;
500 }
501 
FromPath(const char * path)502 void URL::FromPath(const char *path)
503 {
504     MyString ret = "";
505     URL myurl(path);
506     if ((strcasecmp(myurl.getScheme(), "https") == 0) ||
507         (strcasecmp(myurl.getScheme(), "http") == 0) ||
508         (strcasecmp(myurl.getScheme(), "ftp") == 0)) {
509         const char *myPath = TheApp->getDownloadDirectory();
510         myPath = replaceHome(myPath);
511         ret += myPath;
512         free((void *)myPath);
513         ret += swGetPathSeperator();
514         ret += myurl.getHostname();
515         ret += swGetPathSeperator();
516         ret += '/';
517         ret += myurl.ToPath();
518         Parse(ret);
519         path = ret;
520      }
521 #ifdef _WIN32
522     char drive[_MAX_DRIVE];
523     char dir[_MAX_DIR];
524     char fname[_MAX_FNAME];
525     char ext[_MAX_EXT];
526 
527     _splitpath(path, drive, dir, fname, ext);
528     m_url = "";
529     m_scheme = "file";
530     m_hostname = "";
531     m_port = 80;
532     m_path = '/';
533     m_path += drive[0];
534     m_path += '|';
535     for (const char *p = dir; *p; p++) {
536         if (*p == '\\') {
537             m_path += '/';
538         } else {
539             m_path += *p;
540         }
541     }
542     m_path += fname;
543     m_path += ext;
544     Rebuild();
545 #else
546     m_scheme = "file";
547     m_hostname = "";
548     m_port = 80;
549     if (path[0] == '/') {
550         m_path = path;
551     } else {
552         m_path = "";
553         char *pwd = getpwd();
554         m_path += pwd;
555         free(pwd);
556         m_path += '/';
557         m_path += path;
558         SimplifyPath();
559     }
560     Rebuild();
561 #endif
562 }
563 
isJavascript(void)564 bool URL::isJavascript(void)
565 {
566    return ::isJavascript(m_url);
567 }
568 
isEcmascript(void)569 bool URL::isEcmascript(void)
570 {
571    return ::isEcmascript(m_url);
572 }
573 
isSortOfEcmascript(void)574 bool URL::isSortOfEcmascript(void)
575 {
576    return ::isSortOfEcmascript(m_url);
577 }
578 
notURN(void)579 bool URL::notURN(void)
580 {
581    return ::notURN(m_url);
582 }
583 
584 const MyString     &
GetDir()585 URL::GetDir()
586 {
587     static MyString ret;
588     int len;
589     for (len=m_path.length()-1;len>=0;len--)
590         if ((m_path[len]=='/') || (m_path[len]=='\\'))
591            break;
592     if (len<0)
593         ret=m_path;
594     else {
595         ret="";
596         for (int i=0;i<len;i++)
597            ret+=m_path[i];
598     }
599 
600     return ret;
601 }
602 
603 MyString
fixCurrentPath(const char * url)604 fixCurrentPath(const char* url)
605 {
606     // skip already existing "file:"
607     if (stringncmp(url,"file:")==0) {
608         // take care about file://Driverletter| constructs
609         if (!getDriveLetter(url)) {
610             if (stringncmp(url,"file://")==0)
611                 url += 7;
612             else
613                 url += 5;
614         }
615     }
616 
617     if (url[0] == '"')
618         url++;
619 
620     MyString* ret = new MyString(url);
621     if ((strcmp(url,".") == 0) ||
622         (strcmp(url,"./") == 0)) {
623 #ifdef _WIN32
624         *ret = "file://";
625 #endif
626         char *pwd = getpwd();
627         *ret += pwd;
628         free(pwd);
629         *ret += "/";
630     }
631     return *ret;
632 }
633 
634 MyString
rewriteURL(const char * url,const char * oldBase,const char * newBase)635 rewriteURL(const char *url, const char *oldBase, const char *newBase)
636 {
637     if ((!isSortOfEcmascript(url)) && notURN(url)) {
638         URL oldURL(fixCurrentPath(oldBase), fixCurrentPath(url));
639         MyString newDir = "";
640         if (URL::isCurrentDirectory(newBase)) {
641             char *pwd = getpwd();
642             newDir += pwd;
643             free(pwd);
644             newDir += "/";
645         } else
646             newDir += newBase;
647         MyString newURL(oldURL.RelativeTo(fixCurrentPath(newDir)));
648         return newURL;
649     } else {
650         MyString newURL(url);
651 #ifndef HAVE_DONT_REPLACE_VRMLSCRIPT
652         if (strncasecmp(url,"vrmlscript:",strlen("vrmlscript:"))==0){
653             newURL="java";
654             newURL+=(const char*)(url+4);
655         }
656 #endif
657         return newURL;
658     }
659 }
660 
661 bool
isJavascript(const char * url)662 isJavascript(const char* url)
663 {
664    char *string = (char *)url;
665    while (isspace(string[0]))
666        string++;
667    if ((strncasecmp(string,"javascript:",strlen("javascript:"))==0) ||
668        (strncasecmp(string,"vrmlscript:",strlen("vrmlscript:"))==0))
669        return true;
670    return false;
671 }
672 
673 bool
isEcmascript(const char * url)674 isEcmascript(const char* url)
675 {
676    char *string = (char *)url;
677    while (isspace(string[0]))
678        string++;
679    if (strncasecmp(string,"ecmascript:",strlen("ecmascript:"))==0)
680        return true;
681    return false;
682 }
683 
684 bool
isX3domscript(const char * url)685 isX3domscript(const char* url)
686 {
687    char *string = (char *)url;
688    while (isspace(string[0]))
689        string++;
690    if (strncasecmp(string,"x3domscript:",strlen("x3domscript:"))==0)
691        return true;
692    return false;
693 }
694 
isSortOfEcmascript(const char * url)695 bool isSortOfEcmascript(const char* url)
696 {
697    return isEcmascript(url) || isJavascript(url);
698 }
699 
700 
701 bool
notURN(const char * url)702 notURN(const char* url)
703 {
704    if (strncasecmp(url,"urn:",strlen("urn:"))==0)
705        return false;
706    return true;
707 }
708 
709 // check for file:///D| or file://D: (D is driverletter)
getDriveLetter(const char * url)710 char getDriveLetter(const char* url)
711 {
712     bool isDrive = true;
713     if (stringncmp(url, "file:///") != 0)
714         isDrive = false;
715     if (isDrive) {
716         if (strlen(url) < strlen("file:///D|"))
717             isDrive = false;
718         if (isDrive) {
719             if ((url[strlen("file:///D|")-1] != '|') &&
720                 (url[strlen("file:///D:")-1] != ':'))
721                     isDrive = false;
722                  else
723                     return url[strlen("file:///D:")-2];
724         }
725     }
726     if (!isDrive) {
727         isDrive = true;
728         if (stringncmp(url, "file://") != 0)
729             isDrive = false;
730         if (isDrive) {
731             if (strlen(url) < strlen("file://D:"))
732                 isDrive = false;
733             if (isDrive) {
734                 if ((url[strlen("file://D|")-1] != '|') &&
735                     (url[strlen("file://D:")-1] != ':'))
736                     isDrive = false;
737                 else
738                     return url[strlen("file://D:")-2];
739             }
740         }
741     }
742 
743     return 0;
744 }
745 
sameDrive(const char * url1,const char * url2)746 bool URL::sameDrive(const char* url1, const char* url2)
747 {
748     char letter1 = getDriveLetter(url1);
749     char letter2 = getDriveLetter(url2);
750     if ((!letter1) || (!letter2))
751         return false;
752     // are the driveletters identical ?
753     if (letter1 == letter2)
754         return true;
755     return false;
756 }
757 
758 bool
isCurrentDirectory(const char * path)759 URL::isCurrentDirectory(const char *path)
760 {
761     int numSlash = 0;
762     for (unsigned int i = 0; i < strlen(path); i++)
763         if (path[i] == '/')
764             numSlash++;
765     if (numSlash == 0)
766         return true;
767     else if (numSlash == 1)
768         if (stringncmp(path, "./") == 0)
769             return true;
770     return false;
771 }
772 
replaceHome(const char * path)773 char *replaceHome(const char *path)
774 {
775      MyString ret = "";
776      int start = 0;
777      if (path[0] == '~') {
778          start = 1;
779 #ifdef _WIN32
780          ret += getenv("HOMEDRIVE");
781          ret += getenv("HOMEPATH");
782 #else
783          ret += getenv("HOME");
784 #endif
785          ret += swGetPathSeperator();
786      }
787      ret += path + start;
788      return strdup(ret);
789 }
790 
mkdir_parents4file(const char * mypath)791 bool mkdir_parents4file(const char *mypath)
792 {
793      MyString ret = "";
794      const char *path = replaceHome(mypath);
795      for (unsigned int i = 0; i < strlen(path); i++)
796          if ((i != 0) && ((path[i] == '/') || (path[i] == '\\'))) {
797 #ifdef _WIN32
798              if (_mkdir(ret) == -1)
799 #else
800              if (mkdir(ret, 0755) == -1)
801 #endif
802                  if (errno != EEXIST) {
803                      TheApp->MessageBoxPerror(ret);
804                      return false;
805                  }
806              ret += swGetPathSeperator();
807          } else
808              ret += path[i];
809      free((void *)path);
810      return true;
811 }
812 
myperror(const char * string)813 void myperror(const char *string)
814 {
815     TheApp->MessageBoxPerror(string);
816 }
817 
toPosixPath(char * target,char * source,unsigned int len)818 void toPosixPath(char *target, char *source, unsigned int len)
819 {
820     if (len > 0) {
821         target[0] = '/';
822         unsigned int i = 0;
823         for (i = 0; (i < strlen(source)) && (i < len - 1); i++)
824             if (source[i] == '\\')
825                 target[i + 1] = '/';
826             else
827                 target[i + 1] = source[i];
828         target[i + 1] = 0;
829     }
830 }
831