1 //C-  -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001  AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software.  Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C-  ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License.   This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55 
56 #ifndef _GURL_H_
57 #define _GURL_H_
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64 
65 
66 #include "GString.h"
67 #include "Arrays.h"
68 #include "GThreads.h"
69 
70 
71 #ifdef HAVE_NAMESPACES
72 namespace DJVU {
73 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
74 }
75 #endif
76 #endif
77 
78 /** @name GURL.h
79     Files #"GURL.h"# and #"GURL.cpp"# contain the implementation of the
80     \Ref{GURL} class used to store URLs in a system independent format.
81     @memo System independent URL representation.
82     @author Andrei Erofeev <eaf@geocities.com>
83 
84 // From: Leon Bottou, 1/31/2002
85 // This has been heavily changed by Lizardtech.
86 // They decided to use URLs for everyting, including
87 // the most basic file access.  The URL class now is a unholy
88 // mixture of code for syntactically parsing the urls (which is was)
89 // and file status code (only for local file: urls).
90 
91 */
92 
93 //@{
94 
95 /** System independent URL representation.
96 
97     This class is used in the library to store URLs in a system independent
98     format. The idea to use a general class to hold URL arose after we
99     realized, that DjVu had to be able to access files both from the WEB
100     and from the local disk. While it is strange to talk about system
101     independence of HTTP URLs, file names formats obviously differ from
102     platform to platform. They may contain forward slashes, backward slashes,
103     colons as separators, etc. There maybe more than one URL corresponding
104     to the same file name. Compare #file:/dir/file.djvu# and
105     #file://localhost/dir/file.djvu#.
106 
107     To simplify a developer's life we have created this class, which contains
108     inside a canonical representation of URLs.
109 
110     File URLs are converted to internal format with the help of \Ref{GOS} class.
111 
112     All other URLs are modified to contain only forward slashes.
113 */
114 
115 class DJVUAPI GURL
116 {
117 public:
118   class Filename;
119   class UTF8;
120   class Native;
121 protected:
122       /** @name Constructors
123 	  Accept the string URL, check that it starts from #file:/#
124 	  or #http:/# and convert to internal system independent
125 	  representation.
126       */
127       //@{
128       ///
129    GURL(const char * url_string);
130       //@}
131 
132 public:
133    GURL(void);
134 
135    GURL(const GUTF8String & url_string);
136 
137    GURL(const GNativeString & url_string);
138 
139    GURL(const GUTF8String &xurl, const GURL &codebase);
140 
141    GURL(const GNativeString &xurl, const GURL &codebase);
142 
143       /// Copy constructor
144    GURL(const GURL & gurl);
145 
146       /// The destructor
~GURL(void)147    virtual ~GURL(void) {}
148 
149 private:
150       // The 'class_lock' should be locked whenever you're accessing
151       // url, or cgi_name_arr, or cgi_value_arr.
152    GCriticalSection	class_lock;
153 protected:
154    GUTF8String	url;
155    DArray<GUTF8String>	cgi_name_arr, cgi_value_arr;
156    bool validurl;
157 
158    void		init(const bool nothrow=false);
159    void		convert_slashes(void);
160    void		beautify_path(void);
161    static GUTF8String	beautify_path(GUTF8String url);
162 
163    static GUTF8String	protocol(const GUTF8String& url);
164    void		parse_cgi_args(void);
165    void		store_cgi_args(void);
166 public:
167    /// Test if the URL is valid. If invalid, reinitialize.
168    bool is_valid(void) const;     // const lies to the compiler because of dependency problems
169 
170       /// Extracts the {\em protocol} part from the URL and returns it
171    GUTF8String	protocol(void) const;
172 
173       /** Returns string after the first '\#' with decoded
174 	  escape sequences. */
175    GUTF8String	hash_argument(void) const;
176 
177       /** Inserts the #arg# after a separating hash into the URL.
178 	  The function encodes any illegal character in #arg# using
179 	  \Ref{GOS::encode_reserved}(). */
180    void		set_hash_argument(const GUTF8String &arg);
181 
182       /** Returns the total number of CGI arguments in the URL.
183 	  CGI arguments follow '#?#' sign and are separated by '#&#' signs */
184    int		cgi_arguments(void) const;
185 
186       /** Returns the total number of DjVu-related CGI arguments (arguments
187 	  following #DJVUOPTS# in the URL). */
188    int		djvu_cgi_arguments(void) const;
189 
190       /** Returns that part of CGI argument number #num#, which is
191 	  before the equal sign. */
192    GUTF8String	cgi_name(int num) const;
193 
194       /** Returns that part of DjVu-related CGI argument number #num#,
195 	  which is before the equal sign. */
196    GUTF8String	djvu_cgi_name(int num) const;
197 
198       /** Returns that part of CGI argument number #num#, which is
199 	  after the equal sign. */
200    GUTF8String	cgi_value(int num) const;
201 
202       /** Returns that part of DjVu-related CGI argument number #num#,
203 	  which is after the equal sign. */
204    GUTF8String	djvu_cgi_value(int num) const;
205 
206       /** Returns array of all known CGI names (part of CGI argument before
207 	  the equal sign) */
208    DArray<GUTF8String>cgi_names(void) const;
209 
210       /** Returns array of names of DjVu-related CGI arguments (arguments
211 	  following #DJVUOPTS# option. */
212    DArray<GUTF8String>djvu_cgi_names(void) const;
213 
214       /** Returns array of all known CGI names (part of CGI argument before
215 	  the equal sign) */
216    DArray<GUTF8String>cgi_values(void) const;
217 
218       /** Returns array of values of DjVu-related CGI arguments (arguments
219 	  following #DJVUOPTS# option. */
220    DArray<GUTF8String>djvu_cgi_values(void) const;
221 
222       /// Erases everything after the first '\#' or '?'
223    void		clear_all_arguments(void);
224 
225       /// Erases everything after the first '\#'
226    void		clear_hash_argument(void);
227 
228       /// Erases DjVu CGI arguments (following "#DJVUOPTS#")
229    void		clear_djvu_cgi_arguments(void);
230 
231       /// Erases all CGI arguments (following the first '?')
232    void		clear_cgi_arguments(void);
233 
234       /** Appends the specified CGI argument. Will insert "#DJVUOPTS#" if
235 	  necessary */
236    void		add_djvu_cgi_argument(const GUTF8String &name, const char * value=0);
237 
238       /** Returns the URL corresponding to the directory containing
239 	  the document with this URL. The function basically takes the
240 	  URL and clears everything after the last slash. */
241    GURL		base(void) const;
242 
243       /// Returns the aboslute URL without the host part.
244    GUTF8String pathname(void) const;
245 
246       /** Returns the name part of this URL.
247 	  For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
248           this function will return #file%201.djvu#. \Ref{fname}() will
249           return #file 1.djvu# at the same time. */
250    GUTF8String	name(void) const;
251 
252       /** Returns the name part of this URL with escape sequences expanded.
253 	  For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
254           this function will return #file 1.djvu#. \Ref{name}() will
255           return #file%201.djvu# at the same time. */
256    GUTF8String	fname(void) const;
257 
258       /// Returns the extention part of name of document in this URL.
259    GUTF8String	extension(void) const;
260 
261       /// Checks if this is an empty URL
262    bool		is_empty(void) const;
263 
264       /// Checks if the URL is local (starts from #file:/#) or not
265    bool		is_local_file_url(void) const;
266 
267       /** @name Concatenation operators
268 	  Concatenate the GURL with the passed {\em name}. If the {\em name}
269 	  is absolute (has non empty protocol prefix), we just return
270 	  #GURL(name)#. Otherwise the #name# is appended to the GURL after a
271 	  separating slash.
272       */
273       //@{
274       ///
275 //   GURL		operator+(const GUTF8String &name) const;
276       //@}
277 
278       /// Returns TRUE if #gurl1# and #gurl2# are the same
279    bool	operator==(const GURL & gurl2) const;
280 
281       /// Returns TRUE if #gurl1# and #gurl2# are different
282    bool	operator!=(const GURL & gurl2) const;
283 
284       /// Assignment operator
285    GURL &	operator=(const GURL & url);
286 
287       /// Returns Internal URL representation
288    operator	const char*(void) const { return url; };
289 
290   /** Returns a string representing the URL.  This function normally
291       returns a standard file URL as described in RFC 1738.
292       Some versions of MSIE do not support this standard syntax.
293       A brain damaged MSIE compatible syntax is generated
294       when the optional argument #useragent# contains string #"MSIE"# or
295       #"Microsoft"#. */
296    GUTF8String get_string(const GUTF8String &useragent) const;
297 
298    GUTF8String get_string(const bool nothrow=false) const;
299 
300       /// Escape special characters
301    static GUTF8String encode_reserved(const GUTF8String &gs);
302 
303    /** Decodes reserved characters from the URL.
304       See also: \Ref{encode_reserved}(). */
305    static GUTF8String decode_reserved(const GUTF8String &url);
306 
307   /// Test if this url is an existing file, directory, or device.
308   bool is_local_path(void) const;
309 
310   /// Test if this url is an existing file.
311   bool is_file(void) const;
312 
313   /// Test if this url is an existing directory.
314   bool is_dir(void) const;
315 
316   /// Follows symbolic links.
317   GURL follow_symlinks(void) const;
318 
319   /// Creates the specified directory.
320   int mkdir(void) const;
321 
322   /** Deletes file or directory.
323       Directories are not deleted unless the directory is empty.
324       Returns a negative number if an error occurs. */
325   int deletefile(void) const;
326 
327   /** Recursively erases contents of directory. The directory
328       itself will not be removed. */
329   int cleardir(const int timeout=0) const;
330 
331   /// Rename a file or directory.
332   int renameto(const GURL &newurl) const;
333 
334   /// List the contents of a directory.
335   GList<GURL> listdir(void) const;
336 
337   /** Returns a filename for a URL. Argument #url# must be a legal file URL.
338       This function applies heuristic rules to convert the URL into a valid
339       file name. It is guaranteed that this function can properly parse all
340       URLs generated by #filename_to_url#. The heuristics also work better when
341       the file actually exists.  The empty string is returned when this
342       function cannot parse the URL or when the URL is not a file URL.
343         URL formats are as described in RFC 1738 plus the following alternative
344       formats for files on the local host:
345 
346                 file://<letter>:/<path>
347                 file://<letter>|/<path>
348                 file:/<path>
349 
350       which are accepted because various browsers recognize them.*/
351    GUTF8String UTF8Filename(void) const;
352    /// Same but returns a native string.
353    GNativeString NativeFilename(void) const;
354 
355       /** Hashing function.
356 	  @return hash suitable for usage in \Ref{GMap} */
357    friend unsigned int	hash(const GURL & gurl);
358 
359   /** Returns fully qualified file names.  This functions constructs the fully
360       qualified name of file or directory #filename#. When provided, the
361       optional argument #fromdirname# is used as the current directory when
362       interpreting relative specifications in #filename#.  Function
363       #expand_name# is very useful for logically concatenating file names.  It
364       knows which separators should be used for each operating system and it
365       knows which syntactical rules apply. */
366   static GUTF8String expand_name(const GUTF8String &filename, const char *fromdirname=0);
367 };
368 
369 class DJVUAPI GURL::UTF8 : public GURL
370 {
371 public:
372   UTF8(const GUTF8String &xurl);
373   UTF8(const GUTF8String &xurl, const GURL &codebase);
374 };
375 
376 class DJVUAPI GURL::Native : public GURL
377 {
378 public:
379   Native(const GNativeString &xurl);
380   Native(const GNativeString &xurl, const GURL &codebase);
381 };
382 
383 class DJVUAPI GURL::Filename : public GURL
384 {
385 public:
386   Filename(const GUTF8String &filename);
387   Filename(const GNativeString &filename);
388   class UTF8;
389   class Native;
390 };
391 
392 class DJVUAPI GURL::Filename::UTF8 : public GURL::Filename
393 {
394 public:
395   UTF8(const GUTF8String &filename);
396 };
397 
398 class DJVUAPI GURL::Filename::Native : public GURL::Filename
399 {
400 public:
401   Native(const GNativeString &filename);
402 };
403 
404 
405 inline bool
406 GURL::operator!=(const GURL & gurl2) const
407 {
408   return !(*this == gurl2);
409 }
410 
411 inline GUTF8String
protocol(void)412 GURL::protocol(void) const
413 {
414    return protocol(get_string());
415 }
416 
417 inline bool
is_empty(void)418 GURL::is_empty(void) const
419 {
420    return !url.length()||!get_string().length();
421 }
422 
423 // Test if the URL is valid.
424 // If invalid, reinitialize and return the result.
425 inline bool
is_valid(void)426 GURL::is_valid(void) const
427 {
428   if(!validurl)
429     const_cast<GURL *>(this)->init(true);
430   return validurl;
431 }
432 
433 
434 
435 //@}
436 
437 
438 #ifdef HAVE_NAMESPACES
439 }
440 # ifndef NOT_USING_DJVU_NAMESPACE
441 using namespace DJVU;
442 # endif
443 #endif
444 #endif
445