1 //C- -*- C++ -*-
2 //C- -------------------------------------------------------------------
3 //C- DjVuLibre-3.5
4 //C- Copyright (c) 2002 Leon Bottou and Yann Le Cun.
5 //C- Copyright (c) 2001 AT&T
6 //C-
7 //C- This software is subject to, and may be distributed under, the
8 //C- GNU General Public License, either Version 2 of the license,
9 //C- or (at your option) any later version. The license should have
10 //C- accompanied the software or you may obtain a copy of the license
11 //C- from the Free Software Foundation at http://www.fsf.org .
12 //C-
13 //C- This program is distributed in the hope that it will be useful,
14 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 //C- GNU General Public License for more details.
17 //C-
18 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library from
19 //C- Lizardtech Software. Lizardtech Software has authorized us to
20 //C- replace the original DjVu(r) Reference Library notice by the following
21 //C- text (see doc/lizard2002.djvu and doc/lizardtech2007.djvu):
22 //C-
23 //C- ------------------------------------------------------------------
24 //C- | DjVu (r) Reference Library (v. 3.5)
25 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
26 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
27 //C- | 6,058,214 and patents pending.
28 //C- |
29 //C- | This software is subject to, and may be distributed under, the
30 //C- | GNU General Public License, either Version 2 of the license,
31 //C- | or (at your option) any later version. The license should have
32 //C- | accompanied the software or you may obtain a copy of the license
33 //C- | from the Free Software Foundation at http://www.fsf.org .
34 //C- |
35 //C- | The computer code originally released by LizardTech under this
36 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
37 //C- | ORIGINAL CODE." Subject to any third party intellectual property
38 //C- | claims, LizardTech grants recipient a worldwide, royalty-free,
39 //C- | non-exclusive license to make, use, sell, or otherwise dispose of
40 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the
41 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU
42 //C- | General Public License. This grant only confers the right to
43 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to
44 //C- | the extent such infringement is reasonably necessary to enable
45 //C- | recipient to make, have made, practice, sell, or otherwise dispose
46 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to
47 //C- | any greater extent that may be necessary to utilize further
48 //C- | modifications or combinations.
49 //C- |
50 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
51 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
52 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
53 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
54 //C- +------------------------------------------------------------------
55
56 #ifndef _GURL_H_
57 #define _GURL_H_
58 #ifdef HAVE_CONFIG_H
59 #include "config.h"
60 #endif
61 #if NEED_GNUG_PRAGMAS
62 # pragma interface
63 #endif
64
65
66 #include "GString.h"
67 #include "Arrays.h"
68 #include "GThreads.h"
69
70
71 #ifdef HAVE_NAMESPACES
72 namespace DJVU {
73 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
74 }
75 #endif
76 #endif
77
78 /** @name GURL.h
79 Files #"GURL.h"# and #"GURL.cpp"# contain the implementation of the
80 \Ref{GURL} class used to store URLs in a system independent format.
81 @memo System independent URL representation.
82 @author Andrei Erofeev <eaf@geocities.com>
83
84 // From: Leon Bottou, 1/31/2002
85 // This has been heavily changed by Lizardtech.
86 // They decided to use URLs for everyting, including
87 // the most basic file access. The URL class now is a unholy
88 // mixture of code for syntactically parsing the urls (which is was)
89 // and file status code (only for local file: urls).
90
91 */
92
93 //@{
94
95 /** System independent URL representation.
96
97 This class is used in the library to store URLs in a system independent
98 format. The idea to use a general class to hold URL arose after we
99 realized, that DjVu had to be able to access files both from the WEB
100 and from the local disk. While it is strange to talk about system
101 independence of HTTP URLs, file names formats obviously differ from
102 platform to platform. They may contain forward slashes, backward slashes,
103 colons as separators, etc. There maybe more than one URL corresponding
104 to the same file name. Compare #file:/dir/file.djvu# and
105 #file://localhost/dir/file.djvu#.
106
107 To simplify a developer's life we have created this class, which contains
108 inside a canonical representation of URLs.
109
110 File URLs are converted to internal format with the help of \Ref{GOS} class.
111
112 All other URLs are modified to contain only forward slashes.
113 */
114
115 class DJVUAPI GURL
116 {
117 public:
118 class Filename;
119 class UTF8;
120 class Native;
121 protected:
122 /** @name Constructors
123 Accept the string URL, check that it starts from #file:/#
124 or #http:/# and convert to internal system independent
125 representation.
126 */
127 //@{
128 ///
129 GURL(const char * url_string);
130 //@}
131
132 public:
133 GURL(void);
134
135 GURL(const GUTF8String & url_string);
136
137 GURL(const GNativeString & url_string);
138
139 GURL(const GUTF8String &xurl, const GURL &codebase);
140
141 GURL(const GNativeString &xurl, const GURL &codebase);
142
143 /// Copy constructor
144 GURL(const GURL & gurl);
145
146 /// The destructor
~GURL(void)147 virtual ~GURL(void) {}
148
149 private:
150 // The 'class_lock' should be locked whenever you're accessing
151 // url, or cgi_name_arr, or cgi_value_arr.
152 GCriticalSection class_lock;
153 protected:
154 GUTF8String url;
155 DArray<GUTF8String> cgi_name_arr, cgi_value_arr;
156 bool validurl;
157
158 void init(const bool nothrow=false);
159 void convert_slashes(void);
160 void beautify_path(void);
161 static GUTF8String beautify_path(GUTF8String url);
162
163 static GUTF8String protocol(const GUTF8String& url);
164 void parse_cgi_args(void);
165 void store_cgi_args(void);
166 public:
167 /// Test if the URL is valid. If invalid, reinitialize.
168 bool is_valid(void) const; // const lies to the compiler because of dependency problems
169
170 /// Extracts the {\em protocol} part from the URL and returns it
171 GUTF8String protocol(void) const;
172
173 /** Returns string after the first '\#' with decoded
174 escape sequences. */
175 GUTF8String hash_argument(void) const;
176
177 /** Inserts the #arg# after a separating hash into the URL.
178 The function encodes any illegal character in #arg# using
179 \Ref{GOS::encode_reserved}(). */
180 void set_hash_argument(const GUTF8String &arg);
181
182 /** Returns the total number of CGI arguments in the URL.
183 CGI arguments follow '#?#' sign and are separated by '#&#' signs */
184 int cgi_arguments(void) const;
185
186 /** Returns the total number of DjVu-related CGI arguments (arguments
187 following #DJVUOPTS# in the URL). */
188 int djvu_cgi_arguments(void) const;
189
190 /** Returns that part of CGI argument number #num#, which is
191 before the equal sign. */
192 GUTF8String cgi_name(int num) const;
193
194 /** Returns that part of DjVu-related CGI argument number #num#,
195 which is before the equal sign. */
196 GUTF8String djvu_cgi_name(int num) const;
197
198 /** Returns that part of CGI argument number #num#, which is
199 after the equal sign. */
200 GUTF8String cgi_value(int num) const;
201
202 /** Returns that part of DjVu-related CGI argument number #num#,
203 which is after the equal sign. */
204 GUTF8String djvu_cgi_value(int num) const;
205
206 /** Returns array of all known CGI names (part of CGI argument before
207 the equal sign) */
208 DArray<GUTF8String>cgi_names(void) const;
209
210 /** Returns array of names of DjVu-related CGI arguments (arguments
211 following #DJVUOPTS# option. */
212 DArray<GUTF8String>djvu_cgi_names(void) const;
213
214 /** Returns array of all known CGI names (part of CGI argument before
215 the equal sign) */
216 DArray<GUTF8String>cgi_values(void) const;
217
218 /** Returns array of values of DjVu-related CGI arguments (arguments
219 following #DJVUOPTS# option. */
220 DArray<GUTF8String>djvu_cgi_values(void) const;
221
222 /// Erases everything after the first '\#' or '?'
223 void clear_all_arguments(void);
224
225 /// Erases everything after the first '\#'
226 void clear_hash_argument(void);
227
228 /// Erases DjVu CGI arguments (following "#DJVUOPTS#")
229 void clear_djvu_cgi_arguments(void);
230
231 /// Erases all CGI arguments (following the first '?')
232 void clear_cgi_arguments(void);
233
234 /** Appends the specified CGI argument. Will insert "#DJVUOPTS#" if
235 necessary */
236 void add_djvu_cgi_argument(const GUTF8String &name, const char * value=0);
237
238 /** Returns the URL corresponding to the directory containing
239 the document with this URL. The function basically takes the
240 URL and clears everything after the last slash. */
241 GURL base(void) const;
242
243 /// Returns the aboslute URL without the host part.
244 GUTF8String pathname(void) const;
245
246 /** Returns the name part of this URL.
247 For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
248 this function will return #file%201.djvu#. \Ref{fname}() will
249 return #file 1.djvu# at the same time. */
250 GUTF8String name(void) const;
251
252 /** Returns the name part of this URL with escape sequences expanded.
253 For example, if the URL is #http://www.lizardtech.com/file%201.djvu# then
254 this function will return #file 1.djvu#. \Ref{name}() will
255 return #file%201.djvu# at the same time. */
256 GUTF8String fname(void) const;
257
258 /// Returns the extention part of name of document in this URL.
259 GUTF8String extension(void) const;
260
261 /// Checks if this is an empty URL
262 bool is_empty(void) const;
263
264 /// Checks if the URL is local (starts from #file:/#) or not
265 bool is_local_file_url(void) const;
266
267 /** @name Concatenation operators
268 Concatenate the GURL with the passed {\em name}. If the {\em name}
269 is absolute (has non empty protocol prefix), we just return
270 #GURL(name)#. Otherwise the #name# is appended to the GURL after a
271 separating slash.
272 */
273 //@{
274 ///
275 // GURL operator+(const GUTF8String &name) const;
276 //@}
277
278 /// Returns TRUE if #gurl1# and #gurl2# are the same
279 bool operator==(const GURL & gurl2) const;
280
281 /// Returns TRUE if #gurl1# and #gurl2# are different
282 bool operator!=(const GURL & gurl2) const;
283
284 /// Assignment operator
285 GURL & operator=(const GURL & url);
286
287 /// Returns Internal URL representation
288 operator const char*(void) const { return url; };
289
290 /** Returns a string representing the URL. This function normally
291 returns a standard file URL as described in RFC 1738.
292 Some versions of MSIE do not support this standard syntax.
293 A brain damaged MSIE compatible syntax is generated
294 when the optional argument #useragent# contains string #"MSIE"# or
295 #"Microsoft"#. */
296 GUTF8String get_string(const GUTF8String &useragent) const;
297
298 GUTF8String get_string(const bool nothrow=false) const;
299
300 /// Escape special characters
301 static GUTF8String encode_reserved(const GUTF8String &gs);
302
303 /** Decodes reserved characters from the URL.
304 See also: \Ref{encode_reserved}(). */
305 static GUTF8String decode_reserved(const GUTF8String &url);
306
307 /// Test if this url is an existing file, directory, or device.
308 bool is_local_path(void) const;
309
310 /// Test if this url is an existing file.
311 bool is_file(void) const;
312
313 /// Test if this url is an existing directory.
314 bool is_dir(void) const;
315
316 /// Follows symbolic links.
317 GURL follow_symlinks(void) const;
318
319 /// Creates the specified directory.
320 int mkdir(void) const;
321
322 /** Deletes file or directory.
323 Directories are not deleted unless the directory is empty.
324 Returns a negative number if an error occurs. */
325 int deletefile(void) const;
326
327 /** Recursively erases contents of directory. The directory
328 itself will not be removed. */
329 int cleardir(const int timeout=0) const;
330
331 /// Rename a file or directory.
332 int renameto(const GURL &newurl) const;
333
334 /// List the contents of a directory.
335 GList<GURL> listdir(void) const;
336
337 /** Returns a filename for a URL. Argument #url# must be a legal file URL.
338 This function applies heuristic rules to convert the URL into a valid
339 file name. It is guaranteed that this function can properly parse all
340 URLs generated by #filename_to_url#. The heuristics also work better when
341 the file actually exists. The empty string is returned when this
342 function cannot parse the URL or when the URL is not a file URL.
343 URL formats are as described in RFC 1738 plus the following alternative
344 formats for files on the local host:
345
346 file://<letter>:/<path>
347 file://<letter>|/<path>
348 file:/<path>
349
350 which are accepted because various browsers recognize them.*/
351 GUTF8String UTF8Filename(void) const;
352 /// Same but returns a native string.
353 GNativeString NativeFilename(void) const;
354
355 /** Hashing function.
356 @return hash suitable for usage in \Ref{GMap} */
357 friend unsigned int hash(const GURL & gurl);
358
359 /** Returns fully qualified file names. This functions constructs the fully
360 qualified name of file or directory #filename#. When provided, the
361 optional argument #fromdirname# is used as the current directory when
362 interpreting relative specifications in #filename#. Function
363 #expand_name# is very useful for logically concatenating file names. It
364 knows which separators should be used for each operating system and it
365 knows which syntactical rules apply. */
366 static GUTF8String expand_name(const GUTF8String &filename, const char *fromdirname=0);
367 };
368
369 class DJVUAPI GURL::UTF8 : public GURL
370 {
371 public:
372 UTF8(const GUTF8String &xurl);
373 UTF8(const GUTF8String &xurl, const GURL &codebase);
374 };
375
376 class DJVUAPI GURL::Native : public GURL
377 {
378 public:
379 Native(const GNativeString &xurl);
380 Native(const GNativeString &xurl, const GURL &codebase);
381 };
382
383 class DJVUAPI GURL::Filename : public GURL
384 {
385 public:
386 Filename(const GUTF8String &filename);
387 Filename(const GNativeString &filename);
388 class UTF8;
389 class Native;
390 };
391
392 class DJVUAPI GURL::Filename::UTF8 : public GURL::Filename
393 {
394 public:
395 UTF8(const GUTF8String &filename);
396 };
397
398 class DJVUAPI GURL::Filename::Native : public GURL::Filename
399 {
400 public:
401 Native(const GNativeString &filename);
402 };
403
404
405 inline bool
406 GURL::operator!=(const GURL & gurl2) const
407 {
408 return !(*this == gurl2);
409 }
410
411 inline GUTF8String
protocol(void)412 GURL::protocol(void) const
413 {
414 return protocol(get_string());
415 }
416
417 inline bool
is_empty(void)418 GURL::is_empty(void) const
419 {
420 return !url.length()||!get_string().length();
421 }
422
423 // Test if the URL is valid.
424 // If invalid, reinitialize and return the result.
425 inline bool
is_valid(void)426 GURL::is_valid(void) const
427 {
428 if(!validurl)
429 const_cast<GURL *>(this)->init(true);
430 return validurl;
431 }
432
433
434
435 //@}
436
437
438 #ifdef HAVE_NAMESPACES
439 }
440 # ifndef NOT_USING_DJVU_NAMESPACE
441 using namespace DJVU;
442 # endif
443 #endif
444 #endif
445