1 /*
2  * url.h
3  *
4  * Universal Resource Locator (for HTTP/HTML) class.
5  *
6  * Portable Windows Library
7  *
8  * Copyright (c) 1993-2002 Equivalence Pty. Ltd.
9  *
10  * The contents of this file are subject to the Mozilla Public License
11  * Version 1.0 (the "License"); you may not use this file except in
12  * compliance with the License. You may obtain a copy of the License at
13  * http://www.mozilla.org/MPL/
14  *
15  * Software distributed under the License is distributed on an "AS IS"
16  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
17  * the License for the specific language governing rights and limitations
18  * under the License.
19  *
20  * The Original Code is Portable Windows Library.
21  *
22  * The Initial Developer of the Original Code is Equivalence Pty. Ltd.
23  *
24  * Contributor(s): ______________________________________.
25  *
26  * $Revision: 27132 $
27  * $Author: rjongbloed $
28  * $Date: 2012-03-06 17:29:05 -0600 (Tue, 06 Mar 2012) $
29  */
30 
31 #ifndef PTLIB_PURL_H
32 #define PTLIB_PURL_H
33 
34 #ifdef P_USE_PRAGMA
35 #pragma interface
36 #endif
37 
38 
39 #if P_URL
40 
41 #include <ptlib/pfactory.h>
42 
43 
44 //////////////////////////////////////////////////////////////////////////////
45 // PURL
46 
47 class PURLLegacyScheme;
48 
49 /**
50  This class describes a Universal Resource Locator.
51  This is the desciption of a resource location as used by the World Wide
52  Web and the <code>PHTTPSocket</code> class.
53  */
54 class PURL : public PObject
55 {
56   PCLASSINFO(PURL, PObject)
57   public:
58     /**Construct a new URL object from the URL string. */
59     PURL();
60     /**Construct a new URL object from the URL string. */
61     PURL(
62       const char * cstr,    ///< C string representation of the URL.
63       const char * defaultScheme = "http" ///< Default scheme for URL
64     );
65     /**Construct a new URL object from the URL string. */
66     PURL(
67       const PString & str,  ///< String representation of the URL.
68       const char * defaultScheme = "http" ///< Default scheme for URL
69     );
70     /**Construct a new URL object from the file path. */
71     PURL(
72       const PFilePath & path   ///< File path to turn into a "file:" URL.
73     );
74 
75     PURL(const PURL & other);
76     PURL & operator=(const PURL & other);
77 
78   /**@name Overrides from class PObject */
79   //@{
80     /**Compare the two URLs and return their relative rank.
81 
82      @return
83        <code>LessThan</code>, <code>EqualTo</code> or <code>GreaterThan</code>
84        according to the relative rank of the objects.
85      */
86     virtual Comparison Compare(
87       const PObject & obj   ///< Object to compare against.
88     ) const;
89 
90     /**This function yields a hash value required by the <code>PDictionary</code>
91        class. A descendent class that is required to be the key of a dictionary
92        should override this function. The precise values returned is dependent
93        on the semantics of the class. For example, the <code>PString</code> class
94        overrides it to provide a hash function for distinguishing text strings.
95 
96        The default behaviour is to return the value zero.
97 
98        @return
99        hash function value for class instance.
100      */
101     virtual PINDEX HashFunction() const;
102 
103     /**Output the contents of the URL to the stream as a string.
104      */
105     virtual void PrintOn(
106       ostream &strm   ///< Stream to print the object into.
107     ) const;
108 
109     /**Input the contents of the URL from the stream. The input is a URL in
110        string form.
111      */
112     virtual void ReadFrom(
113       istream &strm   ///< Stream to read the objects contents from.
114     );
115   //@}
116 
117   /**@name New functions for class. */
118   //@{
119     /**Parse the URL string into the fields in the object instance. */
120     inline PBoolean Parse(
121       const char * cstr,   ///< URL as a string to parse.
122       const char * defaultScheme = NULL ///< Default scheme for URL
123     ) { return InternalParse(cstr, defaultScheme); }
124     /**Parse the URL string into the fields in the object instance. */
125     inline PBoolean Parse(
126       const PString & str, ///< URL as a string to parse.
127       const char * defaultScheme = NULL ///< Default scheme for URL
128     ) { return InternalParse((const char *)str, defaultScheme); }
129 
130     /**Print/String output representation formats. */
131     enum UrlFormat {
132       /// Translate to a string as a full URL
133       FullURL,
134       /// Translate to a string as only path
135       PathOnly,
136       /// Translate to a string with no scheme or host
137       URIOnly,
138       /// Translate to a string with scheme and host/port
139       HostPortOnly
140     };
141 
142     /**Convert the URL object into its string representation. The parameter
143        indicates whether a full or partial representation os to be produced.
144 
145        @return
146        String representation of the URL.
147      */
148     PString AsString(
149       UrlFormat fmt = FullURL   ///< The type of string to be returned.
150     ) const;
PString()151     operator PString() const { return AsString(); }
152 
153     /**Get the "file:" URL as a file path.
154        If the URL is not a "file:" URL then returns an empty string.
155       */
156     PFilePath AsFilePath() const;
157 
158     /// Type for translation of strings to URL format,
159     enum TranslationType {
160       /// Translate a username/password field for a URL.
161       LoginTranslation,
162       /// Translate the path field for a URL.
163       PathTranslation,
164       /// Translate the query variable field for a URL.
165       QueryTranslation,
166       /// Translate the parameter variables field for a URL.
167       ParameterTranslation,
168       /// Translate the quoted parameter variables field for a URL.
169       QuotedParameterTranslation
170     };
171 
172     /**Translate a string from general form to one that can be included into
173        a URL. All reserved characters for the particular field type are
174        escaped.
175 
176        @return
177        String for the URL ready translation.
178      */
179     static PString TranslateString(
180       const PString & str,    ///< String to be translated.
181       TranslationType type    ///< Type of translation.
182     );
183 
184     /**Untranslate a string from a form that was included into a URL into a
185        normal string. All reserved characters for the particular field type
186        are unescaped.
187 
188        @return
189        String from the URL untranslated.
190      */
191     static PString UntranslateString(
192       const PString & str,    ///< String to be translated.
193       TranslationType type    ///< Type of translation.
194     );
195 
196     /** Split a string to a dictionary of names and values. */
197     static void SplitVars(
198       const PString & str,    ///< String to split into variables.
199       PStringToString & vars, ///< Dictionary of variable names and values.
200       char sep1 = ';',        ///< Separater between pairs
201       char sep2 = '=',        ///< Separater between key and value
202       TranslationType type = ParameterTranslation ///< Type of translation.
203     );
204 
205     /** Split a string in &= form to a dictionary of names and values. */
SplitQueryVars(const PString & queryStr,PStringToString & queryVars)206     static void SplitQueryVars(
207       const PString & queryStr,   ///< String to split into variables.
208       PStringToString & queryVars ///< Dictionary of variable names and values.
209     ) { SplitVars(queryStr, queryVars, '&', '=', QueryTranslation); }
210 
211     /** Construct string from a dictionary using separators.
212       */
213     static void OutputVars(
214       ostream & strm,               ///< Stream to output dictionary to
215       const PStringToString & vars, ///< Dictionary of variable names and values.
216       char sep0 = ';',              ///< First separater before all ('\0' means none)
217       char sep1 = ';',              ///< Separater between pairs
218       char sep2 = '=',              ///< Separater between key and value
219       TranslationType type = ParameterTranslation ///< Type of translation.
220     );
221 
222 
223     /// Get the scheme field of the URL.
GetScheme()224     const PCaselessString & GetScheme() const { return scheme; }
225 
226     /// Set the scheme field of the URL
227     void SetScheme(const PString & scheme);
228 
229     /// Get the username field of the URL.
GetUserName()230     const PString & GetUserName() const { return username; }
231 
232     /// Set the username field of the URL.
233     void SetUserName(const PString & username);
234 
235     /// Get the password field of the URL.
GetPassword()236     const PString & GetPassword() const { return password; }
237 
238     /// Set the password field of the URL.
239     void SetPassword(const PString & password);
240 
241     /// Get the hostname field of the URL.
GetHostName()242     const PCaselessString & GetHostName() const { return hostname; }
243 
244     /// Set the hostname field of the URL.
245     void SetHostName(const PString & hostname);
246 
247     /// Get the port field of the URL.
GetPort()248     WORD GetPort() const { return port; }
249 
250     /// Set the port field in the URL.
251     void SetPort(WORD newPort);
252 
253     /// Get if explicit port is specified.
GetPortSupplied()254     PBoolean GetPortSupplied() const { return portSupplied; }
255 
256     /// Get if path is relative or absolute
GetRelativePath()257     PBoolean GetRelativePath() const { return relativePath; }
258 
259     /// Get the path field of the URL as a string.
260     PString GetPathStr() const;
261 
262     /// Set the path field of the URL as a string.
263     void SetPathStr(const PString & pathStr);
264 
265     /// Get the path field of the URL as a string array.
GetPath()266     const PStringArray & GetPath() const { return path; }
267 
268     /// Set the path field of the URL as a string array.
269     void SetPath(const PStringArray & path);
270 
271     /// Append segment to the path field of the URL.
272     void AppendPath(const PString & segment);
273 
274     /// Get the parameter (;) field of the URL.
275     PString GetParameters() const;
276 
277     /// Set the parameter (;) field of the URL.
278     void SetParameters(const PString & parameters);
279 
280     /// Get the parameter (;) field(s) of the URL as a string dictionary.
281     /// Note the values have already been translated using UntranslateString
GetParamVars()282     const PStringOptions & GetParamVars() const { return paramVars; }
283 
284     /// Set the parameter (;) field(s) of the URL as a string dictionary.
285     /// Note the values will be translated using TranslateString
286     void SetParamVars(const PStringToString & paramVars);
287 
288     /// Set the parameter (;) field of the URL as a string dictionary.
289     /// Note the values will be translated using TranslateString
290     void SetParamVar(
291       const PString & key,          ///< Key to add/delete
292       const PString & data,         ///< Vlaue to add at key, if empty string may be removed
293       bool emptyDataDeletes = true  ///< If true, and data empty string, key is removed
294     );
295 
296     /// Get the fragment (\#) field of the URL.
GetFragment()297     const PString & GetFragment() const { return fragment; }
298 
299     /// Get the Query (?) field of the URL as a string.
300     PString GetQuery() const;
301 
302     /// Set the Query (?) field of the URL as a string.
303     /// Note the values will be translated using UntranslateString
304     void SetQuery(const PString & query);
305 
306     /// Get the Query (?) field of the URL as a string dictionary.
307     /// Note the values have already been translated using UntranslateString
GetQueryVars()308     const PStringOptions & GetQueryVars() const { return queryVars; }
309 
310     /// Set the Query (?) field(s) of the URL as a string dictionary.
311     /// Note the values will be translated using TranslateString
312     void SetQueryVars(const PStringToString & queryVars);
313 
314     /// Set the Query (?) field of the URL as a string dictionary.
315     /// Note the values will be translated using TranslateString
316     void SetQueryVar(const PString & key, const PString & data);
317 
318     /// Get the contents of URL, data left after all elemetns are parsed out
GetContents()319     const PString & GetContents() const { return m_contents; }
320 
321     /// Set the contents of URL, data left after all elemetns are parsed out
322     void SetContents(const PString & str);
323 
324     /// Return true if the URL is an empty string.
IsEmpty()325     PBoolean IsEmpty() const { return urlString.IsEmpty(); }
326 
327 
328     /**Get the resource the URL is pointing at.
329        The data returned is obtained according to the scheme and the factory
330        PURLLoaderFactory.
331       */
332     bool LoadResource(
333       PString & data,  ///< Resource data as a string
334       const PString & requiredContentType = PString::Empty() ///< Expected content type where applicable
335     ) const;
336     bool LoadResource(
337       PBYTEArray & data,  ///< Resource data as a binary blob
338       const PString & requiredContentType = PString::Empty() ///< Expected content type where applicable
339     ) const;
340 
341     /**Open the URL in a browser.
342 
343        @return
344        The browser was successfully opened. This does not mean the URL exists and was
345        displayed.
346      */
OpenBrowser()347     bool OpenBrowser() const { return OpenBrowser(AsString()); }
348     static bool OpenBrowser(
349       const PString & url   ///< URL to open
350     );
351   //@}
352 
353     PBoolean LegacyParse(const PString & url, const PURLLegacyScheme * schemeInfo);
354     PString LegacyAsString(PURL::UrlFormat fmt, const PURLLegacyScheme * schemeInfo) const;
355 
356   protected:
357     void CopyContents(const PURL & other);
358     virtual PBoolean InternalParse(
359       const char * cstr,         ///< URL as a string to parse.
360       const char * defaultScheme ///< Default scheme for URL
361     );
362     void Recalculate();
363     PString urlString;
364 
365     PCaselessString scheme;
366     PString username;
367     PString password;
368     PCaselessString hostname;
369     WORD port;
370     PBoolean portSupplied;          /// port was supplied in string input
371     PBoolean relativePath;
372     PStringArray path;
373     PStringOptions paramVars;
374     PString fragment;
375     PStringOptions queryVars;
376     PString m_contents;  // Anything left after parsing other elements
377 };
378 
379 
380 //////////////////////////////////////////////////////////////////////////////
381 // PURLScheme
382 
383 class PURLScheme : public PObject
384 {
385   PCLASSINFO(PURLScheme, PObject);
386   public:
387     virtual PString GetName() const = 0;
388     virtual PBoolean Parse(const PString & url, PURL & purl) const = 0;
389     virtual PString AsString(PURL::UrlFormat fmt, const PURL & purl) const = 0;
390 };
391 
392 typedef PFactory<PURLScheme> PURLSchemeFactory;
393 
394 
395 //////////////////////////////////////////////////////////////////////////////
396 // PURLLegacyScheme
397 
398 class PURLLegacyScheme : public PURLScheme
399 {
400   public:
401     PURLLegacyScheme(
402       const char * s,
403       bool user    = false,
404       bool pass    = false,
405       bool host    = false,
406       bool def     = false,
407       bool defhost = false,
408       bool query   = false,
409       bool params  = false,
410       bool frags   = false,
411       bool path    = false,
412       bool rel     = false,
413       WORD port    = 0
414     )
scheme(s)415       : scheme(s)
416       , hasUsername           (user)
417       , hasPassword           (pass)
418       , hasHostPort           (host)
419       , defaultToUserIfNoAt   (def)
420       , defaultHostToLocal    (defhost)
421       , hasQuery              (query)
422       , hasParameters         (params)
423       , hasFragments          (frags)
424       , hasPath               (path)
425       , relativeImpliesScheme (rel)
426       , defaultPort           (port)
427     { }
428 
Parse(const PString & url,PURL & purl)429     PBoolean Parse(const PString & url, PURL & purl) const
430     { return purl.LegacyParse(url, this); }
431 
AsString(PURL::UrlFormat fmt,const PURL & purl)432     PString AsString(PURL::UrlFormat fmt, const PURL & purl) const
433     { return purl.LegacyAsString(fmt, this); }
434 
GetName()435     PString GetName() const
436     { return scheme; }
437 
438     PString scheme;
439     bool hasUsername;
440     bool hasPassword;
441     bool hasHostPort;
442     bool defaultToUserIfNoAt;
443     bool defaultHostToLocal;
444     bool hasQuery;
445     bool hasParameters;
446     bool hasFragments;
447     bool hasPath;
448     bool relativeImpliesScheme;
449     WORD defaultPort;
450 };
451 
452 #define PURL_LEGACY_SCHEME(schemeName, user, pass, host, def, defhost, query, params, frags, path, rel, port) \
453   class PURLLegacyScheme_##schemeName : public PURLLegacyScheme \
454   { \
455     public: \
456       PURLLegacyScheme_##schemeName() \
457         : PURLLegacyScheme(#schemeName, user, pass, host, def, defhost, query, params, frags, path, rel, port) \
458         { } \
459   }; \
460   static PURLSchemeFactory::Worker<PURLLegacyScheme_##schemeName> schemeName##Factory(#schemeName, true); \
461 
462 
463 
464 //////////////////////////////////////////////////////////////////////////////
465 // PURLLoader
466 
467 class PURLLoader : public PObject
468 {
469   PCLASSINFO(PURLLoader, PObject);
470   public:
471     virtual bool Load(const PURL & url, PString & str, const PString & requiredContentType) = 0;
472     virtual bool Load(const PURL & url, PBYTEArray & data, const PString & requiredContentType) = 0;
473 };
474 
475 typedef PFactory<PURLLoader> PURLLoaderFactory;
476 
477 
478 #endif // P_URL
479 
480 #endif // PTLIB_PURL_H
481 
482 
483 // End Of File ///////////////////////////////////////////////////////////////
484