1 /*
2  * Copyright 2006-2010 The FLWOR Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 #ifndef ZORBA_URI_RESOLVER_H
18 #define ZORBA_URI_RESOLVER_H
19 
20 
21 /**
22  * This file contains the implementation of the Zorba URI resolver.
23  * This comprises the URLResolver and URIMapper interfaces, and the
24  * Resource class hierarchy.
25  *
26  * Zorba has two built-in URLResolver implementations and two built-in
27  * URIResolver implementations; those are implemented in
28  * default_url_resolvers.h/cpp and default_uri_mappers.h/cpp.
29  */
30 
31 #include <memory>
32 #include <istream>
33 #include <zorbatypes/zstring.h>
34 #include <util/auto_vector.h>
35 #include <store/api/shared_types.h>
36 #include <zorba/streams.h>
37 #include <zorba/locale.h>
38 
39 namespace zorba {
40 
41 // Forward declaration
42 class static_context;
43 
44 namespace internal {
45 
46 /**
47  * @brief The class representing the result of URL resolution.
48  *
49  * This class is the final output of the URI resolution process. All
50  * URL resolvers return results using subclasses of this class.
51  */
52 class Resource
53 {
54 public:
55   /**
56    * @brief Return the URL used to load this Resource.
57    */
getUrl()58   zstring const& getUrl() const { return theUrl; }
59 
60   virtual ~Resource() = 0;
61 
62 protected:
63 
64   Resource();
65 
66 private:
67 
68   /**
69    * Used by static_context to populate the URL.
70    */
setUrl(zstring const & aUrl)71   void setUrl(zstring const &aUrl) { theUrl = aUrl; }
72   friend class zorba::static_context;
73 
74   zstring theUrl;
75 };
76 
77 /**
78  * @ brief Concrete Resource subclass representing access to an entity
79  * via a stream.
80  */
81 class StreamResource : public Resource
82 {
83 public:
84 
85   /**
86    * @brief Public constructor from istream.
87    *
88    * The Resource object will take memory ownership of the istream. Zorba will
89    * pass it to aStreamReleaser when it is no longer needed, so that the
90    * original client may delete it.
91    * @param aStream An istream whence to read the string's content.
92    * @param aStreamReleaser A function pointer which is invoked once
93    *        the StreamResource is destroyed. Normally this function will delete
94    *        the std::istream object passed to it.
95    * @param aStreamUrl Normally this StreamResource represents the
96    * contents of the URL which was used to resolve it. However, there
97    * are certain unusual circumstances where a URLResolver may wish to
98    * return a stream over some other URL than the one passed to it. In
99    * that case, the URLResolver may pass the true URL here.
100    * @param aIsStreamSeekable determines whether the stream passed as first
101    * argument is arbitrarily seekable without throwing errors.
102    */
103   StreamResource(std::istream* aStream,
104                  StreamReleaser aStreamReleaser,
105                  zstring aStreamUrl = "",
106                  bool aIsStreamSeekable = false);
107 
108   virtual ~StreamResource();
109 
110   /**
111    * @brief Retrieve the istream associated with this Resource.
112    */
113   std::istream* getStream();
114 
115   /**
116    * @brief Retrieve the StreamReleaser.
117    */
118   StreamReleaser getStreamReleaser();
119 
120   /**
121    * @brief Assign the StreamReleaser. Generally you should only use this
122    * to set the StreamReleaser to "nullptr" if you are taking memory
123    * ownership of the istream.
124    */
125   void setStreamReleaser(StreamReleaser aStreamReleaser);
126 
127   /**
128    * @brief Retrieve the URL that the stream was actually loaded from.
129    * By default this will be the same as getUrl().
130    */
131   zstring getStreamUrl();
132 
133   /**
134    * @brief Returns true if the stream returned by getStream is seekable,
135    * false otherwise.
136    */
isStreamSeekable()137   bool isStreamSeekable() const { return theIsStreamSeekable; }
138 
139 private:
140 
141   std::istream* theStream;
142   StreamReleaser theStreamReleaser;
143   zstring theStreamUrl;
144   bool theIsStreamSeekable;
145 };
146 
147 /**
148  */
149 class CollectionResource : public Resource
150 {
151   public:
152   /**
153    */
154   CollectionResource(const store::Collection_t& aCollection);
155 
156   /**
157    */
158   store::Collection_t
159   getCollection();
160 
161   private:
162   store::Collection_t theCollection;
163 };
164 
165 /**
166  * @brief The class containing data which may be of use to URIMappers
167  * and URLResolvers when mapping/resolving a URI.
168  *
169  * This base class specifies the kind of entity for which this URI is being
170  * resolved - for instance, a schema URI or a module URI. Subclasses of
171  * this class will provide additional data for specific kinds of entities.
172  */
173 class EntityData
174 {
175 public:
176   /**
177    * @brief enum listing the kinds of entities that may be represented
178    * by URIs, and hence may be looked up via the URI resolution
179    * mechanism.
180    */
181   enum Kind {
182     SCHEMA,
183     MODULE,
184 #ifndef ZORBA_NO_FULL_TEXT
185     THESAURUS,
186     STOP_WORDS,
187 #endif /* ZORBA_NO_FULL_TEXT */
188     COLLECTION,
189     DOCUMENT,
190     SOME_CONTENT
191   };
192 
193   EntityData(Kind aKind);
194 
195   /**
196    * @brief Return the Kind of Entity for which this URI is being resolved.
197    */
198   virtual Kind getKind() const;
199 
200   virtual ~EntityData();
201 
202 private:
203   Kind const theKind;
204 };
205 
206 /**
207  * @brief Interface for URL resolving.
208  *
209  * Subclass this to provide a URL resolver to the method
210  * StaticContext::addURLResolver().
211  */
212 class URLResolver
213 {
214   public:
215 
216   virtual ~URLResolver();
217 
218   /**
219    * @brief Transforms an input URL into a Resource.
220    *
221    * The "aEntityData" parameter informs the URLResolver what kind of
222    * entity is being referenced by the URL. URLResolvers may choose to
223    * make use of this information to alter their behaviour.
224    * URLResolvers must ensure that they return a concrete subclass of
225    * Resource which is compatible with the entity kind being resolved.
226    *
227    * Implementers of this method should do nothing if they do not know
228    * how to resolve the URL.  They should create and return a Resource
229    * if they were successfully able to resolve the URL.
230    *
231    * Implementers may throw any exception if they believe that they
232    * are canonical for the URL and yet had some error arise attempting
233    * to resolve it.  Note that because there may be several possible
234    * URLs attempted, Zorba will catch any exceptions thrown and
235    * continue until all all URLs have failed. Zorba will not re-throw
236    * any of these exceptions directly. However, if the exception
237    * thrown extends std::exception, Zorba will make efforts to ensure
238    * that its error message is included in the exception which is
239    * ultimately thrown. For any other thrown objects, only the fact
240    * that an exception occurred will be remembered; the exception
241    * object itself will be discarded.
242    *
243    * In any case, if they create a Resource, Zorba will take memory
244    * ownership of the Resource and delete it when it is no longer
245    * needed.
246    */
247   virtual Resource* resolveURL(zstring const& aUrl,
248                                EntityData const* aEntityData) = 0;
249 };
250 
251 /**
252  * @brief Interface for URI mapping.
253  */
254 class URIMapper
255 {
256   public:
257 
258   virtual ~URIMapper();
259 
260   /**
261    * @brief Transform an input URI into a set of output URIs.
262    *
263    * The "aEntityKind" parameter informs the URIMapper what kind of
264    * entity is being referenced by URI. URIMappers may choose to make
265    * use of this information to alter their behaviour.
266    *
267    * Implementers of this method should provide output URIs by adding
268    * them to the oUris output parameter, using the push_back()
269    * method. They should not otherwise view or manipulate this vector.
270    *
271    * If a URIMapper does not wish to provide any output URIs for the
272    * given input URI, they should simply do nothing and return.
273    */
274   virtual void mapURI(zstring const& aUri,
275     EntityData const* aEntityData, static_context const& aSctx,
276     std::vector<zstring>& oUris)
277     = 0;
278 
279   /**
280    * @brief enum defining legal return values for mapperKind().
281    */
282   enum Kind {
283     COMPONENT,
284     CANDIDATE
285   };
286 
287   /**
288    * @brief Declare whether this is a "component" or "candidate" URI
289    * mapper.
290    *
291    * Zorba supports two different kinds of URI mapping. The first,
292    * "component URI mapping", is to allow mapping from an input URI to
293    * a set of URIs which, taken together, comprise the entire entity
294    * to be resolved. This is currently only supported for module
295    * import, where it can be used to load a module which is physically
296    * stored in multiple library module files.
297    *
298    * "Candidate URI mapping" is to allow mapping from an input URI to
299    * a set or URIs which are *potential* identifiers of the entity
300    * being resolved. Each of these URIs will be treated to any
301    * subsequent URI mappers, and then treated as URLs and passed in
302    * turn to all registered URLResolvers. This type of URI mapping is
303    * supported for all uses of URIs in Zorba. It can be used for
304    * example to redirect http: URIs to locally-cached file: URLs, or
305    * to provide several alternative locations for a given resource.
306    *
307    * If you do not override this method, the default is "candidate".
308    */
mapperKind()309   virtual Kind mapperKind() { return CANDIDATE; }
310 
311   /**
312    * @brief Constant indicating that Zorba should deny access to the
313    * given URI.
314    *
315    * If any kind of URIMapper returns this value at any point in the
316    * vector of URIs, Zorba will cause the resolution of this URI to be
317    * denied with an error.  This can be used, for example, to suppress
318    * importing particular modules by URI.
319    */
320   static const zstring DENY_ACCESS;
321 };
322 
323 } /* namespace zorba::impl */
324 
325 } /* namespace zorba */
326 
327 #endif
328 
329 /*
330  * Local variables:
331  * mode: c++
332  * End:
333  */
334 /* vim:set et sw=2 ts=2: */
335