1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /*
19  * $Id: AnyURIDatatypeValidator.cpp 676796 2008-07-15 05:04:13Z dbertoni $
20  */
21 
22 // ---------------------------------------------------------------------------
23 //  Includes
24 // ---------------------------------------------------------------------------
25 #include <stdio.h>
26 #include <xercesc/util/OutOfMemoryException.hpp>
27 #include <xercesc/util/XMLUTF8Transcoder.hpp>
28 #include <xercesc/framework/XMLBuffer.hpp>
29 #include <xercesc/validators/datatype/AnyURIDatatypeValidator.hpp>
30 #include <xercesc/validators/datatype/InvalidDatatypeFacetException.hpp>
31 #include <xercesc/validators/datatype/InvalidDatatypeValueException.hpp>
32 
33 XERCES_CPP_NAMESPACE_BEGIN
34 
35 // ---------------------------------------------------------------------------
36 //  Constructors and Destructor
37 // ---------------------------------------------------------------------------
AnyURIDatatypeValidator(MemoryManager * const manager)38 AnyURIDatatypeValidator::AnyURIDatatypeValidator(MemoryManager* const manager)
39 :AbstractStringValidator(0, 0, 0, DatatypeValidator::AnyURI, manager)
40 {}
41 
~AnyURIDatatypeValidator()42 AnyURIDatatypeValidator::~AnyURIDatatypeValidator()
43 {
44 }
45 
AnyURIDatatypeValidator(DatatypeValidator * const baseValidator,RefHashTableOf<KVStringPair> * const facets,RefArrayVectorOf<XMLCh> * const enums,const int finalSet,MemoryManager * const manager)46 AnyURIDatatypeValidator::AnyURIDatatypeValidator(
47                           DatatypeValidator*            const baseValidator
48                         , RefHashTableOf<KVStringPair>* const facets
49                         , RefArrayVectorOf<XMLCh>*      const enums
50                         , const int                           finalSet
51                         , MemoryManager* const manager)
52 :AbstractStringValidator(baseValidator, facets, finalSet, DatatypeValidator::AnyURI, manager)
53 {
54     init(enums, manager);
55 }
56 
newInstance(RefHashTableOf<KVStringPair> * const facets,RefArrayVectorOf<XMLCh> * const enums,const int finalSet,MemoryManager * const manager)57 DatatypeValidator* AnyURIDatatypeValidator::newInstance(
58                                       RefHashTableOf<KVStringPair>* const facets
59                                     , RefArrayVectorOf<XMLCh>*           const enums
60                                     , const int                           finalSet
61                                     , MemoryManager* const manager)
62 {
63     return (DatatypeValidator*) new (manager) AnyURIDatatypeValidator(this, facets, enums, finalSet, manager);
64 }
65 
66 // ---------------------------------------------------------------------------
67 //  Utilities
68 // ---------------------------------------------------------------------------
69 
checkValueSpace(const XMLCh * const content,MemoryManager * const manager)70 void AnyURIDatatypeValidator::checkValueSpace(const XMLCh* const content
71                                               , MemoryManager* const manager)
72 {
73     bool validURI = true;
74 
75     // check 3.2.17.c0 must: URI (rfc 2396/2723)
76     try
77     {
78         // Support for relative URLs
79         // According to Java 1.1: URLs may also be specified with a
80         // String and the URL object that it is related to.
81         //
82         XMLSize_t len = XMLString::stringLen(content);
83         if (len)
84         {
85             // Encode special characters using XLink 5.4 algorithm
86 			XMLBuffer encoded((len*3)+1, manager);
87             encode(content, len, encoded, manager);
88             validURI = XMLUri::isValidURI(true, encoded.getRawBuffer(), true);
89         }
90     }
91     catch(const OutOfMemoryException&)
92     {
93         throw;
94     }
95     catch (...)
96     {
97         ThrowXMLwithMemMgr1(InvalidDatatypeValueException
98                 , XMLExcepts::VALUE_URI_Malformed
99                 , content
100                 , manager);
101     }
102 
103     if (!validURI) {
104         ThrowXMLwithMemMgr1(InvalidDatatypeValueException
105                     , XMLExcepts::VALUE_URI_Malformed
106                     , content
107                     , manager);
108     }
109 }
110 
111 /***
112  * To encode special characters in anyURI, by using %HH to represent
113  * special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
114  * and non-ASCII characters (whose value >= 128).
115  ***/
encode(const XMLCh * const content,const XMLSize_t len,XMLBuffer & encoded,MemoryManager * const manager)116 void AnyURIDatatypeValidator::encode(const XMLCh* const content, const XMLSize_t len, XMLBuffer& encoded, MemoryManager* const manager)
117 {
118     static const bool needEscapeMap[] = {
119         true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , /* 0x00 to 0x0F need escape */
120         true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , /* 0x10 to 0x1F need escape */
121         true , false, true , false, false, false, false, false, false, false, false, false, false, false, false, false, /* 0x20:' ', 0x22:'"' */
122         false, false, false, false, false, false, false, false, false, false, false, false, true , false, true , false, /* 0x3C:'<', 0x3E:'>' */
123         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
124         false, false, false, false, false, false, false, false, false, false, false, false, true , false, true , false, /* 0x5C:'\\', 0x5E:'^' */
125         true , false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, /* 0x60:'`' */
126         false, false, false, false, false, false, false, false, false, false, false, true , true , true , true , true   /* 0x7B:'{', 0x7C:'|', 0x7D:'}', 0x7E:'~', 0x7F:DEL */
127     };
128 
129     // For each character in content
130     XMLSize_t i;
131     for (i = 0; i < len; i++)
132     {
133         int ch = (int)content[i];
134         // If it's not an ASCII character, break here, and use UTF-8 encoding
135         if (ch >= 128)
136             break;
137 
138         if (needEscapeMap[ch])
139         {
140             char tempStr[3] = "\0";
141             sprintf(tempStr, "%02X", ch);
142             encoded.append('%');
143             encoded.append((XMLCh)tempStr[0]);
144             encoded.append((XMLCh)tempStr[1]);
145         }
146         else
147         {
148             encoded.append((XMLCh)ch);
149         }
150     }
151 
152     // we saw some non-ascii character
153     if (i < len) {
154         // get UTF-8 bytes for the remaining sub-string
155         const XMLCh* remContent = (XMLCh*)&content[i];
156         const XMLSize_t remContentLen = len - i;
157         XMLByte* UTF8Byte = (XMLByte*)manager->allocate((remContentLen*4+1) * sizeof(XMLByte));
158         XMLSize_t charsEaten;
159 
160         XMLUTF8Transcoder transcoder(XMLUni::fgUTF8EncodingString, remContentLen*4+1, manager);
161         XMLSize_t utf8Len = transcoder.transcodeTo(remContent, remContentLen, UTF8Byte, remContentLen*4, charsEaten, XMLTranscoder::UnRep_RepChar);
162         assert(charsEaten == remContentLen);
163 
164         XMLSize_t j;
165         for (j = 0; j < utf8Len; j++) {
166             XMLByte b = UTF8Byte[j];
167             if (b >= 128 || needEscapeMap[b])
168             {
169                 char tempStr[3] = "\0";
170                 sprintf(tempStr, "%02X", b);
171                 encoded.append('%');
172                 encoded.append((XMLCh)tempStr[0]);
173                 encoded.append((XMLCh)tempStr[1]);
174             }
175             else
176             {
177                 encoded.append((XMLCh)b);
178             }
179         }
180         manager->deallocate(UTF8Byte);
181     }
182 }
183 
184 /***
185  * Support for Serialization/De-serialization
186  ***/
187 
IMPL_XSERIALIZABLE_TOCREATE(AnyURIDatatypeValidator)188 IMPL_XSERIALIZABLE_TOCREATE(AnyURIDatatypeValidator)
189 
190 void AnyURIDatatypeValidator::serialize(XSerializeEngine& serEng)
191 {
192     AbstractStringValidator::serialize(serEng);
193 }
194 
195 XERCES_CPP_NAMESPACE_END
196 
197 /**
198   * End of file AnyURIDatatypeValidator.cpp
199   */
200