1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Licensed to the Apache Software Foundation (ASF) under one or more
7  * contributor license agreements.  See the NOTICE file distributed with
8  * this work for additional information regarding copyright ownership.
9  * The ASF licenses this file to You under the Apache License, Version 2.0
10  * (the "License"); you may not use this file except in compliance with
11  * the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  */
21 
22 package com.sun.org.apache.xerces.internal.impl.dv.xs;
23 
24 import com.sun.org.apache.xerces.internal.impl.dv.InvalidDatatypeValueException;
25 import com.sun.org.apache.xerces.internal.util.URI;
26 import com.sun.org.apache.xerces.internal.impl.dv.ValidationContext;
27 
28 /**
29  * Represent the schema type "anyURI"
30  *
31  * @xerces.internal
32  *
33  * @author Neeraj Bajaj, Sun Microsystems, inc.
34  * @author Sandy Gao, IBM
35  *
36  */
37 public class AnyURIDV extends TypeValidator {
38 
39     private static final URI BASE_URI;
40     static {
41         URI uri = null;
42         try {
43             uri = new URI("abc://def.ghi.jkl");
44         } catch (URI.MalformedURIException ex) {
45         }
46         BASE_URI = uri;
47     }
48 
getAllowedFacets()49     public short getAllowedFacets(){
50         return (XSSimpleTypeDecl.FACET_LENGTH | XSSimpleTypeDecl.FACET_MINLENGTH | XSSimpleTypeDecl.FACET_MAXLENGTH | XSSimpleTypeDecl.FACET_PATTERN | XSSimpleTypeDecl.FACET_ENUMERATION | XSSimpleTypeDecl.FACET_WHITESPACE );
51     }
52 
53     // before we return string we have to make sure it is correct URI as per spec.
54     // for some types (string and derived), they just return the string itself
getActualValue(String content, ValidationContext context)55     public Object getActualValue(String content, ValidationContext context) throws InvalidDatatypeValueException {
56         // check 3.2.17.c0 must: URI (rfc 2396/2723)
57         try {
58             if( content.length() != 0 ) {
59                 // encode special characters using XLink 5.4 algorithm
60                 final String encoded = encode(content);
61                 // Support for relative URLs
62                 // According to Java 1.1: URLs may also be specified with a
63                 // String and the URL object that it is related to.
64                 new URI(BASE_URI, encoded );
65             }
66         } catch (URI.MalformedURIException ex) {
67             throw new InvalidDatatypeValueException("cvc-datatype-valid.1.2.1", new Object[]{content, "anyURI"});
68         }
69 
70         // REVISIT: do we need to return the new URI object?
71         return content;
72     }
73 
74     // which ASCII characters need to be escaped
75     private static boolean gNeedEscaping[] = new boolean[128];
76     // the first hex character if a character needs to be escaped
77     private static char gAfterEscaping1[] = new char[128];
78     // the second hex character if a character needs to be escaped
79     private static char gAfterEscaping2[] = new char[128];
80     private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
81                                      '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
82     // initialize the above 3 arrays
83     static {
84         for (int i = 0; i <= 0x1f; i++) {
85             gNeedEscaping[i] = true;
86             gAfterEscaping1[i] = gHexChs[i >> 4];
87             gAfterEscaping2[i] = gHexChs[i & 0xf];
88         }
89         gNeedEscaping[0x7f] = true;
90         gAfterEscaping1[0x7f] = '7';
91         gAfterEscaping2[0x7f] = 'F';
92         char[] escChs = {' ', '<', '>', '"', '{', '}',
93                          '|', '\\', '^', '~', '`'};
94         int len = escChs.length;
95         char ch;
96         for (int i = 0; i < len; i++) {
97             ch = escChs[i];
98             gNeedEscaping[ch] = true;
99             gAfterEscaping1[ch] = gHexChs[ch >> 4];
100             gAfterEscaping2[ch] = gHexChs[ch & 0xf];
101         }
102     }
103 
104     // To encode special characters in anyURI, by using %HH to represent
105     // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
106     // and non-ASCII characters (whose value >= 128).
encode(String anyURI)107     private static String encode(String anyURI){
108         int len = anyURI.length(), ch;
109         StringBuffer buffer = new StringBuffer(len*3);
110 
111         // for each character in the anyURI
112         int i = 0;
113         for (; i < len; i++) {
114             ch = anyURI.charAt(i);
115             // if it's not an ASCII character, break here, and use UTF-8 encoding
116             if (ch >= 128)
117                 break;
118             if (gNeedEscaping[ch]) {
119                 buffer.append('%');
120                 buffer.append(gAfterEscaping1[ch]);
121                 buffer.append(gAfterEscaping2[ch]);
122             }
123             else {
124                 buffer.append((char)ch);
125             }
126         }
127 
128         // we saw some non-ascii character
129         if (i < len) {
130             // get UTF-8 bytes for the remaining sub-string
131             byte[] bytes = null;
132             byte b;
133             try {
134                 bytes = anyURI.substring(i).getBytes("UTF-8");
135             } catch (java.io.UnsupportedEncodingException e) {
136                 // should never happen
137                 return anyURI;
138             }
139             len = bytes.length;
140 
141             // for each byte
142             for (i = 0; i < len; i++) {
143                 b = bytes[i];
144                 // for non-ascii character: make it positive, then escape
145                 if (b < 0) {
146                     ch = b + 256;
147                     buffer.append('%');
148                     buffer.append(gHexChs[ch >> 4]);
149                     buffer.append(gHexChs[ch & 0xf]);
150                 }
151                 else if (gNeedEscaping[b]) {
152                     buffer.append('%');
153                     buffer.append(gAfterEscaping1[b]);
154                     buffer.append(gAfterEscaping2[b]);
155                 }
156                 else {
157                     buffer.append((char)b);
158                 }
159             }
160         }
161 
162         // If encoding happened, create a new string;
163         // otherwise, return the orginal one.
164         if (buffer.length() != len) {
165             return buffer.toString();
166         }
167         else {
168             return anyURI;
169         }
170     }
171 
172 } // class AnyURIDV
173