1 /*
2  * reserved comment block
3  * DO NOT REMOVE OR ALTER!
4  */
5 /*
6  * Copyright 2001-2004 The Apache Software Foundation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl.xs.models;
22 
23 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
24 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
25 import com.sun.org.apache.xerces.internal.impl.xs.XSComplexTypeDecl;
26 import com.sun.org.apache.xerces.internal.impl.xs.XSDeclarationPool;
27 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
28 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
29 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
30 
31 /**
32  * This class constructs content models for a given grammar.
33  *
34  * @xerces.internal
35  *
36  * @author Elena Litani, IBM
37  * @author Sandy Gao, IBM
38  *
39  * @version $Id: CMBuilder.java,v 1.11 2010/08/06 23:49:43 joehw Exp $
40  */
41 public class CMBuilder {
42 
43     // REVISIT: should update the decl pool to cache XSCM objects too
44     private XSDeclarationPool fDeclPool = null;
45 
46     // It never changes, so a static member is good enough
47     private static XSEmptyCM fEmptyCM = new XSEmptyCM();
48 
49     // needed for DFA construction
50     private int fLeafCount;
51     // needed for UPA
52     private int fParticleCount;
53     //Factory to create Bin, Uni, Leaf nodes
54     private CMNodeFactory fNodeFactory ;
55 
CMBuilder(CMNodeFactory nodeFactory)56     public CMBuilder(CMNodeFactory nodeFactory) {
57         fDeclPool = null;
58         fNodeFactory = nodeFactory ;
59     }
60 
setDeclPool(XSDeclarationPool declPool)61     public void setDeclPool(XSDeclarationPool declPool) {
62         fDeclPool = declPool;
63     }
64 
65     /**
66      * Get content model for the a given type
67      *
68      * @param typeDecl  get content model for which complex type
69      * @return          a content model validator
70      */
getContentModel(XSComplexTypeDecl typeDecl)71     public XSCMValidator getContentModel(XSComplexTypeDecl typeDecl) {
72 
73         // for complex type with empty or simple content,
74         // there is no content model validator
75         short contentType = typeDecl.getContentType();
76         if (contentType == XSComplexTypeDecl.CONTENTTYPE_SIMPLE ||
77             contentType == XSComplexTypeDecl.CONTENTTYPE_EMPTY) {
78             return null;
79         }
80 
81         XSParticleDecl particle = (XSParticleDecl)typeDecl.getParticle();
82 
83         // if the content is element only or mixed, but no particle
84         // is defined, return the empty content model
85         if (particle == null)
86             return fEmptyCM;
87 
88         // if the content model contains "all" model group,
89         // we create an "all" content model, otherwise a DFA content model
90         XSCMValidator cmValidator = null;
91         if (particle.fType == XSParticleDecl.PARTICLE_MODELGROUP &&
92             ((XSModelGroupImpl)particle.fValue).fCompositor == XSModelGroupImpl.MODELGROUP_ALL) {
93             cmValidator = createAllCM(particle);
94         }
95         else {
96             cmValidator = createDFACM(particle);
97         }
98 
99         //now we are throught building content model and have passed sucessfully of the nodecount check
100         //if set by the application
101         fNodeFactory.resetNodeCount() ;
102 
103         // if the validator returned is null, it means there is nothing in
104         // the content model, so we return the empty content model.
105         if (cmValidator == null)
106             cmValidator = fEmptyCM;
107 
108         return cmValidator;
109     }
110 
createAllCM(XSParticleDecl particle)111     XSCMValidator createAllCM(XSParticleDecl particle) {
112         if (particle.fMaxOccurs == 0)
113             return null;
114 
115         // get the model group, and add all children of it to the content model
116         XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
117         // create an all content model. the parameter indicates whether
118         // the <all> itself is optional
119         XSAllCM allContent = new XSAllCM(particle.fMinOccurs == 0, group.fParticleCount);
120         for (int i = 0; i < group.fParticleCount; i++) {
121             // add the element decl to the all content model
122             allContent.addElement((XSElementDecl)group.fParticles[i].fValue,
123             group.fParticles[i].fMinOccurs == 0);
124         }
125         return allContent;
126     }
127 
createDFACM(XSParticleDecl particle)128     XSCMValidator createDFACM(XSParticleDecl particle) {
129         fLeafCount = 0;
130         fParticleCount = 0;
131         // convert particle tree to CM tree
132         CMNode node = useRepeatingLeafNodes(particle) ? buildCompactSyntaxTree(particle) : buildSyntaxTree(particle, true);
133         if (node == null)
134             return null;
135         // build DFA content model from the CM tree
136         return new XSDFACM(node, fLeafCount);
137     }
138 
139     // 1. convert particle tree to CM tree:
140     // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
141     //                                  a{n, m} -> a, a, ..., a?, a?, ...
142     // 3. convert model groups (a, b, c, ...) or (a | b | c | ...) to
143     //    binary tree: (((a,b),c),...) or (((a|b)|c)|...)
144     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
buildSyntaxTree(XSParticleDecl particle, boolean optimize)145     private CMNode buildSyntaxTree(XSParticleDecl particle, boolean optimize) {
146 
147         int maxOccurs = particle.fMaxOccurs;
148         int minOccurs = particle.fMinOccurs;
149         short type = particle.fType;
150         CMNode nodeRet = null;
151 
152         if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
153             (type == XSParticleDecl.PARTICLE_ELEMENT)) {
154             // (task 1) element and wildcard particles should be converted to
155             // leaf nodes
156             // REVISIT: Make a clone of the leaf particle, so that if there
157             // are two references to the same group, we have two different
158             // leaf particles for the same element or wildcard decl.
159             // This is useful for checking UPA.
160             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
161             // (task 2) expand occurrence values
162             nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, optimize);
163         }
164         else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
165             // (task 1,3) convert model groups to binary trees
166             XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
167             CMNode temp = null;
168             // when the model group is a choice of more than one particles, but
169             // only one of the particle is not empty, (for example
170             // <choice>
171             //   <sequence/>
172             //   <element name="e"/>
173             // </choice>
174             // ) we can't not return that one particle ("e"). instead, we should
175             // treat such particle as optional ("e?").
176             // the following boolean variable is true when there are at least
177             // 2 non-empty children.
178             boolean twoChildren = false;
179             for (int i = 0; i < group.fParticleCount; i++) {
180                 // first convert each child to a CM tree
181                 temp = buildSyntaxTree(group.fParticles[i],
182                         optimize &&
183                         minOccurs == 1 && maxOccurs == 1 &&
184                         (group.fCompositor == XSModelGroupImpl.MODELGROUP_SEQUENCE ||
185                          group.fParticleCount == 1));
186                 // then combine them using binary operation
187                 if (temp != null) {
188                     if (nodeRet == null) {
189                         nodeRet = temp;
190                     }
191                     else {
192                         nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
193                         // record the fact that there are at least 2 children
194                         twoChildren = true;
195                     }
196                 }
197             }
198             // (task 2) expand occurrence values
199             if (nodeRet != null) {
200                 // when the group is "choice", there is only one non-empty
201                 // child, and the group had more than one children, we need
202                 // to create a zero-or-one (optional) node for the non-empty
203                 // particle.
204                 if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE &&
205                     !twoChildren && group.fParticleCount > 1) {
206                     nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
207                 }
208                 nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, false);
209             }
210         }
211 
212         return nodeRet;
213     }
214 
215     // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
216     //                                  a{n, m} -> a, a, ..., a?, a?, ...
217     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
expandContentModel(CMNode node, int minOccurs, int maxOccurs, boolean optimize)218     private CMNode expandContentModel(CMNode node,
219                                       int minOccurs, int maxOccurs, boolean optimize) {
220 
221         CMNode nodeRet = null;
222 
223         if (minOccurs==1 && maxOccurs==1) {
224             nodeRet = node;
225         }
226         else if (minOccurs==0 && maxOccurs==1) {
227             //zero or one
228             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
229         }
230         else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
231             //zero or more
232             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, node);
233         }
234         else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
235             //one or more
236             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
237         }
238         else if (optimize && node.type() == XSParticleDecl.PARTICLE_ELEMENT ||
239                  node.type() == XSParticleDecl.PARTICLE_WILDCARD) {
240             // Only for elements and wildcards, subsume e{n,m} and e{n,unbounded} to e*
241             // or e+ and, once the DFA reaches a final state, check if the actual number
242             // of elements is between minOccurs and maxOccurs. This new algorithm runs
243             // in constant space.
244 
245             // TODO: What is the impact of this optimization on the PSVI?
246             nodeRet = fNodeFactory.getCMUniOpNode(
247                     minOccurs == 0 ? XSParticleDecl.PARTICLE_ZERO_OR_MORE
248                         : XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
249             nodeRet.setUserData(new int[] { minOccurs, maxOccurs });
250         }
251         else if (maxOccurs == SchemaSymbols.OCCURRENCE_UNBOUNDED) {
252             // => a,a,..,a+
253             // create a+ node first, then put minOccurs-1 a's in front of it
254             // for the first time "node" is used, we don't need to make a copy
255             // and for other references to node, we make copies
256             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
257             // (task 4) we need to call copyNode here, so that we append
258             // an entire new copy of the node (a subtree). this is to ensure
259             // all leaf nodes have distinct position
260             // we know that minOccurs > 1
261             nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
262                                                   multiNodes(node, minOccurs-1, true), nodeRet);
263         }
264         else {
265             // {n,m} => a,a,a,...(a),(a),...
266             // first n a's, then m-n a?'s.
267             // copyNode is called, for the same reason as above
268             if (minOccurs > 0) {
269                 nodeRet = multiNodes(node, minOccurs, false);
270             }
271             if (maxOccurs > minOccurs) {
272                 node = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
273                 if (nodeRet == null) {
274                     nodeRet = multiNodes(node, maxOccurs-minOccurs, false);
275                 }
276                 else {
277                     nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
278                                                           nodeRet, multiNodes(node, maxOccurs-minOccurs, true));
279                 }
280             }
281         }
282 
283         return nodeRet;
284     }
285 
multiNodes(CMNode node, int num, boolean copyFirst)286     private CMNode multiNodes(CMNode node, int num, boolean copyFirst) {
287         if (num == 0) {
288             return null;
289         }
290         if (num == 1) {
291             return copyFirst ? copyNode(node) : node;
292         }
293         int num1 = num/2;
294         return fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
295                                            multiNodes(node, num1, copyFirst),
296                                            multiNodes(node, num-num1, true));
297     }
298 
299     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
copyNode(CMNode node)300     private CMNode copyNode(CMNode node) {
301         int type = node.type();
302         // for choice or sequence, copy the two subtrees, and combine them
303         if (type == XSModelGroupImpl.MODELGROUP_CHOICE ||
304             type == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
305             XSCMBinOp bin = (XSCMBinOp)node;
306             node = fNodeFactory.getCMBinOpNode(type, copyNode(bin.getLeft()),
307                                  copyNode(bin.getRight()));
308         }
309         // for ?+*, copy the subtree, and put it in a new ?+* node
310         else if (type == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
311                  type == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
312                  type == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
313             XSCMUniOp uni = (XSCMUniOp)node;
314             node = fNodeFactory.getCMUniOpNode(type, copyNode(uni.getChild()));
315         }
316         // for element/wildcard (leaf), make a new leaf node,
317         // with a distinct position
318         else if (type == XSParticleDecl.PARTICLE_ELEMENT ||
319                  type == XSParticleDecl.PARTICLE_WILDCARD) {
320             XSCMLeaf leaf = (XSCMLeaf)node;
321             node = fNodeFactory.getCMLeafNode(leaf.type(), leaf.getLeaf(), leaf.getParticleId(), fLeafCount++);
322         }
323 
324         return node;
325     }
326 
327     // A special version of buildSyntaxTree() which builds a compact syntax tree
328     // containing compound leaf nodes which carry occurence information. This method
329     // for building the syntax tree is chosen over buildSyntaxTree() when
330     // useRepeatingLeafNodes() returns true.
buildCompactSyntaxTree(XSParticleDecl particle)331     private CMNode buildCompactSyntaxTree(XSParticleDecl particle) {
332         int maxOccurs = particle.fMaxOccurs;
333         int minOccurs = particle.fMinOccurs;
334         short type = particle.fType;
335         CMNode nodeRet = null;
336 
337         if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
338             (type == XSParticleDecl.PARTICLE_ELEMENT)) {
339             return buildCompactSyntaxTree2(particle, minOccurs, maxOccurs);
340         }
341         else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
342             XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
343             if (group.fParticleCount == 1 && (minOccurs != 1 || maxOccurs != 1)) {
344                 return buildCompactSyntaxTree2(group.fParticles[0], minOccurs, maxOccurs);
345             }
346             else {
347                 CMNode temp = null;
348 
349                 // when the model group is a choice of more than one particles, but
350                 // only one of the particle is not empty, (for example
351                 // <choice>
352                 //   <sequence/>
353                 //   <element name="e"/>
354                 // </choice>
355                 // ) we can't not return that one particle ("e"). instead, we should
356                 // treat such particle as optional ("e?").
357                 // the following int variable keeps track of the number of non-empty children
358                 int count = 0;
359                 for (int i = 0; i < group.fParticleCount; i++) {
360                     // first convert each child to a CM tree
361                     temp = buildCompactSyntaxTree(group.fParticles[i]);
362                     // then combine them using binary operation
363                     if (temp != null) {
364                         ++count;
365                         if (nodeRet == null) {
366                             nodeRet = temp;
367                         }
368                         else {
369                             nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
370                         }
371                     }
372                 }
373                 if (nodeRet != null) {
374                     // when the group is "choice" and the group has one or more empty children,
375                     // we need to create a zero-or-one (optional) node for the non-empty particles.
376                     if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && count < group.fParticleCount) {
377                         nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
378                     }
379                 }
380             }
381         }
382         return nodeRet;
383     }
384 
buildCompactSyntaxTree2(XSParticleDecl particle, int minOccurs, int maxOccurs)385     private CMNode buildCompactSyntaxTree2(XSParticleDecl particle, int minOccurs, int maxOccurs) {
386         // Convert element and wildcard particles to leaf nodes. Wrap repeating particles in a CMUniOpNode.
387         CMNode nodeRet = null;
388         if (minOccurs == 1 && maxOccurs == 1) {
389             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
390         }
391         else if (minOccurs == 0 && maxOccurs == 1) {
392             // zero or one
393             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
394             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
395         }
396         else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
397             // zero or more
398             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
399             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
400         }
401         else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
402             // one or more
403             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
404             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
405         }
406         else {
407             // {n,m}: Instead of expanding this out, create a compound leaf node which carries the
408             // occurence information and wrap it in the appropriate CMUniOpNode.
409             nodeRet = fNodeFactory.getCMRepeatingLeafNode(particle.fType, particle.fValue, minOccurs, maxOccurs, fParticleCount++, fLeafCount++);
410             if (minOccurs == 0) {
411                 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
412             }
413             else {
414                 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
415             }
416         }
417         return nodeRet;
418     }
419 
420     // This method checks if this particle can be transformed into a compact syntax
421     // tree containing compound leaf nodes which carry occurence information. Currently
422     // it returns true if each model group has minOccurs/maxOccurs == 1 or
423     // contains only one element/wildcard particle with minOccurs/maxOccurs == 1.
useRepeatingLeafNodes(XSParticleDecl particle)424     private boolean useRepeatingLeafNodes(XSParticleDecl particle) {
425         int maxOccurs = particle.fMaxOccurs;
426         int minOccurs = particle.fMinOccurs;
427         short type = particle.fType;
428 
429         if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
430             XSModelGroupImpl group = (XSModelGroupImpl) particle.fValue;
431             if (minOccurs != 1 || maxOccurs != 1) {
432                 if (group.fParticleCount == 1) {
433                     XSParticleDecl particle2 = (XSParticleDecl) group.fParticles[0];
434                     short type2 = particle2.fType;
435                     return ((type2 == XSParticleDecl.PARTICLE_ELEMENT ||
436                             type2 == XSParticleDecl.PARTICLE_WILDCARD) &&
437                             particle2.fMinOccurs == 1 &&
438                             particle2.fMaxOccurs == 1);
439                 }
440                 return (group.fParticleCount == 0);
441             }
442             for (int i = 0; i < group.fParticleCount; ++i) {
443                 if (!useRepeatingLeafNodes(group.fParticles[i])) {
444                     return false;
445                 }
446             }
447         }
448         return true;
449     }
450 }
451