1 //
2 // RelaxngInference.cs
3 //
4 // Author:
5 //	Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // (C) 2005 Novell Inc.
8 //
9 
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining
12 // a copy of this software and associated documentation files (the
13 // "Software"), to deal in the Software without restriction, including
14 // without limitation the rights to use, copy, modify, merge, publish,
15 // distribute, sublicense, and/or sell copies of the Software, and to
16 // permit persons to whom the Software is furnished to do so, subject to
17 // the following conditions:
18 //
19 // The above copyright notice and this permission notice shall be
20 // included in all copies or substantial portions of the Software.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 
31 using System;
32 using System.Collections;
33 using System.Xml;
34 using System.Xml.Schema;
35 using Commons.Xml.Relaxng;
36 
37 using QName = System.Xml.XmlQualifiedName;
38 
39 
40 namespace Commons.Xml.Relaxng.Inference
41 {
42 	public class RelaxngInference
43 	{
44 		public enum InferenceOption {
45 			Restricted,
46 			Relaxed,
47 		}
48 
49 		InferenceOption occurrence = InferenceOption.Restricted;
50 		InferenceOption typeInference = InferenceOption.Restricted;
51 
RelaxngInference()52 		public RelaxngInference ()
53 		{
54 		}
55 
56 		public InferenceOption Occurrence {
57 			get { return occurrence; }
58 			set { occurrence = value; }
59 		}
60 
61 		public InferenceOption TypeInference {
62 			get { return typeInference; }
63 			set { typeInference = value; }
64 		}
65 
InferSchema(XmlReader xmlReader)66 		public RelaxngGrammar InferSchema (XmlReader xmlReader)
67 		{
68 			return InferSchema (xmlReader, new RelaxngGrammar ());
69 		}
70 
InferSchema(XmlReader xmlReader, RelaxngGrammar grammar)71 		public RelaxngGrammar InferSchema (XmlReader xmlReader,
72 			RelaxngGrammar grammar)
73 		{
74 			return RngInference.Process (xmlReader, grammar,
75 				occurrence == InferenceOption.Relaxed,
76 				typeInference == InferenceOption.Relaxed);
77 		}
78 	}
79 
80 	class RngInference
81 	{
Process(XmlReader xmlReader, RelaxngGrammar grammar, bool laxOccurence, bool laxTypeInference)82 		public static RelaxngGrammar Process (XmlReader xmlReader,
83 			RelaxngGrammar grammar,
84 			bool laxOccurence,
85 			bool laxTypeInference)
86 		{
87 			RngInference impl = new RngInference (xmlReader,
88 				grammar, laxOccurence, laxTypeInference);
89 			impl.Run ();
90 			return impl.grammar;
91 		}
92 
93 		public const string NamespaceXml =
94 			"http://www.w3.org/XML/1998/namespace";
95 
96 		public const string NamespaceXmlns =
97 			"http://www.w3.org/2000/xmlns/";
98 
99 		public const string NamespaceXmlSchemaDatatypes =
100 			"http://www.w3.org/2001/XMLSchema-datatypes";
101 
102 		public const string XdtNamespace =
103 			"http://www.w3.org/2003/11/xpath-datatypes";
104 
105 		public const string NamespaceXmlSchema =
106 			System.Xml.Schema.XmlSchema.Namespace;
107 
108 		static readonly QName QNameString = new QName (
109 			"string", NamespaceXmlSchema);
110 
111 		static readonly QName QNameBoolean = new QName (
112 			"boolean", NamespaceXmlSchema);
113 
114 		static readonly QName QNameAnyType = new QName (
115 			"anyType", NamespaceXmlSchema);
116 
117 		static readonly QName QNameByte = new QName (
118 			"byte", NamespaceXmlSchema);
119 
120 		static readonly QName QNameUByte = new QName (
121 			"unsignedByte", NamespaceXmlSchema);
122 
123 		static readonly QName QNameShort = new QName (
124 			"short", NamespaceXmlSchema);
125 
126 		static readonly QName QNameUShort = new QName (
127 			"unsignedShort", NamespaceXmlSchema);
128 
129 		static readonly QName QNameInt = new QName (
130 			"int", NamespaceXmlSchema);
131 
132 		static readonly QName QNameUInt = new QName (
133 			"unsignedInt", NamespaceXmlSchema);
134 
135 		static readonly QName QNameLong = new QName (
136 			"long", NamespaceXmlSchema);
137 
138 		static readonly QName QNameULong = new QName (
139 			"unsignedLong", NamespaceXmlSchema);
140 
141 		static readonly QName QNameDecimal = new QName (
142 			"decimal", NamespaceXmlSchema);
143 
144 		static readonly QName QNameUDecimal = new QName (
145 			"unsignedDecimal", NamespaceXmlSchema);
146 
147 		static readonly QName QNameDouble = new QName (
148 			"double", NamespaceXmlSchema);
149 
150 		static readonly QName QNameFloat = new QName (
151 			"float", NamespaceXmlSchema);
152 
153 		static readonly QName QNameDateTime = new QName (
154 			"dateTime", NamespaceXmlSchema);
155 
156 		static readonly QName QNameDuration = new QName (
157 			"duration", NamespaceXmlSchema);
158 
159 		XmlReader source;
160 		RelaxngGrammar grammar;
161 		bool laxOccurence;
162 		bool laxTypeInference;
163 
164 		Hashtable elements = new Hashtable ();
165 		Hashtable attributes = new Hashtable ();
166 		XmlNamespaceManager nsmgr;
167 
RngInference(XmlReader xmlReader, RelaxngGrammar grammar, bool laxOccurence, bool laxTypeInference)168 		private RngInference (XmlReader xmlReader,
169 			RelaxngGrammar grammar,
170 			bool laxOccurence,
171 			bool laxTypeInference)
172 		{
173 			this.source = xmlReader;
174 			this.grammar = grammar;
175 			this.laxOccurence = laxOccurence;
176 			this.laxTypeInference = laxTypeInference;
177 			nsmgr = new XmlNamespaceManager (source.NameTable);
178 
179 			foreach (RelaxngDefine def in grammar.Defines) {
180 				if (def.Patterns.Count != 1)
181 					continue;
182 				RelaxngElement e = def.Patterns [0] as RelaxngElement;
183 				RelaxngAttribute a = def.Patterns [0] as RelaxngAttribute;
184 				if (e == null && a == null)
185 					continue;
186 				RelaxngName rn = e != null ?
187 					e.NameClass as RelaxngName :
188 					a.NameClass as RelaxngName;
189 				if (rn == null)
190 					continue;
191 				QName qname = new QName (rn.LocalName,
192 					rn.Namespace);
193 				if (e != null)
194 					elements.Add (qname, def);
195 				else
196 					attributes.Add (qname, def);
197 			}
198 		}
199 
Run()200 		private void Run ()
201 		{
202 			// move to top-level element
203 			source.MoveToContent ();
204 			int depth = source.Depth;
205 			if (source.NodeType != XmlNodeType.Element)
206 				throw new ArgumentException ("Argument XmlReader content is expected to be an element.");
207 
208 			QName qname = new QName (source.LocalName,
209 				source.NamespaceURI);
210 			RelaxngDefine el = GetGlobalElement (qname);
211 			if (el == null) {
212 				el = CreateGlobalElement (qname);
213 				InferElement (el, true);
214 			}
215 			else
216 				InferElement (el, false);
217 			RelaxngStart start = new RelaxngStart ();
218 			start.Combine = "choice";
219 			RelaxngRef topRef = new RelaxngRef ();
220 			topRef.Name = el.Name;
221 			start.Pattern = topRef;
222 			grammar.Starts.Add (start);
223 		}
224 
InferElement(RelaxngRef r, bool isNew)225 		private void InferElement (RelaxngRef r, bool isNew)
226 		{
227 			RelaxngDefine body = GetDefine (r.Name);
228 			InferElement (body, isNew);
229 		}
230 
InferElement(RelaxngDefine el, bool isNew)231 		private void InferElement (RelaxngDefine el, bool isNew)
232 		{
233 			RelaxngElement ct = (RelaxngElement) el.Patterns [0];
234 
235 			// Attributes
236 			if (source.MoveToFirstAttribute ()) {
237 				InferAttributes (ct, isNew);
238 				source.MoveToElement ();
239 			}
240 
241 			// Content
242 			if (source.IsEmptyElement) {
243 				InferAsEmptyElement (ct, isNew);
244 				source.Read ();
245 				source.MoveToContent ();
246 			}
247 			else {
248 				InferContent (ct, isNew);
249 				source.ReadEndElement ();
250 			}
251 			if (GetElementContent (ct) == null)
252 				el.Patterns.Add (new RelaxngEmpty ());
253 		}
254 
255 		#region Attribute Inference
256 
257 		// get attribute definition table.
CollectAttrTable(RelaxngInterleave attList)258 		private Hashtable CollectAttrTable (RelaxngInterleave attList)
259 		{
260 			Hashtable table = new Hashtable ();
261 			if (attList == null)
262 				return table;
263 			foreach (RelaxngPattern p in attList.Patterns) {
264 				RelaxngAttribute a = p as RelaxngAttribute;
265 				if (a == null)
266 					a = (RelaxngAttribute)
267 						((RelaxngOptional) p)
268 						.Patterns [0];
269 				RelaxngName rn = a.NameClass as RelaxngName;
270 				table.Add (new QName (
271 					rn.LocalName, rn.Namespace),
272 					a);
273 			}
274 			return table;
275 		}
276 
InferAttributes(RelaxngElement ct, bool isNew)277 		private void InferAttributes (RelaxngElement ct, bool isNew)
278 		{
279 			RelaxngInterleave attList = null;
280 			Hashtable table = null;
281 
282 			do {
283 				if (source.NamespaceURI == NamespaceXmlns)
284 					continue;
285 
286 				if (table == null) {
287 					attList = GetAttributes (ct);
288 					table = CollectAttrTable (attList);
289 				}
290 				QName attrName = new QName (
291 					source.LocalName, source.NamespaceURI);
292 				RelaxngPattern attr = table [attrName]
293 					as RelaxngPattern;
294 				if (attr == null) {
295 					if (attList == null) {
296 						attList = new RelaxngInterleave ();
297 						ct.Patterns.Insert (0, attList);
298 					}
299 					attList.Patterns.Add (
300 						InferNewAttribute (
301 						attrName, isNew));
302 				} else {
303 					table.Remove (attrName);
304 					if (attrName.Namespace.Length > 0) {
305 						RelaxngDefine ga = GetGlobalAttribute (attrName);
306 						InferMergedAttribute (
307 							ga.Patterns [0]);
308 					}
309 					else
310 						InferMergedAttribute (attr);
311 				}
312 			} while (source.MoveToNextAttribute ());
313 
314 			// mark all attr definitions that did not appear
315 			// as optional.
316 			if (table != null) {
317 				foreach (RelaxngPattern attr in table.Values) {
318 					if (attr is RelaxngOptional)
319 						continue;
320 					attList.Patterns.Remove (attr);
321 					RelaxngOptional opt = new RelaxngOptional ();
322 					opt.Patterns.Add (attr);
323 					attList.Patterns.Add (opt);
324 				}
325 			}
326 		}
327 
328 		// It returns RelaxngAttribute for local attribute, and
329 		// RelaxngRef for global attribute.
InferNewAttribute( QName attrName, bool isNewTypeDefinition)330 		private RelaxngPattern InferNewAttribute (
331 			QName attrName, bool isNewTypeDefinition)
332 		{
333 			RelaxngPattern p = null;
334 			bool mergedRequired = false;
335 			if (attrName.Namespace.Length > 0) {
336 				// global attribute; might be already defined.
337 				// (Actually RELAX NG has no concept of "global
338 				// attributes" but it is still useful to
339 				// represent attributes in global scope.
340 				RelaxngDefine attr = GetGlobalAttribute (
341 					attrName);
342 				if (attr == null) {
343 					attr = CreateGlobalAttribute (attrName);
344 					attr.Patterns.Add (CreateSimplePattern (
345 						InferSimpleType (source.Value)));
346 				} else {
347 					RelaxngAttribute a = attr.Patterns [0] as RelaxngAttribute;
348 					if (a != null)
349 						mergedRequired = true;
350 					else {
351 						RelaxngOptional opt =
352 							(RelaxngOptional) attr.Patterns [0];
353 						a = (RelaxngAttribute) opt.Patterns [0];
354 					}
355 					InferMergedAttribute (a);
356 				}
357 				RelaxngRef r = new RelaxngRef ();
358 				r.Name = attr.Name;
359 				p = r;
360 			} else {
361 				// local attribute
362 				RelaxngAttribute a = new RelaxngAttribute ();
363 				a.NameClass = new RelaxngName (
364 					attrName.Name, attrName.Namespace);
365 				a.Pattern = CreateSimplePattern (
366 					InferSimpleType (source.Value));
367 				p = a;
368 			}
369 			// optional
370 			if (laxOccurence ||
371 				(!isNewTypeDefinition && !mergedRequired)) {
372 				RelaxngOptional opt = new RelaxngOptional ();
373 				opt.Patterns.Add (p);
374 				p = opt;
375 			}
376 
377 			return p;
378 		}
379 
380 		// validate string value agains attr and
381 		// if invalid, then relax the type.
InferMergedAttribute(RelaxngPattern ap)382 		private void InferMergedAttribute (RelaxngPattern ap)
383 		{
384 			switch (ap.PatternType) {
385 			case RelaxngPatternType.Ref:
386 				string refName = ((RelaxngRef) ap).Name;
387 				RelaxngDefine def = GetDefine (refName);
388 				InferMergedAttribute (def.Patterns [0]);
389 				return;
390 			case RelaxngPatternType.Optional:
391 				InferMergedAttribute (
392 					((RelaxngOptional) ap).Patterns [0]);
393 				return;
394 			}
395 
396 			RelaxngAttribute attr = (RelaxngAttribute) ap;
397 
398 			RelaxngPattern p = attr.Pattern;
399 			if (p is RelaxngText)
400 				return; // We could do nothing anymore.
401 			if (p is RelaxngEmpty) {
402 				if (source.Value.Length == 0)
403 					return; // We can keep empty.
404 				// We still could infer a choice of empty and
405 				// data, but it's being too complicated. So
406 				// here we just set text.
407 				attr.Pattern = new RelaxngText ();
408 				return;
409 			}
410 			RelaxngData data = p as RelaxngData;
411 			if (data == null)
412 				throw Error (p, "This inference implementation only allows text, empty and data for an attribute.");
413 			attr.Pattern = CreateSimplePattern (
414 				InferMergedType (source.Value,
415 				new QName (data.Type, data.DatatypeLibrary)));
416 		}
417 
InferMergedType(string value, QName typeName)418 		private QName InferMergedType (string value, QName typeName)
419 		{
420 			// examine value against specified type and
421 			// if unacceptable, then return a relaxed type.
422 
423 			XmlSchemaSimpleType st = XmlSchemaType.GetBuiltInSimpleType (
424 				typeName);
425 			if (st == null) // non-primitive type => see above.
426 				return QNameString;
427 			do {
428 				try {
429 					st.Datatype.ParseValue (value,
430 						source.NameTable,
431 						source as IXmlNamespaceResolver);
432 					return typeName;
433 				} catch {
434 					st = st.BaseXmlSchemaType as XmlSchemaSimpleType;
435 					typeName = st != null ? st.QualifiedName : QName.Empty;
436 				}
437 			} while (typeName != QName.Empty);
438 			return QNameString;
439 		}
440 
GetAttributes(RelaxngElement el)441 		private RelaxngInterleave GetAttributes (RelaxngElement el)
442 		{
443 			return el.Patterns.Count > 0 ?
444 				el.Patterns [0] as RelaxngInterleave : null;
445 		}
446 
447 		#endregion
448 
449 		#region Element Type
450 
GetElementContent(RelaxngElement el)451 		private RelaxngPattern GetElementContent (RelaxngElement el)
452 		{
453 			if (el.Patterns.Count == 0)
454 				return null;
455 			RelaxngPattern p = el.Patterns [0];
456 			if (p is RelaxngInterleave)
457 				return el.Patterns.Count == 2 ?
458 					el.Patterns [1] : null;
459 			else
460 				return p;
461 		}
462 
InferAsEmptyElement(RelaxngElement ct, bool isNew)463 		private void InferAsEmptyElement (RelaxngElement ct, bool isNew)
464 		{
465 			RelaxngPattern content = GetElementContent (ct);
466 			if (content == null) {
467 				ct.Patterns.Add (new RelaxngEmpty ());
468 				return;
469 			}
470 
471 			RelaxngGroup g = content as RelaxngGroup;
472 			if (g == null)
473 				return;
474 			RelaxngOptional opt = new RelaxngOptional ();
475 			opt.Patterns.Add (g);
476 			ct.Patterns.Remove (content);
477 			ct.Patterns.Add (opt);
478 		}
479 
InferContent(RelaxngElement ct, bool isNew)480 		private void InferContent (RelaxngElement ct, bool isNew)
481 		{
482 			source.Read ();
483 			source.MoveToContent ();
484 			switch (source.NodeType) {
485 			case XmlNodeType.EndElement:
486 				InferAsEmptyElement (ct, isNew);
487 				break;
488 			case XmlNodeType.Element:
489 				InferComplexContent (ct, isNew);
490 				break;
491 			case XmlNodeType.Text:
492 			case XmlNodeType.CDATA:
493 			case XmlNodeType.SignificantWhitespace:
494 				InferTextContent (ct, isNew);
495 				source.MoveToContent ();
496 				if (source.NodeType == XmlNodeType.Element)
497 					goto case XmlNodeType.Element;
498 				break;
499 			case XmlNodeType.Whitespace:
500 				InferContent (ct, isNew); // skip and retry
501 				break;
502 			}
503 		}
504 
InferComplexContent(RelaxngElement ct, bool isNew)505 		private void InferComplexContent (RelaxngElement ct, bool isNew)
506 		{
507 			bool makeMixed = false;
508 			RelaxngPattern content = GetElementContent (ct);
509 			if (content != null) {
510 				switch (content.PatternType) {
511 				case RelaxngPatternType.Text:
512 				case RelaxngPatternType.Data:
513 					makeMixed = true;
514 					ct.Patterns.Remove (content);
515 					ct.Patterns.Add (new RelaxngGroup ());
516 					break;
517 				}
518 			}
519 			else
520 				ct.Patterns.Add (new RelaxngGroup ());
521 			InferComplexContentCore (ct, isNew);
522 			if (makeMixed)
523 				MarkAsMixed (ct);
524 		}
525 
InferComplexContentCore(RelaxngElement ct, bool isNew)526 		private void InferComplexContentCore (RelaxngElement ct,
527 			bool isNew)
528 		{
529 			int position = 0;
530 			bool consumed = false;
531 
532 			do {
533 				switch (source.NodeType) {
534 				case XmlNodeType.Element:
535 					RelaxngPattern p =
536 						GetElementContent (ct);
537 					RelaxngGroup g = null;
538 					if (p == null)
539 						g = new RelaxngGroup ();
540 					switch (p.PatternType) {
541 					case RelaxngPatternType.OneOrMore:
542 					case RelaxngPatternType.ZeroOrMore:
543 						ProcessLax ((RelaxngSingleContentPattern) p);
544 						break;
545 					case RelaxngPatternType.Optional:
546 						g = (RelaxngGroup)
547 							((RelaxngOptional) p)
548 							.Patterns [0];
549 						goto default;
550 					case RelaxngPatternType.Group:
551 						g = (RelaxngGroup) p;
552 						goto default;
553 					case RelaxngPatternType.Text:
554 					case RelaxngPatternType.Data:
555 						g = new RelaxngGroup ();
556 						g.Patterns.Add (new RelaxngMixed ());
557 						goto default;
558 					default:
559 						if (g == null)
560 							throw Error (p, "Unexpected pattern: " + p.PatternType);
561 						ProcessSequence (ct, g,
562 							ref position,
563 							ref consumed,
564 							isNew);
565 						break;
566 					}
567 					source.MoveToContent ();
568 					break;
569 				case XmlNodeType.Text:
570 				case XmlNodeType.CDATA:
571 				case XmlNodeType.SignificantWhitespace:
572 					MarkAsMixed (ct);
573 					source.ReadString ();
574 					source.MoveToContent ();
575 					break;
576 				case XmlNodeType.EndElement:
577 					return; // finished
578 				case XmlNodeType.None:
579 					throw new NotImplementedException ("Internal Error: Should not happen.");
580 				}
581 			} while (true);
582 		}
583 
InferTextContent(RelaxngElement ct, bool isNew)584 		private void InferTextContent (RelaxngElement ct, bool isNew)
585 		{
586 			string value = source.ReadString ();
587 			RelaxngPattern p = GetElementContent (ct);
588 			if (p == null) {
589 				ct.Patterns.Add (CreateSimplePattern (
590 					InferSimpleType (value)));
591 				return;
592 			}
593 			RelaxngPatternList pl = null;
594 			switch (p.PatternType) {
595 			case RelaxngPatternType.Text:
596 			case RelaxngPatternType.Data:
597 				return; // no way to narrow it to data.
598 			case RelaxngPatternType.Empty:
599 				ct.Patterns.Remove (p);
600 				ct.Patterns.Add (new RelaxngText ());
601 				return;
602 			case RelaxngPatternType.Group:
603 				pl = ((RelaxngBinaryContentPattern) p).Patterns;
604 				break;
605 			case RelaxngPatternType.Optional:
606 			case RelaxngPatternType.ZeroOrMore:
607 			case RelaxngPatternType.OneOrMore:
608 				pl = ((RelaxngSingleContentPattern) p).Patterns;
609 				break;
610 			default:
611 				throw Error (p, "Unexpected pattern");
612 			}
613 			if (pl.Count > 0 && pl [0] is RelaxngMixed)
614 				return;
615 			RelaxngMixed m = new RelaxngMixed ();
616 			while (pl.Count > 0) {
617 				RelaxngPattern child = pl [0];
618 				m.Patterns.Add (child);
619 				pl.Remove (child);
620 			}
621 			pl.Add (m);
622 		}
623 
624 		// Change pattern as to allow text content.
MarkAsMixed(RelaxngElement ct)625 		private void MarkAsMixed (RelaxngElement ct)
626 		{
627 			RelaxngPattern p = GetElementContent (ct);
628 			// empty
629 			if (p == null || p is RelaxngEmpty) {
630 				if (p != null)
631 					ct.Patterns.Remove (p);
632 				ct.Patterns.Add (new RelaxngText ());
633 				return;
634 			}
635 			// text
636 			switch (p.PatternType) {
637 			case RelaxngPatternType.Text:
638 			case RelaxngPatternType.Data:
639 			case RelaxngPatternType.Mixed:
640 				return;
641 			case RelaxngPatternType.Choice:
642 			case RelaxngPatternType.Group:
643 				RelaxngBinaryContentPattern b =
644 					(RelaxngBinaryContentPattern) p;
645 				if (b != null) {
646 					RelaxngMixed m = b.Patterns [0]
647 						as RelaxngMixed;
648 					if (m == null) {
649 						m = new RelaxngMixed ();
650 						while (b.Patterns.Count > 0) {
651 							RelaxngPattern child =
652 								b.Patterns [0];
653 							m.Patterns.Add (child);
654 							b.Patterns.Remove (child);
655 						}
656 						b.Patterns.Add (m);
657 					}
658 				}
659 				break;
660 			default:
661 				throw Error (p, "Not allowed pattern.");
662 			}
663 		}
664 
665 		#endregion
666 
667 		#region Particles
668 
ProcessLax(RelaxngSingleContentPattern scp)669 		private void ProcessLax (RelaxngSingleContentPattern scp)
670 		{
671 			RelaxngChoice c = (RelaxngChoice) scp.Patterns [0];
672 			foreach (RelaxngPattern p in c.Patterns) {
673 				RelaxngRef el = p as RelaxngRef;
674 				if (el == null) {
675 					RelaxngOneOrMore oom =
676 						(RelaxngOneOrMore) p;
677 					el = (RelaxngRef) oom.Patterns [0];
678 				}
679 				if (el == null)
680 					throw Error (c, String.Format ("Target pattern contains unacceptable child pattern {0}. Only ref is allowed here."));
681 				if (ElementMatches (el)) {
682 					InferElement (el, false);
683 					return;
684 				}
685 			}
686 			// append a new element particle to lax term.
687 			QName qname = new QName (
688 				source.LocalName, source.NamespaceURI);
689 			RelaxngDefine def = GetGlobalElement (qname);
690 			if (def == null) {
691 				def = CreateGlobalElement (qname); // used to be CreateElement().
692 				InferElement (def, true);
693 			}
694 			else
695 				InferElement (def, false);
696 			RelaxngRef nel = new RelaxngRef ();
697 			nel.Name = def.Name;
698 			c.Patterns.Add (nel);
699 		}
700 
ElementMatches(RelaxngRef el)701 		private bool ElementMatches (RelaxngRef el)
702 		{
703 			RelaxngDefine def = elements [new QName (
704 				source.LocalName, source.NamespaceURI)]
705 				as RelaxngDefine;
706 			return def != null && def.Name == el.Name;
707 		}
708 
ProcessSequence(RelaxngElement ct, RelaxngGroup s, ref int position, ref bool consumed, bool isNew)709 		private void ProcessSequence (RelaxngElement ct, RelaxngGroup s,
710 			ref int position, ref bool consumed,
711 			bool isNew)
712 		{
713 			RelaxngMixed m = s.Patterns.Count > 0 ? s.Patterns [0] as RelaxngMixed : null;
714 			RelaxngPatternList pl = m != null ?
715 				m.Patterns : s.Patterns;
716 			for (int i = 0; i < position; i++) {
717 				RelaxngPattern p = pl [i];
718 				RelaxngRef iel = p as RelaxngRef;
719 				if (iel == null) {
720 					RelaxngOneOrMore oom =
721 						p as RelaxngOneOrMore;
722 					iel = (RelaxngRef) oom.Patterns [0];
723 				}
724 				if (ElementMatches (iel)) {
725 					// Sequence element type violation
726 					// might happen (might not, but we
727 					// cannot backtrack here). So switch
728 					// to sequence of choice* here.
729 					ProcessLax (ToSequenceOfChoice (ct, s));
730 					return;
731 				}
732 			}
733 
734 			if (pl.Count <= position) {
735 				QName name = new QName (source.LocalName,
736 					source.NamespaceURI);
737 				RelaxngDefine nel = GetGlobalElement (name);
738 				if (nel != null)
739 					InferElement (nel, false);
740 				else {
741 					nel = CreateGlobalElement (name); // used to be CreateElement().
742 					InferElement (nel, true);
743 				}
744 				RelaxngRef re = new RelaxngRef ();
745 				re.Name = nel.Name;
746 				pl.Add (re);
747 				consumed = true;
748 				return;
749 			}
750 			RelaxngPattern c = pl [position];
751 			RelaxngRef el = c as RelaxngRef;
752 			if (el == null) {
753 				RelaxngOneOrMore oom = c as RelaxngOneOrMore;
754 				el = (RelaxngRef) oom.Patterns [0];
755 			}
756 			if (el == null)
757 				throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Patterns [position]));
758 			bool matches = ElementMatches (el);
759 			if (matches) {
760 				if (consumed && c is RelaxngRef) {
761 					RelaxngOneOrMore oom = new RelaxngOneOrMore ();
762 					oom.Patterns.Add (el);
763 					pl [position] = oom;
764 				}
765 				InferElement (el, false);
766 				source.MoveToContent ();
767 				switch (source.NodeType) {
768 				case XmlNodeType.None:
769 					if (source.NodeType ==
770 						XmlNodeType.Element)
771 						goto case XmlNodeType.Element;
772 					else if (source.NodeType ==
773 						XmlNodeType.EndElement)
774 						goto case XmlNodeType.EndElement;
775 					break;
776 				case XmlNodeType.Element:
777 					ProcessSequence (ct, s, ref position,
778 						ref consumed, isNew);
779 					break;
780 				case XmlNodeType.Text:
781 				case XmlNodeType.CDATA:
782 				case XmlNodeType.SignificantWhitespace:
783 					MarkAsMixed (ct);
784 					source.ReadString ();
785 					goto case XmlNodeType.None;
786 				case XmlNodeType.Whitespace:
787 					source.ReadString ();
788 					goto case XmlNodeType.None;
789 				case XmlNodeType.EndElement:
790 					return;
791 				default:
792 					source.Read ();
793 					break;
794 				}
795 			}
796 			else {
797 				if (consumed) {
798 					position++;
799 					consumed = false;
800 					ProcessSequence (ct, s,
801 						ref position, ref consumed,
802 						isNew);
803 				}
804 				else
805 					ProcessLax (ToSequenceOfChoice (ct, s));
806 			}
807 		}
808 
809 		// Note that it does not return the changed sequence.
ToSequenceOfChoice( RelaxngElement ct, RelaxngGroup s)810 		private RelaxngSingleContentPattern ToSequenceOfChoice (
811 			RelaxngElement ct, RelaxngGroup s)
812 		{
813 			RelaxngSingleContentPattern scp =
814 				laxOccurence ?
815 				(RelaxngSingleContentPattern)
816 				new RelaxngZeroOrMore () :
817 				new RelaxngOneOrMore ();
818 			RelaxngChoice c = new RelaxngChoice ();
819 			foreach (RelaxngPattern p in s.Patterns)
820 				c.Patterns.Add (p);
821 			scp.Patterns.Add (c);
822 			ct.Patterns.Clear ();
823 			ct.Patterns.Add (scp);
824 			return scp;
825 		}
826 
827 		#endregion
828 
829 		#region String Value
830 
CreateSimplePattern(QName typeName)831 		private RelaxngPattern CreateSimplePattern (QName typeName)
832 		{
833 			if (typeName == QNameString)
834 				return new RelaxngText ();
835 
836 			RelaxngData data = new RelaxngData ();
837 			data.Type = typeName.Name;
838 			data.DatatypeLibrary =
839 				typeName.Namespace == NamespaceXmlSchema ?
840 				NamespaceXmlSchemaDatatypes :
841 				typeName.Namespace;
842 			return data;
843 		}
844 
845 		// primitive type inference.
846 		// When running lax type inference, it just returns xs:string.
InferSimpleType(string value)847 		private QName InferSimpleType (string value)
848 		{
849 			if (laxTypeInference)
850 				return QNameString;
851 
852 			switch (value) {
853 			case "true":
854 			case "false":
855 				return QNameBoolean;
856 			}
857 			try {
858 				long dec = XmlConvert.ToInt64 (value);
859 				if (byte.MinValue <= dec && dec <= byte.MaxValue)
860 					return QNameUByte;
861 				if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue)
862 					return QNameByte;
863 				if (ushort.MinValue <= dec && dec <= ushort.MaxValue)
864 					return QNameUShort;
865 				if (short.MinValue <= dec && dec <= short.MaxValue)
866 					return QNameShort;
867 				if (uint.MinValue <= dec && dec <= uint.MaxValue)
868 					return QNameUInt;
869 				if (int.MinValue <= dec && dec <= int.MaxValue)
870 					return QNameInt;
871 				return QNameLong;
872 			} catch (Exception) {
873 			}
874 			try {
875 				XmlConvert.ToUInt64 (value);
876 				return QNameULong;
877 			} catch (Exception) {
878 			}
879 			try {
880 				XmlConvert.ToDecimal (value);
881 				return QNameDecimal;
882 			} catch (Exception) {
883 			}
884 			try {
885 				double dbl = XmlConvert.ToDouble (value);
886 				if (float.MinValue <= dbl &&
887 					dbl <= float.MaxValue)
888 					return QNameFloat;
889 				else
890 					return QNameDouble;
891 			} catch (Exception) {
892 			}
893 			try {
894 				// FIXME: also try DateTimeSerializationMode
895 				// and gYearMonth
896 				XmlConvert.ToDateTime (value);
897 				return QNameDateTime;
898 			} catch (Exception) {
899 			}
900 			try {
901 				XmlConvert.ToTimeSpan (value);
902 				return QNameDuration;
903 			} catch (Exception) {
904 			}
905 
906 			// xs:string
907 			return QNameString;
908 		}
909 
910 		#endregion
911 
912 		#region Utilities
913 
GetDefine(string name)914 		private RelaxngDefine GetDefine (string name)
915 		{
916 			foreach (RelaxngDefine def in grammar.Defines) {
917 				if (def.Name == name)
918 					return def;
919 			}
920 			return null;
921 		}
922 
GetGlobalElement(QName name)923 		private RelaxngDefine GetGlobalElement (QName name)
924 		{
925 			return elements [name] as RelaxngDefine;
926 		}
927 
GetGlobalAttribute(QName name)928 		private RelaxngDefine GetGlobalAttribute (QName name)
929 		{
930 			return attributes [name] as RelaxngDefine;
931 		}
932 
CreateUniqueName(string baseName)933 		private string CreateUniqueName (string baseName)
934 		{
935 			string name = baseName;
936 			bool retry;
937 			do {
938 				retry = false;
939 				foreach (RelaxngDefine d in grammar.Defines) {
940 					if (d.Name == name) {
941 						name += "_";
942 						retry = true;
943 						break;
944 					}
945 				}
946 			} while (retry);
947 			return name;
948 		}
949 
950 		// Already relaxed.
CreateGlobalElement(QName name)951 		private RelaxngDefine CreateGlobalElement (QName name)
952 		{
953 			RelaxngDefine def = new RelaxngDefine ();
954 			def.Name = CreateUniqueName (name.Name);
955 			RelaxngElement el = new RelaxngElement ();
956 			el.NameClass = new RelaxngName (name.Name,
957 				name.Namespace);
958 			def.Patterns.Add (el);
959 			elements.Add (name, def);
960 			grammar.Defines.Add (def);
961 			return def;
962 		}
963 
CreateGlobalAttribute(QName name)964 		private RelaxngDefine CreateGlobalAttribute (QName name)
965 		{
966 			RelaxngDefine def = new RelaxngDefine ();
967 			def.Name = CreateUniqueName (name.Name + "-attr");
968 			RelaxngAttribute attr = new RelaxngAttribute ();
969 			attr.NameClass = new RelaxngName (
970 				name.Name, name.Namespace);
971 			def.Patterns.Add (attr);
972 			attributes.Add (name, def);
973 			grammar.Defines.Add (def);
974 			return def;
975 		}
976 
977 		// FIXME: should create another type of RelaxngException.
Error( RelaxngElementBase sourceObj, string message)978 		private RelaxngException Error (
979 			RelaxngElementBase sourceObj,
980 			string message)
981 		{
982 			// This override is mainly for schema component error.
983 			return Error (sourceObj, false, message);
984 		}
985 
Error( RelaxngElementBase sourceObj, bool useReader, string message)986 		private RelaxngException Error (
987 			RelaxngElementBase sourceObj,
988 			bool useReader,
989 			string message)
990 		{
991 			string msg = String.Concat (
992 				message,
993 				sourceObj != null ?
994 					String.Format (". Related schema component is {0} ({1}) line {2}, column {3}",
995 						sourceObj.BaseUri,
996 						sourceObj.GetType ().Name,
997 						sourceObj.LineNumber,
998 						sourceObj.LinePosition) :
999 					String.Empty,
1000 				useReader ?
1001 					String.Format (". {0}", source.BaseURI) :
1002 					String.Empty);
1003 
1004 			IXmlLineInfo li = source as IXmlLineInfo;
1005 			if (useReader && li != null && li.HasLineInfo ())
1006 				msg += String.Format (" line {0} column {1}",
1007 					li.LineNumber, li.LinePosition);
1008 
1009 			return new RelaxngException (msg);
1010 		}
1011 
1012 		#endregion
1013 	}
1014 }
1015