1 // 2 // RelaxngInference.cs 3 // 4 // Author: 5 // Atsushi Enomoto <atsushi@ximian.com> 6 // 7 // (C) 2005 Novell Inc. 8 // 9 10 // 11 // Permission is hereby granted, free of charge, to any person obtaining 12 // a copy of this software and associated documentation files (the 13 // "Software"), to deal in the Software without restriction, including 14 // without limitation the rights to use, copy, modify, merge, publish, 15 // distribute, sublicense, and/or sell copies of the Software, and to 16 // permit persons to whom the Software is furnished to do so, subject to 17 // the following conditions: 18 // 19 // The above copyright notice and this permission notice shall be 20 // included in all copies or substantial portions of the Software. 21 // 22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 26 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 27 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 31 using System; 32 using System.Collections; 33 using System.Xml; 34 using System.Xml.Schema; 35 using Commons.Xml.Relaxng; 36 37 using QName = System.Xml.XmlQualifiedName; 38 39 40 namespace Commons.Xml.Relaxng.Inference 41 { 42 public class RelaxngInference 43 { 44 public enum InferenceOption { 45 Restricted, 46 Relaxed, 47 } 48 49 InferenceOption occurrence = InferenceOption.Restricted; 50 InferenceOption typeInference = InferenceOption.Restricted; 51 RelaxngInference()52 public RelaxngInference () 53 { 54 } 55 56 public InferenceOption Occurrence { 57 get { return occurrence; } 58 set { occurrence = value; } 59 } 60 61 public InferenceOption TypeInference { 62 get { return typeInference; } 63 set { typeInference = value; } 64 } 65 InferSchema(XmlReader xmlReader)66 public RelaxngGrammar InferSchema (XmlReader xmlReader) 67 { 68 return InferSchema (xmlReader, new RelaxngGrammar ()); 69 } 70 InferSchema(XmlReader xmlReader, RelaxngGrammar grammar)71 public RelaxngGrammar InferSchema (XmlReader xmlReader, 72 RelaxngGrammar grammar) 73 { 74 return RngInference.Process (xmlReader, grammar, 75 occurrence == InferenceOption.Relaxed, 76 typeInference == InferenceOption.Relaxed); 77 } 78 } 79 80 class RngInference 81 { Process(XmlReader xmlReader, RelaxngGrammar grammar, bool laxOccurence, bool laxTypeInference)82 public static RelaxngGrammar Process (XmlReader xmlReader, 83 RelaxngGrammar grammar, 84 bool laxOccurence, 85 bool laxTypeInference) 86 { 87 RngInference impl = new RngInference (xmlReader, 88 grammar, laxOccurence, laxTypeInference); 89 impl.Run (); 90 return impl.grammar; 91 } 92 93 public const string NamespaceXml = 94 "http://www.w3.org/XML/1998/namespace"; 95 96 public const string NamespaceXmlns = 97 "http://www.w3.org/2000/xmlns/"; 98 99 public const string NamespaceXmlSchemaDatatypes = 100 "http://www.w3.org/2001/XMLSchema-datatypes"; 101 102 public const string XdtNamespace = 103 "http://www.w3.org/2003/11/xpath-datatypes"; 104 105 public const string NamespaceXmlSchema = 106 System.Xml.Schema.XmlSchema.Namespace; 107 108 static readonly QName QNameString = new QName ( 109 "string", NamespaceXmlSchema); 110 111 static readonly QName QNameBoolean = new QName ( 112 "boolean", NamespaceXmlSchema); 113 114 static readonly QName QNameAnyType = new QName ( 115 "anyType", NamespaceXmlSchema); 116 117 static readonly QName QNameByte = new QName ( 118 "byte", NamespaceXmlSchema); 119 120 static readonly QName QNameUByte = new QName ( 121 "unsignedByte", NamespaceXmlSchema); 122 123 static readonly QName QNameShort = new QName ( 124 "short", NamespaceXmlSchema); 125 126 static readonly QName QNameUShort = new QName ( 127 "unsignedShort", NamespaceXmlSchema); 128 129 static readonly QName QNameInt = new QName ( 130 "int", NamespaceXmlSchema); 131 132 static readonly QName QNameUInt = new QName ( 133 "unsignedInt", NamespaceXmlSchema); 134 135 static readonly QName QNameLong = new QName ( 136 "long", NamespaceXmlSchema); 137 138 static readonly QName QNameULong = new QName ( 139 "unsignedLong", NamespaceXmlSchema); 140 141 static readonly QName QNameDecimal = new QName ( 142 "decimal", NamespaceXmlSchema); 143 144 static readonly QName QNameUDecimal = new QName ( 145 "unsignedDecimal", NamespaceXmlSchema); 146 147 static readonly QName QNameDouble = new QName ( 148 "double", NamespaceXmlSchema); 149 150 static readonly QName QNameFloat = new QName ( 151 "float", NamespaceXmlSchema); 152 153 static readonly QName QNameDateTime = new QName ( 154 "dateTime", NamespaceXmlSchema); 155 156 static readonly QName QNameDuration = new QName ( 157 "duration", NamespaceXmlSchema); 158 159 XmlReader source; 160 RelaxngGrammar grammar; 161 bool laxOccurence; 162 bool laxTypeInference; 163 164 Hashtable elements = new Hashtable (); 165 Hashtable attributes = new Hashtable (); 166 XmlNamespaceManager nsmgr; 167 RngInference(XmlReader xmlReader, RelaxngGrammar grammar, bool laxOccurence, bool laxTypeInference)168 private RngInference (XmlReader xmlReader, 169 RelaxngGrammar grammar, 170 bool laxOccurence, 171 bool laxTypeInference) 172 { 173 this.source = xmlReader; 174 this.grammar = grammar; 175 this.laxOccurence = laxOccurence; 176 this.laxTypeInference = laxTypeInference; 177 nsmgr = new XmlNamespaceManager (source.NameTable); 178 179 foreach (RelaxngDefine def in grammar.Defines) { 180 if (def.Patterns.Count != 1) 181 continue; 182 RelaxngElement e = def.Patterns [0] as RelaxngElement; 183 RelaxngAttribute a = def.Patterns [0] as RelaxngAttribute; 184 if (e == null && a == null) 185 continue; 186 RelaxngName rn = e != null ? 187 e.NameClass as RelaxngName : 188 a.NameClass as RelaxngName; 189 if (rn == null) 190 continue; 191 QName qname = new QName (rn.LocalName, 192 rn.Namespace); 193 if (e != null) 194 elements.Add (qname, def); 195 else 196 attributes.Add (qname, def); 197 } 198 } 199 Run()200 private void Run () 201 { 202 // move to top-level element 203 source.MoveToContent (); 204 int depth = source.Depth; 205 if (source.NodeType != XmlNodeType.Element) 206 throw new ArgumentException ("Argument XmlReader content is expected to be an element."); 207 208 QName qname = new QName (source.LocalName, 209 source.NamespaceURI); 210 RelaxngDefine el = GetGlobalElement (qname); 211 if (el == null) { 212 el = CreateGlobalElement (qname); 213 InferElement (el, true); 214 } 215 else 216 InferElement (el, false); 217 RelaxngStart start = new RelaxngStart (); 218 start.Combine = "choice"; 219 RelaxngRef topRef = new RelaxngRef (); 220 topRef.Name = el.Name; 221 start.Pattern = topRef; 222 grammar.Starts.Add (start); 223 } 224 InferElement(RelaxngRef r, bool isNew)225 private void InferElement (RelaxngRef r, bool isNew) 226 { 227 RelaxngDefine body = GetDefine (r.Name); 228 InferElement (body, isNew); 229 } 230 InferElement(RelaxngDefine el, bool isNew)231 private void InferElement (RelaxngDefine el, bool isNew) 232 { 233 RelaxngElement ct = (RelaxngElement) el.Patterns [0]; 234 235 // Attributes 236 if (source.MoveToFirstAttribute ()) { 237 InferAttributes (ct, isNew); 238 source.MoveToElement (); 239 } 240 241 // Content 242 if (source.IsEmptyElement) { 243 InferAsEmptyElement (ct, isNew); 244 source.Read (); 245 source.MoveToContent (); 246 } 247 else { 248 InferContent (ct, isNew); 249 source.ReadEndElement (); 250 } 251 if (GetElementContent (ct) == null) 252 el.Patterns.Add (new RelaxngEmpty ()); 253 } 254 255 #region Attribute Inference 256 257 // get attribute definition table. CollectAttrTable(RelaxngInterleave attList)258 private Hashtable CollectAttrTable (RelaxngInterleave attList) 259 { 260 Hashtable table = new Hashtable (); 261 if (attList == null) 262 return table; 263 foreach (RelaxngPattern p in attList.Patterns) { 264 RelaxngAttribute a = p as RelaxngAttribute; 265 if (a == null) 266 a = (RelaxngAttribute) 267 ((RelaxngOptional) p) 268 .Patterns [0]; 269 RelaxngName rn = a.NameClass as RelaxngName; 270 table.Add (new QName ( 271 rn.LocalName, rn.Namespace), 272 a); 273 } 274 return table; 275 } 276 InferAttributes(RelaxngElement ct, bool isNew)277 private void InferAttributes (RelaxngElement ct, bool isNew) 278 { 279 RelaxngInterleave attList = null; 280 Hashtable table = null; 281 282 do { 283 if (source.NamespaceURI == NamespaceXmlns) 284 continue; 285 286 if (table == null) { 287 attList = GetAttributes (ct); 288 table = CollectAttrTable (attList); 289 } 290 QName attrName = new QName ( 291 source.LocalName, source.NamespaceURI); 292 RelaxngPattern attr = table [attrName] 293 as RelaxngPattern; 294 if (attr == null) { 295 if (attList == null) { 296 attList = new RelaxngInterleave (); 297 ct.Patterns.Insert (0, attList); 298 } 299 attList.Patterns.Add ( 300 InferNewAttribute ( 301 attrName, isNew)); 302 } else { 303 table.Remove (attrName); 304 if (attrName.Namespace.Length > 0) { 305 RelaxngDefine ga = GetGlobalAttribute (attrName); 306 InferMergedAttribute ( 307 ga.Patterns [0]); 308 } 309 else 310 InferMergedAttribute (attr); 311 } 312 } while (source.MoveToNextAttribute ()); 313 314 // mark all attr definitions that did not appear 315 // as optional. 316 if (table != null) { 317 foreach (RelaxngPattern attr in table.Values) { 318 if (attr is RelaxngOptional) 319 continue; 320 attList.Patterns.Remove (attr); 321 RelaxngOptional opt = new RelaxngOptional (); 322 opt.Patterns.Add (attr); 323 attList.Patterns.Add (opt); 324 } 325 } 326 } 327 328 // It returns RelaxngAttribute for local attribute, and 329 // RelaxngRef for global attribute. InferNewAttribute( QName attrName, bool isNewTypeDefinition)330 private RelaxngPattern InferNewAttribute ( 331 QName attrName, bool isNewTypeDefinition) 332 { 333 RelaxngPattern p = null; 334 bool mergedRequired = false; 335 if (attrName.Namespace.Length > 0) { 336 // global attribute; might be already defined. 337 // (Actually RELAX NG has no concept of "global 338 // attributes" but it is still useful to 339 // represent attributes in global scope. 340 RelaxngDefine attr = GetGlobalAttribute ( 341 attrName); 342 if (attr == null) { 343 attr = CreateGlobalAttribute (attrName); 344 attr.Patterns.Add (CreateSimplePattern ( 345 InferSimpleType (source.Value))); 346 } else { 347 RelaxngAttribute a = attr.Patterns [0] as RelaxngAttribute; 348 if (a != null) 349 mergedRequired = true; 350 else { 351 RelaxngOptional opt = 352 (RelaxngOptional) attr.Patterns [0]; 353 a = (RelaxngAttribute) opt.Patterns [0]; 354 } 355 InferMergedAttribute (a); 356 } 357 RelaxngRef r = new RelaxngRef (); 358 r.Name = attr.Name; 359 p = r; 360 } else { 361 // local attribute 362 RelaxngAttribute a = new RelaxngAttribute (); 363 a.NameClass = new RelaxngName ( 364 attrName.Name, attrName.Namespace); 365 a.Pattern = CreateSimplePattern ( 366 InferSimpleType (source.Value)); 367 p = a; 368 } 369 // optional 370 if (laxOccurence || 371 (!isNewTypeDefinition && !mergedRequired)) { 372 RelaxngOptional opt = new RelaxngOptional (); 373 opt.Patterns.Add (p); 374 p = opt; 375 } 376 377 return p; 378 } 379 380 // validate string value agains attr and 381 // if invalid, then relax the type. InferMergedAttribute(RelaxngPattern ap)382 private void InferMergedAttribute (RelaxngPattern ap) 383 { 384 switch (ap.PatternType) { 385 case RelaxngPatternType.Ref: 386 string refName = ((RelaxngRef) ap).Name; 387 RelaxngDefine def = GetDefine (refName); 388 InferMergedAttribute (def.Patterns [0]); 389 return; 390 case RelaxngPatternType.Optional: 391 InferMergedAttribute ( 392 ((RelaxngOptional) ap).Patterns [0]); 393 return; 394 } 395 396 RelaxngAttribute attr = (RelaxngAttribute) ap; 397 398 RelaxngPattern p = attr.Pattern; 399 if (p is RelaxngText) 400 return; // We could do nothing anymore. 401 if (p is RelaxngEmpty) { 402 if (source.Value.Length == 0) 403 return; // We can keep empty. 404 // We still could infer a choice of empty and 405 // data, but it's being too complicated. So 406 // here we just set text. 407 attr.Pattern = new RelaxngText (); 408 return; 409 } 410 RelaxngData data = p as RelaxngData; 411 if (data == null) 412 throw Error (p, "This inference implementation only allows text, empty and data for an attribute."); 413 attr.Pattern = CreateSimplePattern ( 414 InferMergedType (source.Value, 415 new QName (data.Type, data.DatatypeLibrary))); 416 } 417 InferMergedType(string value, QName typeName)418 private QName InferMergedType (string value, QName typeName) 419 { 420 // examine value against specified type and 421 // if unacceptable, then return a relaxed type. 422 423 XmlSchemaSimpleType st = XmlSchemaType.GetBuiltInSimpleType ( 424 typeName); 425 if (st == null) // non-primitive type => see above. 426 return QNameString; 427 do { 428 try { 429 st.Datatype.ParseValue (value, 430 source.NameTable, 431 source as IXmlNamespaceResolver); 432 return typeName; 433 } catch { 434 st = st.BaseXmlSchemaType as XmlSchemaSimpleType; 435 typeName = st != null ? st.QualifiedName : QName.Empty; 436 } 437 } while (typeName != QName.Empty); 438 return QNameString; 439 } 440 GetAttributes(RelaxngElement el)441 private RelaxngInterleave GetAttributes (RelaxngElement el) 442 { 443 return el.Patterns.Count > 0 ? 444 el.Patterns [0] as RelaxngInterleave : null; 445 } 446 447 #endregion 448 449 #region Element Type 450 GetElementContent(RelaxngElement el)451 private RelaxngPattern GetElementContent (RelaxngElement el) 452 { 453 if (el.Patterns.Count == 0) 454 return null; 455 RelaxngPattern p = el.Patterns [0]; 456 if (p is RelaxngInterleave) 457 return el.Patterns.Count == 2 ? 458 el.Patterns [1] : null; 459 else 460 return p; 461 } 462 InferAsEmptyElement(RelaxngElement ct, bool isNew)463 private void InferAsEmptyElement (RelaxngElement ct, bool isNew) 464 { 465 RelaxngPattern content = GetElementContent (ct); 466 if (content == null) { 467 ct.Patterns.Add (new RelaxngEmpty ()); 468 return; 469 } 470 471 RelaxngGroup g = content as RelaxngGroup; 472 if (g == null) 473 return; 474 RelaxngOptional opt = new RelaxngOptional (); 475 opt.Patterns.Add (g); 476 ct.Patterns.Remove (content); 477 ct.Patterns.Add (opt); 478 } 479 InferContent(RelaxngElement ct, bool isNew)480 private void InferContent (RelaxngElement ct, bool isNew) 481 { 482 source.Read (); 483 source.MoveToContent (); 484 switch (source.NodeType) { 485 case XmlNodeType.EndElement: 486 InferAsEmptyElement (ct, isNew); 487 break; 488 case XmlNodeType.Element: 489 InferComplexContent (ct, isNew); 490 break; 491 case XmlNodeType.Text: 492 case XmlNodeType.CDATA: 493 case XmlNodeType.SignificantWhitespace: 494 InferTextContent (ct, isNew); 495 source.MoveToContent (); 496 if (source.NodeType == XmlNodeType.Element) 497 goto case XmlNodeType.Element; 498 break; 499 case XmlNodeType.Whitespace: 500 InferContent (ct, isNew); // skip and retry 501 break; 502 } 503 } 504 InferComplexContent(RelaxngElement ct, bool isNew)505 private void InferComplexContent (RelaxngElement ct, bool isNew) 506 { 507 bool makeMixed = false; 508 RelaxngPattern content = GetElementContent (ct); 509 if (content != null) { 510 switch (content.PatternType) { 511 case RelaxngPatternType.Text: 512 case RelaxngPatternType.Data: 513 makeMixed = true; 514 ct.Patterns.Remove (content); 515 ct.Patterns.Add (new RelaxngGroup ()); 516 break; 517 } 518 } 519 else 520 ct.Patterns.Add (new RelaxngGroup ()); 521 InferComplexContentCore (ct, isNew); 522 if (makeMixed) 523 MarkAsMixed (ct); 524 } 525 InferComplexContentCore(RelaxngElement ct, bool isNew)526 private void InferComplexContentCore (RelaxngElement ct, 527 bool isNew) 528 { 529 int position = 0; 530 bool consumed = false; 531 532 do { 533 switch (source.NodeType) { 534 case XmlNodeType.Element: 535 RelaxngPattern p = 536 GetElementContent (ct); 537 RelaxngGroup g = null; 538 if (p == null) 539 g = new RelaxngGroup (); 540 switch (p.PatternType) { 541 case RelaxngPatternType.OneOrMore: 542 case RelaxngPatternType.ZeroOrMore: 543 ProcessLax ((RelaxngSingleContentPattern) p); 544 break; 545 case RelaxngPatternType.Optional: 546 g = (RelaxngGroup) 547 ((RelaxngOptional) p) 548 .Patterns [0]; 549 goto default; 550 case RelaxngPatternType.Group: 551 g = (RelaxngGroup) p; 552 goto default; 553 case RelaxngPatternType.Text: 554 case RelaxngPatternType.Data: 555 g = new RelaxngGroup (); 556 g.Patterns.Add (new RelaxngMixed ()); 557 goto default; 558 default: 559 if (g == null) 560 throw Error (p, "Unexpected pattern: " + p.PatternType); 561 ProcessSequence (ct, g, 562 ref position, 563 ref consumed, 564 isNew); 565 break; 566 } 567 source.MoveToContent (); 568 break; 569 case XmlNodeType.Text: 570 case XmlNodeType.CDATA: 571 case XmlNodeType.SignificantWhitespace: 572 MarkAsMixed (ct); 573 source.ReadString (); 574 source.MoveToContent (); 575 break; 576 case XmlNodeType.EndElement: 577 return; // finished 578 case XmlNodeType.None: 579 throw new NotImplementedException ("Internal Error: Should not happen."); 580 } 581 } while (true); 582 } 583 InferTextContent(RelaxngElement ct, bool isNew)584 private void InferTextContent (RelaxngElement ct, bool isNew) 585 { 586 string value = source.ReadString (); 587 RelaxngPattern p = GetElementContent (ct); 588 if (p == null) { 589 ct.Patterns.Add (CreateSimplePattern ( 590 InferSimpleType (value))); 591 return; 592 } 593 RelaxngPatternList pl = null; 594 switch (p.PatternType) { 595 case RelaxngPatternType.Text: 596 case RelaxngPatternType.Data: 597 return; // no way to narrow it to data. 598 case RelaxngPatternType.Empty: 599 ct.Patterns.Remove (p); 600 ct.Patterns.Add (new RelaxngText ()); 601 return; 602 case RelaxngPatternType.Group: 603 pl = ((RelaxngBinaryContentPattern) p).Patterns; 604 break; 605 case RelaxngPatternType.Optional: 606 case RelaxngPatternType.ZeroOrMore: 607 case RelaxngPatternType.OneOrMore: 608 pl = ((RelaxngSingleContentPattern) p).Patterns; 609 break; 610 default: 611 throw Error (p, "Unexpected pattern"); 612 } 613 if (pl.Count > 0 && pl [0] is RelaxngMixed) 614 return; 615 RelaxngMixed m = new RelaxngMixed (); 616 while (pl.Count > 0) { 617 RelaxngPattern child = pl [0]; 618 m.Patterns.Add (child); 619 pl.Remove (child); 620 } 621 pl.Add (m); 622 } 623 624 // Change pattern as to allow text content. MarkAsMixed(RelaxngElement ct)625 private void MarkAsMixed (RelaxngElement ct) 626 { 627 RelaxngPattern p = GetElementContent (ct); 628 // empty 629 if (p == null || p is RelaxngEmpty) { 630 if (p != null) 631 ct.Patterns.Remove (p); 632 ct.Patterns.Add (new RelaxngText ()); 633 return; 634 } 635 // text 636 switch (p.PatternType) { 637 case RelaxngPatternType.Text: 638 case RelaxngPatternType.Data: 639 case RelaxngPatternType.Mixed: 640 return; 641 case RelaxngPatternType.Choice: 642 case RelaxngPatternType.Group: 643 RelaxngBinaryContentPattern b = 644 (RelaxngBinaryContentPattern) p; 645 if (b != null) { 646 RelaxngMixed m = b.Patterns [0] 647 as RelaxngMixed; 648 if (m == null) { 649 m = new RelaxngMixed (); 650 while (b.Patterns.Count > 0) { 651 RelaxngPattern child = 652 b.Patterns [0]; 653 m.Patterns.Add (child); 654 b.Patterns.Remove (child); 655 } 656 b.Patterns.Add (m); 657 } 658 } 659 break; 660 default: 661 throw Error (p, "Not allowed pattern."); 662 } 663 } 664 665 #endregion 666 667 #region Particles 668 ProcessLax(RelaxngSingleContentPattern scp)669 private void ProcessLax (RelaxngSingleContentPattern scp) 670 { 671 RelaxngChoice c = (RelaxngChoice) scp.Patterns [0]; 672 foreach (RelaxngPattern p in c.Patterns) { 673 RelaxngRef el = p as RelaxngRef; 674 if (el == null) { 675 RelaxngOneOrMore oom = 676 (RelaxngOneOrMore) p; 677 el = (RelaxngRef) oom.Patterns [0]; 678 } 679 if (el == null) 680 throw Error (c, String.Format ("Target pattern contains unacceptable child pattern {0}. Only ref is allowed here.")); 681 if (ElementMatches (el)) { 682 InferElement (el, false); 683 return; 684 } 685 } 686 // append a new element particle to lax term. 687 QName qname = new QName ( 688 source.LocalName, source.NamespaceURI); 689 RelaxngDefine def = GetGlobalElement (qname); 690 if (def == null) { 691 def = CreateGlobalElement (qname); // used to be CreateElement(). 692 InferElement (def, true); 693 } 694 else 695 InferElement (def, false); 696 RelaxngRef nel = new RelaxngRef (); 697 nel.Name = def.Name; 698 c.Patterns.Add (nel); 699 } 700 ElementMatches(RelaxngRef el)701 private bool ElementMatches (RelaxngRef el) 702 { 703 RelaxngDefine def = elements [new QName ( 704 source.LocalName, source.NamespaceURI)] 705 as RelaxngDefine; 706 return def != null && def.Name == el.Name; 707 } 708 ProcessSequence(RelaxngElement ct, RelaxngGroup s, ref int position, ref bool consumed, bool isNew)709 private void ProcessSequence (RelaxngElement ct, RelaxngGroup s, 710 ref int position, ref bool consumed, 711 bool isNew) 712 { 713 RelaxngMixed m = s.Patterns.Count > 0 ? s.Patterns [0] as RelaxngMixed : null; 714 RelaxngPatternList pl = m != null ? 715 m.Patterns : s.Patterns; 716 for (int i = 0; i < position; i++) { 717 RelaxngPattern p = pl [i]; 718 RelaxngRef iel = p as RelaxngRef; 719 if (iel == null) { 720 RelaxngOneOrMore oom = 721 p as RelaxngOneOrMore; 722 iel = (RelaxngRef) oom.Patterns [0]; 723 } 724 if (ElementMatches (iel)) { 725 // Sequence element type violation 726 // might happen (might not, but we 727 // cannot backtrack here). So switch 728 // to sequence of choice* here. 729 ProcessLax (ToSequenceOfChoice (ct, s)); 730 return; 731 } 732 } 733 734 if (pl.Count <= position) { 735 QName name = new QName (source.LocalName, 736 source.NamespaceURI); 737 RelaxngDefine nel = GetGlobalElement (name); 738 if (nel != null) 739 InferElement (nel, false); 740 else { 741 nel = CreateGlobalElement (name); // used to be CreateElement(). 742 InferElement (nel, true); 743 } 744 RelaxngRef re = new RelaxngRef (); 745 re.Name = nel.Name; 746 pl.Add (re); 747 consumed = true; 748 return; 749 } 750 RelaxngPattern c = pl [position]; 751 RelaxngRef el = c as RelaxngRef; 752 if (el == null) { 753 RelaxngOneOrMore oom = c as RelaxngOneOrMore; 754 el = (RelaxngRef) oom.Patterns [0]; 755 } 756 if (el == null) 757 throw Error (s, String.Format ("Target complex type content sequence has an unacceptable type of particle {0}", s.Patterns [position])); 758 bool matches = ElementMatches (el); 759 if (matches) { 760 if (consumed && c is RelaxngRef) { 761 RelaxngOneOrMore oom = new RelaxngOneOrMore (); 762 oom.Patterns.Add (el); 763 pl [position] = oom; 764 } 765 InferElement (el, false); 766 source.MoveToContent (); 767 switch (source.NodeType) { 768 case XmlNodeType.None: 769 if (source.NodeType == 770 XmlNodeType.Element) 771 goto case XmlNodeType.Element; 772 else if (source.NodeType == 773 XmlNodeType.EndElement) 774 goto case XmlNodeType.EndElement; 775 break; 776 case XmlNodeType.Element: 777 ProcessSequence (ct, s, ref position, 778 ref consumed, isNew); 779 break; 780 case XmlNodeType.Text: 781 case XmlNodeType.CDATA: 782 case XmlNodeType.SignificantWhitespace: 783 MarkAsMixed (ct); 784 source.ReadString (); 785 goto case XmlNodeType.None; 786 case XmlNodeType.Whitespace: 787 source.ReadString (); 788 goto case XmlNodeType.None; 789 case XmlNodeType.EndElement: 790 return; 791 default: 792 source.Read (); 793 break; 794 } 795 } 796 else { 797 if (consumed) { 798 position++; 799 consumed = false; 800 ProcessSequence (ct, s, 801 ref position, ref consumed, 802 isNew); 803 } 804 else 805 ProcessLax (ToSequenceOfChoice (ct, s)); 806 } 807 } 808 809 // Note that it does not return the changed sequence. ToSequenceOfChoice( RelaxngElement ct, RelaxngGroup s)810 private RelaxngSingleContentPattern ToSequenceOfChoice ( 811 RelaxngElement ct, RelaxngGroup s) 812 { 813 RelaxngSingleContentPattern scp = 814 laxOccurence ? 815 (RelaxngSingleContentPattern) 816 new RelaxngZeroOrMore () : 817 new RelaxngOneOrMore (); 818 RelaxngChoice c = new RelaxngChoice (); 819 foreach (RelaxngPattern p in s.Patterns) 820 c.Patterns.Add (p); 821 scp.Patterns.Add (c); 822 ct.Patterns.Clear (); 823 ct.Patterns.Add (scp); 824 return scp; 825 } 826 827 #endregion 828 829 #region String Value 830 CreateSimplePattern(QName typeName)831 private RelaxngPattern CreateSimplePattern (QName typeName) 832 { 833 if (typeName == QNameString) 834 return new RelaxngText (); 835 836 RelaxngData data = new RelaxngData (); 837 data.Type = typeName.Name; 838 data.DatatypeLibrary = 839 typeName.Namespace == NamespaceXmlSchema ? 840 NamespaceXmlSchemaDatatypes : 841 typeName.Namespace; 842 return data; 843 } 844 845 // primitive type inference. 846 // When running lax type inference, it just returns xs:string. InferSimpleType(string value)847 private QName InferSimpleType (string value) 848 { 849 if (laxTypeInference) 850 return QNameString; 851 852 switch (value) { 853 case "true": 854 case "false": 855 return QNameBoolean; 856 } 857 try { 858 long dec = XmlConvert.ToInt64 (value); 859 if (byte.MinValue <= dec && dec <= byte.MaxValue) 860 return QNameUByte; 861 if (sbyte.MinValue <= dec && dec <= sbyte.MaxValue) 862 return QNameByte; 863 if (ushort.MinValue <= dec && dec <= ushort.MaxValue) 864 return QNameUShort; 865 if (short.MinValue <= dec && dec <= short.MaxValue) 866 return QNameShort; 867 if (uint.MinValue <= dec && dec <= uint.MaxValue) 868 return QNameUInt; 869 if (int.MinValue <= dec && dec <= int.MaxValue) 870 return QNameInt; 871 return QNameLong; 872 } catch (Exception) { 873 } 874 try { 875 XmlConvert.ToUInt64 (value); 876 return QNameULong; 877 } catch (Exception) { 878 } 879 try { 880 XmlConvert.ToDecimal (value); 881 return QNameDecimal; 882 } catch (Exception) { 883 } 884 try { 885 double dbl = XmlConvert.ToDouble (value); 886 if (float.MinValue <= dbl && 887 dbl <= float.MaxValue) 888 return QNameFloat; 889 else 890 return QNameDouble; 891 } catch (Exception) { 892 } 893 try { 894 // FIXME: also try DateTimeSerializationMode 895 // and gYearMonth 896 XmlConvert.ToDateTime (value); 897 return QNameDateTime; 898 } catch (Exception) { 899 } 900 try { 901 XmlConvert.ToTimeSpan (value); 902 return QNameDuration; 903 } catch (Exception) { 904 } 905 906 // xs:string 907 return QNameString; 908 } 909 910 #endregion 911 912 #region Utilities 913 GetDefine(string name)914 private RelaxngDefine GetDefine (string name) 915 { 916 foreach (RelaxngDefine def in grammar.Defines) { 917 if (def.Name == name) 918 return def; 919 } 920 return null; 921 } 922 GetGlobalElement(QName name)923 private RelaxngDefine GetGlobalElement (QName name) 924 { 925 return elements [name] as RelaxngDefine; 926 } 927 GetGlobalAttribute(QName name)928 private RelaxngDefine GetGlobalAttribute (QName name) 929 { 930 return attributes [name] as RelaxngDefine; 931 } 932 CreateUniqueName(string baseName)933 private string CreateUniqueName (string baseName) 934 { 935 string name = baseName; 936 bool retry; 937 do { 938 retry = false; 939 foreach (RelaxngDefine d in grammar.Defines) { 940 if (d.Name == name) { 941 name += "_"; 942 retry = true; 943 break; 944 } 945 } 946 } while (retry); 947 return name; 948 } 949 950 // Already relaxed. CreateGlobalElement(QName name)951 private RelaxngDefine CreateGlobalElement (QName name) 952 { 953 RelaxngDefine def = new RelaxngDefine (); 954 def.Name = CreateUniqueName (name.Name); 955 RelaxngElement el = new RelaxngElement (); 956 el.NameClass = new RelaxngName (name.Name, 957 name.Namespace); 958 def.Patterns.Add (el); 959 elements.Add (name, def); 960 grammar.Defines.Add (def); 961 return def; 962 } 963 CreateGlobalAttribute(QName name)964 private RelaxngDefine CreateGlobalAttribute (QName name) 965 { 966 RelaxngDefine def = new RelaxngDefine (); 967 def.Name = CreateUniqueName (name.Name + "-attr"); 968 RelaxngAttribute attr = new RelaxngAttribute (); 969 attr.NameClass = new RelaxngName ( 970 name.Name, name.Namespace); 971 def.Patterns.Add (attr); 972 attributes.Add (name, def); 973 grammar.Defines.Add (def); 974 return def; 975 } 976 977 // FIXME: should create another type of RelaxngException. Error( RelaxngElementBase sourceObj, string message)978 private RelaxngException Error ( 979 RelaxngElementBase sourceObj, 980 string message) 981 { 982 // This override is mainly for schema component error. 983 return Error (sourceObj, false, message); 984 } 985 Error( RelaxngElementBase sourceObj, bool useReader, string message)986 private RelaxngException Error ( 987 RelaxngElementBase sourceObj, 988 bool useReader, 989 string message) 990 { 991 string msg = String.Concat ( 992 message, 993 sourceObj != null ? 994 String.Format (". Related schema component is {0} ({1}) line {2}, column {3}", 995 sourceObj.BaseUri, 996 sourceObj.GetType ().Name, 997 sourceObj.LineNumber, 998 sourceObj.LinePosition) : 999 String.Empty, 1000 useReader ? 1001 String.Format (". {0}", source.BaseURI) : 1002 String.Empty); 1003 1004 IXmlLineInfo li = source as IXmlLineInfo; 1005 if (useReader && li != null && li.HasLineInfo ()) 1006 msg += String.Format (" line {0} column {1}", 1007 li.LineNumber, li.LinePosition); 1008 1009 return new RelaxngException (msg); 1010 } 1011 1012 #endregion 1013 } 1014 } 1015