1 2# 3# spyne - Copyright (C) Spyne contributors. 4# 5# This library is free software; you can redistribute it and/or 6# modify it under the terms of the GNU Lesser General Public 7# License as published by the Free Software Foundation; either 8# version 2.1 of the License, or (at your option) any later version. 9# 10# This library is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# Lesser General Public License for more details. 14# 15# You should have received a copy of the GNU Lesser General Public 16# License along with this library; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 18# 19 20# To see the list of xml schema builtins recognized by this parser, run defn.py 21# in this package. 22 23# This module is EXPERIMENTAL. Only a subset of Xml schema standard is 24# implemented. 25# 26 27import logging 28logger = logging.getLogger(__name__) 29 30import os 31 32from copy import copy 33from pprint import pformat 34from itertools import chain 35from collections import defaultdict 36from os.path import dirname, abspath, join 37 38from lxml import etree 39 40from spyne.util import memoize 41from spyne.util.odict import odict 42 43from spyne.model import Null, XmlData, XmlAttribute, Array, ComplexModelBase, \ 44 ComplexModelMeta 45from spyne.model.complex import XmlModifier 46 47from spyne.protocol.xml import XmlDocument 48 49from spyne.interface.xml_schema.defn import TYPE_MAP 50from spyne.interface.xml_schema.defn import SchemaBase 51from spyne.interface.xml_schema.defn import XmlSchema10 52 53from spyne.util.color import R, G, B, MAG, YEL 54 55PARSER = etree.XMLParser(remove_comments=True) 56 57_prot = XmlDocument() 58 59 60class _Schema(object): 61 def __init__(self): 62 self.types = {} 63 self.elements = {} 64 self.imports = set() 65 66 67# FIXME: Needs to emit delayed assignment of recursive structures instead of 68# lousy ellipses. 69def Thier_repr(with_ns=False): 70 """Template for ``hier_repr``, a ``repr`` variant that shows spyne 71 ``ComplexModel``s in a hierarchical format. 72 73 :param with_ns: either bool or a callable that returns the class name 74 as string 75 """ 76 77 if with_ns is False: 78 def get_class_name(c): 79 return c.get_type_name() 80 81 elif with_ns is True or with_ns == 1: 82 def get_class_name(c): 83 return "{%s}%s" % (c.get_namespace(), c.get_type_name()) 84 85 else: 86 def get_class_name(c): 87 return with_ns(c.get_namespace(), c.get_type_name()) 88 89 def hier_repr(inst, i0=0, I=' ', tags=None): 90 if tags is None: 91 tags = set() 92 93 cls = inst.__class__ 94 if not hasattr(cls, '_type_info'): 95 return repr(inst) 96 97 clsid = "%s" % (get_class_name(cls)) 98 if id(inst) in tags: 99 return clsid 100 101 tags.add(id(inst)) 102 103 i1 = i0 + 1 104 i2 = i1 + 1 105 106 retval = [clsid, '('] 107 108 xtba = cls.Attributes._xml_tag_body_as 109 if xtba is not None: 110 xtba = iter(xtba) 111 xtba_key, xtba_type = next(xtba) 112 if xtba_key is not None: 113 value = getattr(inst, xtba_key, None) 114 retval.append("%s,\n" % hier_repr(value, i1, I, tags)) 115 else: 116 retval.append('\n') 117 else: 118 retval.append('\n') 119 120 for k, v in inst.get_flat_type_info(cls).items(): 121 value = getattr(inst, k, None) 122 if (issubclass(v, Array) or v.Attributes.max_occurs > 1) and \ 123 value is not None: 124 retval.append("%s%s=[\n" % (I * i1, k)) 125 for subval in value: 126 retval.append("%s%s,\n" % (I * i2, 127 hier_repr(subval, i2, I, tags))) 128 retval.append('%s],\n' % (I * i1)) 129 130 elif issubclass(v, XmlData): 131 pass 132 133 else: 134 retval.append("%s%s=%s,\n" % (I * i1, k, 135 hier_repr(value, i1, I, tags))) 136 137 retval.append('%s)' % (I * i0)) 138 return ''.join(retval) 139 140 return hier_repr 141 142SchemaBase.__repr__ = Thier_repr() 143 144hier_repr = Thier_repr() 145hier_repr_ns = Thier_repr(with_ns=True) 146 147 148class XmlSchemaParser(object): 149 def __init__(self, files, base_dir=None, repr_=Thier_repr(with_ns=False), 150 skip_errors=False): 151 self.retval = {} 152 self.indent = 0 153 self.files = files 154 self.base_dir = base_dir 155 self.repr = repr_ 156 if self.base_dir is None: 157 self.base_dir = os.getcwd() 158 self.parent = None 159 self.children = None 160 self.nsmap = None 161 self.schema = None 162 self.prefmap = None 163 164 self.tns = None 165 self.pending_elements = None 166 self.pending_types = None 167 self.skip_errors = skip_errors 168 169 self.pending_simple_types = defaultdict(set) 170 171 def clone(self, indent=0, base_dir=None): 172 retval = copy(self) 173 174 if retval.parent is None: 175 retval.parent = self 176 if self.children is None: 177 self.children = [retval] 178 else: 179 self.children.append(retval) 180 181 else: 182 retval.parent.children.append(retval) 183 184 retval.indent = self.indent + indent 185 if base_dir is not None: 186 retval.base_dir = base_dir 187 188 return retval 189 190 def debug0(self, s, *args, **kwargs): 191 logger.debug("%s%s" % (" " * self.indent, s), *args, **kwargs) 192 193 def debug1(self, s, *args, **kwargs): 194 logger.debug("%s%s" % (" " * (self.indent + 1), s), *args, **kwargs) 195 196 def debug2(self, s, *args, **kwargs): 197 logger.debug("%s%s" % (" " * (self.indent + 2), s), *args, **kwargs) 198 199 def parse_schema_file(self, file_name): 200 elt = etree.fromstring(open(file_name, 'rb').read(), parser=PARSER) 201 return self.parse_schema(elt) 202 203 def process_includes(self, include): 204 file_name = include.schema_location 205 if file_name is None: 206 return 207 208 self.debug1("including %s %s", self.base_dir, file_name) 209 210 file_name = abspath(join(self.base_dir, file_name)) 211 data = open(file_name, 'rb').read() 212 elt = etree.fromstring(data, parser=PARSER) 213 self.nsmap.update(elt.nsmap) 214 self.prefmap = dict([(v, k) for k, v in self.nsmap.items()]) 215 216 sub_schema = _prot.from_element(None, XmlSchema10, elt) 217 if sub_schema.includes: 218 for inc in sub_schema.includes: 219 base_dir = dirname(file_name) 220 child_ctx = self.clone(base_dir=base_dir) 221 self.process_includes(inc) 222 self.nsmap.update(child_ctx.nsmap) 223 self.prefmap = dict([(v, k) for k, v in self.nsmap.items()]) 224 225 for attr in ('imports', 'simple_types', 'complex_types', 'elements'): 226 sub = getattr(sub_schema, attr) 227 if sub is None: 228 sub = [] 229 230 own = getattr(self.schema, attr) 231 if own is None: 232 own = [] 233 234 own.extend(sub) 235 236 setattr(self.schema, attr, own) 237 238 def process_simple_type_list(self, s, name=None): 239 item_type = s.list.item_type 240 if item_type is None: 241 self.debug1("skipping simple type: %s because its list itemType " 242 "could not be found", name) 243 return 244 245 base = self.get_type(item_type) 246 if base is None: 247 self.pending_simple_types[self.get_name(item_type)].add((s, name)) 248 self.debug1("pending simple type list: %s " 249 "because of unseen base %s", name, item_type) 250 251 return 252 253 self.debug1("adding simple type list: %s", name) 254 retval = Array(base, serialize_as='sd-list') # FIXME: to be implemented 255 retval.__type_name__ = name 256 retval.__namespace__ = self.tns 257 258 assert not retval.get_type_name() is retval.Empty 259 return retval 260 261 def process_simple_type_restriction(self, s, name=None): 262 base_name = s.restriction.base 263 if base_name is None: 264 self.debug1("skipping simple type: %s because its restriction base " 265 "could not be found", name) 266 return 267 268 base = self.get_type(base_name) 269 if base is None: 270 self.pending_simple_types[self.get_name(base_name)].add((s, name)) 271 self.debug1("pending simple type: %s because of unseen base %s", 272 name, base_name) 273 274 return 275 276 self.debug1("adding simple type: %s", name) 277 278 kwargs = {} 279 restriction = s.restriction 280 if restriction.enumeration: 281 kwargs['values'] = [e.value for e in restriction.enumeration] 282 283 if restriction.max_length: 284 if restriction.max_length.value: 285 kwargs['max_len'] = int(restriction.max_length.value) 286 287 if restriction.min_length: 288 if restriction.min_length.value: 289 kwargs['min_len'] = int(restriction.min_length.value) 290 291 if restriction.pattern: 292 if restriction.pattern.value: 293 kwargs['pattern'] = restriction.pattern.value 294 295 retval = base.customize(**kwargs) 296 retval.__type_name__ = name 297 retval.__namespace__ = self.tns 298 if retval.__orig__ is None: 299 retval.__orig__ = base 300 301 if retval.__extends__ is None: 302 retval.__extends__ = base 303 304 assert not retval.get_type_name() is retval.Empty 305 return retval 306 307 def process_simple_type_union(self, s, name=None): 308 self.debug1("skipping simple type: %s because <union> is not " 309 "implemented", name) 310 311 def process_simple_type(self, s, name=None): 312 """Returns the simple Spyne type from `<simpleType>` tag.""" 313 retval = None 314 315 if name is None: 316 name = s.name 317 318 if s.list is not None: 319 retval = self.process_simple_type_list(s, name) 320 321 elif s.union is not None: 322 retval = self.process_simple_type_union(s, name) 323 324 elif s.restriction is not None: 325 retval = self.process_simple_type_restriction(s, name) 326 327 if retval is None: 328 self.debug1("skipping simple type: %s", name) 329 return 330 331 self.retval[self.tns].types[s.name] = retval 332 333 key = self.get_name(name) 334 dependents = self.pending_simple_types[key] 335 for s, name in set(dependents): 336 st = self.process_simple_type(s, name) 337 if st is not None: 338 self.retval[self.tns].types[s.name] = st 339 340 self.debug2("added back simple type: %s", s.name) 341 dependents.remove((s, name)) 342 343 if len(dependents) == 0: 344 del self.pending_simple_types[key] 345 346 return retval 347 348 def process_schema_element(self, e): 349 if e.name is None: 350 return 351 352 self.debug1("adding element: %s", e.name) 353 354 t = self.get_type(e.type) 355 if t: 356 if e.name in self.pending_elements: 357 del self.pending_elements[e.name] 358 359 self.retval[self.tns].elements[e.name] = e 360 361 else: 362 self.pending_elements[e.name] = e 363 364 def process_attribute(self, a): 365 if a.ref is not None: 366 t = self.get_type(a.ref) 367 return t.type.get_type_name(), t 368 369 if a.type is not None and a.simple_type is not None: 370 raise ValueError(a, "Both type and simple_type are defined.") 371 372 elif a.type is not None: 373 t = self.get_type(a.type) 374 375 if t is None: 376 raise ValueError(a, 'type %r not found' % a.type) 377 378 elif a.simple_type is not None: 379 t = self.process_simple_type(a.simple_type, a.name) 380 381 if t is None: 382 raise ValueError(a, 'simple type %r not found' % a.simple_type) 383 384 else: 385 raise Exception("dunno attr") 386 387 kwargs = {} 388 if a.default is not None: 389 kwargs['default'] = _prot.from_unicode(t, a.default) 390 391 if len(kwargs) > 0: 392 t = t.customize(**kwargs) 393 self.debug2("t = t.customize(**%r)" % kwargs) 394 return a.name, XmlAttribute(t) 395 396 def process_complex_type(self, c): 397 def process_type(tn, name, wrapper=None, element=None, attribute=None): 398 if wrapper is None: 399 wrapper = lambda x: x 400 else: 401 assert issubclass(wrapper, XmlModifier), wrapper 402 403 t = self.get_type(tn) 404 key = (c.name, name) 405 if t is None: 406 self.pending_types[key] = c 407 self.debug2("not found: %r(%s)", key, tn) 408 return 409 410 if key in self.pending_types: 411 del self.pending_types[key] 412 413 assert name is not None, (key, e) 414 415 kwargs = {} 416 if element is not None: 417 if e.min_occurs != "0": # spyne default 418 kwargs['min_occurs'] = int(e.min_occurs) 419 420 if e.max_occurs == "unbounded": 421 kwargs['max_occurs'] = e.max_occurs 422 elif e.max_occurs != "1": 423 kwargs['max_occurs'] = int(e.max_occurs) 424 425 if e.nillable != True: # spyne default 426 kwargs['nillable'] = e.nillable 427 428 if e.default is not None: 429 kwargs['default'] = _prot.from_unicode(t, e.default) 430 431 if len(kwargs) > 0: 432 t = t.customize(**kwargs) 433 434 if attribute is not None: 435 if attribute.default is not None: 436 kwargs['default'] = _prot.from_unicode(t, a.default) 437 438 if len(kwargs) > 0: 439 t = t.customize(**kwargs) 440 441 ti.append( (name, wrapper(t)) ) 442 self.debug2(" found: %r(%s), c: %r", key, tn, kwargs) 443 444 def process_element(e): 445 if e.ref is not None: 446 tn = e.ref 447 name = e.ref.split(":", 1)[-1] 448 449 elif e.name is not None: 450 tn = e.type 451 name = e.name 452 453 if tn is None: 454 # According to http://www.w3.org/TR/2004/REC-xmlschema-1-20041028/structures.html#element-element 455 # this means this element is now considered to be a 456 # http://www.w3.org/TR/2004/REC-xmlschema-1-20041028/structures.html#ur-type-itself 457 self.debug2(" skipped: %s ur-type", e.name) 458 return 459 460 else: 461 raise Exception("dunno") 462 463 process_type(tn, name, element=e) 464 465 ti = [] 466 base = ComplexModelBase 467 if c.name in self.retval[self.tns].types: 468 self.debug1("modifying existing %r", c.name) 469 else: 470 self.debug1("adding complex type: %s", c.name) 471 472 if c.sequence is not None: 473 if c.sequence.elements is not None: 474 for e in c.sequence.elements: 475 process_element(e) 476 477 if c.sequence.choices is not None: 478 for ch in c.sequence.choices: 479 if ch.elements is not None: 480 for e in ch.elements: 481 process_element(e) 482 483 if c.choice is not None: 484 if c.choice.elements is not None: 485 for e in c.choice.elements: 486 process_element(e) 487 488 if c.attributes is not None: 489 for a in c.attributes: 490 if a.name is None: 491 continue 492 if a.type is None: 493 continue 494 495 process_type(a.type, a.name, XmlAttribute, attribute=a) 496 497 if c.simple_content is not None: 498 sc = c.simple_content 499 ext = sc.extension 500 restr = sc.restriction 501 502 if ext is not None: 503 base_name = ext.base 504 b = self.get_type(ext.base) 505 506 if ext.attributes is not None: 507 for a in ext.attributes: 508 ti.append(self.process_attribute(a)) 509 510 elif restr is not None: 511 base_name = restr.base 512 b = self.get_type(restr.base) 513 514 if restr.attributes is not None: 515 for a in restr.attributes: 516 ti.append(self.process_attribute(a)) 517 518 else: 519 raise Exception("Invalid simpleContent tag: %r", sc) 520 521 if issubclass(b, ComplexModelBase): 522 base = b 523 else: 524 process_type(base_name, "_data", XmlData) 525 526 if c.name in self.retval[self.tns].types: 527 r = self.retval[self.tns].types[c.name] 528 r._type_info.update(ti) 529 530 else: 531 cls_dict = odict({ 532 '__type_name__': c.name, 533 '__namespace__': self.tns, 534 '_type_info': ti, 535 }) 536 if self.repr is not None: 537 cls_dict['__repr__'] = self.repr 538 539 r = ComplexModelMeta(str(c.name), (base,), cls_dict) 540 self.retval[self.tns].types[c.name] = r 541 542 return r 543 544 def get_name(self, tn): 545 if tn.startswith("{"): 546 ns, qn = tn[1:].split('}', 1) 547 548 elif ":" in tn: 549 ns, qn = tn.split(":", 1) 550 ns = self.nsmap[ns] 551 552 else: 553 if None in self.nsmap: 554 ns, qn = self.nsmap[None], tn 555 else: 556 ns, qn = self.tns, tn 557 558 return ns, qn 559 560 def get_type(self, tn): 561 if tn is None: 562 return Null 563 564 ns, qn = self.get_name(tn) 565 566 ti = self.retval.get(ns) 567 if ti is not None: 568 t = ti.types.get(qn) 569 if t: 570 return t 571 572 e = ti.elements.get(qn) 573 if e: 574 if e.type and ":" in e.type: 575 return self.get_type(e.type) 576 else: 577 retval = self.get_type("{%s}%s" % (ns, e.type)) 578 if retval is None and None in self.nsmap: 579 retval = self.get_type("{%s}%s" % 580 (self.nsmap[None], e.type)) 581 return retval 582 583 return TYPE_MAP.get("{%s}%s" % (ns, qn)) 584 585 def process_pending(self): 586 # process pending 587 self.debug0("6 %s processing pending complex_types", B(self.tns)) 588 for (c_name, e_name), _v in list(self.pending_types.items()): 589 self.process_complex_type(_v) 590 591 self.debug0("7 %s processing pending elements", YEL(self.tns)) 592 for _k, _v in self.pending_elements.items(): 593 self.process_schema_element(_v) 594 595 def print_pending(self, fail=False): 596 ptt_pending = sum((len(v) for v in self.pending_simple_types.values())) > 0 597 if len(self.pending_elements) > 0 or len(self.pending_types) > 0 or \ 598 ptt_pending: 599 if fail: 600 logging.basicConfig(level=logging.DEBUG) 601 self.debug0("%" * 50) 602 self.debug0(self.tns) 603 self.debug0("") 604 605 self.debug0("elements") 606 self.debug0(pformat(self.pending_elements)) 607 self.debug0("") 608 609 self.debug0("simple types") 610 self.debug0(pformat(self.pending_simple_types)) 611 self.debug0("%" * 50) 612 613 self.debug0("complex types") 614 self.debug0(pformat(self.pending_types)) 615 self.debug0("%" * 50) 616 617 if fail: 618 raise Exception("there are still unresolved elements") 619 620 def parse_schema(self, elt): 621 self.nsmap = dict(elt.nsmap.items()) 622 self.prefmap = dict([(v, k) for k, v in self.nsmap.items()]) 623 self.schema = schema = _prot.from_element(self, XmlSchema10, elt) 624 625 self.pending_types = {} 626 self.pending_elements = {} 627 628 self.tns = tns = schema.target_namespace 629 if self.tns is None: 630 self.tns = tns = '__no_ns__' 631 if tns in self.retval: 632 return 633 self.retval[tns] = _Schema() 634 635 self.debug0("1 %s processing includes", MAG(tns)) 636 if schema.includes: 637 for include in schema.includes: 638 self.process_includes(include) 639 640 if schema.elements: 641 schema.elements = odict([(e.name, e) for e in schema.elements]) 642 if schema.complex_types: 643 schema.complex_types = odict([(c.name, c) 644 for c in schema.complex_types]) 645 if schema.simple_types: 646 schema.simple_types = odict([(s.name, s) 647 for s in schema.simple_types]) 648 if schema.attributes: 649 schema.attributes = odict([(a.name, a) for a in schema.attributes]) 650 651 self.debug0("2 %s processing imports", R(tns)) 652 if schema.imports: 653 for imp in schema.imports: 654 if not imp.namespace in self.retval: 655 self.debug1("%s importing %s", tns, imp.namespace) 656 fname = self.files[imp.namespace] 657 self.clone(2, dirname(fname)).parse_schema_file(fname) 658 self.retval[tns].imports.add(imp.namespace) 659 660 self.debug0("3 %s processing simple_types", G(tns)) 661 if schema.simple_types: 662 for s in schema.simple_types.values(): 663 self.process_simple_type(s) 664 665 # no simple types should have been left behind. 666 assert sum((len(v) for v in self.pending_simple_types.values())) == 0, \ 667 self.pending_simple_types.values() 668 669 self.debug0("4 %s processing attributes", G(tns)) 670 if schema.attributes: 671 for s in schema.attributes.values(): 672 n, t = self.process_attribute(s) 673 self.retval[self.tns].types[n] = t 674 675 self.debug0("5 %s processing complex_types", B(tns)) 676 if schema.complex_types: 677 for c in schema.complex_types.values(): 678 self.process_complex_type(c) 679 680 self.debug0("6 %s processing elements", YEL(tns)) 681 if schema.elements: 682 for e in schema.elements.values(): 683 self.process_schema_element(e) 684 685 self.process_pending() 686 687 if self.parent is None: # for the top-most schema 688 if self.children is not None: # if it uses <include> or <import> 689 # This is needed for schemas with circular imports 690 for c in chain([self], self.children): 691 c.print_pending() 692 self.debug0('') 693 694 # FIXME: should put this in a while loop that loops until no 695 # changes occur 696 for c in chain([self], self.children): 697 c.process_pending() 698 for c in chain([self], self.children): 699 c.process_pending() 700 self.debug0('') 701 702 for c in chain([self], self.children): 703 c.print_pending(fail=(not self.skip_errors)) 704 705 return self.retval 706