1
2#
3# spyne - Copyright (C) Spyne contributors.
4#
5# This library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# This library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with this library; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301
18#
19
20# To see the list of xml schema builtins recognized by this parser, run defn.py
21# in this package.
22
23# This module is EXPERIMENTAL. Only a subset of Xml schema standard is
24# implemented.
25#
26
27import logging
28logger = logging.getLogger(__name__)
29
30import os
31
32from copy import copy
33from pprint import pformat
34from itertools import chain
35from collections import defaultdict
36from os.path import dirname, abspath, join
37
38from lxml import etree
39
40from spyne.util import memoize
41from spyne.util.odict import odict
42
43from spyne.model import Null, XmlData, XmlAttribute, Array, ComplexModelBase, \
44    ComplexModelMeta
45from spyne.model.complex import XmlModifier
46
47from spyne.protocol.xml import XmlDocument
48
49from spyne.interface.xml_schema.defn import TYPE_MAP
50from spyne.interface.xml_schema.defn import SchemaBase
51from spyne.interface.xml_schema.defn import XmlSchema10
52
53from spyne.util.color import R, G, B, MAG, YEL
54
55PARSER = etree.XMLParser(remove_comments=True)
56
57_prot = XmlDocument()
58
59
60class _Schema(object):
61    def __init__(self):
62        self.types = {}
63        self.elements = {}
64        self.imports = set()
65
66
67# FIXME: Needs to emit delayed assignment of recursive structures instead of
68# lousy ellipses.
69def Thier_repr(with_ns=False):
70    """Template for ``hier_repr``, a ``repr`` variant that shows spyne
71    ``ComplexModel``s in a hierarchical format.
72
73    :param with_ns: either bool or a callable that returns the class name
74    as string
75    """
76
77    if with_ns is False:
78        def get_class_name(c):
79            return c.get_type_name()
80
81    elif with_ns is True or with_ns == 1:
82        def get_class_name(c):
83            return "{%s}%s" % (c.get_namespace(), c.get_type_name())
84
85    else:
86        def get_class_name(c):
87            return with_ns(c.get_namespace(), c.get_type_name())
88
89    def hier_repr(inst, i0=0, I='  ', tags=None):
90        if tags is None:
91            tags = set()
92
93        cls = inst.__class__
94        if not hasattr(cls, '_type_info'):
95            return repr(inst)
96
97        clsid = "%s" % (get_class_name(cls))
98        if id(inst) in tags:
99            return clsid
100
101        tags.add(id(inst))
102
103        i1 = i0 + 1
104        i2 = i1 + 1
105
106        retval = [clsid, '(']
107
108        xtba = cls.Attributes._xml_tag_body_as
109        if xtba is not None:
110            xtba = iter(xtba)
111            xtba_key, xtba_type = next(xtba)
112            if xtba_key is not None:
113                value = getattr(inst, xtba_key, None)
114                retval.append("%s,\n" % hier_repr(value, i1, I, tags))
115            else:
116                retval.append('\n')
117        else:
118            retval.append('\n')
119
120        for k, v in inst.get_flat_type_info(cls).items():
121            value = getattr(inst, k, None)
122            if (issubclass(v, Array) or v.Attributes.max_occurs > 1) and \
123                                                              value is not None:
124                retval.append("%s%s=[\n" % (I * i1, k))
125                for subval in value:
126                    retval.append("%s%s,\n" % (I * i2,
127                                                hier_repr(subval, i2, I, tags)))
128                retval.append('%s],\n' % (I * i1))
129
130            elif issubclass(v, XmlData):
131                pass
132
133            else:
134                retval.append("%s%s=%s,\n" % (I * i1, k,
135                                                hier_repr(value, i1, I, tags)))
136
137        retval.append('%s)' % (I * i0))
138        return ''.join(retval)
139
140    return hier_repr
141
142SchemaBase.__repr__ = Thier_repr()
143
144hier_repr = Thier_repr()
145hier_repr_ns = Thier_repr(with_ns=True)
146
147
148class XmlSchemaParser(object):
149    def __init__(self, files, base_dir=None, repr_=Thier_repr(with_ns=False),
150                                                             skip_errors=False):
151        self.retval = {}
152        self.indent = 0
153        self.files = files
154        self.base_dir = base_dir
155        self.repr = repr_
156        if self.base_dir is None:
157            self.base_dir = os.getcwd()
158        self.parent = None
159        self.children = None
160        self.nsmap = None
161        self.schema = None
162        self.prefmap = None
163
164        self.tns = None
165        self.pending_elements = None
166        self.pending_types = None
167        self.skip_errors = skip_errors
168
169        self.pending_simple_types = defaultdict(set)
170
171    def clone(self, indent=0, base_dir=None):
172        retval = copy(self)
173
174        if retval.parent is None:
175            retval.parent = self
176            if self.children is None:
177                self.children = [retval]
178            else:
179                self.children.append(retval)
180
181        else:
182            retval.parent.children.append(retval)
183
184        retval.indent = self.indent + indent
185        if base_dir is not None:
186            retval.base_dir = base_dir
187
188        return retval
189
190    def debug0(self, s, *args, **kwargs):
191        logger.debug("%s%s" % ("  " *  self.indent, s), *args, **kwargs)
192
193    def debug1(self, s, *args, **kwargs):
194        logger.debug("%s%s" % ("  " * (self.indent + 1), s), *args, **kwargs)
195
196    def debug2(self, s, *args, **kwargs):
197        logger.debug("%s%s" % ("  " * (self.indent + 2), s), *args, **kwargs)
198
199    def parse_schema_file(self, file_name):
200        elt = etree.fromstring(open(file_name, 'rb').read(), parser=PARSER)
201        return self.parse_schema(elt)
202
203    def process_includes(self, include):
204        file_name = include.schema_location
205        if file_name is None:
206            return
207
208        self.debug1("including %s %s", self.base_dir, file_name)
209
210        file_name = abspath(join(self.base_dir, file_name))
211        data = open(file_name, 'rb').read()
212        elt = etree.fromstring(data, parser=PARSER)
213        self.nsmap.update(elt.nsmap)
214        self.prefmap = dict([(v, k) for k, v in self.nsmap.items()])
215
216        sub_schema = _prot.from_element(None, XmlSchema10, elt)
217        if sub_schema.includes:
218            for inc in sub_schema.includes:
219                base_dir = dirname(file_name)
220                child_ctx = self.clone(base_dir=base_dir)
221                self.process_includes(inc)
222                self.nsmap.update(child_ctx.nsmap)
223                self.prefmap = dict([(v, k) for k, v in self.nsmap.items()])
224
225        for attr in ('imports', 'simple_types', 'complex_types', 'elements'):
226            sub = getattr(sub_schema, attr)
227            if sub is None:
228                sub = []
229
230            own = getattr(self.schema, attr)
231            if own is None:
232                own = []
233
234            own.extend(sub)
235
236            setattr(self.schema, attr, own)
237
238    def process_simple_type_list(self, s, name=None):
239        item_type = s.list.item_type
240        if item_type is None:
241            self.debug1("skipping simple type: %s because its list itemType "
242                        "could not be found", name)
243            return
244
245        base = self.get_type(item_type)
246        if base is None:
247            self.pending_simple_types[self.get_name(item_type)].add((s, name))
248            self.debug1("pending  simple type list: %s "
249                                   "because of unseen base %s", name, item_type)
250
251            return
252
253        self.debug1("adding   simple type list: %s", name)
254        retval = Array(base, serialize_as='sd-list')  # FIXME: to be implemented
255        retval.__type_name__ = name
256        retval.__namespace__ = self.tns
257
258        assert not retval.get_type_name() is retval.Empty
259        return retval
260
261    def process_simple_type_restriction(self, s, name=None):
262        base_name = s.restriction.base
263        if base_name is None:
264            self.debug1("skipping simple type: %s because its restriction base "
265                        "could not be found", name)
266            return
267
268        base = self.get_type(base_name)
269        if base is None:
270            self.pending_simple_types[self.get_name(base_name)].add((s, name))
271            self.debug1("pending  simple type: %s because of unseen base %s",
272                                                                name, base_name)
273
274            return
275
276        self.debug1("adding   simple type: %s", name)
277
278        kwargs = {}
279        restriction = s.restriction
280        if restriction.enumeration:
281            kwargs['values'] = [e.value for e in restriction.enumeration]
282
283        if restriction.max_length:
284            if restriction.max_length.value:
285                kwargs['max_len'] = int(restriction.max_length.value)
286
287        if restriction.min_length:
288            if restriction.min_length.value:
289                kwargs['min_len'] = int(restriction.min_length.value)
290
291        if restriction.pattern:
292            if restriction.pattern.value:
293                kwargs['pattern'] = restriction.pattern.value
294
295        retval = base.customize(**kwargs)
296        retval.__type_name__ = name
297        retval.__namespace__ = self.tns
298        if retval.__orig__ is None:
299            retval.__orig__ = base
300
301        if retval.__extends__ is None:
302            retval.__extends__ = base
303
304        assert not retval.get_type_name() is retval.Empty
305        return retval
306
307    def process_simple_type_union(self, s, name=None):
308        self.debug1("skipping simple type: %s because <union> is not "
309                    "implemented", name)
310
311    def process_simple_type(self, s, name=None):
312        """Returns the simple Spyne type from `<simpleType>` tag."""
313        retval = None
314
315        if name is None:
316            name = s.name
317
318        if s.list is not None:
319            retval = self.process_simple_type_list(s, name)
320
321        elif s.union is not None:
322            retval = self.process_simple_type_union(s, name)
323
324        elif s.restriction is not None:
325            retval = self.process_simple_type_restriction(s, name)
326
327        if retval is None:
328            self.debug1("skipping simple type: %s", name)
329            return
330
331        self.retval[self.tns].types[s.name] = retval
332
333        key = self.get_name(name)
334        dependents = self.pending_simple_types[key]
335        for s, name in set(dependents):
336            st = self.process_simple_type(s, name)
337            if st is not None:
338                self.retval[self.tns].types[s.name] = st
339
340                self.debug2("added back simple type: %s", s.name)
341                dependents.remove((s, name))
342
343        if len(dependents) == 0:
344            del self.pending_simple_types[key]
345
346        return retval
347
348    def process_schema_element(self, e):
349        if e.name is None:
350            return
351
352        self.debug1("adding element: %s", e.name)
353
354        t = self.get_type(e.type)
355        if t:
356            if e.name in self.pending_elements:
357                del self.pending_elements[e.name]
358
359            self.retval[self.tns].elements[e.name] = e
360
361        else:
362            self.pending_elements[e.name] = e
363
364    def process_attribute(self, a):
365        if a.ref is not None:
366            t = self.get_type(a.ref)
367            return t.type.get_type_name(), t
368
369        if a.type is not None and a.simple_type is not None:
370            raise ValueError(a, "Both type and simple_type are defined.")
371
372        elif a.type is not None:
373            t = self.get_type(a.type)
374
375            if t is None:
376                raise ValueError(a, 'type %r not found' % a.type)
377
378        elif a.simple_type is not None:
379            t = self.process_simple_type(a.simple_type, a.name)
380
381            if t is None:
382                raise ValueError(a, 'simple type %r not found' % a.simple_type)
383
384        else:
385            raise Exception("dunno attr")
386
387        kwargs = {}
388        if a.default is not None:
389            kwargs['default'] = _prot.from_unicode(t, a.default)
390
391        if len(kwargs) > 0:
392            t = t.customize(**kwargs)
393            self.debug2("t = t.customize(**%r)" % kwargs)
394        return a.name, XmlAttribute(t)
395
396    def process_complex_type(self, c):
397        def process_type(tn, name, wrapper=None, element=None, attribute=None):
398            if wrapper is None:
399                wrapper = lambda x: x
400            else:
401                assert issubclass(wrapper, XmlModifier), wrapper
402
403            t = self.get_type(tn)
404            key = (c.name, name)
405            if t is None:
406                self.pending_types[key] = c
407                self.debug2("not found: %r(%s)", key, tn)
408                return
409
410            if key in self.pending_types:
411                del self.pending_types[key]
412
413            assert name is not None, (key, e)
414
415            kwargs = {}
416            if element is not None:
417                if e.min_occurs != "0":  # spyne default
418                    kwargs['min_occurs'] = int(e.min_occurs)
419
420                if e.max_occurs == "unbounded":
421                    kwargs['max_occurs'] = e.max_occurs
422                elif e.max_occurs != "1":
423                    kwargs['max_occurs'] = int(e.max_occurs)
424
425                if e.nillable != True:  # spyne default
426                    kwargs['nillable'] = e.nillable
427
428                if e.default is not None:
429                    kwargs['default'] = _prot.from_unicode(t, e.default)
430
431                if len(kwargs) > 0:
432                    t = t.customize(**kwargs)
433
434            if attribute is not None:
435                if attribute.default is not None:
436                    kwargs['default'] = _prot.from_unicode(t, a.default)
437
438                if len(kwargs) > 0:
439                    t = t.customize(**kwargs)
440
441            ti.append( (name, wrapper(t)) )
442            self.debug2("    found: %r(%s), c: %r", key, tn, kwargs)
443
444        def process_element(e):
445            if e.ref is not None:
446                tn = e.ref
447                name = e.ref.split(":", 1)[-1]
448
449            elif e.name is not None:
450                tn = e.type
451                name = e.name
452
453                if tn is None:
454                    # According to http://www.w3.org/TR/2004/REC-xmlschema-1-20041028/structures.html#element-element
455                    # this means this element is now considered to be a
456                    # http://www.w3.org/TR/2004/REC-xmlschema-1-20041028/structures.html#ur-type-itself
457                    self.debug2("  skipped: %s ur-type", e.name)
458                    return
459
460            else:
461                raise Exception("dunno")
462
463            process_type(tn, name, element=e)
464
465        ti = []
466        base = ComplexModelBase
467        if c.name in self.retval[self.tns].types:
468            self.debug1("modifying existing %r", c.name)
469        else:
470            self.debug1("adding complex type: %s", c.name)
471
472        if c.sequence is not None:
473            if c.sequence.elements is not None:
474                for e in c.sequence.elements:
475                    process_element(e)
476
477            if c.sequence.choices is not None:
478                for ch in c.sequence.choices:
479                    if ch.elements is not None:
480                        for e in ch.elements:
481                            process_element(e)
482
483        if c.choice is not None:
484            if c.choice.elements is not None:
485                for e in c.choice.elements:
486                    process_element(e)
487
488        if c.attributes is not None:
489            for a in c.attributes:
490                if a.name is None:
491                    continue
492                if a.type is None:
493                    continue
494
495                process_type(a.type, a.name, XmlAttribute, attribute=a)
496
497        if c.simple_content is not None:
498            sc = c.simple_content
499            ext = sc.extension
500            restr = sc.restriction
501
502            if ext is not None:
503                base_name = ext.base
504                b = self.get_type(ext.base)
505
506                if ext.attributes is not None:
507                    for a in ext.attributes:
508                        ti.append(self.process_attribute(a))
509
510            elif restr is not None:
511                base_name = restr.base
512                b = self.get_type(restr.base)
513
514                if restr.attributes is not None:
515                    for a in restr.attributes:
516                        ti.append(self.process_attribute(a))
517
518            else:
519                raise Exception("Invalid simpleContent tag: %r", sc)
520
521            if issubclass(b, ComplexModelBase):
522                base = b
523            else:
524                process_type(base_name, "_data", XmlData)
525
526        if c.name in self.retval[self.tns].types:
527            r = self.retval[self.tns].types[c.name]
528            r._type_info.update(ti)
529
530        else:
531            cls_dict = odict({
532                '__type_name__': c.name,
533                '__namespace__': self.tns,
534                '_type_info': ti,
535            })
536            if self.repr is not None:
537                cls_dict['__repr__'] = self.repr
538
539            r = ComplexModelMeta(str(c.name), (base,), cls_dict)
540            self.retval[self.tns].types[c.name] = r
541
542        return r
543
544    def get_name(self, tn):
545        if tn.startswith("{"):
546            ns, qn = tn[1:].split('}', 1)
547
548        elif ":" in tn:
549            ns, qn = tn.split(":", 1)
550            ns = self.nsmap[ns]
551
552        else:
553            if None in self.nsmap:
554                ns, qn = self.nsmap[None], tn
555            else:
556                ns, qn = self.tns, tn
557
558        return ns, qn
559
560    def get_type(self, tn):
561        if tn is None:
562            return Null
563
564        ns, qn = self.get_name(tn)
565
566        ti = self.retval.get(ns)
567        if ti is not None:
568            t = ti.types.get(qn)
569            if t:
570                return t
571
572            e = ti.elements.get(qn)
573            if e:
574                if e.type and ":" in e.type:
575                    return self.get_type(e.type)
576                else:
577                    retval = self.get_type("{%s}%s" % (ns, e.type))
578                    if retval is None and None in self.nsmap:
579                        retval = self.get_type("{%s}%s" %
580                                                     (self.nsmap[None], e.type))
581                    return retval
582
583        return TYPE_MAP.get("{%s}%s" % (ns, qn))
584
585    def process_pending(self):
586        # process pending
587        self.debug0("6 %s processing pending complex_types", B(self.tns))
588        for (c_name, e_name), _v in list(self.pending_types.items()):
589            self.process_complex_type(_v)
590
591        self.debug0("7 %s processing pending elements", YEL(self.tns))
592        for _k, _v in self.pending_elements.items():
593            self.process_schema_element(_v)
594
595    def print_pending(self, fail=False):
596        ptt_pending = sum((len(v) for v in self.pending_simple_types.values())) > 0
597        if len(self.pending_elements) > 0 or len(self.pending_types) > 0 or \
598                                                                    ptt_pending:
599            if fail:
600                logging.basicConfig(level=logging.DEBUG)
601            self.debug0("%" * 50)
602            self.debug0(self.tns)
603            self.debug0("")
604
605            self.debug0("elements")
606            self.debug0(pformat(self.pending_elements))
607            self.debug0("")
608
609            self.debug0("simple types")
610            self.debug0(pformat(self.pending_simple_types))
611            self.debug0("%" * 50)
612
613            self.debug0("complex types")
614            self.debug0(pformat(self.pending_types))
615            self.debug0("%" * 50)
616
617            if fail:
618                raise Exception("there are still unresolved elements")
619
620    def parse_schema(self, elt):
621        self.nsmap = dict(elt.nsmap.items())
622        self.prefmap = dict([(v, k) for k, v in self.nsmap.items()])
623        self.schema = schema = _prot.from_element(self, XmlSchema10, elt)
624
625        self.pending_types = {}
626        self.pending_elements = {}
627
628        self.tns = tns = schema.target_namespace
629        if self.tns is None:
630            self.tns = tns = '__no_ns__'
631        if tns in self.retval:
632            return
633        self.retval[tns] = _Schema()
634
635        self.debug0("1 %s processing includes", MAG(tns))
636        if schema.includes:
637            for include in schema.includes:
638                self.process_includes(include)
639
640        if schema.elements:
641            schema.elements = odict([(e.name, e) for e in schema.elements])
642        if schema.complex_types:
643            schema.complex_types = odict([(c.name, c)
644                                                 for c in schema.complex_types])
645        if schema.simple_types:
646            schema.simple_types = odict([(s.name, s)
647                                                 for s in schema.simple_types])
648        if schema.attributes:
649            schema.attributes = odict([(a.name, a) for a in schema.attributes])
650
651        self.debug0("2 %s processing imports", R(tns))
652        if schema.imports:
653            for imp in schema.imports:
654                if not imp.namespace in self.retval:
655                    self.debug1("%s importing %s", tns, imp.namespace)
656                    fname = self.files[imp.namespace]
657                    self.clone(2, dirname(fname)).parse_schema_file(fname)
658                    self.retval[tns].imports.add(imp.namespace)
659
660        self.debug0("3 %s processing simple_types", G(tns))
661        if schema.simple_types:
662            for s in schema.simple_types.values():
663                self.process_simple_type(s)
664
665            # no simple types should have been left behind.
666            assert sum((len(v) for v in self.pending_simple_types.values())) == 0, \
667                                              self.pending_simple_types.values()
668
669        self.debug0("4 %s processing attributes", G(tns))
670        if schema.attributes:
671            for s in schema.attributes.values():
672                n, t = self.process_attribute(s)
673                self.retval[self.tns].types[n] = t
674
675        self.debug0("5 %s processing complex_types", B(tns))
676        if schema.complex_types:
677            for c in schema.complex_types.values():
678                self.process_complex_type(c)
679
680        self.debug0("6 %s processing elements", YEL(tns))
681        if schema.elements:
682            for e in schema.elements.values():
683                self.process_schema_element(e)
684
685        self.process_pending()
686
687        if self.parent is None:  # for the top-most schema
688            if self.children is not None:  # if it uses <include> or <import>
689                # This is needed for schemas with circular imports
690                for c in chain([self], self.children):
691                    c.print_pending()
692                self.debug0('')
693
694                # FIXME: should put this in a while loop that loops until no
695                # changes occur
696                for c in chain([self], self.children):
697                    c.process_pending()
698                for c in chain([self], self.children):
699                    c.process_pending()
700                self.debug0('')
701
702                for c in chain([self], self.children):
703                    c.print_pending(fail=(not self.skip_errors))
704
705        return self.retval
706