1# Copyright 2009 by Michiel de Hoon. All rights reserved.
2# This code is part of the Biopython distribution and governed by its
3# license. Please see the LICENSE file that should have been included
4# as part of this package.
5
6"""Code for calling and parsing ScanProsite from ExPASy."""
7
8# Importing these functions with leading underscore as not intended for reuse
9from urllib.request import urlopen
10from urllib.parse import urlencode
11
12from xml.sax import handler
13from xml.sax.expatreader import ExpatParser
14
15
16class Record(list):
17    """Represents search results returned by ScanProsite.
18
19    This record is a list containing the search results returned by
20    ScanProsite. The record also contains the data members n_match,
21    n_seq, capped, and warning.
22    """
23
24    def __init__(self):
25        """Initialize the class."""
26        self.n_match = None
27        self.n_seq = None
28        self.capped = None
29        self.warning = None
30
31
32def scan(seq="", mirror="https://www.expasy.org", output="xml", **keywords):
33    """Execute a ScanProsite search.
34
35    Arguments:
36     - mirror:   The ScanProsite mirror to be used
37                 (default: https://www.expasy.org).
38     - seq:      The query sequence, or UniProtKB (Swiss-Prot,
39                 TrEMBL) accession
40     - output:   Format of the search results
41                 (default: xml)
42
43    Further search parameters can be passed as keywords; see the
44    documentation for programmatic access to ScanProsite at
45    https://www.expasy.org/tools/scanprosite/ScanPrositeREST.html
46    for a description of such parameters.
47
48    This function returns a handle to the search results returned by
49    ScanProsite. Search results in the XML format can be parsed into a
50    Python object, by using the Bio.ExPASy.ScanProsite.read function.
51
52    """
53    parameters = {"seq": seq, "output": output}
54    for key, value in keywords.items():
55        if value is not None:
56            parameters[key] = value
57    command = urlencode(parameters)
58    url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command)
59    handle = urlopen(url)
60    return handle
61
62
63def read(handle):
64    """Parse search results returned by ScanProsite into a Python object."""
65    content_handler = ContentHandler()
66    saxparser = Parser()
67    saxparser.setContentHandler(content_handler)
68    saxparser.parse(handle)
69    record = content_handler.record
70    return record
71
72
73# The classes below are considered private
74
75
76class Parser(ExpatParser):
77    """Process the result from a ScanProsite search (PRIVATE)."""
78
79    def __init__(self):
80        """Initialize the class."""
81        ExpatParser.__init__(self)
82        self.firsttime = True
83
84    def feed(self, data, isFinal=0):
85        """Raise an Error if plain text is received in the data.
86
87        This is to show the Error messages returned by ScanProsite.
88        """
89        # Error messages returned by the ScanProsite server are formatted as
90        # as plain text instead of an XML document. To catch such error
91        # messages, we override the feed method of the Expat parser.
92        # The error message is (hopefully) contained in the data that was just
93        # fed to the parser.
94        if self.firsttime:
95            if data[:5].decode("utf-8") != "<?xml":
96                raise ValueError(data)
97        self.firsttime = False
98        return ExpatParser.feed(self, data, isFinal)
99
100
101class ContentHandler(handler.ContentHandler):
102    """Process and fill in the records, results of the search (PRIVATE)."""
103
104    integers = ("start", "stop")
105    strings = (
106        "sequence_ac",
107        "sequence_id",
108        "sequence_db",
109        "signature_ac",
110        "level",
111        "level_tag",
112    )
113
114    def __init__(self):
115        """Initialize the class."""
116        self.element = []
117
118    def startElement(self, name, attrs):
119        """Define the beginning of a record and stores the search record."""
120        self.element.append(name)
121        self.content = ""
122        if self.element == ["matchset"]:
123            self.record = Record()
124            self.record.n_match = int(attrs["n_match"])
125            self.record.n_seq = int(attrs["n_seq"])
126        elif self.element == ["matchset", "match"]:
127            match = {}
128            self.record.append(match)
129
130    def endElement(self, name):
131        """Define the end of the search record."""
132        assert name == self.element.pop()
133        if self.element == ["matchset", "match"]:
134            match = self.record[-1]
135            if name in ContentHandler.integers:
136                match[name] = int(self.content)
137            elif name in ContentHandler.strings:
138                match[name] = self.content
139            else:
140                # Unknown type, treat it as a string
141                match[name] = self.content
142
143    def characters(self, content):
144        """Store the record content."""
145        self.content += content
146