1# Copyright 2009 by Michiel de Hoon. All rights reserved. 2# This code is part of the Biopython distribution and governed by its 3# license. Please see the LICENSE file that should have been included 4# as part of this package. 5 6"""Code for calling and parsing ScanProsite from ExPASy.""" 7 8# Importing these functions with leading underscore as not intended for reuse 9from urllib.request import urlopen 10from urllib.parse import urlencode 11 12from xml.sax import handler 13from xml.sax.expatreader import ExpatParser 14 15 16class Record(list): 17 """Represents search results returned by ScanProsite. 18 19 This record is a list containing the search results returned by 20 ScanProsite. The record also contains the data members n_match, 21 n_seq, capped, and warning. 22 """ 23 24 def __init__(self): 25 """Initialize the class.""" 26 self.n_match = None 27 self.n_seq = None 28 self.capped = None 29 self.warning = None 30 31 32def scan(seq="", mirror="https://www.expasy.org", output="xml", **keywords): 33 """Execute a ScanProsite search. 34 35 Arguments: 36 - mirror: The ScanProsite mirror to be used 37 (default: https://www.expasy.org). 38 - seq: The query sequence, or UniProtKB (Swiss-Prot, 39 TrEMBL) accession 40 - output: Format of the search results 41 (default: xml) 42 43 Further search parameters can be passed as keywords; see the 44 documentation for programmatic access to ScanProsite at 45 https://www.expasy.org/tools/scanprosite/ScanPrositeREST.html 46 for a description of such parameters. 47 48 This function returns a handle to the search results returned by 49 ScanProsite. Search results in the XML format can be parsed into a 50 Python object, by using the Bio.ExPASy.ScanProsite.read function. 51 52 """ 53 parameters = {"seq": seq, "output": output} 54 for key, value in keywords.items(): 55 if value is not None: 56 parameters[key] = value 57 command = urlencode(parameters) 58 url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command) 59 handle = urlopen(url) 60 return handle 61 62 63def read(handle): 64 """Parse search results returned by ScanProsite into a Python object.""" 65 content_handler = ContentHandler() 66 saxparser = Parser() 67 saxparser.setContentHandler(content_handler) 68 saxparser.parse(handle) 69 record = content_handler.record 70 return record 71 72 73# The classes below are considered private 74 75 76class Parser(ExpatParser): 77 """Process the result from a ScanProsite search (PRIVATE).""" 78 79 def __init__(self): 80 """Initialize the class.""" 81 ExpatParser.__init__(self) 82 self.firsttime = True 83 84 def feed(self, data, isFinal=0): 85 """Raise an Error if plain text is received in the data. 86 87 This is to show the Error messages returned by ScanProsite. 88 """ 89 # Error messages returned by the ScanProsite server are formatted as 90 # as plain text instead of an XML document. To catch such error 91 # messages, we override the feed method of the Expat parser. 92 # The error message is (hopefully) contained in the data that was just 93 # fed to the parser. 94 if self.firsttime: 95 if data[:5].decode("utf-8") != "<?xml": 96 raise ValueError(data) 97 self.firsttime = False 98 return ExpatParser.feed(self, data, isFinal) 99 100 101class ContentHandler(handler.ContentHandler): 102 """Process and fill in the records, results of the search (PRIVATE).""" 103 104 integers = ("start", "stop") 105 strings = ( 106 "sequence_ac", 107 "sequence_id", 108 "sequence_db", 109 "signature_ac", 110 "level", 111 "level_tag", 112 ) 113 114 def __init__(self): 115 """Initialize the class.""" 116 self.element = [] 117 118 def startElement(self, name, attrs): 119 """Define the beginning of a record and stores the search record.""" 120 self.element.append(name) 121 self.content = "" 122 if self.element == ["matchset"]: 123 self.record = Record() 124 self.record.n_match = int(attrs["n_match"]) 125 self.record.n_seq = int(attrs["n_seq"]) 126 elif self.element == ["matchset", "match"]: 127 match = {} 128 self.record.append(match) 129 130 def endElement(self, name): 131 """Define the end of the search record.""" 132 assert name == self.element.pop() 133 if self.element == ["matchset", "match"]: 134 match = self.record[-1] 135 if name in ContentHandler.integers: 136 match[name] = int(self.content) 137 elif name in ContentHandler.strings: 138 match[name] = self.content 139 else: 140 # Unknown type, treat it as a string 141 match[name] = self.content 142 143 def characters(self, content): 144 """Store the record content.""" 145 self.content += content 146