1# -*- coding: utf-8 -*- 2# ================================================================= 3# 4# Authors: Tom Kralidis <tomkralidis@gmail.com> 5# 6# Copyright (c) 2015 Tom Kralidis 7# 8# Permission is hereby granted, free of charge, to any person 9# obtaining a copy of this software and associated documentation 10# files (the "Software"), to deal in the Software without 11# restriction, including without limitation the rights to use, 12# copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the 14# Software is furnished to do so, subject to the following 15# conditions: 16# 17# The above copyright notice and this permission notice shall be 18# included in all copies or substantial portions of the Software. 19# 20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27# OTHER DEALINGS IN THE SOFTWARE. 28# 29# ================================================================= 30 31import logging 32from pycsw.core import util 33from pycsw.core.etree import etree 34 35 36LOGGER = logging.getLogger(__name__) 37 38class OAIPMH(object): 39 """OAI-PMH wrapper class""" 40 def __init__(self, context, config): 41 LOGGER.debug('Initializing OAI-PMH constants') 42 self.oaipmh_version = '2.0' 43 44 self.namespaces = { 45 'oai': 'http://www.openarchives.org/OAI/2.0/', 46 'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/', 47 'xsi': 'http://www.w3.org/2001/XMLSchema-instance' 48 } 49 self.request_model = { 50 'Identify': [], 51 'ListSets': ['resumptiontoken'], 52 'ListMetadataFormats': ['identifier'], 53 'GetRecord': ['identifier', 'metadataprefix'], 54 'ListRecords': ['from', 'until', 'set', 'resumptiontoken', 'metadataprefix'], 55 'ListIdentifiers': ['from', 'until', 'set', 'resumptiontoken', 'metadataprefix'], 56 } 57 self.metadata_formats = { 58 'iso19139': { 59 'namespace': 'http://www.isotc211.org/2005/gmd', 60 'schema': 'http://www.isotc211.org/2005/gmd/gmd.xsd', 61 'identifier': '//gmd:fileIdentifier/gco:CharacterString', 62 'dateStamp': '//gmd:dateStamp/gco:DateTime|//gmd:dateStamp/gco:Date', 63 'setSpec': '//gmd:hierarchyLevel/gmd:MD_ScopeCode' 64 }, 65 'csw-record': { 66 'namespace': 'http://www.opengis.net/cat/csw/2.0.2', 67 'schema': 'http://schemas.opengis.net/csw/2.0.2/record.xsd', 68 'identifier': '//dc:identifier', 69 'dateStamp': '//dct:modified', 70 'setSpec': '//dc:type' 71 }, 72 'fgdc-std': { 73 'namespace': 'http://www.opengis.net/cat/csw/csdgm', 74 'schema': 'http://www.fgdc.gov/metadata/fgdc-std-001-1998.xsd', 75 'identifier': '//idinfo/datasetid', 76 'dateStamp': '//metainfo/metd', 77 'setSpec': '//dataset' 78 }, 79 'oai_dc': { 80 'namespace': '%soai_dc/' % self.namespaces['oai'], 81 'schema': 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 82 'identifier': '//dc:identifier', 83 'dateStamp': '//dct:modified', 84 'setSpec': '//dc:type' 85 }, 86 'dif': { 87 'namespace': 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/', 88 'schema': 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif.xsd', 89 'identifier': '//dif:Entry_ID', 90 'dateStamp': '//dif:Last_DIF_Revision_Date', 91 'setSpec': '//dataset' 92 }, 93 'gm03': { 94 'namespace': 'http://www.interlis.ch/INTERLIS2.3', 95 'schema': 'http://www.geocat.ch/internet/geocat/en/home/documentation/gm03.parsys.50316.downloadList.86742.DownloadFile.tmp/gm0321.zip', 96 'identifier': '//gm03:DATASECTION//gm03:fileIdentifer', 97 'dateStamp': '//gm03:DATASECTION//gm03:dateStamp', 98 'setSpec': '//dataset' 99 } 100 } 101 self.metadata_sets = { 102 'datasets': ('Datasets', 'dataset'), 103 'interactiveResources': ('Interactive Resources', 'service') 104 } 105 self.error_codes = { 106 'badArgument': 'InvalidParameterValue', 107 'badVerb': 'OperationNotSupported', 108 'idDoesNotExist': None, 109 'noRecordsMatch': None, 110 } 111 112 self.context = context 113 self.context.namespaces.update(self.namespaces) 114 self.context.namespaces.update({'gco': 'http://www.isotc211.org/2005/gco'}) 115 self.config = config 116 117 def request(self, kvp): 118 """process OAI-PMH request""" 119 kvpout = {'service': 'CSW', 'version': '2.0.2', 'mode': 'oaipmh'} 120 LOGGER.debug('Incoming kvp: %s', kvp) 121 if 'verb' in kvp: 122 if 'metadataprefix' in kvp: 123 self.metadata_prefix = kvp['metadataprefix'] 124 try: 125 kvpout['outputschema'] = self._get_metadata_prefix(kvp['metadataprefix']) 126 except KeyError: 127 kvpout['outputschema'] = kvp['metadataprefix'] 128 else: 129 self.metadata_prefix = 'csw-record' 130 LOGGER.debug('metadataPrefix: %s', self.metadata_prefix) 131 if kvp['verb'] in ['ListRecords', 'ListIdentifiers', 'GetRecord']: 132 kvpout['request'] = 'GetRecords' 133 kvpout['resulttype'] = 'results' 134 kvpout['typenames'] = 'csw:Record' 135 kvpout['elementsetname'] = 'full' 136 if kvp['verb'] in ['Identify', 'ListMetadataFormats', 'ListSets']: 137 kvpout['request'] = 'GetCapabilities' 138 elif kvp['verb'] == 'GetRecord': 139 kvpout['request'] = 'GetRecordById' 140 if 'identifier' in kvp: 141 kvpout['id'] = kvp['identifier'] 142 if ('outputschema' in kvpout and 143 kvp['metadataprefix'] == 'oai_dc'): # just use default DC 144 del kvpout['outputschema'] 145 elif kvp['verb'] in ['ListRecords', 'ListIdentifiers']: 146 if 'resumptiontoken' in kvp: 147 kvpout['startposition'] = kvp['resumptiontoken'] 148 if ('outputschema' in kvpout and 149 kvp['verb'] == 'ListIdentifiers'): # simple output only 150 pass #del kvpout['outputschema'] 151 if ('outputschema' in kvpout and 152 kvp['metadataprefix'] in ['dc', 'oai_dc']): # just use default DC 153 del kvpout['outputschema'] 154 155 156 start = end = None 157 LOGGER.debug('Scanning temporal parameters') 158 if 'from' in kvp: 159 start = 'dc:date >= %s' % kvp['from'] 160 if 'until' in kvp: 161 end = 'dc:date <= %s' % kvp['until'] 162 if any([start is not None, end is not None]): 163 if all([start is not None, end is not None]): 164 time_query = '%s and %s' % (start, end) 165 elif end is None: 166 time_query = start 167 elif start is None: 168 time_query = end 169 kvpout['constraintlanguage'] = 'CQL_TEXT' 170 kvpout['constraint'] = time_query 171 LOGGER.debug('Resulting parameters: %s', kvpout) 172 return kvpout 173 174 def response(self, response, kvp, repository, server_url): 175 """process OAI-PMH request""" 176 177 mode = kvp.pop('mode', None) 178 if 'config' in kvp: 179 config_val = kvp.pop('config') 180 url = '%smode=oaipmh' % util.bind_url(server_url) 181 182 node = etree.Element(util.nspath_eval('oai:OAI-PMH', self.namespaces), nsmap=self.namespaces) 183 node.set(util.nspath_eval('xsi:schemaLocation', self.namespaces), '%s http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd' % self.namespaces['oai']) 184 LOGGER.debug(etree.tostring(node)) 185 186 etree.SubElement(node, util.nspath_eval('oai:responseDate', self.namespaces)).text = util.get_today_and_now() 187 etree.SubElement(node, util.nspath_eval('oai:request', self.namespaces), attrib=kvp).text = url 188 189 if 'verb' not in kvp: 190 etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Missing \'verb\' parameter' 191 return node 192 193 if kvp['verb'] not in self.request_model.keys(): 194 etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Unknown verb \'%s\'' % kvp['verb'] 195 return node 196 197 if etree.QName(response).localname == 'ExceptionReport': 198 etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = response.xpath('//ows:ExceptionText|//ows20:ExceptionText', namespaces=self.context.namespaces)[0].text 199 return node 200 201 verb = kvp.pop('verb') 202 203 if verb in ['GetRecord', 'ListIdentifiers', 'ListRecords']: 204 if 'metadataprefix' not in kvp: 205 etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Missing metadataPrefix parameter' 206 return node 207 elif kvp['metadataprefix'] not in self.metadata_formats.keys(): 208 etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Invalid metadataPrefix parameter' 209 return node 210 211 for key, value in kvp.items(): 212 if key != 'mode' and key not in self.request_model[verb]: 213 etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Illegal parameter \'%s\'' % key 214 return node 215 216 verbnode = etree.SubElement(node, util.nspath_eval('oai:%s' % verb, self.namespaces)) 217 218 if verb == 'Identify': 219 etree.SubElement(verbnode, util.nspath_eval('oai:repositoryName', self.namespaces)).text = self.config.get('metadata:main', 'identification_title') 220 etree.SubElement(verbnode, util.nspath_eval('oai:baseURL', self.namespaces)).text = url 221 etree.SubElement(verbnode, util.nspath_eval('oai:protocolVersion', self.namespaces)).text = '2.0' 222 etree.SubElement(verbnode, util.nspath_eval('oai:adminEmail', self.namespaces)).text = self.config.get('metadata:main', 'contact_email') 223 etree.SubElement(verbnode, util.nspath_eval('oai:earliestDatestamp', self.namespaces)).text = repository.query_insert('min') 224 etree.SubElement(verbnode, util.nspath_eval('oai:deletedRecord', self.namespaces)).text = 'no' 225 etree.SubElement(verbnode, util.nspath_eval('oai:granularity', self.namespaces)).text = 'YYYY-MM-DDThh:mm:ssZ' 226 227 elif verb == 'ListSets': 228 for key, value in sorted(self.metadata_sets.items()): 229 setnode = etree.SubElement(verbnode, util.nspath_eval('oai:set', self.namespaces)) 230 etree.SubElement(setnode, util.nspath_eval('oai:setSpec', self.namespaces)).text = key 231 etree.SubElement(setnode, util.nspath_eval('oai:setName', self.namespaces)).text = value[0] 232 233 elif verb == 'ListMetadataFormats': 234 for key, value in sorted(self.metadata_formats.items()): 235 mdfnode = etree.SubElement(verbnode, util.nspath_eval('oai:metadataFormat', self.namespaces)) 236 etree.SubElement(mdfnode, util.nspath_eval('oai:metadataPrefix', self.namespaces)).text = key 237 etree.SubElement(mdfnode, util.nspath_eval('oai:schema', self.namespaces)).text = value['schema'] 238 etree.SubElement(mdfnode, util.nspath_eval('oai:metadataNamespace', self.namespaces)).text = value['namespace'] 239 240 elif verb in ['GetRecord', 'ListIdentifiers', 'ListRecords']: 241 if verb == 'GetRecord': # GetRecordById 242 records = response.getchildren() 243 else: # GetRecords 244 records = response.getchildren()[1].getchildren() 245 for child in records: 246 recnode = etree.SubElement(verbnode, util.nspath_eval('oai:record', self.namespaces)) 247 header = etree.SubElement(recnode, util.nspath_eval('oai:header', self.namespaces)) 248 self._transform_element(header, response, 'oai:identifier') 249 self._transform_element(header, response, 'oai:dateStamp') 250 self._transform_element(header, response, 'oai:setSpec') 251 if verb in ['GetRecord', 'ListRecords']: 252 metadata = etree.SubElement(recnode, util.nspath_eval('oai:metadata', self.namespaces)) 253 if 'metadataprefix' in kvp and kvp['metadataprefix'] == 'oai_dc': 254 child.tag = util.nspath_eval('oai_dc:dc', self.namespaces) 255 metadata.append(child) 256 if verb != 'GetRecord': 257 complete_list_size = response.xpath('//@numberOfRecordsMatched')[0] 258 next_record = response.xpath('//@nextRecord')[0] 259 cursor = str(int(complete_list_size) - int(next_record) - 1) 260 261 resumption_token = etree.SubElement(verbnode, util.nspath_eval('oai:resumptionToken', self.namespaces), 262 completeListSize=complete_list_size, cursor=cursor).text = next_record 263 return node 264 265 def _get_metadata_prefix(self, prefix): 266 """Convenience function to return metadataPrefix as CSW outputschema""" 267 try: 268 outputschema = self.metadata_formats[prefix]['namespace'] 269 except KeyError: 270 outputschema = prefix 271 return outputschema 272 273 def _transform_element(self, parent, element, elname): 274 """tests for existence of a given xpath, writes out text if exists""" 275 276 xpath = self.metadata_formats[self.metadata_prefix][elname.split(':')[1]] 277 if xpath.startswith('//'): 278 value = element.xpath(xpath, namespaces=self.context.namespaces) 279 if value: 280 value = value[0].text 281 else: # bare string literal 282 value = xpath 283 el = etree.SubElement(parent, util.nspath_eval(elname, self.context.namespaces)) 284 if value: 285 if elname == 'oai:setSpec': 286 value = None 287 for k, v in self.metadata_sets.items(): 288 if v[1] == elname: 289 value = k 290 break 291 el.text = value 292