1# -*- coding: utf-8 -*-
2# =================================================================
3#
4# Authors: Tom Kralidis <tomkralidis@gmail.com>
5#
6# Copyright (c) 2015 Tom Kralidis
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29# =================================================================
30
31import logging
32from pycsw.core import util
33from pycsw.core.etree import etree
34
35
36LOGGER = logging.getLogger(__name__)
37
38class OAIPMH(object):
39    """OAI-PMH wrapper class"""
40    def __init__(self, context, config):
41        LOGGER.debug('Initializing OAI-PMH constants')
42        self.oaipmh_version = '2.0'
43
44        self.namespaces = {
45            'oai': 'http://www.openarchives.org/OAI/2.0/',
46            'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/',
47            'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
48        }
49        self.request_model = {
50            'Identify': [],
51            'ListSets': ['resumptiontoken'],
52            'ListMetadataFormats': ['identifier'],
53            'GetRecord': ['identifier', 'metadataprefix'],
54            'ListRecords': ['from', 'until', 'set', 'resumptiontoken', 'metadataprefix'],
55            'ListIdentifiers': ['from', 'until', 'set', 'resumptiontoken', 'metadataprefix'],
56        }
57        self.metadata_formats = {
58            'iso19139': {
59                'namespace': 'http://www.isotc211.org/2005/gmd',
60                'schema': 'http://www.isotc211.org/2005/gmd/gmd.xsd',
61                'identifier': '//gmd:fileIdentifier/gco:CharacterString',
62                'dateStamp': '//gmd:dateStamp/gco:DateTime|//gmd:dateStamp/gco:Date',
63                'setSpec': '//gmd:hierarchyLevel/gmd:MD_ScopeCode'
64            },
65            'csw-record': {
66                'namespace': 'http://www.opengis.net/cat/csw/2.0.2',
67                'schema': 'http://schemas.opengis.net/csw/2.0.2/record.xsd',
68                'identifier': '//dc:identifier',
69                'dateStamp': '//dct:modified',
70                'setSpec': '//dc:type'
71            },
72            'fgdc-std': {
73                'namespace': 'http://www.opengis.net/cat/csw/csdgm',
74                'schema': 'http://www.fgdc.gov/metadata/fgdc-std-001-1998.xsd',
75                'identifier': '//idinfo/datasetid',
76                'dateStamp': '//metainfo/metd',
77                'setSpec': '//dataset'
78            },
79            'oai_dc': {
80                'namespace': '%soai_dc/' % self.namespaces['oai'],
81                'schema': 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
82                'identifier': '//dc:identifier',
83                'dateStamp': '//dct:modified',
84                'setSpec': '//dc:type'
85            },
86            'dif': {
87                'namespace': 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/',
88                'schema': 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/dif.xsd',
89                'identifier': '//dif:Entry_ID',
90                'dateStamp': '//dif:Last_DIF_Revision_Date',
91                'setSpec': '//dataset'
92            },
93            'gm03': {
94                'namespace': 'http://www.interlis.ch/INTERLIS2.3',
95                'schema': 'http://www.geocat.ch/internet/geocat/en/home/documentation/gm03.parsys.50316.downloadList.86742.DownloadFile.tmp/gm0321.zip',
96                'identifier': '//gm03:DATASECTION//gm03:fileIdentifer',
97                'dateStamp': '//gm03:DATASECTION//gm03:dateStamp',
98                'setSpec': '//dataset'
99            }
100        }
101        self.metadata_sets = {
102            'datasets': ('Datasets', 'dataset'),
103            'interactiveResources': ('Interactive Resources', 'service')
104        }
105        self.error_codes = {
106            'badArgument': 'InvalidParameterValue',
107            'badVerb': 'OperationNotSupported',
108            'idDoesNotExist': None,
109            'noRecordsMatch': None,
110        }
111
112        self.context = context
113        self.context.namespaces.update(self.namespaces)
114        self.context.namespaces.update({'gco': 'http://www.isotc211.org/2005/gco'})
115        self.config = config
116
117    def request(self, kvp):
118        """process OAI-PMH request"""
119        kvpout = {'service': 'CSW', 'version': '2.0.2', 'mode': 'oaipmh'}
120        LOGGER.debug('Incoming kvp: %s', kvp)
121        if 'verb' in kvp:
122            if 'metadataprefix' in kvp:
123                self.metadata_prefix = kvp['metadataprefix']
124                try:
125                    kvpout['outputschema'] = self._get_metadata_prefix(kvp['metadataprefix'])
126                except KeyError:
127                    kvpout['outputschema'] = kvp['metadataprefix']
128            else:
129                self.metadata_prefix = 'csw-record'
130            LOGGER.debug('metadataPrefix: %s', self.metadata_prefix)
131            if kvp['verb'] in ['ListRecords', 'ListIdentifiers', 'GetRecord']:
132                kvpout['request'] = 'GetRecords'
133                kvpout['resulttype'] = 'results'
134                kvpout['typenames'] = 'csw:Record'
135                kvpout['elementsetname'] = 'full'
136            if kvp['verb'] in ['Identify', 'ListMetadataFormats', 'ListSets']:
137                kvpout['request'] = 'GetCapabilities'
138            elif kvp['verb'] == 'GetRecord':
139                kvpout['request'] = 'GetRecordById'
140                if 'identifier' in kvp:
141                    kvpout['id'] = kvp['identifier']
142                if ('outputschema' in kvpout and
143                    kvp['metadataprefix'] == 'oai_dc'):  # just use default DC
144                    del kvpout['outputschema']
145            elif kvp['verb'] in ['ListRecords', 'ListIdentifiers']:
146                if 'resumptiontoken' in kvp:
147                    kvpout['startposition'] = kvp['resumptiontoken']
148                if ('outputschema' in kvpout and
149                   kvp['verb'] == 'ListIdentifiers'):  # simple output only
150                    pass #del kvpout['outputschema']
151                if ('outputschema' in kvpout and
152                    kvp['metadataprefix'] in ['dc', 'oai_dc']):  # just use default DC
153                    del kvpout['outputschema']
154
155
156                start = end = None
157                LOGGER.debug('Scanning temporal parameters')
158                if 'from' in kvp:
159                    start = 'dc:date >= %s' % kvp['from']
160                if 'until' in kvp:
161                    end = 'dc:date <= %s' % kvp['until']
162                if any([start is not None, end is not None]):
163                    if all([start is not None, end is not None]):
164                        time_query = '%s and %s' % (start, end)
165                    elif end is None:
166                        time_query = start
167                    elif start is None:
168                        time_query = end
169                    kvpout['constraintlanguage'] = 'CQL_TEXT'
170                    kvpout['constraint'] = time_query
171        LOGGER.debug('Resulting parameters: %s', kvpout)
172        return kvpout
173
174    def response(self, response, kvp, repository, server_url):
175        """process OAI-PMH request"""
176
177        mode = kvp.pop('mode', None)
178        if 'config' in kvp:
179            config_val = kvp.pop('config')
180        url = '%smode=oaipmh' % util.bind_url(server_url)
181
182        node = etree.Element(util.nspath_eval('oai:OAI-PMH', self.namespaces), nsmap=self.namespaces)
183        node.set(util.nspath_eval('xsi:schemaLocation', self.namespaces), '%s http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd' % self.namespaces['oai'])
184        LOGGER.debug(etree.tostring(node))
185
186        etree.SubElement(node, util.nspath_eval('oai:responseDate', self.namespaces)).text = util.get_today_and_now()
187        etree.SubElement(node, util.nspath_eval('oai:request', self.namespaces), attrib=kvp).text = url
188
189        if 'verb' not in kvp:
190            etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Missing \'verb\' parameter'
191            return node
192
193        if kvp['verb'] not in self.request_model.keys():
194            etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Unknown verb \'%s\'' % kvp['verb']
195            return node
196
197        if etree.QName(response).localname == 'ExceptionReport':
198            etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = response.xpath('//ows:ExceptionText|//ows20:ExceptionText', namespaces=self.context.namespaces)[0].text
199            return node
200
201        verb = kvp.pop('verb')
202
203        if verb in ['GetRecord', 'ListIdentifiers', 'ListRecords']:
204            if 'metadataprefix' not in kvp:
205                etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Missing metadataPrefix parameter'
206                return node
207            elif kvp['metadataprefix'] not in self.metadata_formats.keys():
208                etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Invalid metadataPrefix parameter'
209                return node
210
211        for key, value in kvp.items():
212            if key != 'mode' and key not in self.request_model[verb]:
213                etree.SubElement(node, util.nspath_eval('oai:error', self.namespaces), code='badArgument').text = 'Illegal parameter \'%s\'' % key
214                return node
215
216        verbnode = etree.SubElement(node, util.nspath_eval('oai:%s' % verb, self.namespaces))
217
218        if verb == 'Identify':
219                etree.SubElement(verbnode, util.nspath_eval('oai:repositoryName', self.namespaces)).text = self.config.get('metadata:main', 'identification_title')
220                etree.SubElement(verbnode, util.nspath_eval('oai:baseURL', self.namespaces)).text = url
221                etree.SubElement(verbnode, util.nspath_eval('oai:protocolVersion', self.namespaces)).text = '2.0'
222                etree.SubElement(verbnode, util.nspath_eval('oai:adminEmail', self.namespaces)).text = self.config.get('metadata:main', 'contact_email')
223                etree.SubElement(verbnode, util.nspath_eval('oai:earliestDatestamp', self.namespaces)).text = repository.query_insert('min')
224                etree.SubElement(verbnode, util.nspath_eval('oai:deletedRecord', self.namespaces)).text = 'no'
225                etree.SubElement(verbnode, util.nspath_eval('oai:granularity', self.namespaces)).text = 'YYYY-MM-DDThh:mm:ssZ'
226
227        elif verb == 'ListSets':
228            for key, value in sorted(self.metadata_sets.items()):
229                setnode = etree.SubElement(verbnode, util.nspath_eval('oai:set', self.namespaces))
230                etree.SubElement(setnode, util.nspath_eval('oai:setSpec', self.namespaces)).text = key
231                etree.SubElement(setnode, util.nspath_eval('oai:setName', self.namespaces)).text = value[0]
232
233        elif verb == 'ListMetadataFormats':
234            for key, value in sorted(self.metadata_formats.items()):
235                mdfnode = etree.SubElement(verbnode, util.nspath_eval('oai:metadataFormat', self.namespaces))
236                etree.SubElement(mdfnode, util.nspath_eval('oai:metadataPrefix', self.namespaces)).text = key
237                etree.SubElement(mdfnode, util.nspath_eval('oai:schema', self.namespaces)).text = value['schema']
238                etree.SubElement(mdfnode, util.nspath_eval('oai:metadataNamespace', self.namespaces)).text = value['namespace']
239
240        elif verb in ['GetRecord', 'ListIdentifiers', 'ListRecords']:
241                if verb == 'GetRecord':  # GetRecordById
242                    records = response.getchildren()
243                else:  # GetRecords
244                    records = response.getchildren()[1].getchildren()
245                for child in records:
246                    recnode = etree.SubElement(verbnode, util.nspath_eval('oai:record', self.namespaces))
247                    header = etree.SubElement(recnode, util.nspath_eval('oai:header', self.namespaces))
248                    self._transform_element(header, response, 'oai:identifier')
249                    self._transform_element(header, response, 'oai:dateStamp')
250                    self._transform_element(header, response, 'oai:setSpec')
251                    if verb in ['GetRecord', 'ListRecords']:
252                        metadata = etree.SubElement(recnode, util.nspath_eval('oai:metadata', self.namespaces))
253                        if 'metadataprefix' in kvp and kvp['metadataprefix'] == 'oai_dc':
254                            child.tag = util.nspath_eval('oai_dc:dc', self.namespaces)
255                        metadata.append(child)
256                if verb != 'GetRecord':
257                    complete_list_size = response.xpath('//@numberOfRecordsMatched')[0]
258                    next_record = response.xpath('//@nextRecord')[0]
259                    cursor = str(int(complete_list_size) - int(next_record) - 1)
260
261                    resumption_token = etree.SubElement(verbnode, util.nspath_eval('oai:resumptionToken', self.namespaces),
262                                                        completeListSize=complete_list_size, cursor=cursor).text = next_record
263        return node
264
265    def _get_metadata_prefix(self, prefix):
266        """Convenience function to return metadataPrefix as CSW outputschema"""
267        try:
268            outputschema = self.metadata_formats[prefix]['namespace']
269        except KeyError:
270            outputschema = prefix
271        return outputschema
272
273    def _transform_element(self, parent, element, elname):
274        """tests for existence of a given xpath, writes out text if exists"""
275
276        xpath = self.metadata_formats[self.metadata_prefix][elname.split(':')[1]]
277        if xpath.startswith('//'):
278            value = element.xpath(xpath, namespaces=self.context.namespaces)
279            if value:
280                value = value[0].text
281        else:  # bare string literal
282            value = xpath
283        el = etree.SubElement(parent, util.nspath_eval(elname, self.context.namespaces))
284        if value:
285            if elname == 'oai:setSpec':
286                value = None
287                for k, v in self.metadata_sets.items():
288                    if v[1] == elname:
289                        value = k
290                        break
291            el.text = value
292