1# -*- coding: utf-8 -*-
2from __future__ import unicode_literals
3
4from internetarchive.api import get_item
5from internetarchive.utils import norm_filepath, InvalidIdentifierException
6from tests.conftest import (PROTOCOL, IaRequestsMock, load_file,
7                            NASA_METADATA_PATH, load_test_data_file)
8
9try:
10    import ujson as json
11except ImportError:
12    import json
13import types
14import re
15import os
16from copy import deepcopy
17
18import pytest
19import responses
20from requests.exceptions import HTTPError, ConnectionError
21
22from internetarchive import get_session
23import internetarchive.files
24
25S3_URL = r'{0}//s3.us.archive.org/'.format(PROTOCOL)
26DOWNLOAD_URL_RE = re.compile(r'{0}//archive.org/download/.*'.format(PROTOCOL))
27S3_URL_RE = re.compile(r'.*s3.us.archive.org/.*')
28
29EXPECTED_S3_HEADERS = {
30    'content-length': '7557',
31    'x-archive-queue-derive': '1',
32    'x-archive-meta00-scanner': 'uri(Internet%20Archive%20Python%20library',
33    'x-archive-size-hint': '7557',
34    'x-archive-auto-make-bucket': '1',
35    'authorization': 'LOW a:b',
36    'accept': '*/*',
37    'accept-encoding': 'gzip, deflate',
38    'connection': 'close',
39}
40
41
42def test_get_item(nasa_metadata, nasa_item, session):
43    assert nasa_item.item_metadata == nasa_metadata
44    assert nasa_item.identifier == 'nasa'
45    assert nasa_item.exists is True
46    assert isinstance(nasa_item.metadata, dict)
47    assert isinstance(nasa_item.files, list)
48    assert isinstance(nasa_item.reviews, list)
49    assert nasa_item.created == 1427273784
50    assert nasa_item.d1 == 'ia902606.us.archive.org'
51    assert nasa_item.d2 == 'ia802606.us.archive.org'
52    assert nasa_item.dir == '/7/items/nasa'
53    assert nasa_item.files_count == 6
54    assert nasa_item.item_size == 114030
55    assert nasa_item.server == 'ia902606.us.archive.org'
56    assert nasa_item.uniq == 2131998567
57    assert nasa_item.updated == 1427273788
58    assert nasa_item.tasks is None
59    assert len(nasa_item.collection) == 1
60
61
62def test_get_file(nasa_item):
63    file = nasa_item.get_file('nasa_meta.xml')
64    assert type(file) == internetarchive.files.File
65    assert file.name == 'nasa_meta.xml'
66
67
68def test_get_files(nasa_item):
69    files = nasa_item.get_files()
70    assert isinstance(files, types.GeneratorType)
71
72    expected_files = set(['NASAarchiveLogo.jpg',
73                          'globe_west_540.jpg',
74                          'nasa_reviews.xml',
75                          'nasa_meta.xml',
76                          'nasa_archive.torrent',
77                          'nasa_files.xml', ])
78    files = set(x.name for x in files)
79    assert files == expected_files
80
81
82def test_get_files_by_name(nasa_item):
83    files = nasa_item.get_files('globe_west_540.jpg')
84    assert set(f.name for f in files) == set(['globe_west_540.jpg'])
85
86    files = nasa_item.get_files(['globe_west_540.jpg', 'nasa_meta.xml'])
87    assert set(f.name
88               for f in files) == set(['globe_west_540.jpg', 'nasa_meta.xml'])
89
90
91def test_get_files_by_formats(nasa_item):
92    files = set(f.name for f in nasa_item.get_files(formats='Archive BitTorrent'))
93    expected_files = set(['nasa_archive.torrent'])
94    assert files == expected_files
95
96    files = set(
97        f.name for f in nasa_item.get_files(formats=['Archive BitTorrent', 'JPEG']))
98    expected_files = set(['nasa_archive.torrent', 'globe_west_540.jpg', ])
99    assert files == expected_files
100
101
102def test_get_files_by_glob(nasa_item):
103    files = set(f.name for f in nasa_item.get_files(glob_pattern='*jpg|*torrent'))
104    expected_files = set(['NASAarchiveLogo.jpg',
105                          'globe_west_540.jpg',
106                          'nasa_archive.torrent', ])
107    assert files == expected_files
108
109    files = set(f.name
110                for f in nasa_item.get_files(glob_pattern=['*jpg', '*torrent']))
111    expected_files = set(['NASAarchiveLogo.jpg',
112                          'globe_west_540.jpg',
113                          'nasa_archive.torrent', ])
114    assert files == expected_files
115
116
117def test_get_files_with_multiple_filters(nasa_item):
118    files = set(f.name for f in nasa_item.get_files(formats='JPEG',
119                                                    glob_pattern='*xml'))
120    expected_files = set(['globe_west_540.jpg',
121                          'nasa_reviews.xml',
122                          'nasa_meta.xml',
123                          'nasa_files.xml', ])
124    assert files == expected_files
125
126
127def test_get_files_no_matches(nasa_item):
128    assert list(nasa_item.get_files(formats='none')) == []
129
130
131def test_download(tmpdir, nasa_item):
132    tmpdir.chdir()
133    with IaRequestsMock() as rsps:
134        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content')
135        nasa_item.download(files='nasa_meta.xml')
136        assert len(tmpdir.listdir()) == 1
137    with IaRequestsMock() as rsps:
138        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='new test content')
139        nasa_item.download(files='nasa_meta.xml')
140        with open('nasa/nasa_meta.xml', 'r') as fh:
141            assert fh.read() == 'new test content'
142
143
144def test_download_io_error(tmpdir, nasa_item):
145    tmpdir.chdir()
146    with IaRequestsMock() as rsps:
147        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content')
148        nasa_item.download(files='nasa_meta.xml')
149        rsps.reset()
150        with pytest.raises(ConnectionError):
151            nasa_item.download(files='nasa_meta.xml')
152
153
154def test_download_ignore_errors(tmpdir, nasa_item):
155    with IaRequestsMock() as rsps:
156        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content')
157        nasa_item.download(files='nasa_meta.xml')
158        nasa_item.download(files='nasa_meta.xml', ignore_errors=True)
159
160
161def test_download_ignore_existing(tmpdir, nasa_item):
162    tmpdir.chdir()
163    with IaRequestsMock(
164            assert_all_requests_are_fired=False) as rsps:
165        rsps.add(responses.GET, DOWNLOAD_URL_RE,
166                 body='test content')
167        nasa_item.download(files='nasa_meta.xml', ignore_existing=True)
168
169        rsps.add(responses.GET, DOWNLOAD_URL_RE,
170                 body='new test content')
171        nasa_item.download(files='nasa_meta.xml', ignore_existing=True)
172        with open('nasa/nasa_meta.xml', 'r') as fh:
173            assert fh.read() == 'test content'
174
175
176def test_download_clobber(tmpdir, nasa_item):
177    tmpdir.chdir()
178    with IaRequestsMock() as rsps:
179        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content')
180        nasa_item.download(files='nasa_meta.xml')
181
182        rsps.reset()
183        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='new test content')
184        nasa_item.download(files='nasa_meta.xml')
185        load_file('nasa/nasa_meta.xml') == 'new test content'
186
187
188def test_download_checksum(tmpdir, caplog):
189    tmpdir.chdir()
190
191    # test overwrite based on checksum.
192    with IaRequestsMock() as rsps:
193        rsps.add_metadata_mock('nasa')
194        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content')
195        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='overwrite based on md5')
196
197        nasa_item = get_item('nasa')
198        nasa_item.download(files='nasa_meta.xml')
199        nasa_item.download(files='nasa_meta.xml', checksum=True)
200
201        assert load_file('nasa/nasa_meta.xml') == 'overwrite based on md5'
202
203        # test no overwrite based on checksum.
204        rsps.reset()
205        rsps.add(responses.GET, DOWNLOAD_URL_RE,
206                 body=load_test_data_file('nasa_meta.xml'))
207        nasa_item.download(files='nasa_meta.xml', checksum=True)
208        nasa_item.download(files='nasa_meta.xml', checksum=True)
209
210        assert 'skipping nasa' in caplog.text
211        assert 'nasa_meta.xml, file already exists based on checksum.' in caplog.text
212
213
214def test_download_destdir(tmpdir, nasa_item):
215    tmpdir.chdir()
216    with IaRequestsMock() as rsps:
217        rsps.add(responses.GET, DOWNLOAD_URL_RE, body='new destdir')
218        dest = os.path.join(str(tmpdir), 'new destdir')
219        nasa_item.download(files='nasa_meta.xml', destdir=dest)
220        assert 'nasa' in os.listdir(dest)
221        with open(os.path.join(dest, 'nasa/nasa_meta.xml'), 'r') as fh:
222            assert fh.read() == 'new destdir'
223
224
225def test_download_no_directory(tmpdir, nasa_item):
226    url_re = re.compile(r'{0}//archive.org/download/.*'.format(PROTOCOL))
227    tmpdir.chdir()
228    with IaRequestsMock() as rsps:
229        rsps.add(responses.GET, url_re, body='no dest dir')
230        nasa_item.download(files='nasa_meta.xml', no_directory=True)
231        with open(os.path.join(str(tmpdir), 'nasa_meta.xml'), 'r') as fh:
232            assert fh.read() == 'no dest dir'
233
234
235def test_download_dry_run(tmpdir, capsys, nasa_item):
236    tmpdir.chdir()
237    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
238        rsps.add(responses.GET, DOWNLOAD_URL_RE,
239                 body='no dest dir',
240                 adding_headers={'content-length': '100'})
241        nasa_item.download(formats='Metadata', dry_run=True)
242
243    expected = set(['nasa_reviews.xml', 'nasa_meta.xml', 'nasa_files.xml'])
244    out, err = capsys.readouterr()
245
246    assert set([x.split('/')[-1] for x in out.split('\n') if x]) == expected
247
248
249def test_download_verbose(tmpdir, capsys, nasa_item):
250    tmpdir.chdir()
251    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
252        rsps.add(responses.GET, DOWNLOAD_URL_RE,
253                 body='no dest dir',
254                 adding_headers={'content-length': '100'})
255        nasa_item.download(files='nasa_meta.xml', verbose=True)
256        out, err = capsys.readouterr()
257        print(repr(out))
258        assert 'downloaded nasa/nasa_meta.xml to' in out
259        assert 'nasa_meta.xml' in out
260
261
262def test_download_dark_item(tmpdir, capsys, nasa_metadata, session):
263    tmpdir.chdir()
264    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
265        nasa_metadata['metadata']['identifier'] = 'dark-item'
266        nasa_metadata['is_dark'] = True
267        _item_metadata = json.dumps(nasa_metadata)
268        rsps.add(responses.GET, '{0}//archive.org/metadata/dark-item'.format(PROTOCOL),
269                 body=_item_metadata,
270                 content_type='application/json')
271        _item = session.get_item('dark-item')
272        rsps.add(responses.GET, DOWNLOAD_URL_RE,
273                 body='no dest dir',
274                 status=403,
275                 adding_headers={'content-length': '100'})
276        _item.download(files='nasa_meta.xml', verbose=True)
277        out, err = capsys.readouterr()
278        assert 'skipping dark-item, item is dark' in out
279
280
281def test_upload(nasa_item):
282    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
283        rsps.add(responses.PUT, S3_URL_RE,
284                 adding_headers=EXPECTED_S3_HEADERS)
285        _responses = nasa_item.upload(NASA_METADATA_PATH,
286                                      access_key='a',
287                                      secret_key='b')
288        for resp in _responses:
289            request = resp.request
290            headers = dict((k.lower(), str(v)) for k, v in request.headers.items())
291            scanner_header = '%20'.join(
292                resp.headers['x-archive-meta00-scanner'].split('%20')[:4])
293            headers['x-archive-meta00-scanner'] = scanner_header
294            assert 'user-agent' in headers
295            del headers['user-agent']
296            assert headers == EXPECTED_S3_HEADERS
297            assert request.url == '{0}//s3.us.archive.org/nasa/nasa.json'.format(PROTOCOL)
298
299
300def test_upload_validate_identifier(session):
301    item = session.get_item('føø')
302    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
303        rsps.add(responses.PUT, S3_URL_RE,
304                 adding_headers=EXPECTED_S3_HEADERS)
305        try:
306            item.upload(NASA_METADATA_PATH,
307                        access_key='a',
308                        secret_key='b',
309                        validate_identifier=True)
310            assert False
311        except Exception as exc:
312            assert isinstance(exc, InvalidIdentifierException)
313
314    valid_item = session.get_item('foo')
315    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
316        rsps.add(responses.PUT, S3_URL_RE,
317                 adding_headers=EXPECTED_S3_HEADERS)
318        valid_item.upload(NASA_METADATA_PATH,
319                        access_key='a',
320                        secret_key='b',
321                        validate_identifier=True)
322        assert True
323
324
325def test_upload_secure_session():
326    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
327        c = {'s3': {'access': 'foo', 'secret': 'bar'}, 'general': {'secure': True}}
328        s = get_session(config=c)
329        rsps.add_metadata_mock('nasa')
330        item = s.get_item('nasa')
331        with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
332            rsps.add(responses.PUT, S3_URL_RE)
333            r = item.upload(NASA_METADATA_PATH)
334            assert r[0].url == 'https://s3.us.archive.org/nasa/nasa.json'
335
336
337def test_upload_metadata(nasa_item):
338    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
339        _expected_headers = deepcopy(EXPECTED_S3_HEADERS)
340        del _expected_headers['x-archive-meta00-scanner']
341        _expected_headers['x-archive-meta00-foo'] = 'bar'
342        _expected_headers['x-archive-meta00-subject'] = 'first'
343        _expected_headers['x-archive-meta01-subject'] = 'second'
344        _expected_headers['x-archive-meta00-baz'] = (
345            'uri(%D0%9F%D0%BE%D1%87%D0%B5%D0%BC'
346            '%D1%83%20%D0%B1%D1%8B%20%D0%B8%20%'
347            'D0%BD%D0%B5%D1%82...)')
348        _expected_headers['x-archive-meta00-baz2'] = (
349            'uri(%D0%9F%D0%BE%D1%87%D0%B5%D0%BC'
350            '%D1%83%20%D0%B1%D1%8B%20%D0%B8%20%'
351            'D0%BD%D0%B5%D1%82...)')
352        rsps.add(responses.PUT, S3_URL_RE,
353                 adding_headers=_expected_headers)
354        md = dict(
355            foo='bar',
356            subject=['first', 'second'],
357            baz='Почему бы и нет...',
358            baz2=(u'\u041f\u043e\u0447\u0435\u043c\u0443 \u0431\u044b \u0438 '
359                  u'\u043d\u0435\u0442...'),
360        )
361        _responses = nasa_item.upload(NASA_METADATA_PATH,
362                                      metadata=md,
363                                      access_key='a',
364                                      secret_key='b')
365        for resp in _responses:
366            request = resp.request
367            del request.headers['x-archive-meta00-scanner']
368            headers = dict((k.lower(), str(v)) for k, v in request.headers.items())
369            assert 'user-agent' in headers
370            del headers['user-agent']
371            assert headers == _expected_headers
372
373
374def test_upload_503(capsys, nasa_item):
375    body = ("<?xml version='1.0' encoding='UTF-8'?>"
376            '<Error><Code>SlowDown</Code><Message>Please reduce your request rate.'
377            '</Message><Resource>simulated error caused by x-(amz|archive)-simulate-error'
378            ', try x-archive-simulate-error:help</Resource><RequestId>d36ec445-8d4a-4a64-'
379            'a110-f67af6ee2c2a</RequestId></Error>')
380    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
381        _expected_headers = deepcopy(EXPECTED_S3_HEADERS)
382        rsps.add(responses.GET, S3_URL_RE,
383                 body='{"over_limit": "1"}')
384        rsps.add(responses.PUT, S3_URL_RE,
385                 body=body,
386                 adding_headers=_expected_headers,
387                 status=503)
388        try:
389            nasa_item.upload(NASA_METADATA_PATH,
390                             access_key='a',
391                             secret_key='b',
392                             retries=1,
393                             retries_sleep=.1,
394                             verbose=True)
395        except Exception as exc:
396            assert 'Please reduce your request rate' in str(exc)
397            out, err = capsys.readouterr()
398            assert 'warning: s3 is overloaded' in err
399
400
401def test_upload_file_keys(nasa_item):
402    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
403        rsps.add(responses.PUT, S3_URL_RE, adding_headers=EXPECTED_S3_HEADERS)
404        files = {'new_key.txt': NASA_METADATA_PATH, '222': NASA_METADATA_PATH}
405        _responses = nasa_item.upload(files, access_key='a', secret_key='b')
406        expected_urls = [
407            '{0}//s3.us.archive.org/nasa/new_key.txt'.format(PROTOCOL),
408            '{0}//s3.us.archive.org/nasa/222'.format(PROTOCOL)
409        ]
410        for resp in _responses:
411            assert resp.request.url in expected_urls
412
413
414def test_upload_dir(tmpdir, nasa_item):
415    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
416        rsps.add(responses.PUT, S3_URL_RE,
417                 adding_headers=EXPECTED_S3_HEADERS)
418
419        tmpdir.mkdir('dir_test')
420        with open(os.path.join(str(tmpdir), 'dir_test', 'foo.txt'), 'w') as fh:
421            fh.write('hi')
422        with open(os.path.join(str(tmpdir), 'dir_test', 'foo2.txt'), 'w') as fh:
423            fh.write('hi 2')
424
425        # Test no-slash upload, dir is not in key name.
426        _responses = nasa_item.upload(os.path.join(str(tmpdir), 'dir_test') + '/',
427                                      access_key='a',
428                                      secret_key='b')
429        expected_eps = [
430            '{0}nasa/foo.txt'.format(S3_URL),
431            '{0}nasa/foo2.txt'.format(S3_URL),
432        ]
433        for resp in _responses:
434            assert resp.request.url in expected_eps
435
436        # Test slash upload, dir is in key name.
437        _responses = nasa_item.upload(os.path.join(str(tmpdir), 'dir_test'),
438                                      access_key='a',
439                                      secret_key='b')
440        tmp_path = norm_filepath(str(tmpdir))
441        expected_eps = [
442            '{0}nasa{1}/dir_test/{2}'.format(S3_URL, tmp_path, 'foo.txt'),
443            '{0}nasa{1}/dir_test/{2}'.format(S3_URL, tmp_path, 'foo2.txt'),
444        ]
445        for resp in _responses:
446            assert resp.request.url in expected_eps
447
448
449def test_upload_queue_derive(nasa_item):
450    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
451        _expected_headers = deepcopy(EXPECTED_S3_HEADERS)
452        _expected_headers['x-archive-queue-derive'] = '1'
453        del _expected_headers['x-archive-meta00-scanner']
454        rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers)
455        _responses = nasa_item.upload(NASA_METADATA_PATH, access_key='a', secret_key='b')
456        for resp in _responses:
457            headers = dict((k.lower(), str(v)) for k, v in resp.request.headers.items())
458            del headers['x-archive-meta00-scanner']
459            assert 'user-agent' in headers
460            del headers['user-agent']
461            assert headers == _expected_headers
462
463
464def test_upload_delete(tmpdir, nasa_item):
465    body = ("<?xml version='1.0' encoding='UTF-8'?>"
466            '<Error><Code>BadDigest</Code><Message>The Content-MD5 you specified did not '
467            'match what we received.</Message><Resource>content-md5 submitted with PUT: '
468            'foo != received data md5: 70871f9fce8dd23853d6e42417356b05also not equal to '
469            'base64 version: cIcfn86N0jhT1uQkFzVrBQ==</Resource><RequestId>ec03fe7c-e123-'
470            '4133-a207-3141d4d74096</RequestId></Error>')
471
472    _expected_headers = deepcopy(EXPECTED_S3_HEADERS)
473    del _expected_headers['x-archive-meta00-scanner']
474    tmpdir.chdir()
475    test_file = os.path.join(str(tmpdir), 'test.txt')
476    with open(test_file, 'w') as fh:
477        fh.write('test delete')
478
479    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
480        # Non-matching md5, should not delete.
481        rsps.add(responses.PUT, S3_URL_RE,
482                 body=body,
483                 adding_headers=_expected_headers,
484                 status=400)
485        with pytest.raises(HTTPError):
486            nasa_item.upload(test_file,
487                             access_key='a',
488                             secret_key='b',
489                             delete=True,
490                             queue_derive=True)
491
492        assert len(tmpdir.listdir()) == 1
493
494    _expected_headers = deepcopy(EXPECTED_S3_HEADERS)
495    test_file = os.path.join(str(tmpdir), 'test.txt')
496    with open(test_file, 'w') as fh:
497        fh.write('test delete')
498
499    # Matching md5, should delete.
500    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
501        rsps.add(responses.PUT, S3_URL_RE,
502                 adding_headers=_expected_headers)
503        resp = nasa_item.upload(test_file,
504                                access_key='a',
505                                secret_key='b',
506                                delete=True,
507                                queue_derive=True)
508        for r in resp:
509            headers = dict((k.lower(), str(v)) for k, v in r.headers.items())
510            del headers['content-type']
511            assert headers == _expected_headers
512            assert len(tmpdir.listdir()) == 0
513
514
515def test_upload_checksum(tmpdir, nasa_item):
516    with IaRequestsMock() as rsps:
517        rsps.add_metadata_mock('nasa')
518        nasa_item = get_item('nasa')
519
520        _expected_headers = deepcopy(EXPECTED_S3_HEADERS)
521        del _expected_headers['x-archive-meta00-scanner']
522        _expected_headers['content-md5'] = '6f1834f5c70c0eabf93dea675ccf90c4'
523
524        test_file = os.path.join(str(tmpdir), 'checksum_test.txt')
525        with open(test_file, 'wb') as fh:
526            fh.write(b'test delete')
527
528        # No skip.
529        rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers)
530        resp = nasa_item.upload(test_file,
531                                access_key='a',
532                                secret_key='b',
533                                checksum=True)
534        for r in resp:
535            headers = dict((k.lower(), str(v)) for k, v in r.headers.items())
536            del headers['content-type']
537            assert headers == _expected_headers
538            assert r.status_code == 200
539
540        # Skip.
541        nasa_item.item_metadata['files'].append(
542            dict(name='checksum_test.txt',
543                 md5='33213e7683c1e6d15b2a658f3c567717'))
544        resp = nasa_item.upload(test_file,
545                                access_key='a',
546                                secret_key='b',
547                                checksum=True)
548        for r in resp:
549            headers = dict((k.lower(), str(v)) for k, v in r.headers.items())
550            assert r.status_code is None
551
552
553def test_modify_metadata(nasa_item, nasa_metadata):
554    with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
555        rsps.add(responses.POST, '{0}//archive.org/metadata/nasa'.format(PROTOCOL))
556
557        # Test simple add.
558        md = {'foo': 'bar'}
559        p = nasa_item.modify_metadata(md, debug=True)
560        _patch = json.dumps([
561            {"add": "/foo", "value": "bar"},
562        ])
563        expected_data = {
564            'priority': -5,
565            '-target': 'metadata',
566            '-patch': _patch,
567        }
568        assert set(p.data.keys()) == set(expected_data.keys())
569        assert p.data['priority'] == expected_data['priority']
570        assert p.data['-target'] == expected_data['-target']
571        assert all(v in p.data['-patch'] for v in ['/foo', 'bar'])
572        # Test no changes.
573        md = {'title': 'NASA Images'}
574        p = nasa_item.modify_metadata(md, debug=True)
575        expected_data = {'priority': -5, '-target': 'metadata', '-patch': '[]'}
576        assert p.data == expected_data
577
578        md = {'title': 'REMOVE_TAG'}
579        p = nasa_item.modify_metadata(md, debug=True)
580        expected_data = {
581            'priority': -5,
582            '-target': 'metadata',
583            '-patch': json.dumps([{"remove": "/title"}])
584        }
585        assert set(p.data.keys()) == set(expected_data.keys())
586        assert p.data['priority'] == expected_data['priority']
587        assert p.data['-target'] == expected_data['-target']
588        assert '/title' in str(p.data['-patch'])
589        assert 'remove' in str(p.data['-patch'])
590
591        # Test add array.
592        md = {'subject': ['one', 'two', 'last']}
593        p = nasa_item.modify_metadata(md, debug=True, priority=-1)
594        expected_data = {
595            'priority': -1,
596            '-target': 'metadata',
597            '-patch': json.dumps([{"add": "/subject", "value": ["one", "two", "last"]}])
598        }
599        assert set(p.data.keys()) == set(expected_data.keys())
600        assert p.data['priority'] == expected_data['priority']
601        assert p.data['-target'] == expected_data['-target']
602        assert '["one", "two", "last"]' in str(p.data['-patch'])
603
604        # Test indexed mod.
605        nasa_item.item_metadata['metadata']['subject'] = ['first', 'middle', 'last']
606        md = {'subject[2]': 'new first'}
607        p = nasa_item.modify_metadata(md, debug=True)
608        expected_data = {
609            'priority': -5,
610            '-target': 'metadata',
611            '-patch': json.dumps([{"value": "new first", "replace": "/subject/2"}])
612        }
613
614        # Avoid comparing the json strings, because they are not in a canonical form
615        assert set(p.data.keys()) == set(expected_data.keys())
616        assert all(p.data[k] == expected_data[k] for k in ['priority', '-target'])
617        assert '/subject/2' in p.data['-patch']
618
619        # Test priority.
620        md = {'title': 'NASA Images'}
621        p = nasa_item.modify_metadata(md, priority=3, debug=True)
622        expected_data = {'priority': 3, '-target': 'metadata', '-patch': '[]'}
623        assert p.data == expected_data
624
625        # Test auth.
626        md = {'title': 'NASA Images'}
627        p = nasa_item.modify_metadata(md,
628                                      access_key='a',
629                                      secret_key='b',
630                                      debug=True)
631        assert 'access=a' in p.body
632        assert 'secret=b' in p.body
633
634        # Test change.
635        md = {'title': 'new title'}
636        nasa_metadata['metadata']['title'] = 'new title'
637        _item_metadata = json.dumps(nasa_metadata)
638        rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(PROTOCOL),
639                 body=_item_metadata)
640        nasa_item.modify_metadata(md,
641                                  access_key='a',
642                                  secret_key='b')
643        # Test that item re-initializes
644        assert nasa_item.metadata['title'] == 'new title'
645