1# -*- coding: utf-8 -*- 2from __future__ import unicode_literals 3 4from internetarchive.api import get_item 5from internetarchive.utils import norm_filepath, InvalidIdentifierException 6from tests.conftest import (PROTOCOL, IaRequestsMock, load_file, 7 NASA_METADATA_PATH, load_test_data_file) 8 9try: 10 import ujson as json 11except ImportError: 12 import json 13import types 14import re 15import os 16from copy import deepcopy 17 18import pytest 19import responses 20from requests.exceptions import HTTPError, ConnectionError 21 22from internetarchive import get_session 23import internetarchive.files 24 25S3_URL = r'{0}//s3.us.archive.org/'.format(PROTOCOL) 26DOWNLOAD_URL_RE = re.compile(r'{0}//archive.org/download/.*'.format(PROTOCOL)) 27S3_URL_RE = re.compile(r'.*s3.us.archive.org/.*') 28 29EXPECTED_S3_HEADERS = { 30 'content-length': '7557', 31 'x-archive-queue-derive': '1', 32 'x-archive-meta00-scanner': 'uri(Internet%20Archive%20Python%20library', 33 'x-archive-size-hint': '7557', 34 'x-archive-auto-make-bucket': '1', 35 'authorization': 'LOW a:b', 36 'accept': '*/*', 37 'accept-encoding': 'gzip, deflate', 38 'connection': 'close', 39} 40 41 42def test_get_item(nasa_metadata, nasa_item, session): 43 assert nasa_item.item_metadata == nasa_metadata 44 assert nasa_item.identifier == 'nasa' 45 assert nasa_item.exists is True 46 assert isinstance(nasa_item.metadata, dict) 47 assert isinstance(nasa_item.files, list) 48 assert isinstance(nasa_item.reviews, list) 49 assert nasa_item.created == 1427273784 50 assert nasa_item.d1 == 'ia902606.us.archive.org' 51 assert nasa_item.d2 == 'ia802606.us.archive.org' 52 assert nasa_item.dir == '/7/items/nasa' 53 assert nasa_item.files_count == 6 54 assert nasa_item.item_size == 114030 55 assert nasa_item.server == 'ia902606.us.archive.org' 56 assert nasa_item.uniq == 2131998567 57 assert nasa_item.updated == 1427273788 58 assert nasa_item.tasks is None 59 assert len(nasa_item.collection) == 1 60 61 62def test_get_file(nasa_item): 63 file = nasa_item.get_file('nasa_meta.xml') 64 assert type(file) == internetarchive.files.File 65 assert file.name == 'nasa_meta.xml' 66 67 68def test_get_files(nasa_item): 69 files = nasa_item.get_files() 70 assert isinstance(files, types.GeneratorType) 71 72 expected_files = set(['NASAarchiveLogo.jpg', 73 'globe_west_540.jpg', 74 'nasa_reviews.xml', 75 'nasa_meta.xml', 76 'nasa_archive.torrent', 77 'nasa_files.xml', ]) 78 files = set(x.name for x in files) 79 assert files == expected_files 80 81 82def test_get_files_by_name(nasa_item): 83 files = nasa_item.get_files('globe_west_540.jpg') 84 assert set(f.name for f in files) == set(['globe_west_540.jpg']) 85 86 files = nasa_item.get_files(['globe_west_540.jpg', 'nasa_meta.xml']) 87 assert set(f.name 88 for f in files) == set(['globe_west_540.jpg', 'nasa_meta.xml']) 89 90 91def test_get_files_by_formats(nasa_item): 92 files = set(f.name for f in nasa_item.get_files(formats='Archive BitTorrent')) 93 expected_files = set(['nasa_archive.torrent']) 94 assert files == expected_files 95 96 files = set( 97 f.name for f in nasa_item.get_files(formats=['Archive BitTorrent', 'JPEG'])) 98 expected_files = set(['nasa_archive.torrent', 'globe_west_540.jpg', ]) 99 assert files == expected_files 100 101 102def test_get_files_by_glob(nasa_item): 103 files = set(f.name for f in nasa_item.get_files(glob_pattern='*jpg|*torrent')) 104 expected_files = set(['NASAarchiveLogo.jpg', 105 'globe_west_540.jpg', 106 'nasa_archive.torrent', ]) 107 assert files == expected_files 108 109 files = set(f.name 110 for f in nasa_item.get_files(glob_pattern=['*jpg', '*torrent'])) 111 expected_files = set(['NASAarchiveLogo.jpg', 112 'globe_west_540.jpg', 113 'nasa_archive.torrent', ]) 114 assert files == expected_files 115 116 117def test_get_files_with_multiple_filters(nasa_item): 118 files = set(f.name for f in nasa_item.get_files(formats='JPEG', 119 glob_pattern='*xml')) 120 expected_files = set(['globe_west_540.jpg', 121 'nasa_reviews.xml', 122 'nasa_meta.xml', 123 'nasa_files.xml', ]) 124 assert files == expected_files 125 126 127def test_get_files_no_matches(nasa_item): 128 assert list(nasa_item.get_files(formats='none')) == [] 129 130 131def test_download(tmpdir, nasa_item): 132 tmpdir.chdir() 133 with IaRequestsMock() as rsps: 134 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content') 135 nasa_item.download(files='nasa_meta.xml') 136 assert len(tmpdir.listdir()) == 1 137 with IaRequestsMock() as rsps: 138 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='new test content') 139 nasa_item.download(files='nasa_meta.xml') 140 with open('nasa/nasa_meta.xml', 'r') as fh: 141 assert fh.read() == 'new test content' 142 143 144def test_download_io_error(tmpdir, nasa_item): 145 tmpdir.chdir() 146 with IaRequestsMock() as rsps: 147 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content') 148 nasa_item.download(files='nasa_meta.xml') 149 rsps.reset() 150 with pytest.raises(ConnectionError): 151 nasa_item.download(files='nasa_meta.xml') 152 153 154def test_download_ignore_errors(tmpdir, nasa_item): 155 with IaRequestsMock() as rsps: 156 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content') 157 nasa_item.download(files='nasa_meta.xml') 158 nasa_item.download(files='nasa_meta.xml', ignore_errors=True) 159 160 161def test_download_ignore_existing(tmpdir, nasa_item): 162 tmpdir.chdir() 163 with IaRequestsMock( 164 assert_all_requests_are_fired=False) as rsps: 165 rsps.add(responses.GET, DOWNLOAD_URL_RE, 166 body='test content') 167 nasa_item.download(files='nasa_meta.xml', ignore_existing=True) 168 169 rsps.add(responses.GET, DOWNLOAD_URL_RE, 170 body='new test content') 171 nasa_item.download(files='nasa_meta.xml', ignore_existing=True) 172 with open('nasa/nasa_meta.xml', 'r') as fh: 173 assert fh.read() == 'test content' 174 175 176def test_download_clobber(tmpdir, nasa_item): 177 tmpdir.chdir() 178 with IaRequestsMock() as rsps: 179 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content') 180 nasa_item.download(files='nasa_meta.xml') 181 182 rsps.reset() 183 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='new test content') 184 nasa_item.download(files='nasa_meta.xml') 185 load_file('nasa/nasa_meta.xml') == 'new test content' 186 187 188def test_download_checksum(tmpdir, caplog): 189 tmpdir.chdir() 190 191 # test overwrite based on checksum. 192 with IaRequestsMock() as rsps: 193 rsps.add_metadata_mock('nasa') 194 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='test content') 195 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='overwrite based on md5') 196 197 nasa_item = get_item('nasa') 198 nasa_item.download(files='nasa_meta.xml') 199 nasa_item.download(files='nasa_meta.xml', checksum=True) 200 201 assert load_file('nasa/nasa_meta.xml') == 'overwrite based on md5' 202 203 # test no overwrite based on checksum. 204 rsps.reset() 205 rsps.add(responses.GET, DOWNLOAD_URL_RE, 206 body=load_test_data_file('nasa_meta.xml')) 207 nasa_item.download(files='nasa_meta.xml', checksum=True) 208 nasa_item.download(files='nasa_meta.xml', checksum=True) 209 210 assert 'skipping nasa' in caplog.text 211 assert 'nasa_meta.xml, file already exists based on checksum.' in caplog.text 212 213 214def test_download_destdir(tmpdir, nasa_item): 215 tmpdir.chdir() 216 with IaRequestsMock() as rsps: 217 rsps.add(responses.GET, DOWNLOAD_URL_RE, body='new destdir') 218 dest = os.path.join(str(tmpdir), 'new destdir') 219 nasa_item.download(files='nasa_meta.xml', destdir=dest) 220 assert 'nasa' in os.listdir(dest) 221 with open(os.path.join(dest, 'nasa/nasa_meta.xml'), 'r') as fh: 222 assert fh.read() == 'new destdir' 223 224 225def test_download_no_directory(tmpdir, nasa_item): 226 url_re = re.compile(r'{0}//archive.org/download/.*'.format(PROTOCOL)) 227 tmpdir.chdir() 228 with IaRequestsMock() as rsps: 229 rsps.add(responses.GET, url_re, body='no dest dir') 230 nasa_item.download(files='nasa_meta.xml', no_directory=True) 231 with open(os.path.join(str(tmpdir), 'nasa_meta.xml'), 'r') as fh: 232 assert fh.read() == 'no dest dir' 233 234 235def test_download_dry_run(tmpdir, capsys, nasa_item): 236 tmpdir.chdir() 237 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 238 rsps.add(responses.GET, DOWNLOAD_URL_RE, 239 body='no dest dir', 240 adding_headers={'content-length': '100'}) 241 nasa_item.download(formats='Metadata', dry_run=True) 242 243 expected = set(['nasa_reviews.xml', 'nasa_meta.xml', 'nasa_files.xml']) 244 out, err = capsys.readouterr() 245 246 assert set([x.split('/')[-1] for x in out.split('\n') if x]) == expected 247 248 249def test_download_verbose(tmpdir, capsys, nasa_item): 250 tmpdir.chdir() 251 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 252 rsps.add(responses.GET, DOWNLOAD_URL_RE, 253 body='no dest dir', 254 adding_headers={'content-length': '100'}) 255 nasa_item.download(files='nasa_meta.xml', verbose=True) 256 out, err = capsys.readouterr() 257 print(repr(out)) 258 assert 'downloaded nasa/nasa_meta.xml to' in out 259 assert 'nasa_meta.xml' in out 260 261 262def test_download_dark_item(tmpdir, capsys, nasa_metadata, session): 263 tmpdir.chdir() 264 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 265 nasa_metadata['metadata']['identifier'] = 'dark-item' 266 nasa_metadata['is_dark'] = True 267 _item_metadata = json.dumps(nasa_metadata) 268 rsps.add(responses.GET, '{0}//archive.org/metadata/dark-item'.format(PROTOCOL), 269 body=_item_metadata, 270 content_type='application/json') 271 _item = session.get_item('dark-item') 272 rsps.add(responses.GET, DOWNLOAD_URL_RE, 273 body='no dest dir', 274 status=403, 275 adding_headers={'content-length': '100'}) 276 _item.download(files='nasa_meta.xml', verbose=True) 277 out, err = capsys.readouterr() 278 assert 'skipping dark-item, item is dark' in out 279 280 281def test_upload(nasa_item): 282 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 283 rsps.add(responses.PUT, S3_URL_RE, 284 adding_headers=EXPECTED_S3_HEADERS) 285 _responses = nasa_item.upload(NASA_METADATA_PATH, 286 access_key='a', 287 secret_key='b') 288 for resp in _responses: 289 request = resp.request 290 headers = dict((k.lower(), str(v)) for k, v in request.headers.items()) 291 scanner_header = '%20'.join( 292 resp.headers['x-archive-meta00-scanner'].split('%20')[:4]) 293 headers['x-archive-meta00-scanner'] = scanner_header 294 assert 'user-agent' in headers 295 del headers['user-agent'] 296 assert headers == EXPECTED_S3_HEADERS 297 assert request.url == '{0}//s3.us.archive.org/nasa/nasa.json'.format(PROTOCOL) 298 299 300def test_upload_validate_identifier(session): 301 item = session.get_item('føø') 302 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 303 rsps.add(responses.PUT, S3_URL_RE, 304 adding_headers=EXPECTED_S3_HEADERS) 305 try: 306 item.upload(NASA_METADATA_PATH, 307 access_key='a', 308 secret_key='b', 309 validate_identifier=True) 310 assert False 311 except Exception as exc: 312 assert isinstance(exc, InvalidIdentifierException) 313 314 valid_item = session.get_item('foo') 315 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 316 rsps.add(responses.PUT, S3_URL_RE, 317 adding_headers=EXPECTED_S3_HEADERS) 318 valid_item.upload(NASA_METADATA_PATH, 319 access_key='a', 320 secret_key='b', 321 validate_identifier=True) 322 assert True 323 324 325def test_upload_secure_session(): 326 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 327 c = {'s3': {'access': 'foo', 'secret': 'bar'}, 'general': {'secure': True}} 328 s = get_session(config=c) 329 rsps.add_metadata_mock('nasa') 330 item = s.get_item('nasa') 331 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 332 rsps.add(responses.PUT, S3_URL_RE) 333 r = item.upload(NASA_METADATA_PATH) 334 assert r[0].url == 'https://s3.us.archive.org/nasa/nasa.json' 335 336 337def test_upload_metadata(nasa_item): 338 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 339 _expected_headers = deepcopy(EXPECTED_S3_HEADERS) 340 del _expected_headers['x-archive-meta00-scanner'] 341 _expected_headers['x-archive-meta00-foo'] = 'bar' 342 _expected_headers['x-archive-meta00-subject'] = 'first' 343 _expected_headers['x-archive-meta01-subject'] = 'second' 344 _expected_headers['x-archive-meta00-baz'] = ( 345 'uri(%D0%9F%D0%BE%D1%87%D0%B5%D0%BC' 346 '%D1%83%20%D0%B1%D1%8B%20%D0%B8%20%' 347 'D0%BD%D0%B5%D1%82...)') 348 _expected_headers['x-archive-meta00-baz2'] = ( 349 'uri(%D0%9F%D0%BE%D1%87%D0%B5%D0%BC' 350 '%D1%83%20%D0%B1%D1%8B%20%D0%B8%20%' 351 'D0%BD%D0%B5%D1%82...)') 352 rsps.add(responses.PUT, S3_URL_RE, 353 adding_headers=_expected_headers) 354 md = dict( 355 foo='bar', 356 subject=['first', 'second'], 357 baz='Почему бы и нет...', 358 baz2=(u'\u041f\u043e\u0447\u0435\u043c\u0443 \u0431\u044b \u0438 ' 359 u'\u043d\u0435\u0442...'), 360 ) 361 _responses = nasa_item.upload(NASA_METADATA_PATH, 362 metadata=md, 363 access_key='a', 364 secret_key='b') 365 for resp in _responses: 366 request = resp.request 367 del request.headers['x-archive-meta00-scanner'] 368 headers = dict((k.lower(), str(v)) for k, v in request.headers.items()) 369 assert 'user-agent' in headers 370 del headers['user-agent'] 371 assert headers == _expected_headers 372 373 374def test_upload_503(capsys, nasa_item): 375 body = ("<?xml version='1.0' encoding='UTF-8'?>" 376 '<Error><Code>SlowDown</Code><Message>Please reduce your request rate.' 377 '</Message><Resource>simulated error caused by x-(amz|archive)-simulate-error' 378 ', try x-archive-simulate-error:help</Resource><RequestId>d36ec445-8d4a-4a64-' 379 'a110-f67af6ee2c2a</RequestId></Error>') 380 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 381 _expected_headers = deepcopy(EXPECTED_S3_HEADERS) 382 rsps.add(responses.GET, S3_URL_RE, 383 body='{"over_limit": "1"}') 384 rsps.add(responses.PUT, S3_URL_RE, 385 body=body, 386 adding_headers=_expected_headers, 387 status=503) 388 try: 389 nasa_item.upload(NASA_METADATA_PATH, 390 access_key='a', 391 secret_key='b', 392 retries=1, 393 retries_sleep=.1, 394 verbose=True) 395 except Exception as exc: 396 assert 'Please reduce your request rate' in str(exc) 397 out, err = capsys.readouterr() 398 assert 'warning: s3 is overloaded' in err 399 400 401def test_upload_file_keys(nasa_item): 402 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 403 rsps.add(responses.PUT, S3_URL_RE, adding_headers=EXPECTED_S3_HEADERS) 404 files = {'new_key.txt': NASA_METADATA_PATH, '222': NASA_METADATA_PATH} 405 _responses = nasa_item.upload(files, access_key='a', secret_key='b') 406 expected_urls = [ 407 '{0}//s3.us.archive.org/nasa/new_key.txt'.format(PROTOCOL), 408 '{0}//s3.us.archive.org/nasa/222'.format(PROTOCOL) 409 ] 410 for resp in _responses: 411 assert resp.request.url in expected_urls 412 413 414def test_upload_dir(tmpdir, nasa_item): 415 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 416 rsps.add(responses.PUT, S3_URL_RE, 417 adding_headers=EXPECTED_S3_HEADERS) 418 419 tmpdir.mkdir('dir_test') 420 with open(os.path.join(str(tmpdir), 'dir_test', 'foo.txt'), 'w') as fh: 421 fh.write('hi') 422 with open(os.path.join(str(tmpdir), 'dir_test', 'foo2.txt'), 'w') as fh: 423 fh.write('hi 2') 424 425 # Test no-slash upload, dir is not in key name. 426 _responses = nasa_item.upload(os.path.join(str(tmpdir), 'dir_test') + '/', 427 access_key='a', 428 secret_key='b') 429 expected_eps = [ 430 '{0}nasa/foo.txt'.format(S3_URL), 431 '{0}nasa/foo2.txt'.format(S3_URL), 432 ] 433 for resp in _responses: 434 assert resp.request.url in expected_eps 435 436 # Test slash upload, dir is in key name. 437 _responses = nasa_item.upload(os.path.join(str(tmpdir), 'dir_test'), 438 access_key='a', 439 secret_key='b') 440 tmp_path = norm_filepath(str(tmpdir)) 441 expected_eps = [ 442 '{0}nasa{1}/dir_test/{2}'.format(S3_URL, tmp_path, 'foo.txt'), 443 '{0}nasa{1}/dir_test/{2}'.format(S3_URL, tmp_path, 'foo2.txt'), 444 ] 445 for resp in _responses: 446 assert resp.request.url in expected_eps 447 448 449def test_upload_queue_derive(nasa_item): 450 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 451 _expected_headers = deepcopy(EXPECTED_S3_HEADERS) 452 _expected_headers['x-archive-queue-derive'] = '1' 453 del _expected_headers['x-archive-meta00-scanner'] 454 rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers) 455 _responses = nasa_item.upload(NASA_METADATA_PATH, access_key='a', secret_key='b') 456 for resp in _responses: 457 headers = dict((k.lower(), str(v)) for k, v in resp.request.headers.items()) 458 del headers['x-archive-meta00-scanner'] 459 assert 'user-agent' in headers 460 del headers['user-agent'] 461 assert headers == _expected_headers 462 463 464def test_upload_delete(tmpdir, nasa_item): 465 body = ("<?xml version='1.0' encoding='UTF-8'?>" 466 '<Error><Code>BadDigest</Code><Message>The Content-MD5 you specified did not ' 467 'match what we received.</Message><Resource>content-md5 submitted with PUT: ' 468 'foo != received data md5: 70871f9fce8dd23853d6e42417356b05also not equal to ' 469 'base64 version: cIcfn86N0jhT1uQkFzVrBQ==</Resource><RequestId>ec03fe7c-e123-' 470 '4133-a207-3141d4d74096</RequestId></Error>') 471 472 _expected_headers = deepcopy(EXPECTED_S3_HEADERS) 473 del _expected_headers['x-archive-meta00-scanner'] 474 tmpdir.chdir() 475 test_file = os.path.join(str(tmpdir), 'test.txt') 476 with open(test_file, 'w') as fh: 477 fh.write('test delete') 478 479 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 480 # Non-matching md5, should not delete. 481 rsps.add(responses.PUT, S3_URL_RE, 482 body=body, 483 adding_headers=_expected_headers, 484 status=400) 485 with pytest.raises(HTTPError): 486 nasa_item.upload(test_file, 487 access_key='a', 488 secret_key='b', 489 delete=True, 490 queue_derive=True) 491 492 assert len(tmpdir.listdir()) == 1 493 494 _expected_headers = deepcopy(EXPECTED_S3_HEADERS) 495 test_file = os.path.join(str(tmpdir), 'test.txt') 496 with open(test_file, 'w') as fh: 497 fh.write('test delete') 498 499 # Matching md5, should delete. 500 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 501 rsps.add(responses.PUT, S3_URL_RE, 502 adding_headers=_expected_headers) 503 resp = nasa_item.upload(test_file, 504 access_key='a', 505 secret_key='b', 506 delete=True, 507 queue_derive=True) 508 for r in resp: 509 headers = dict((k.lower(), str(v)) for k, v in r.headers.items()) 510 del headers['content-type'] 511 assert headers == _expected_headers 512 assert len(tmpdir.listdir()) == 0 513 514 515def test_upload_checksum(tmpdir, nasa_item): 516 with IaRequestsMock() as rsps: 517 rsps.add_metadata_mock('nasa') 518 nasa_item = get_item('nasa') 519 520 _expected_headers = deepcopy(EXPECTED_S3_HEADERS) 521 del _expected_headers['x-archive-meta00-scanner'] 522 _expected_headers['content-md5'] = '6f1834f5c70c0eabf93dea675ccf90c4' 523 524 test_file = os.path.join(str(tmpdir), 'checksum_test.txt') 525 with open(test_file, 'wb') as fh: 526 fh.write(b'test delete') 527 528 # No skip. 529 rsps.add(responses.PUT, S3_URL_RE, adding_headers=_expected_headers) 530 resp = nasa_item.upload(test_file, 531 access_key='a', 532 secret_key='b', 533 checksum=True) 534 for r in resp: 535 headers = dict((k.lower(), str(v)) for k, v in r.headers.items()) 536 del headers['content-type'] 537 assert headers == _expected_headers 538 assert r.status_code == 200 539 540 # Skip. 541 nasa_item.item_metadata['files'].append( 542 dict(name='checksum_test.txt', 543 md5='33213e7683c1e6d15b2a658f3c567717')) 544 resp = nasa_item.upload(test_file, 545 access_key='a', 546 secret_key='b', 547 checksum=True) 548 for r in resp: 549 headers = dict((k.lower(), str(v)) for k, v in r.headers.items()) 550 assert r.status_code is None 551 552 553def test_modify_metadata(nasa_item, nasa_metadata): 554 with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: 555 rsps.add(responses.POST, '{0}//archive.org/metadata/nasa'.format(PROTOCOL)) 556 557 # Test simple add. 558 md = {'foo': 'bar'} 559 p = nasa_item.modify_metadata(md, debug=True) 560 _patch = json.dumps([ 561 {"add": "/foo", "value": "bar"}, 562 ]) 563 expected_data = { 564 'priority': -5, 565 '-target': 'metadata', 566 '-patch': _patch, 567 } 568 assert set(p.data.keys()) == set(expected_data.keys()) 569 assert p.data['priority'] == expected_data['priority'] 570 assert p.data['-target'] == expected_data['-target'] 571 assert all(v in p.data['-patch'] for v in ['/foo', 'bar']) 572 # Test no changes. 573 md = {'title': 'NASA Images'} 574 p = nasa_item.modify_metadata(md, debug=True) 575 expected_data = {'priority': -5, '-target': 'metadata', '-patch': '[]'} 576 assert p.data == expected_data 577 578 md = {'title': 'REMOVE_TAG'} 579 p = nasa_item.modify_metadata(md, debug=True) 580 expected_data = { 581 'priority': -5, 582 '-target': 'metadata', 583 '-patch': json.dumps([{"remove": "/title"}]) 584 } 585 assert set(p.data.keys()) == set(expected_data.keys()) 586 assert p.data['priority'] == expected_data['priority'] 587 assert p.data['-target'] == expected_data['-target'] 588 assert '/title' in str(p.data['-patch']) 589 assert 'remove' in str(p.data['-patch']) 590 591 # Test add array. 592 md = {'subject': ['one', 'two', 'last']} 593 p = nasa_item.modify_metadata(md, debug=True, priority=-1) 594 expected_data = { 595 'priority': -1, 596 '-target': 'metadata', 597 '-patch': json.dumps([{"add": "/subject", "value": ["one", "two", "last"]}]) 598 } 599 assert set(p.data.keys()) == set(expected_data.keys()) 600 assert p.data['priority'] == expected_data['priority'] 601 assert p.data['-target'] == expected_data['-target'] 602 assert '["one", "two", "last"]' in str(p.data['-patch']) 603 604 # Test indexed mod. 605 nasa_item.item_metadata['metadata']['subject'] = ['first', 'middle', 'last'] 606 md = {'subject[2]': 'new first'} 607 p = nasa_item.modify_metadata(md, debug=True) 608 expected_data = { 609 'priority': -5, 610 '-target': 'metadata', 611 '-patch': json.dumps([{"value": "new first", "replace": "/subject/2"}]) 612 } 613 614 # Avoid comparing the json strings, because they are not in a canonical form 615 assert set(p.data.keys()) == set(expected_data.keys()) 616 assert all(p.data[k] == expected_data[k] for k in ['priority', '-target']) 617 assert '/subject/2' in p.data['-patch'] 618 619 # Test priority. 620 md = {'title': 'NASA Images'} 621 p = nasa_item.modify_metadata(md, priority=3, debug=True) 622 expected_data = {'priority': 3, '-target': 'metadata', '-patch': '[]'} 623 assert p.data == expected_data 624 625 # Test auth. 626 md = {'title': 'NASA Images'} 627 p = nasa_item.modify_metadata(md, 628 access_key='a', 629 secret_key='b', 630 debug=True) 631 assert 'access=a' in p.body 632 assert 'secret=b' in p.body 633 634 # Test change. 635 md = {'title': 'new title'} 636 nasa_metadata['metadata']['title'] = 'new title' 637 _item_metadata = json.dumps(nasa_metadata) 638 rsps.add(responses.GET, '{0}//archive.org/metadata/nasa'.format(PROTOCOL), 639 body=_item_metadata) 640 nasa_item.modify_metadata(md, 641 access_key='a', 642 secret_key='b') 643 # Test that item re-initializes 644 assert nasa_item.metadata['title'] == 'new title' 645