1""" 2These tests are kept as references from the ones that were ported to a itemloaders library. 3Once we remove the references from scrapy, we can remove these tests. 4""" 5 6import unittest 7import warnings 8from functools import partial 9 10from itemloaders.processors import (Compose, Identity, Join, 11 MapCompose, SelectJmes, TakeFirst) 12 13from scrapy.item import Item, Field 14from scrapy.loader import ItemLoader 15from scrapy.loader.common import wrap_loader_context 16from scrapy.utils.deprecate import ScrapyDeprecationWarning 17from scrapy.utils.misc import extract_regex 18 19 20# test items 21class NameItem(Item): 22 name = Field() 23 24 25class TestItem(NameItem): 26 url = Field() 27 summary = Field() 28 29 30# test item loaders 31class NameItemLoader(ItemLoader): 32 default_item_class = TestItem 33 34 35class TestItemLoader(NameItemLoader): 36 name_in = MapCompose(lambda v: v.title()) 37 38 39class DefaultedItemLoader(NameItemLoader): 40 default_input_processor = MapCompose(lambda v: v[:-1]) 41 42 43# test processors 44def processor_with_args(value, other=None, loader_context=None): 45 if 'key' in loader_context: 46 return loader_context['key'] 47 return value 48 49 50class BasicItemLoaderTest(unittest.TestCase): 51 52 def test_load_item_using_default_loader(self): 53 i = TestItem() 54 i['summary'] = 'lala' 55 il = ItemLoader(item=i) 56 il.add_value('name', 'marta') 57 item = il.load_item() 58 assert item is i 59 self.assertEqual(item['summary'], ['lala']) 60 self.assertEqual(item['name'], ['marta']) 61 62 def test_load_item_using_custom_loader(self): 63 il = TestItemLoader() 64 il.add_value('name', 'marta') 65 item = il.load_item() 66 self.assertEqual(item['name'], ['Marta']) 67 68 def test_load_item_ignore_none_field_values(self): 69 def validate_sku(value): 70 # Let's assume a SKU is only digits. 71 if value.isdigit(): 72 return value 73 74 class MyLoader(ItemLoader): 75 name_out = Compose(lambda vs: vs[0]) # take first which allows empty values 76 price_out = Compose(TakeFirst(), float) 77 sku_out = Compose(TakeFirst(), validate_sku) 78 79 valid_fragment = 'SKU: 1234' 80 invalid_fragment = 'SKU: not available' 81 sku_re = 'SKU: (.+)' 82 83 il = MyLoader(item={}) 84 # Should not return "sku: None". 85 il.add_value('sku', [invalid_fragment], re=sku_re) 86 # Should not ignore empty values. 87 il.add_value('name', '') 88 il.add_value('price', ['0']) 89 self.assertEqual(il.load_item(), { 90 'name': '', 91 'price': 0.0, 92 }) 93 94 il.replace_value('sku', [valid_fragment], re=sku_re) 95 self.assertEqual(il.load_item()['sku'], '1234') 96 97 def test_self_referencing_loader(self): 98 class MyLoader(ItemLoader): 99 url_out = TakeFirst() 100 101 def img_url_out(self, values): 102 return (self.get_output_value('url') or '') + values[0] 103 104 il = MyLoader(item={}) 105 il.add_value('url', 'http://example.com/') 106 il.add_value('img_url', '1234.png') 107 self.assertEqual(il.load_item(), { 108 'url': 'http://example.com/', 109 'img_url': 'http://example.com/1234.png', 110 }) 111 112 il = MyLoader(item={}) 113 il.add_value('img_url', '1234.png') 114 self.assertEqual(il.load_item(), { 115 'img_url': '1234.png', 116 }) 117 118 def test_add_value(self): 119 il = TestItemLoader() 120 il.add_value('name', 'marta') 121 self.assertEqual(il.get_collected_values('name'), ['Marta']) 122 self.assertEqual(il.get_output_value('name'), ['Marta']) 123 il.add_value('name', 'pepe') 124 self.assertEqual(il.get_collected_values('name'), ['Marta', 'Pepe']) 125 self.assertEqual(il.get_output_value('name'), ['Marta', 'Pepe']) 126 127 # test add object value 128 il.add_value('summary', {'key': 1}) 129 self.assertEqual(il.get_collected_values('summary'), [{'key': 1}]) 130 131 il.add_value(None, 'Jim', lambda x: {'name': x}) 132 self.assertEqual(il.get_collected_values('name'), ['Marta', 'Pepe', 'Jim']) 133 134 def test_add_zero(self): 135 il = NameItemLoader() 136 il.add_value('name', 0) 137 self.assertEqual(il.get_collected_values('name'), [0]) 138 139 def test_replace_value(self): 140 il = TestItemLoader() 141 il.replace_value('name', 'marta') 142 self.assertEqual(il.get_collected_values('name'), ['Marta']) 143 self.assertEqual(il.get_output_value('name'), ['Marta']) 144 il.replace_value('name', 'pepe') 145 self.assertEqual(il.get_collected_values('name'), ['Pepe']) 146 self.assertEqual(il.get_output_value('name'), ['Pepe']) 147 148 il.replace_value(None, 'Jim', lambda x: {'name': x}) 149 self.assertEqual(il.get_collected_values('name'), ['Jim']) 150 151 def test_get_value(self): 152 il = NameItemLoader() 153 self.assertEqual('FOO', il.get_value(['foo', 'bar'], TakeFirst(), str.upper)) 154 self.assertEqual(['foo', 'bar'], il.get_value(['name:foo', 'name:bar'], re='name:(.*)$')) 155 self.assertEqual('foo', il.get_value(['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$')) 156 157 il.add_value('name', ['name:foo', 'name:bar'], TakeFirst(), re='name:(.*)$') 158 self.assertEqual(['foo'], il.get_collected_values('name')) 159 il.replace_value('name', 'name:bar', re='name:(.*)$') 160 self.assertEqual(['bar'], il.get_collected_values('name')) 161 162 def test_iter_on_input_processor_input(self): 163 class NameFirstItemLoader(NameItemLoader): 164 name_in = TakeFirst() 165 166 il = NameFirstItemLoader() 167 il.add_value('name', 'marta') 168 self.assertEqual(il.get_collected_values('name'), ['marta']) 169 il = NameFirstItemLoader() 170 il.add_value('name', ['marta', 'jose']) 171 self.assertEqual(il.get_collected_values('name'), ['marta']) 172 173 il = NameFirstItemLoader() 174 il.replace_value('name', 'marta') 175 self.assertEqual(il.get_collected_values('name'), ['marta']) 176 il = NameFirstItemLoader() 177 il.replace_value('name', ['marta', 'jose']) 178 self.assertEqual(il.get_collected_values('name'), ['marta']) 179 180 il = NameFirstItemLoader() 181 il.add_value('name', 'marta') 182 il.add_value('name', ['jose', 'pedro']) 183 self.assertEqual(il.get_collected_values('name'), ['marta', 'jose']) 184 185 def test_map_compose_filter(self): 186 def filter_world(x): 187 return None if x == 'world' else x 188 189 proc = MapCompose(filter_world, str.upper) 190 self.assertEqual(proc(['hello', 'world', 'this', 'is', 'scrapy']), 191 ['HELLO', 'THIS', 'IS', 'SCRAPY']) 192 193 def test_map_compose_filter_multil(self): 194 class TestItemLoader(NameItemLoader): 195 name_in = MapCompose(lambda v: v.title(), lambda v: v[:-1]) 196 197 il = TestItemLoader() 198 il.add_value('name', 'marta') 199 self.assertEqual(il.get_output_value('name'), ['Mart']) 200 item = il.load_item() 201 self.assertEqual(item['name'], ['Mart']) 202 203 def test_default_input_processor(self): 204 il = DefaultedItemLoader() 205 il.add_value('name', 'marta') 206 self.assertEqual(il.get_output_value('name'), ['mart']) 207 208 def test_inherited_default_input_processor(self): 209 class InheritDefaultedItemLoader(DefaultedItemLoader): 210 pass 211 212 il = InheritDefaultedItemLoader() 213 il.add_value('name', 'marta') 214 self.assertEqual(il.get_output_value('name'), ['mart']) 215 216 def test_input_processor_inheritance(self): 217 class ChildItemLoader(TestItemLoader): 218 url_in = MapCompose(lambda v: v.lower()) 219 220 il = ChildItemLoader() 221 il.add_value('url', 'HTTP://scrapy.ORG') 222 self.assertEqual(il.get_output_value('url'), ['http://scrapy.org']) 223 il.add_value('name', 'marta') 224 self.assertEqual(il.get_output_value('name'), ['Marta']) 225 226 class ChildChildItemLoader(ChildItemLoader): 227 url_in = MapCompose(lambda v: v.upper()) 228 summary_in = MapCompose(lambda v: v) 229 230 il = ChildChildItemLoader() 231 il.add_value('url', 'http://scrapy.org') 232 self.assertEqual(il.get_output_value('url'), ['HTTP://SCRAPY.ORG']) 233 il.add_value('name', 'marta') 234 self.assertEqual(il.get_output_value('name'), ['Marta']) 235 236 def test_empty_map_compose(self): 237 class IdentityDefaultedItemLoader(DefaultedItemLoader): 238 name_in = MapCompose() 239 240 il = IdentityDefaultedItemLoader() 241 il.add_value('name', 'marta') 242 self.assertEqual(il.get_output_value('name'), ['marta']) 243 244 def test_identity_input_processor(self): 245 class IdentityDefaultedItemLoader(DefaultedItemLoader): 246 name_in = Identity() 247 248 il = IdentityDefaultedItemLoader() 249 il.add_value('name', 'marta') 250 self.assertEqual(il.get_output_value('name'), ['marta']) 251 252 def test_extend_custom_input_processors(self): 253 class ChildItemLoader(TestItemLoader): 254 name_in = MapCompose(TestItemLoader.name_in, str.swapcase) 255 256 il = ChildItemLoader() 257 il.add_value('name', 'marta') 258 self.assertEqual(il.get_output_value('name'), ['mARTA']) 259 260 def test_extend_default_input_processors(self): 261 class ChildDefaultedItemLoader(DefaultedItemLoader): 262 name_in = MapCompose(DefaultedItemLoader.default_input_processor, str.swapcase) 263 264 il = ChildDefaultedItemLoader() 265 il.add_value('name', 'marta') 266 self.assertEqual(il.get_output_value('name'), ['MART']) 267 268 def test_output_processor_using_function(self): 269 il = TestItemLoader() 270 il.add_value('name', ['mar', 'ta']) 271 self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) 272 273 class TakeFirstItemLoader(TestItemLoader): 274 name_out = " ".join 275 276 il = TakeFirstItemLoader() 277 il.add_value('name', ['mar', 'ta']) 278 self.assertEqual(il.get_output_value('name'), 'Mar Ta') 279 280 def test_output_processor_error(self): 281 class TestItemLoader(ItemLoader): 282 default_item_class = TestItem 283 name_out = MapCompose(float) 284 285 il = TestItemLoader() 286 il.add_value('name', ['$10']) 287 try: 288 float('$10') 289 except Exception as e: 290 expected_exc_str = str(e) 291 292 exc = None 293 try: 294 il.load_item() 295 except Exception as e: 296 exc = e 297 assert isinstance(exc, ValueError) 298 s = str(exc) 299 assert 'name' in s, s 300 assert '$10' in s, s 301 assert 'ValueError' in s, s 302 assert expected_exc_str in s, s 303 304 def test_output_processor_using_classes(self): 305 il = TestItemLoader() 306 il.add_value('name', ['mar', 'ta']) 307 self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) 308 309 class TakeFirstItemLoader(TestItemLoader): 310 name_out = Join() 311 312 il = TakeFirstItemLoader() 313 il.add_value('name', ['mar', 'ta']) 314 self.assertEqual(il.get_output_value('name'), 'Mar Ta') 315 316 class TakeFirstItemLoader(TestItemLoader): 317 name_out = Join("<br>") 318 319 il = TakeFirstItemLoader() 320 il.add_value('name', ['mar', 'ta']) 321 self.assertEqual(il.get_output_value('name'), 'Mar<br>Ta') 322 323 def test_default_output_processor(self): 324 il = TestItemLoader() 325 il.add_value('name', ['mar', 'ta']) 326 self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) 327 328 class LalaItemLoader(TestItemLoader): 329 default_output_processor = Identity() 330 331 il = LalaItemLoader() 332 il.add_value('name', ['mar', 'ta']) 333 self.assertEqual(il.get_output_value('name'), ['Mar', 'Ta']) 334 335 def test_loader_context_on_declaration(self): 336 class ChildItemLoader(TestItemLoader): 337 url_in = MapCompose(processor_with_args, key='val') 338 339 il = ChildItemLoader() 340 il.add_value('url', 'text') 341 self.assertEqual(il.get_output_value('url'), ['val']) 342 il.replace_value('url', 'text2') 343 self.assertEqual(il.get_output_value('url'), ['val']) 344 345 def test_loader_context_on_instantiation(self): 346 class ChildItemLoader(TestItemLoader): 347 url_in = MapCompose(processor_with_args) 348 349 il = ChildItemLoader(key='val') 350 il.add_value('url', 'text') 351 self.assertEqual(il.get_output_value('url'), ['val']) 352 il.replace_value('url', 'text2') 353 self.assertEqual(il.get_output_value('url'), ['val']) 354 355 def test_loader_context_on_assign(self): 356 class ChildItemLoader(TestItemLoader): 357 url_in = MapCompose(processor_with_args) 358 359 il = ChildItemLoader() 360 il.context['key'] = 'val' 361 il.add_value('url', 'text') 362 self.assertEqual(il.get_output_value('url'), ['val']) 363 il.replace_value('url', 'text2') 364 self.assertEqual(il.get_output_value('url'), ['val']) 365 366 def test_item_passed_to_input_processor_functions(self): 367 def processor(value, loader_context): 368 return loader_context['item']['name'] 369 370 class ChildItemLoader(TestItemLoader): 371 url_in = MapCompose(processor) 372 373 it = TestItem(name='marta') 374 il = ChildItemLoader(item=it) 375 il.add_value('url', 'text') 376 self.assertEqual(il.get_output_value('url'), ['marta']) 377 il.replace_value('url', 'text2') 378 self.assertEqual(il.get_output_value('url'), ['marta']) 379 380 def test_compose_processor(self): 381 class TestItemLoader(NameItemLoader): 382 name_out = Compose(lambda v: v[0], lambda v: v.title(), lambda v: v[:-1]) 383 384 il = TestItemLoader() 385 il.add_value('name', ['marta', 'other']) 386 self.assertEqual(il.get_output_value('name'), 'Mart') 387 item = il.load_item() 388 self.assertEqual(item['name'], 'Mart') 389 390 def test_partial_processor(self): 391 def join(values, sep=None, loader_context=None, ignored=None): 392 if sep is not None: 393 return sep.join(values) 394 elif loader_context and 'sep' in loader_context: 395 return loader_context['sep'].join(values) 396 else: 397 return ''.join(values) 398 399 class TestItemLoader(NameItemLoader): 400 name_out = Compose(partial(join, sep='+')) 401 url_out = Compose(partial(join, loader_context={'sep': '.'})) 402 summary_out = Compose(partial(join, ignored='foo')) 403 404 il = TestItemLoader() 405 il.add_value('name', ['rabbit', 'hole']) 406 il.add_value('url', ['rabbit', 'hole']) 407 il.add_value('summary', ['rabbit', 'hole']) 408 item = il.load_item() 409 self.assertEqual(item['name'], 'rabbit+hole') 410 self.assertEqual(item['url'], 'rabbit.hole') 411 self.assertEqual(item['summary'], 'rabbithole') 412 413 def test_error_input_processor(self): 414 class TestItem(Item): 415 name = Field() 416 417 class TestItemLoader(ItemLoader): 418 default_item_class = TestItem 419 name_in = MapCompose(float) 420 421 il = TestItemLoader() 422 self.assertRaises(ValueError, il.add_value, 'name', 423 ['marta', 'other']) 424 425 def test_error_output_processor(self): 426 class TestItem(Item): 427 name = Field() 428 429 class TestItemLoader(ItemLoader): 430 default_item_class = TestItem 431 name_out = Compose(Join(), float) 432 433 il = TestItemLoader() 434 il.add_value('name', 'marta') 435 with self.assertRaises(ValueError): 436 il.load_item() 437 438 def test_error_processor_as_argument(self): 439 class TestItem(Item): 440 name = Field() 441 442 class TestItemLoader(ItemLoader): 443 default_item_class = TestItem 444 445 il = TestItemLoader() 446 self.assertRaises(ValueError, il.add_value, 'name', 447 ['marta', 'other'], Compose(float)) 448 449 450class InitializationFromDictTest(unittest.TestCase): 451 452 item_class = dict 453 454 def test_keep_single_value(self): 455 """Loaded item should contain values from the initial item""" 456 input_item = self.item_class(name='foo') 457 il = ItemLoader(item=input_item) 458 loaded_item = il.load_item() 459 self.assertIsInstance(loaded_item, self.item_class) 460 self.assertEqual(dict(loaded_item), {'name': ['foo']}) 461 462 def test_keep_list(self): 463 """Loaded item should contain values from the initial item""" 464 input_item = self.item_class(name=['foo', 'bar']) 465 il = ItemLoader(item=input_item) 466 loaded_item = il.load_item() 467 self.assertIsInstance(loaded_item, self.item_class) 468 self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']}) 469 470 def test_add_value_singlevalue_singlevalue(self): 471 """Values added after initialization should be appended""" 472 input_item = self.item_class(name='foo') 473 il = ItemLoader(item=input_item) 474 il.add_value('name', 'bar') 475 loaded_item = il.load_item() 476 self.assertIsInstance(loaded_item, self.item_class) 477 self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar']}) 478 479 def test_add_value_singlevalue_list(self): 480 """Values added after initialization should be appended""" 481 input_item = self.item_class(name='foo') 482 il = ItemLoader(item=input_item) 483 il.add_value('name', ['item', 'loader']) 484 loaded_item = il.load_item() 485 self.assertIsInstance(loaded_item, self.item_class) 486 self.assertEqual(dict(loaded_item), {'name': ['foo', 'item', 'loader']}) 487 488 def test_add_value_list_singlevalue(self): 489 """Values added after initialization should be appended""" 490 input_item = self.item_class(name=['foo', 'bar']) 491 il = ItemLoader(item=input_item) 492 il.add_value('name', 'qwerty') 493 loaded_item = il.load_item() 494 self.assertIsInstance(loaded_item, self.item_class) 495 self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'qwerty']}) 496 497 def test_add_value_list_list(self): 498 """Values added after initialization should be appended""" 499 input_item = self.item_class(name=['foo', 'bar']) 500 il = ItemLoader(item=input_item) 501 il.add_value('name', ['item', 'loader']) 502 loaded_item = il.load_item() 503 self.assertIsInstance(loaded_item, self.item_class) 504 self.assertEqual(dict(loaded_item), {'name': ['foo', 'bar', 'item', 'loader']}) 505 506 def test_get_output_value_singlevalue(self): 507 """Getting output value must not remove value from item""" 508 input_item = self.item_class(name='foo') 509 il = ItemLoader(item=input_item) 510 self.assertEqual(il.get_output_value('name'), ['foo']) 511 loaded_item = il.load_item() 512 self.assertIsInstance(loaded_item, self.item_class) 513 self.assertEqual(loaded_item, dict({'name': ['foo']})) 514 515 def test_get_output_value_list(self): 516 """Getting output value must not remove value from item""" 517 input_item = self.item_class(name=['foo', 'bar']) 518 il = ItemLoader(item=input_item) 519 self.assertEqual(il.get_output_value('name'), ['foo', 'bar']) 520 loaded_item = il.load_item() 521 self.assertIsInstance(loaded_item, self.item_class) 522 self.assertEqual(loaded_item, dict({'name': ['foo', 'bar']})) 523 524 def test_values_single(self): 525 """Values from initial item must be added to loader._values""" 526 input_item = self.item_class(name='foo') 527 il = ItemLoader(item=input_item) 528 self.assertEqual(il._values.get('name'), ['foo']) 529 530 def test_values_list(self): 531 """Values from initial item must be added to loader._values""" 532 input_item = self.item_class(name=['foo', 'bar']) 533 il = ItemLoader(item=input_item) 534 self.assertEqual(il._values.get('name'), ['foo', 'bar']) 535 536 537class BaseNoInputReprocessingLoader(ItemLoader): 538 title_in = MapCompose(str.upper) 539 title_out = TakeFirst() 540 541 542class NoInputReprocessingDictLoader(BaseNoInputReprocessingLoader): 543 default_item_class = dict 544 545 546class NoInputReprocessingFromDictTest(unittest.TestCase): 547 """ 548 Loaders initialized from loaded items must not reprocess fields (dict instances) 549 """ 550 def test_avoid_reprocessing_with_initial_values_single(self): 551 il = NoInputReprocessingDictLoader(item=dict(title='foo')) 552 il_loaded = il.load_item() 553 self.assertEqual(il_loaded, dict(title='foo')) 554 self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo')) 555 556 def test_avoid_reprocessing_with_initial_values_list(self): 557 il = NoInputReprocessingDictLoader(item=dict(title=['foo', 'bar'])) 558 il_loaded = il.load_item() 559 self.assertEqual(il_loaded, dict(title='foo')) 560 self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='foo')) 561 562 def test_avoid_reprocessing_without_initial_values_single(self): 563 il = NoInputReprocessingDictLoader() 564 il.add_value('title', 'foo') 565 il_loaded = il.load_item() 566 self.assertEqual(il_loaded, dict(title='FOO')) 567 self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO')) 568 569 def test_avoid_reprocessing_without_initial_values_list(self): 570 il = NoInputReprocessingDictLoader() 571 il.add_value('title', ['foo', 'bar']) 572 il_loaded = il.load_item() 573 self.assertEqual(il_loaded, dict(title='FOO')) 574 self.assertEqual(NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title='FOO')) 575 576 577class TestOutputProcessorDict(unittest.TestCase): 578 def test_output_processor(self): 579 580 class TempDict(dict): 581 def __init__(self, *args, **kwargs): 582 super().__init__(self, *args, **kwargs) 583 self.setdefault('temp', 0.3) 584 585 class TempLoader(ItemLoader): 586 default_item_class = TempDict 587 default_input_processor = Identity() 588 default_output_processor = Compose(TakeFirst()) 589 590 loader = TempLoader() 591 item = loader.load_item() 592 self.assertIsInstance(item, TempDict) 593 self.assertEqual(dict(item), {'temp': 0.3}) 594 595 596class ProcessorsTest(unittest.TestCase): 597 598 def test_take_first(self): 599 proc = TakeFirst() 600 self.assertEqual(proc([None, '', 'hello', 'world']), 'hello') 601 self.assertEqual(proc([None, '', 0, 'hello', 'world']), 0) 602 603 def test_identity(self): 604 proc = Identity() 605 self.assertEqual(proc([None, '', 'hello', 'world']), 606 [None, '', 'hello', 'world']) 607 608 def test_join(self): 609 proc = Join() 610 self.assertRaises(TypeError, proc, [None, '', 'hello', 'world']) 611 self.assertEqual(proc(['', 'hello', 'world']), ' hello world') 612 self.assertEqual(proc(['hello', 'world']), 'hello world') 613 self.assertIsInstance(proc(['hello', 'world']), str) 614 615 def test_compose(self): 616 proc = Compose(lambda v: v[0], str.upper) 617 self.assertEqual(proc(['hello', 'world']), 'HELLO') 618 proc = Compose(str.upper) 619 self.assertEqual(proc(None), None) 620 proc = Compose(str.upper, stop_on_none=False) 621 self.assertRaises(ValueError, proc, None) 622 proc = Compose(str.upper, lambda x: x + 1) 623 self.assertRaises(ValueError, proc, 'hello') 624 625 def test_mapcompose(self): 626 def filter_world(x): 627 return None if x == 'world' else x 628 proc = MapCompose(filter_world, str.upper) 629 self.assertEqual(proc(['hello', 'world', 'this', 'is', 'scrapy']), 630 ['HELLO', 'THIS', 'IS', 'SCRAPY']) 631 proc = MapCompose(filter_world, str.upper) 632 self.assertEqual(proc(None), []) 633 proc = MapCompose(filter_world, str.upper) 634 self.assertRaises(ValueError, proc, [1]) 635 proc = MapCompose(filter_world, lambda x: x + 1) 636 self.assertRaises(ValueError, proc, 'hello') 637 638 639class SelectJmesTestCase(unittest.TestCase): 640 test_list_equals = { 641 'simple': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"), 642 'invalid': ('foo.bar.baz', {"foo": {"bar": "baz"}}, None), 643 'top_level': ('foo', {"foo": {"bar": "baz"}}, {"bar": "baz"}), 644 'double_vs_single_quote_string': ('foo.bar', {"foo": {"bar": "baz"}}, "baz"), 645 'dict': ( 646 'foo.bar[*].name', 647 {"foo": {"bar": [{"name": "one"}, {"name": "two"}]}}, 648 ['one', 'two'] 649 ), 650 'list': ('[1]', [1, 2], 2) 651 } 652 653 def test_output(self): 654 for tl in self.test_list_equals: 655 expr, test_list, expected = self.test_list_equals[tl] 656 test = SelectJmes(expr)(test_list) 657 self.assertEqual( 658 test, 659 expected, 660 msg=f'test "{tl}" got {test} expected {expected}' 661 ) 662 663 664# Functions as processors 665 666def function_processor_strip(iterable): 667 return [x.strip() for x in iterable] 668 669 670def function_processor_upper(iterable): 671 return [x.upper() for x in iterable] 672 673 674class FunctionProcessorItem(Item): 675 foo = Field( 676 input_processor=function_processor_strip, 677 output_processor=function_processor_upper, 678 ) 679 680 681class FunctionProcessorDictLoader(ItemLoader): 682 default_item_class = dict 683 foo_in = function_processor_strip 684 foo_out = function_processor_upper 685 686 687class FunctionProcessorTestCase(unittest.TestCase): 688 689 def test_processor_defined_in_item_loader(self): 690 lo = FunctionProcessorDictLoader() 691 lo.add_value('foo', ' bar ') 692 lo.add_value('foo', [' asdf ', ' qwerty ']) 693 self.assertEqual( 694 dict(lo.load_item()), 695 {'foo': ['BAR', 'ASDF', 'QWERTY']} 696 ) 697 698 699class DeprecatedUtilityFunctionsTestCase(unittest.TestCase): 700 701 def test_deprecated_wrap_loader_context(self): 702 def function(*args): 703 return None 704 705 with warnings.catch_warnings(record=True) as w: 706 wrap_loader_context(function, context=dict()) 707 708 assert len(w) == 1 709 assert issubclass(w[0].category, ScrapyDeprecationWarning) 710 711 def test_deprecated_extract_regex(self): 712 with warnings.catch_warnings(record=True) as w: 713 extract_regex(r'\w+', 'this is a test') 714 715 assert len(w) == 1 716 assert issubclass(w[0].category, ScrapyDeprecationWarning) 717 718 719if __name__ == "__main__": 720 unittest.main() 721