1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3""" 4This file contains pytests for find_url() method of URLExtract 5 6.. Licence MIT 7.. codeauthor:: Jan Lipovský <janlipovsky@gmail.com>, janlipovsky.cz 8""" 9import pytest 10 11 12@pytest.mark.parametrize("text, expected", [ 13 ("Let's have URL http://janlipovsky.cz", 14 ['http://janlipovsky.cz']), 15 16 ("Let's have text without URLs.", 17 []), 18 19 ("Dot after TLD: http://janlipovsky.cz.", 20 ['http://janlipovsky.cz']), 21 22 ("URL https://example.com/@eon01/asdsd-dummy", 23 ['https://example.com/@eon01/asdsd-dummy']), 24 25 ("ukrainian news pravda.com.ua", 26 ['pravda.com.ua']), 27 28 ('<a href="https://www.example.com/">example</a>', 29 ['https://www.example.com/']), 30 31 ('<a href="https://www.example.com/path/">example1</a>', 32 ['https://www.example.com/path/']), 33 34 ("https://bladomain.com/bla/?cid=74530889&h=bladomain.com", 35 ['https://bladomain.com/bla/?cid=74530889&h=bladomain.com']), 36 37 ("Hey hou we have URL containing https://example.com/what.com another URL", 38 ['https://example.com/what.com']), 39 40 ("https://i2.wp.com/siliconfilter.com/2011/06/example.jpg", 41 ["https://i2.wp.com/siliconfilter.com/2011/06/example.jpg"]), 42 43 ("https://www.test.org/paper/apostrophe'in-url", 44 ["https://www.test.org/paper/apostrophe'in-url"]), 45 46 ("http://aa.com/b.html https://aa.com/bb.html", 47 ["http://aa.com/b.html", "https://aa.com/bb.html"]), 48 49 ("http://0.0.0.0/a.io", 50 ['http://0.0.0.0/a.io']), 51 52 ("http://123.56.234.210/struts_action.do", 53 ['http://123.56.234.210/struts_action.do']), 54 55 ("<script src='//www.example.com/somejsfile.js'>", 56 ['www.example.com/somejsfile.js']), 57]) 58def test_find_urls(urlextract, text, expected): 59 """ 60 Testing find_urls returning all URLs 61 62 :param fixture urlextract: fixture holding URLExtract object 63 :param str text: text in which we should find links 64 :param list(str) expected: list of URLs that has to be found in text 65 """ 66 assert urlextract.find_urls(text) == expected 67 68 69@pytest.mark.parametrize("text, expected", [ 70 ("http://caseInsensitive.cOM", 71 ['http://caseInsensitive.cOM']), 72 73 ("http://caseInsensitive.COM", 74 ['http://caseInsensitive.COM']), 75]) 76def test_find_urls_case_insensitive(urlextract, text, expected): 77 """ 78 Testing find_urls returning only unique URLs 79 80 :param fixture urlextract: fixture holding URLExtract object 81 :param str text: text in which we should find links 82 :param list(str) expected: list of URLs that has to be found in text 83 """ 84 assert urlextract.find_urls(text, only_unique=True) == expected 85 86 87@pytest.mark.parametrize("text, expected", [ 88 ("http://unique.com http://unique.com", 89 ['http://unique.com']), 90 91 ("Get unique URL from: in.v_alid.cz", 92 []) 93]) 94def test_find_urls_unique(urlextract, text, expected): 95 """ 96 Testing find_urls returning only unique URLs 97 98 :param fixture urlextract: fixture holding URLExtract object 99 :param str text: text in which we should find links 100 :param list(str) expected: list of URLs that has to be found in text 101 """ 102 assert urlextract.find_urls(text, only_unique=True) == expected 103 104 105@pytest.mark.parametrize("text, expected", [ 106 ("Let's have URL http://janlipovsky.cz and a second URL https://example.com/@eon01/asdsd-dummy it's over.", 107 [('http://janlipovsky.cz', (15, 36)), 108 ('https://example.com/@eon01/asdsd-dummy', (54, 92))]), 109]) 110def test_find_urls_with_indices(urlextract, text, expected): 111 """ 112 Testing find_urls returning only unique URLs 113 114 :param fixture urlextract: fixture holding URLExtract object 115 :param str text: text in which we should find links 116 :param list(str) expected: list of URLs that has to be found in text 117 """ 118 assert urlextract.find_urls(text, get_indices=True) == expected 119 120 121@pytest.mark.parametrize("text, expected", [ 122 ("Let's have URL http://janlipovsky.cz", 123 ['http://janlipovsky.cz']), 124 ("Without schema janlipovsky.cz", []), 125]) 126def test_find_urls_schema_only(urlextract, text, expected): 127 """ 128 Testing find_urls returning only unique URLs 129 130 :param fixture urlextract: fixture holding URLExtract object 131 :param str text: text in which we should find links 132 :param list(str) expected: list of URLs that has to be found in text 133 """ 134 assert urlextract.find_urls(text, with_schema_only=True) == expected 135