1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3"""
4This file contains pytests for find_url() method of URLExtract
5
6.. Licence MIT
7.. codeauthor:: Jan Lipovský <janlipovsky@gmail.com>, janlipovsky.cz
8"""
9import pytest
10
11
12@pytest.mark.parametrize("text, expected", [
13    ("Let's have URL http://janlipovsky.cz",
14     ['http://janlipovsky.cz']),
15
16    ("Let's have text without URLs.",
17     []),
18
19    ("Dot after TLD: http://janlipovsky.cz.",
20     ['http://janlipovsky.cz']),
21
22    ("URL https://example.com/@eon01/asdsd-dummy",
23     ['https://example.com/@eon01/asdsd-dummy']),
24
25    ("ukrainian news pravda.com.ua",
26     ['pravda.com.ua']),
27
28    ('<a href="https://www.example.com/">example</a>',
29     ['https://www.example.com/']),
30
31    ('<a href="https://www.example.com/path/">example1</a>',
32     ['https://www.example.com/path/']),
33
34    ("https://bladomain.com/bla/?cid=74530889&h=bladomain.com",
35     ['https://bladomain.com/bla/?cid=74530889&h=bladomain.com']),
36
37    ("Hey hou we have URL containing https://example.com/what.com another URL",
38     ['https://example.com/what.com']),
39
40    ("https://i2.wp.com/siliconfilter.com/2011/06/example.jpg",
41     ["https://i2.wp.com/siliconfilter.com/2011/06/example.jpg"]),
42
43    ("https://www.test.org/paper/apostrophe'in-url",
44     ["https://www.test.org/paper/apostrophe'in-url"]),
45
46    ("http://aa.com/b.html https://aa.com/bb.html",
47     ["http://aa.com/b.html", "https://aa.com/bb.html"]),
48
49    ("http://0.0.0.0/a.io",
50     ['http://0.0.0.0/a.io']),
51
52    ("http://123.56.234.210/struts_action.do",
53     ['http://123.56.234.210/struts_action.do']),
54
55    ("<script src='//www.example.com/somejsfile.js'>",
56     ['www.example.com/somejsfile.js']),
57])
58def test_find_urls(urlextract, text, expected):
59    """
60    Testing find_urls returning all URLs
61
62    :param fixture urlextract: fixture holding URLExtract object
63    :param str text: text in which we should find links
64    :param list(str) expected: list of URLs that has to be found in text
65    """
66    assert  urlextract.find_urls(text) == expected
67
68
69@pytest.mark.parametrize("text, expected", [
70    ("http://caseInsensitive.cOM",
71     ['http://caseInsensitive.cOM']),
72
73    ("http://caseInsensitive.COM",
74     ['http://caseInsensitive.COM']),
75])
76def test_find_urls_case_insensitive(urlextract, text, expected):
77    """
78    Testing find_urls returning only unique URLs
79
80    :param fixture urlextract: fixture holding URLExtract object
81    :param str text: text in which we should find links
82    :param list(str) expected: list of URLs that has to be found in text
83    """
84    assert urlextract.find_urls(text, only_unique=True) == expected
85
86
87@pytest.mark.parametrize("text, expected", [
88    ("http://unique.com http://unique.com",
89     ['http://unique.com']),
90
91    ("Get unique URL from: in.v_alid.cz",
92     [])
93])
94def test_find_urls_unique(urlextract, text, expected):
95    """
96    Testing find_urls returning only unique URLs
97
98    :param fixture urlextract: fixture holding URLExtract object
99    :param str text: text in which we should find links
100    :param list(str) expected: list of URLs that has to be found in text
101    """
102    assert urlextract.find_urls(text, only_unique=True) == expected
103
104
105@pytest.mark.parametrize("text, expected", [
106    ("Let's have URL http://janlipovsky.cz and a second URL https://example.com/@eon01/asdsd-dummy it's over.",
107     [('http://janlipovsky.cz', (15, 36)),
108      ('https://example.com/@eon01/asdsd-dummy', (54, 92))]),
109])
110def test_find_urls_with_indices(urlextract, text, expected):
111    """
112    Testing find_urls returning only unique URLs
113
114    :param fixture urlextract: fixture holding URLExtract object
115    :param str text: text in which we should find links
116    :param list(str) expected: list of URLs that has to be found in text
117    """
118    assert urlextract.find_urls(text, get_indices=True) == expected
119
120
121@pytest.mark.parametrize("text, expected", [
122    ("Let's have URL http://janlipovsky.cz",
123     ['http://janlipovsky.cz']),
124    ("Without schema janlipovsky.cz", []),
125])
126def test_find_urls_schema_only(urlextract, text, expected):
127    """
128    Testing find_urls returning only unique URLs
129
130    :param fixture urlextract: fixture holding URLExtract object
131    :param str text: text in which we should find links
132    :param list(str) expected: list of URLs that has to be found in text
133    """
134    assert urlextract.find_urls(text, with_schema_only=True) == expected
135