1import numpy as np
2import pytest
3
4import pandas as pd
5import pandas._testing as tm
6
7
8def test_error():
9    df = pd.DataFrame(
10        {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
11    )
12    with pytest.raises(ValueError, match="column must be a scalar"):
13        df.explode(list("AA"))
14
15    df.columns = list("AA")
16    with pytest.raises(ValueError, match="columns must be unique"):
17        df.explode("A")
18
19
20def test_basic():
21    df = pd.DataFrame(
22        {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1}
23    )
24    result = df.explode("A")
25    expected = pd.DataFrame(
26        {
27            "A": pd.Series(
28                [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
29            ),
30            "B": 1,
31        }
32    )
33    tm.assert_frame_equal(result, expected)
34
35
36def test_multi_index_rows():
37    df = pd.DataFrame(
38        {"A": np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), "B": 1},
39        index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]),
40    )
41
42    result = df.explode("A")
43    expected = pd.DataFrame(
44        {
45            "A": pd.Series(
46                [0, 1, 2, np.nan, np.nan, 3, 4],
47                index=pd.MultiIndex.from_tuples(
48                    [
49                        ("a", 1),
50                        ("a", 1),
51                        ("a", 1),
52                        ("a", 2),
53                        ("b", 1),
54                        ("b", 2),
55                        ("b", 2),
56                    ]
57                ),
58                dtype=object,
59            ),
60            "B": 1,
61        }
62    )
63    tm.assert_frame_equal(result, expected)
64
65
66def test_multi_index_columns():
67    df = pd.DataFrame(
68        {("A", 1): np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), ("A", 2): 1}
69    )
70
71    result = df.explode(("A", 1))
72    expected = pd.DataFrame(
73        {
74            ("A", 1): pd.Series(
75                [0, 1, 2, np.nan, np.nan, 3, 4],
76                index=pd.Index([0, 0, 0, 1, 2, 3, 3]),
77                dtype=object,
78            ),
79            ("A", 2): 1,
80        }
81    )
82    tm.assert_frame_equal(result, expected)
83
84
85def test_usecase():
86    # explode a single column
87    # gh-10511
88    df = pd.DataFrame(
89        [[11, range(5), 10], [22, range(3), 20]], columns=list("ABC")
90    ).set_index("C")
91    result = df.explode("B")
92
93    expected = pd.DataFrame(
94        {
95            "A": [11, 11, 11, 11, 11, 22, 22, 22],
96            "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object),
97            "C": [10, 10, 10, 10, 10, 20, 20, 20],
98        },
99        columns=list("ABC"),
100    ).set_index("C")
101
102    tm.assert_frame_equal(result, expected)
103
104    # gh-8517
105    df = pd.DataFrame(
106        [["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]],
107        columns=["dt", "name", "text"],
108    )
109    result = df.assign(text=df.text.str.split(" ")).explode("text")
110    expected = pd.DataFrame(
111        [
112            ["2014-01-01", "Alice", "A"],
113            ["2014-01-01", "Alice", "B"],
114            ["2014-01-02", "Bob", "C"],
115            ["2014-01-02", "Bob", "D"],
116        ],
117        columns=["dt", "name", "text"],
118        index=[0, 0, 1, 1],
119    )
120    tm.assert_frame_equal(result, expected)
121
122
123@pytest.mark.parametrize(
124    "input_dict, input_index, expected_dict, expected_index",
125    [
126        (
127            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
128            [0, 0],
129            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
130            [0, 0, 0, 0],
131        ),
132        (
133            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
134            pd.Index([0, 0], name="my_index"),
135            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
136            pd.Index([0, 0, 0, 0], name="my_index"),
137        ),
138        (
139            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
140            pd.MultiIndex.from_arrays(
141                [[0, 0], [1, 1]], names=["my_first_index", "my_second_index"]
142            ),
143            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
144            pd.MultiIndex.from_arrays(
145                [[0, 0, 0, 0], [1, 1, 1, 1]],
146                names=["my_first_index", "my_second_index"],
147            ),
148        ),
149        (
150            {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]},
151            pd.MultiIndex.from_arrays([[0, 0], [1, 1]], names=["my_index", None]),
152            {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]},
153            pd.MultiIndex.from_arrays(
154                [[0, 0, 0, 0], [1, 1, 1, 1]], names=["my_index", None]
155            ),
156        ),
157    ],
158)
159def test_duplicate_index(input_dict, input_index, expected_dict, expected_index):
160    # GH 28005
161    df = pd.DataFrame(input_dict, index=input_index)
162    result = df.explode("col1")
163    expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object)
164    tm.assert_frame_equal(result, expected)
165
166
167def test_ignore_index():
168    # GH 34932
169    df = pd.DataFrame({"id": range(0, 20, 10), "values": [list("ab"), list("cd")]})
170    result = df.explode("values", ignore_index=True)
171    expected = pd.DataFrame(
172        {"id": [0, 0, 10, 10], "values": list("abcd")}, index=[0, 1, 2, 3]
173    )
174    tm.assert_frame_equal(result, expected)
175
176
177def test_explode_sets():
178    # https://github.com/pandas-dev/pandas/issues/35614
179    df = pd.DataFrame({"a": [{"x", "y"}], "b": [1]}, index=[1])
180    result = df.explode(column="a").sort_values(by="a")
181    expected = pd.DataFrame({"a": ["x", "y"], "b": [1, 1]}, index=[1, 1])
182    tm.assert_frame_equal(result, expected)
183