1from pathlib import Path
2
3import numpy as np
4import pytest
5
6import pandas as pd
7import pandas._testing as tm
8
9pyreadstat = pytest.importorskip("pyreadstat")
10
11
12@pytest.mark.parametrize("path_klass", [lambda p: p, Path])
13def test_spss_labelled_num(path_klass, datapath):
14    # test file from the Haven project (https://haven.tidyverse.org/)
15    fname = path_klass(datapath("io", "data", "spss", "labelled-num.sav"))
16
17    df = pd.read_spss(fname, convert_categoricals=True)
18    expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0])
19    expected["VAR00002"] = pd.Categorical(expected["VAR00002"])
20    tm.assert_frame_equal(df, expected)
21
22    df = pd.read_spss(fname, convert_categoricals=False)
23    expected = pd.DataFrame({"VAR00002": 1.0}, index=[0])
24    tm.assert_frame_equal(df, expected)
25
26
27def test_spss_labelled_num_na(datapath):
28    # test file from the Haven project (https://haven.tidyverse.org/)
29    fname = datapath("io", "data", "spss", "labelled-num-na.sav")
30
31    df = pd.read_spss(fname, convert_categoricals=True)
32    expected = pd.DataFrame({"VAR00002": ["This is one", None]})
33    expected["VAR00002"] = pd.Categorical(expected["VAR00002"])
34    tm.assert_frame_equal(df, expected)
35
36    df = pd.read_spss(fname, convert_categoricals=False)
37    expected = pd.DataFrame({"VAR00002": [1.0, np.nan]})
38    tm.assert_frame_equal(df, expected)
39
40
41def test_spss_labelled_str(datapath):
42    # test file from the Haven project (https://haven.tidyverse.org/)
43    fname = datapath("io", "data", "spss", "labelled-str.sav")
44
45    df = pd.read_spss(fname, convert_categoricals=True)
46    expected = pd.DataFrame({"gender": ["Male", "Female"]})
47    expected["gender"] = pd.Categorical(expected["gender"])
48    tm.assert_frame_equal(df, expected)
49
50    df = pd.read_spss(fname, convert_categoricals=False)
51    expected = pd.DataFrame({"gender": ["M", "F"]})
52    tm.assert_frame_equal(df, expected)
53
54
55def test_spss_umlauts(datapath):
56    # test file from the Haven project (https://haven.tidyverse.org/)
57    fname = datapath("io", "data", "spss", "umlauts.sav")
58
59    df = pd.read_spss(fname, convert_categoricals=True)
60    expected = pd.DataFrame(
61        {"var1": ["the ä umlaut", "the ü umlaut", "the ä umlaut", "the ö umlaut"]}
62    )
63    expected["var1"] = pd.Categorical(expected["var1"])
64    tm.assert_frame_equal(df, expected)
65
66    df = pd.read_spss(fname, convert_categoricals=False)
67    expected = pd.DataFrame({"var1": [1.0, 2.0, 1.0, 3.0]})
68    tm.assert_frame_equal(df, expected)
69
70
71def test_spss_usecols(datapath):
72    # usecols must be list-like
73    fname = datapath("io", "data", "spss", "labelled-num.sav")
74
75    with pytest.raises(TypeError, match="usecols must be list-like."):
76        pd.read_spss(fname, usecols="VAR00002")
77