1from pathlib import Path 2 3import numpy as np 4import pytest 5 6import pandas as pd 7import pandas._testing as tm 8 9pyreadstat = pytest.importorskip("pyreadstat") 10 11 12@pytest.mark.parametrize("path_klass", [lambda p: p, Path]) 13def test_spss_labelled_num(path_klass, datapath): 14 # test file from the Haven project (https://haven.tidyverse.org/) 15 fname = path_klass(datapath("io", "data", "spss", "labelled-num.sav")) 16 17 df = pd.read_spss(fname, convert_categoricals=True) 18 expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0]) 19 expected["VAR00002"] = pd.Categorical(expected["VAR00002"]) 20 tm.assert_frame_equal(df, expected) 21 22 df = pd.read_spss(fname, convert_categoricals=False) 23 expected = pd.DataFrame({"VAR00002": 1.0}, index=[0]) 24 tm.assert_frame_equal(df, expected) 25 26 27def test_spss_labelled_num_na(datapath): 28 # test file from the Haven project (https://haven.tidyverse.org/) 29 fname = datapath("io", "data", "spss", "labelled-num-na.sav") 30 31 df = pd.read_spss(fname, convert_categoricals=True) 32 expected = pd.DataFrame({"VAR00002": ["This is one", None]}) 33 expected["VAR00002"] = pd.Categorical(expected["VAR00002"]) 34 tm.assert_frame_equal(df, expected) 35 36 df = pd.read_spss(fname, convert_categoricals=False) 37 expected = pd.DataFrame({"VAR00002": [1.0, np.nan]}) 38 tm.assert_frame_equal(df, expected) 39 40 41def test_spss_labelled_str(datapath): 42 # test file from the Haven project (https://haven.tidyverse.org/) 43 fname = datapath("io", "data", "spss", "labelled-str.sav") 44 45 df = pd.read_spss(fname, convert_categoricals=True) 46 expected = pd.DataFrame({"gender": ["Male", "Female"]}) 47 expected["gender"] = pd.Categorical(expected["gender"]) 48 tm.assert_frame_equal(df, expected) 49 50 df = pd.read_spss(fname, convert_categoricals=False) 51 expected = pd.DataFrame({"gender": ["M", "F"]}) 52 tm.assert_frame_equal(df, expected) 53 54 55def test_spss_umlauts(datapath): 56 # test file from the Haven project (https://haven.tidyverse.org/) 57 fname = datapath("io", "data", "spss", "umlauts.sav") 58 59 df = pd.read_spss(fname, convert_categoricals=True) 60 expected = pd.DataFrame( 61 {"var1": ["the ä umlaut", "the ü umlaut", "the ä umlaut", "the ö umlaut"]} 62 ) 63 expected["var1"] = pd.Categorical(expected["var1"]) 64 tm.assert_frame_equal(df, expected) 65 66 df = pd.read_spss(fname, convert_categoricals=False) 67 expected = pd.DataFrame({"var1": [1.0, 2.0, 1.0, 3.0]}) 68 tm.assert_frame_equal(df, expected) 69 70 71def test_spss_usecols(datapath): 72 # usecols must be list-like 73 fname = datapath("io", "data", "spss", "labelled-num.sav") 74 75 with pytest.raises(TypeError, match="usecols must be list-like."): 76 pd.read_spss(fname, usecols="VAR00002") 77