1import random
2
3import numpy as np
4import pytest
5
6from pandas.errors import PerformanceWarning
7
8import pandas as pd
9from pandas import Categorical, DataFrame, NaT, Timestamp, date_range
10import pandas._testing as tm
11
12
13class TestDataFrameSortValues:
14    def test_sort_values(self):
15        frame = DataFrame(
16            [[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list("ABC")
17        )
18
19        # by column (axis=0)
20        sorted_df = frame.sort_values(by="A")
21        indexer = frame["A"].argsort().values
22        expected = frame.loc[frame.index[indexer]]
23        tm.assert_frame_equal(sorted_df, expected)
24
25        sorted_df = frame.sort_values(by="A", ascending=False)
26        indexer = indexer[::-1]
27        expected = frame.loc[frame.index[indexer]]
28        tm.assert_frame_equal(sorted_df, expected)
29
30        sorted_df = frame.sort_values(by="A", ascending=False)
31        tm.assert_frame_equal(sorted_df, expected)
32
33        # GH4839
34        sorted_df = frame.sort_values(by=["A"], ascending=[False])
35        tm.assert_frame_equal(sorted_df, expected)
36
37        # multiple bys
38        sorted_df = frame.sort_values(by=["B", "C"])
39        expected = frame.loc[[2, 1, 3]]
40        tm.assert_frame_equal(sorted_df, expected)
41
42        sorted_df = frame.sort_values(by=["B", "C"], ascending=False)
43        tm.assert_frame_equal(sorted_df, expected[::-1])
44
45        sorted_df = frame.sort_values(by=["B", "A"], ascending=[True, False])
46        tm.assert_frame_equal(sorted_df, expected)
47
48        msg = "No axis named 2 for object type DataFrame"
49        with pytest.raises(ValueError, match=msg):
50            frame.sort_values(by=["A", "B"], axis=2, inplace=True)
51
52        # by row (axis=1): GH#10806
53        sorted_df = frame.sort_values(by=3, axis=1)
54        expected = frame
55        tm.assert_frame_equal(sorted_df, expected)
56
57        sorted_df = frame.sort_values(by=3, axis=1, ascending=False)
58        expected = frame.reindex(columns=["C", "B", "A"])
59        tm.assert_frame_equal(sorted_df, expected)
60
61        sorted_df = frame.sort_values(by=[1, 2], axis="columns")
62        expected = frame.reindex(columns=["B", "A", "C"])
63        tm.assert_frame_equal(sorted_df, expected)
64
65        sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=[True, False])
66        tm.assert_frame_equal(sorted_df, expected)
67
68        sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False)
69        expected = frame.reindex(columns=["C", "B", "A"])
70        tm.assert_frame_equal(sorted_df, expected)
71
72        msg = r"Length of ascending \(5\) != length of by \(2\)"
73        with pytest.raises(ValueError, match=msg):
74            frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5)
75
76    def test_sort_values_inplace(self):
77        frame = DataFrame(
78            np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"]
79        )
80
81        sorted_df = frame.copy()
82        return_value = sorted_df.sort_values(by="A", inplace=True)
83        assert return_value is None
84        expected = frame.sort_values(by="A")
85        tm.assert_frame_equal(sorted_df, expected)
86
87        sorted_df = frame.copy()
88        return_value = sorted_df.sort_values(by=1, axis=1, inplace=True)
89        assert return_value is None
90        expected = frame.sort_values(by=1, axis=1)
91        tm.assert_frame_equal(sorted_df, expected)
92
93        sorted_df = frame.copy()
94        return_value = sorted_df.sort_values(by="A", ascending=False, inplace=True)
95        assert return_value is None
96        expected = frame.sort_values(by="A", ascending=False)
97        tm.assert_frame_equal(sorted_df, expected)
98
99        sorted_df = frame.copy()
100        return_value = sorted_df.sort_values(
101            by=["A", "B"], ascending=False, inplace=True
102        )
103        assert return_value is None
104        expected = frame.sort_values(by=["A", "B"], ascending=False)
105        tm.assert_frame_equal(sorted_df, expected)
106
107    def test_sort_values_multicolumn(self):
108        A = np.arange(5).repeat(20)
109        B = np.tile(np.arange(5), 20)
110        random.shuffle(A)
111        random.shuffle(B)
112        frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)})
113
114        result = frame.sort_values(by=["A", "B"])
115        indexer = np.lexsort((frame["B"], frame["A"]))
116        expected = frame.take(indexer)
117        tm.assert_frame_equal(result, expected)
118
119        result = frame.sort_values(by=["A", "B"], ascending=False)
120        indexer = np.lexsort(
121            (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False))
122        )
123        expected = frame.take(indexer)
124        tm.assert_frame_equal(result, expected)
125
126        result = frame.sort_values(by=["B", "A"])
127        indexer = np.lexsort((frame["A"], frame["B"]))
128        expected = frame.take(indexer)
129        tm.assert_frame_equal(result, expected)
130
131    def test_sort_values_multicolumn_uint64(self):
132        # GH#9918
133        # uint64 multicolumn sort
134
135        df = DataFrame(
136            {
137                "a": pd.Series([18446637057563306014, 1162265347240853609]),
138                "b": pd.Series([1, 2]),
139            }
140        )
141        df["a"] = df["a"].astype(np.uint64)
142        result = df.sort_values(["a", "b"])
143
144        expected = DataFrame(
145            {
146                "a": pd.Series([18446637057563306014, 1162265347240853609]),
147                "b": pd.Series([1, 2]),
148            },
149            index=pd.Index([1, 0]),
150        )
151
152        tm.assert_frame_equal(result, expected)
153
154    def test_sort_values_nan(self):
155        # GH#3917
156        df = DataFrame(
157            {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}
158        )
159
160        # sort one column only
161        expected = DataFrame(
162            {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]},
163            index=[2, 0, 3, 1, 6, 4, 5],
164        )
165        sorted_df = df.sort_values(["A"], na_position="first")
166        tm.assert_frame_equal(sorted_df, expected)
167
168        expected = DataFrame(
169            {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 9, 2]},
170            index=[2, 5, 4, 6, 1, 0, 3],
171        )
172        sorted_df = df.sort_values(["A"], na_position="first", ascending=False)
173        tm.assert_frame_equal(sorted_df, expected)
174
175        expected = df.reindex(columns=["B", "A"])
176        sorted_df = df.sort_values(by=1, axis=1, na_position="first")
177        tm.assert_frame_equal(sorted_df, expected)
178
179        # na_position='last', order
180        expected = DataFrame(
181            {"A": [1, 1, 2, 4, 6, 8, np.nan], "B": [2, 9, np.nan, 5, 5, 4, 5]},
182            index=[3, 0, 1, 6, 4, 5, 2],
183        )
184        sorted_df = df.sort_values(["A", "B"])
185        tm.assert_frame_equal(sorted_df, expected)
186
187        # na_position='first', order
188        expected = DataFrame(
189            {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 2, 9, np.nan, 5, 5, 4]},
190            index=[2, 3, 0, 1, 6, 4, 5],
191        )
192        sorted_df = df.sort_values(["A", "B"], na_position="first")
193        tm.assert_frame_equal(sorted_df, expected)
194
195        # na_position='first', not order
196        expected = DataFrame(
197            {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]},
198            index=[2, 0, 3, 1, 6, 4, 5],
199        )
200        sorted_df = df.sort_values(["A", "B"], ascending=[1, 0], na_position="first")
201        tm.assert_frame_equal(sorted_df, expected)
202
203        # na_position='last', not order
204        expected = DataFrame(
205            {"A": [8, 6, 4, 2, 1, 1, np.nan], "B": [4, 5, 5, np.nan, 2, 9, 5]},
206            index=[5, 4, 6, 1, 3, 0, 2],
207        )
208        sorted_df = df.sort_values(["A", "B"], ascending=[0, 1], na_position="last")
209        tm.assert_frame_equal(sorted_df, expected)
210
211    def test_sort_values_stable_descending_sort(self):
212        # GH#6399
213        df = DataFrame(
214            [[2, "first"], [2, "second"], [1, "a"], [1, "b"]],
215            columns=["sort_col", "order"],
216        )
217        sorted_df = df.sort_values(by="sort_col", kind="mergesort", ascending=False)
218        tm.assert_frame_equal(df, sorted_df)
219
220    def test_sort_values_stable_descending_multicolumn_sort(self):
221        df = DataFrame(
222            {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}
223        )
224        # test stable mergesort
225        expected = DataFrame(
226            {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 2, 9]},
227            index=[2, 5, 4, 6, 1, 3, 0],
228        )
229        sorted_df = df.sort_values(
230            ["A", "B"], ascending=[0, 1], na_position="first", kind="mergesort"
231        )
232        tm.assert_frame_equal(sorted_df, expected)
233
234        expected = DataFrame(
235            {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 9, 2]},
236            index=[2, 5, 4, 6, 1, 0, 3],
237        )
238        sorted_df = df.sort_values(
239            ["A", "B"], ascending=[0, 0], na_position="first", kind="mergesort"
240        )
241        tm.assert_frame_equal(sorted_df, expected)
242
243    def test_sort_values_stable_categorial(self):
244        # GH#16793
245        df = DataFrame({"x": Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True)})
246        expected = df.copy()
247        sorted_df = df.sort_values("x", kind="mergesort")
248        tm.assert_frame_equal(sorted_df, expected)
249
250    def test_sort_values_datetimes(self):
251
252        # GH#3461, argsort / lexsort differences for a datetime column
253        df = DataFrame(
254            ["a", "a", "a", "b", "c", "d", "e", "f", "g"],
255            columns=["A"],
256            index=date_range("20130101", periods=9),
257        )
258        dts = [
259            Timestamp(x)
260            for x in [
261                "2004-02-11",
262                "2004-01-21",
263                "2004-01-26",
264                "2005-09-20",
265                "2010-10-04",
266                "2009-05-12",
267                "2008-11-12",
268                "2010-09-28",
269                "2010-09-28",
270            ]
271        ]
272        df["B"] = dts[::2] + dts[1::2]
273        df["C"] = 2.0
274        df["A1"] = 3.0
275
276        df1 = df.sort_values(by="A")
277        df2 = df.sort_values(by=["A"])
278        tm.assert_frame_equal(df1, df2)
279
280        df1 = df.sort_values(by="B")
281        df2 = df.sort_values(by=["B"])
282        tm.assert_frame_equal(df1, df2)
283
284        df1 = df.sort_values(by="B")
285
286        df2 = df.sort_values(by=["C", "B"])
287        tm.assert_frame_equal(df1, df2)
288
289    def test_sort_values_frame_column_inplace_sort_exception(self, float_frame):
290        s = float_frame["A"]
291        with pytest.raises(ValueError, match="This Series is a view"):
292            s.sort_values(inplace=True)
293
294        cp = s.copy()
295        cp.sort_values()  # it works!
296
297    def test_sort_values_nat_values_in_int_column(self):
298
299        # GH#14922: "sorting with large float and multiple columns incorrect"
300
301        # cause was that the int64 value NaT was considered as "na". Which is
302        # only correct for datetime64 columns.
303
304        int_values = (2, int(NaT))
305        float_values = (2.0, -1.797693e308)
306
307        df = DataFrame(
308            {"int": int_values, "float": float_values}, columns=["int", "float"]
309        )
310
311        df_reversed = DataFrame(
312            {"int": int_values[::-1], "float": float_values[::-1]},
313            columns=["int", "float"],
314            index=[1, 0],
315        )
316
317        # NaT is not a "na" for int64 columns, so na_position must not
318        # influence the result:
319        df_sorted = df.sort_values(["int", "float"], na_position="last")
320        tm.assert_frame_equal(df_sorted, df_reversed)
321
322        df_sorted = df.sort_values(["int", "float"], na_position="first")
323        tm.assert_frame_equal(df_sorted, df_reversed)
324
325        # reverse sorting order
326        df_sorted = df.sort_values(["int", "float"], ascending=False)
327        tm.assert_frame_equal(df_sorted, df)
328
329        # and now check if NaT is still considered as "na" for datetime64
330        # columns:
331        df = DataFrame(
332            {"datetime": [Timestamp("2016-01-01"), NaT], "float": float_values},
333            columns=["datetime", "float"],
334        )
335
336        df_reversed = DataFrame(
337            {"datetime": [NaT, Timestamp("2016-01-01")], "float": float_values[::-1]},
338            columns=["datetime", "float"],
339            index=[1, 0],
340        )
341
342        df_sorted = df.sort_values(["datetime", "float"], na_position="first")
343        tm.assert_frame_equal(df_sorted, df_reversed)
344
345        df_sorted = df.sort_values(["datetime", "float"], na_position="last")
346        tm.assert_frame_equal(df_sorted, df)
347
348        # Ascending should not affect the results.
349        df_sorted = df.sort_values(["datetime", "float"], ascending=False)
350        tm.assert_frame_equal(df_sorted, df)
351
352    def test_sort_nat(self):
353        # GH 16836
354
355        d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]]
356        d2 = [
357            Timestamp(x)
358            for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"]
359        ]
360        df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3])
361
362        d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]]
363        d4 = [
364            Timestamp(x)
365            for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"]
366        ]
367        expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2])
368        sorted_df = df.sort_values(by=["a", "b"])
369        tm.assert_frame_equal(sorted_df, expected)
370
371    def test_sort_values_na_position_with_categories(self):
372        # GH#22556
373        # Positioning missing value properly when column is Categorical.
374        categories = ["A", "B", "C"]
375        category_indices = [0, 2, 4]
376        list_of_nans = [np.nan, np.nan]
377        na_indices = [1, 3]
378        na_position_first = "first"
379        na_position_last = "last"
380        column_name = "c"
381
382        reversed_categories = sorted(categories, reverse=True)
383        reversed_category_indices = sorted(category_indices, reverse=True)
384        reversed_na_indices = sorted(na_indices)
385
386        df = DataFrame(
387            {
388                column_name: Categorical(
389                    ["A", np.nan, "B", np.nan, "C"], categories=categories, ordered=True
390                )
391            }
392        )
393        # sort ascending with na first
394        result = df.sort_values(
395            by=column_name, ascending=True, na_position=na_position_first
396        )
397        expected = DataFrame(
398            {
399                column_name: Categorical(
400                    list_of_nans + categories, categories=categories, ordered=True
401                )
402            },
403            index=na_indices + category_indices,
404        )
405
406        tm.assert_frame_equal(result, expected)
407
408        # sort ascending with na last
409        result = df.sort_values(
410            by=column_name, ascending=True, na_position=na_position_last
411        )
412        expected = DataFrame(
413            {
414                column_name: Categorical(
415                    categories + list_of_nans, categories=categories, ordered=True
416                )
417            },
418            index=category_indices + na_indices,
419        )
420
421        tm.assert_frame_equal(result, expected)
422
423        # sort descending with na first
424        result = df.sort_values(
425            by=column_name, ascending=False, na_position=na_position_first
426        )
427        expected = DataFrame(
428            {
429                column_name: Categorical(
430                    list_of_nans + reversed_categories,
431                    categories=categories,
432                    ordered=True,
433                )
434            },
435            index=reversed_na_indices + reversed_category_indices,
436        )
437
438        tm.assert_frame_equal(result, expected)
439
440        # sort descending with na last
441        result = df.sort_values(
442            by=column_name, ascending=False, na_position=na_position_last
443        )
444        expected = DataFrame(
445            {
446                column_name: Categorical(
447                    reversed_categories + list_of_nans,
448                    categories=categories,
449                    ordered=True,
450                )
451            },
452            index=reversed_category_indices + reversed_na_indices,
453        )
454
455        tm.assert_frame_equal(result, expected)
456
457    def test_sort_values_nat(self):
458
459        # GH#16836
460
461        d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]]
462        d2 = [
463            Timestamp(x)
464            for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"]
465        ]
466        df = DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3])
467
468        d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]]
469        d4 = [
470            Timestamp(x)
471            for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"]
472        ]
473        expected = DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2])
474        sorted_df = df.sort_values(by=["a", "b"])
475        tm.assert_frame_equal(sorted_df, expected)
476
477    def test_sort_values_na_position_with_categories_raises(self):
478        df = DataFrame(
479            {
480                "c": Categorical(
481                    ["A", np.nan, "B", np.nan, "C"],
482                    categories=["A", "B", "C"],
483                    ordered=True,
484                )
485            }
486        )
487
488        with pytest.raises(ValueError, match="invalid na_position: bad_position"):
489            df.sort_values(by="c", ascending=False, na_position="bad_position")
490
491    @pytest.mark.parametrize("inplace", [True, False])
492    @pytest.mark.parametrize(
493        "original_dict, sorted_dict, ignore_index, output_index",
494        [
495            ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]),
496            ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]),
497            (
498                {"A": [1, 2, 3], "B": [2, 3, 4]},
499                {"A": [3, 2, 1], "B": [4, 3, 2]},
500                True,
501                [0, 1, 2],
502            ),
503            (
504                {"A": [1, 2, 3], "B": [2, 3, 4]},
505                {"A": [3, 2, 1], "B": [4, 3, 2]},
506                False,
507                [2, 1, 0],
508            ),
509        ],
510    )
511    def test_sort_values_ignore_index(
512        self, inplace, original_dict, sorted_dict, ignore_index, output_index
513    ):
514        # GH 30114
515        df = DataFrame(original_dict)
516        expected = DataFrame(sorted_dict, index=output_index)
517        kwargs = {"ignore_index": ignore_index, "inplace": inplace}
518
519        if inplace:
520            result_df = df.copy()
521            result_df.sort_values("A", ascending=False, **kwargs)
522        else:
523            result_df = df.sort_values("A", ascending=False, **kwargs)
524
525        tm.assert_frame_equal(result_df, expected)
526        tm.assert_frame_equal(df, DataFrame(original_dict))
527
528    def test_sort_values_nat_na_position_default(self):
529        # GH 13230
530        expected = DataFrame(
531            {
532                "A": [1, 2, 3, 4, 4],
533                "date": pd.DatetimeIndex(
534                    [
535                        "2010-01-01 09:00:00",
536                        "2010-01-01 09:00:01",
537                        "2010-01-01 09:00:02",
538                        "2010-01-01 09:00:03",
539                        "NaT",
540                    ]
541                ),
542            }
543        )
544        result = expected.sort_values(["A", "date"])
545        tm.assert_frame_equal(result, expected)
546
547
548class TestDataFrameSortKey:  # test key sorting (issue 27237)
549    def test_sort_values_inplace_key(self, sort_by_key):
550        frame = DataFrame(
551            np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"]
552        )
553
554        sorted_df = frame.copy()
555        return_value = sorted_df.sort_values(by="A", inplace=True, key=sort_by_key)
556        assert return_value is None
557        expected = frame.sort_values(by="A", key=sort_by_key)
558        tm.assert_frame_equal(sorted_df, expected)
559
560        sorted_df = frame.copy()
561        return_value = sorted_df.sort_values(
562            by=1, axis=1, inplace=True, key=sort_by_key
563        )
564        assert return_value is None
565        expected = frame.sort_values(by=1, axis=1, key=sort_by_key)
566        tm.assert_frame_equal(sorted_df, expected)
567
568        sorted_df = frame.copy()
569        return_value = sorted_df.sort_values(
570            by="A", ascending=False, inplace=True, key=sort_by_key
571        )
572        assert return_value is None
573        expected = frame.sort_values(by="A", ascending=False, key=sort_by_key)
574        tm.assert_frame_equal(sorted_df, expected)
575
576        sorted_df = frame.copy()
577        sorted_df.sort_values(
578            by=["A", "B"], ascending=False, inplace=True, key=sort_by_key
579        )
580        expected = frame.sort_values(by=["A", "B"], ascending=False, key=sort_by_key)
581        tm.assert_frame_equal(sorted_df, expected)
582
583    def test_sort_values_key(self):
584        df = DataFrame(np.array([0, 5, np.nan, 3, 2, np.nan]))
585
586        result = df.sort_values(0)
587        expected = df.iloc[[0, 4, 3, 1, 2, 5]]
588        tm.assert_frame_equal(result, expected)
589
590        result = df.sort_values(0, key=lambda x: x + 5)
591        expected = df.iloc[[0, 4, 3, 1, 2, 5]]
592        tm.assert_frame_equal(result, expected)
593
594        result = df.sort_values(0, key=lambda x: -x, ascending=False)
595        expected = df.iloc[[0, 4, 3, 1, 2, 5]]
596        tm.assert_frame_equal(result, expected)
597
598    def test_sort_values_by_key(self):
599        df = DataFrame(
600            {
601                "a": np.array([0, 3, np.nan, 3, 2, np.nan]),
602                "b": np.array([0, 2, np.nan, 5, 2, np.nan]),
603            }
604        )
605
606        result = df.sort_values("a", key=lambda x: -x)
607        expected = df.iloc[[1, 3, 4, 0, 2, 5]]
608        tm.assert_frame_equal(result, expected)
609
610        result = df.sort_values(by=["a", "b"], key=lambda x: -x)
611        expected = df.iloc[[3, 1, 4, 0, 2, 5]]
612        tm.assert_frame_equal(result, expected)
613
614        result = df.sort_values(by=["a", "b"], key=lambda x: -x, ascending=False)
615        expected = df.iloc[[0, 4, 1, 3, 2, 5]]
616        tm.assert_frame_equal(result, expected)
617
618    def test_sort_values_by_key_by_name(self):
619        df = DataFrame(
620            {
621                "a": np.array([0, 3, np.nan, 3, 2, np.nan]),
622                "b": np.array([0, 2, np.nan, 5, 2, np.nan]),
623            }
624        )
625
626        def key(col):
627            if col.name == "a":
628                return -col
629            else:
630                return col
631
632        result = df.sort_values(by="a", key=key)
633        expected = df.iloc[[1, 3, 4, 0, 2, 5]]
634        tm.assert_frame_equal(result, expected)
635
636        result = df.sort_values(by=["a"], key=key)
637        expected = df.iloc[[1, 3, 4, 0, 2, 5]]
638        tm.assert_frame_equal(result, expected)
639
640        result = df.sort_values(by="b", key=key)
641        expected = df.iloc[[0, 1, 4, 3, 2, 5]]
642        tm.assert_frame_equal(result, expected)
643
644        result = df.sort_values(by=["a", "b"], key=key)
645        expected = df.iloc[[1, 3, 4, 0, 2, 5]]
646        tm.assert_frame_equal(result, expected)
647
648    def test_sort_values_key_string(self):
649        df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
650
651        result = df.sort_values(1)
652        expected = df[::-1]
653        tm.assert_frame_equal(result, expected)
654
655        result = df.sort_values([0, 1], key=lambda col: col.str.lower())
656        tm.assert_frame_equal(result, df)
657
658        result = df.sort_values(
659            [0, 1], key=lambda col: col.str.lower(), ascending=False
660        )
661        expected = df.sort_values(1, key=lambda col: col.str.lower(), ascending=False)
662        tm.assert_frame_equal(result, expected)
663
664    def test_sort_values_key_empty(self, sort_by_key):
665        df = DataFrame(np.array([]))
666
667        df.sort_values(0, key=sort_by_key)
668        df.sort_index(key=sort_by_key)
669
670    def test_changes_length_raises(self):
671        df = DataFrame({"A": [1, 2, 3]})
672        with pytest.raises(ValueError, match="change the shape"):
673            df.sort_values("A", key=lambda x: x[:1])
674
675    def test_sort_values_key_axes(self):
676        df = DataFrame({0: ["Hello", "goodbye"], 1: [0, 1]})
677
678        result = df.sort_values(0, key=lambda col: col.str.lower())
679        expected = df[::-1]
680        tm.assert_frame_equal(result, expected)
681
682        result = df.sort_values(1, key=lambda col: -col)
683        expected = df[::-1]
684        tm.assert_frame_equal(result, expected)
685
686    def test_sort_values_key_dict_axis(self):
687        df = DataFrame({0: ["Hello", 0], 1: ["goodbye", 1]})
688
689        result = df.sort_values(0, key=lambda col: col.str.lower(), axis=1)
690        expected = df.loc[:, ::-1]
691        tm.assert_frame_equal(result, expected)
692
693        result = df.sort_values(1, key=lambda col: -col, axis=1)
694        expected = df.loc[:, ::-1]
695        tm.assert_frame_equal(result, expected)
696
697    @pytest.mark.parametrize("ordered", [True, False])
698    def test_sort_values_key_casts_to_categorical(self, ordered):
699        # https://github.com/pandas-dev/pandas/issues/36383
700        categories = ["c", "b", "a"]
701        df = DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]})
702
703        def sorter(key):
704            if key.name == "y":
705                return pd.Series(
706                    Categorical(key, categories=categories, ordered=ordered)
707                )
708            return key
709
710        result = df.sort_values(by=["x", "y"], key=sorter)
711        expected = DataFrame(
712            {"x": [1, 1, 1], "y": ["c", "b", "a"]}, index=pd.Index([2, 1, 0])
713        )
714
715        tm.assert_frame_equal(result, expected)
716
717
718@pytest.fixture
719def df_none():
720    return DataFrame(
721        {
722            "outer": ["a", "a", "a", "b", "b", "b"],
723            "inner": [1, 2, 2, 2, 1, 1],
724            "A": np.arange(6, 0, -1),
725            ("B", 5): ["one", "one", "two", "two", "one", "one"],
726        }
727    )
728
729
730@pytest.fixture(params=[["outer"], ["outer", "inner"]])
731def df_idx(request, df_none):
732    levels = request.param
733    return df_none.set_index(levels)
734
735
736@pytest.fixture(
737    params=[
738        "inner",  # index level
739        ["outer"],  # list of index level
740        "A",  # column
741        [("B", 5)],  # list of column
742        ["inner", "outer"],  # two index levels
743        [("B", 5), "outer"],  # index level and column
744        ["A", ("B", 5)],  # Two columns
745        ["inner", "outer"],  # two index levels and column
746    ]
747)
748def sort_names(request):
749    return request.param
750
751
752@pytest.fixture(params=[True, False])
753def ascending(request):
754    return request.param
755
756
757class TestSortValuesLevelAsStr:
758    def test_sort_index_level_and_column_label(
759        self, df_none, df_idx, sort_names, ascending
760    ):
761        # GH#14353
762
763        # Get index levels from df_idx
764        levels = df_idx.index.names
765
766        # Compute expected by sorting on columns and the setting index
767        expected = df_none.sort_values(
768            by=sort_names, ascending=ascending, axis=0
769        ).set_index(levels)
770
771        # Compute result sorting on mix on columns and index levels
772        result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0)
773
774        tm.assert_frame_equal(result, expected)
775
776    def test_sort_column_level_and_index_label(
777        self, df_none, df_idx, sort_names, ascending
778    ):
779        # GH#14353
780
781        # Get levels from df_idx
782        levels = df_idx.index.names
783
784        # Compute expected by sorting on axis=0, setting index levels, and then
785        # transposing. For some cases this will result in a frame with
786        # multiple column levels
787        expected = (
788            df_none.sort_values(by=sort_names, ascending=ascending, axis=0)
789            .set_index(levels)
790            .T
791        )
792
793        # Compute result by transposing and sorting on axis=1.
794        result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
795
796        if len(levels) > 1:
797            # Accessing multi-level columns that are not lexsorted raises a
798            # performance warning
799            with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False):
800                tm.assert_frame_equal(result, expected)
801        else:
802            tm.assert_frame_equal(result, expected)
803