1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18
19# The following S3 methods are registered on load if dplyr is present
20
21tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
22
23select.arrow_dplyr_query <- function(.data, ...) {
24  check_select_helpers(enexprs(...))
25  column_select(as_adq(.data), !!!enquos(...))
26}
27select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
28
29rename.arrow_dplyr_query <- function(.data, ...) {
30  check_select_helpers(enexprs(...))
31  column_select(as_adq(.data), !!!enquos(...), .FUN = vars_rename)
32}
33rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
34
35column_select <- function(.data, ..., .FUN = vars_select) {
36  # .FUN is either tidyselect::vars_select or tidyselect::vars_rename
37  # It operates on the names() of selected_columns, i.e. the column names
38  # factoring in any renaming that may already have happened
39  out <- .FUN(names(.data), !!!enquos(...))
40  # Make sure that the resulting selected columns map back to the original data,
41  # as in when there are multiple renaming steps
42  .data$selected_columns <- set_names(.data$selected_columns[out], names(out))
43
44  # If we've renamed columns, we need to project that renaming into other
45  # query parameters we've collected
46  renamed <- out[names(out) != out]
47  if (length(renamed)) {
48    # Massage group_by
49    gbv <- .data$group_by_vars
50    renamed_groups <- gbv %in% renamed
51    gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)]
52    .data$group_by_vars <- gbv
53    # No need to massage filters because those contain references to Arrow objects
54  }
55  .data
56}
57
58relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) {
59  # The code in this function is adapted from the code in dplyr::relocate.data.frame
60  # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
61  # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
62
63  .data <- as_adq(.data)
64
65  # Assign the schema to the expressions
66  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
67
68  # Create a mask for evaluating expressions in tidyselect helpers
69  mask <- new_environment(.cache$functions, parent = caller_env())
70
71  to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask)
72
73  .before <- enquo(.before)
74  .after <- enquo(.after)
75  has_before <- !quo_is_null(.before)
76  has_after <- !quo_is_null(.after)
77
78  if (has_before && has_after) {
79    abort("Must supply only one of `.before` and `.after`.")
80  } else if (has_before) {
81    where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask)))
82    if (!where %in% to_move) {
83      to_move <- c(to_move, where)
84    }
85  } else if (has_after) {
86    where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask)))
87    if (!where %in% to_move) {
88      to_move <- c(where, to_move)
89    }
90  } else {
91    where <- 1L
92    if (!where %in% to_move) {
93      to_move <- c(to_move, where)
94    }
95  }
96
97  lhs <- setdiff(seq2(1, where - 1), to_move)
98  rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move)
99
100  pos <- vec_unique(c(lhs, to_move, rhs))
101  new_names <- names(pos)
102  .data$selected_columns <- .data$selected_columns[pos]
103
104  if (!is.null(new_names)) {
105    names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""]
106  }
107  .data
108}
109relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query
110
111check_select_helpers <- function(exprs) {
112  # Throw an error if unsupported tidyselect selection helpers in `exprs`
113  exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x)
114  unsup_select_helpers <- "where"
115  funs_in_exprs <- unlist(lapply(exprs, all_funs))
116  unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers]
117  if (length(unsup_funs)) {
118    stop(
119      "Unsupported selection ",
120      ngettext(length(unsup_funs), "helper: ", "helpers: "),
121      oxford_paste(paste0(unsup_funs, "()"), quote = FALSE),
122      call. = FALSE
123    )
124  }
125}
126