1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18 19# The following S3 methods are registered on load if dplyr is present 20 21tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns) 22 23select.arrow_dplyr_query <- function(.data, ...) { 24 check_select_helpers(enexprs(...)) 25 column_select(as_adq(.data), !!!enquos(...)) 26} 27select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query 28 29rename.arrow_dplyr_query <- function(.data, ...) { 30 check_select_helpers(enexprs(...)) 31 column_select(as_adq(.data), !!!enquos(...), .FUN = vars_rename) 32} 33rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query 34 35column_select <- function(.data, ..., .FUN = vars_select) { 36 # .FUN is either tidyselect::vars_select or tidyselect::vars_rename 37 # It operates on the names() of selected_columns, i.e. the column names 38 # factoring in any renaming that may already have happened 39 out <- .FUN(names(.data), !!!enquos(...)) 40 # Make sure that the resulting selected columns map back to the original data, 41 # as in when there are multiple renaming steps 42 .data$selected_columns <- set_names(.data$selected_columns[out], names(out)) 43 44 # If we've renamed columns, we need to project that renaming into other 45 # query parameters we've collected 46 renamed <- out[names(out) != out] 47 if (length(renamed)) { 48 # Massage group_by 49 gbv <- .data$group_by_vars 50 renamed_groups <- gbv %in% renamed 51 gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)] 52 .data$group_by_vars <- gbv 53 # No need to massage filters because those contain references to Arrow objects 54 } 55 .data 56} 57 58relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) { 59 # The code in this function is adapted from the code in dplyr::relocate.data.frame 60 # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R 61 # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829 62 63 .data <- as_adq(.data) 64 65 # Assign the schema to the expressions 66 map(.data$selected_columns, ~ (.$schema <- .data$.data$schema)) 67 68 # Create a mask for evaluating expressions in tidyselect helpers 69 mask <- new_environment(.cache$functions, parent = caller_env()) 70 71 to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask) 72 73 .before <- enquo(.before) 74 .after <- enquo(.after) 75 has_before <- !quo_is_null(.before) 76 has_after <- !quo_is_null(.after) 77 78 if (has_before && has_after) { 79 abort("Must supply only one of `.before` and `.after`.") 80 } else if (has_before) { 81 where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask))) 82 if (!where %in% to_move) { 83 to_move <- c(to_move, where) 84 } 85 } else if (has_after) { 86 where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask))) 87 if (!where %in% to_move) { 88 to_move <- c(where, to_move) 89 } 90 } else { 91 where <- 1L 92 if (!where %in% to_move) { 93 to_move <- c(to_move, where) 94 } 95 } 96 97 lhs <- setdiff(seq2(1, where - 1), to_move) 98 rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move) 99 100 pos <- vec_unique(c(lhs, to_move, rhs)) 101 new_names <- names(pos) 102 .data$selected_columns <- .data$selected_columns[pos] 103 104 if (!is.null(new_names)) { 105 names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""] 106 } 107 .data 108} 109relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query 110 111check_select_helpers <- function(exprs) { 112 # Throw an error if unsupported tidyselect selection helpers in `exprs` 113 exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x) 114 unsup_select_helpers <- "where" 115 funs_in_exprs <- unlist(lapply(exprs, all_funs)) 116 unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers] 117 if (length(unsup_funs)) { 118 stop( 119 "Unsupported selection ", 120 ngettext(length(unsup_funs), "helper: ", "helpers: "), 121 oxford_paste(paste0(unsup_funs, "()"), quote = FALSE), 122 call. = FALSE 123 ) 124 } 125} 126