1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17 18 19# The following S3 methods are registered on load if dplyr is present 20 21do_join <- function(x, 22 y, 23 by = NULL, 24 copy = FALSE, 25 suffix = c(".x", ".y"), 26 ..., 27 keep = FALSE, 28 na_matches, 29 join_type) { 30 # TODO: handle `copy` arg: ignore? 31 # TODO: handle `suffix` arg: Arrow does prefix 32 # TODO: handle `keep` arg: "Should the join keys from both ‘x’ and ‘y’ be preserved in the output?" 33 # TODO: handle `na_matches` arg 34 x <- as_adq(x) 35 y <- as_adq(y) 36 by <- handle_join_by(by, x, y) 37 38 x$join <- list( 39 type = JoinType[[join_type]], 40 right_data = y, 41 by = by 42 ) 43 collapse.arrow_dplyr_query(x) 44} 45 46left_join.arrow_dplyr_query <- function(x, 47 y, 48 by = NULL, 49 copy = FALSE, 50 suffix = c(".x", ".y"), 51 ..., 52 keep = FALSE) { 53 do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_OUTER") 54} 55left_join.Dataset <- left_join.ArrowTabular <- left_join.arrow_dplyr_query 56 57right_join.arrow_dplyr_query <- function(x, 58 y, 59 by = NULL, 60 copy = FALSE, 61 suffix = c(".x", ".y"), 62 ..., 63 keep = FALSE) { 64 do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "RIGHT_OUTER") 65} 66right_join.Dataset <- right_join.ArrowTabular <- right_join.arrow_dplyr_query 67 68inner_join.arrow_dplyr_query <- function(x, 69 y, 70 by = NULL, 71 copy = FALSE, 72 suffix = c(".x", ".y"), 73 ..., 74 keep = FALSE) { 75 do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "INNER") 76} 77inner_join.Dataset <- inner_join.ArrowTabular <- inner_join.arrow_dplyr_query 78 79full_join.arrow_dplyr_query <- function(x, 80 y, 81 by = NULL, 82 copy = FALSE, 83 suffix = c(".x", ".y"), 84 ..., 85 keep = FALSE) { 86 do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "FULL_OUTER") 87} 88full_join.Dataset <- full_join.ArrowTabular <- full_join.arrow_dplyr_query 89 90semi_join.arrow_dplyr_query <- function(x, 91 y, 92 by = NULL, 93 copy = FALSE, 94 suffix = c(".x", ".y"), 95 ..., 96 keep = FALSE) { 97 do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_SEMI") 98} 99semi_join.Dataset <- semi_join.ArrowTabular <- semi_join.arrow_dplyr_query 100 101anti_join.arrow_dplyr_query <- function(x, 102 y, 103 by = NULL, 104 copy = FALSE, 105 suffix = c(".x", ".y"), 106 ..., 107 keep = FALSE) { 108 do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_ANTI") 109} 110anti_join.Dataset <- anti_join.ArrowTabular <- anti_join.arrow_dplyr_query 111 112handle_join_by <- function(by, x, y) { 113 if (is.null(by)) { 114 return(set_names(intersect(names(x), names(y)))) 115 } 116 stopifnot(is.character(by)) 117 if (is.null(names(by))) { 118 by <- set_names(by) 119 } 120 # TODO: nicer messages? 121 stopifnot( 122 all(names(by) %in% names(x)), 123 all(by %in% names(y)) 124 ) 125 by 126} 127