1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17
18
19# The following S3 methods are registered on load if dplyr is present
20
21do_join <- function(x,
22                    y,
23                    by = NULL,
24                    copy = FALSE,
25                    suffix = c(".x", ".y"),
26                    ...,
27                    keep = FALSE,
28                    na_matches,
29                    join_type) {
30  # TODO: handle `copy` arg: ignore?
31  # TODO: handle `suffix` arg: Arrow does prefix
32  # TODO: handle `keep` arg: "Should the join keys from both ‘x’ and ‘y’ be preserved in the output?"
33  # TODO: handle `na_matches` arg
34  x <- as_adq(x)
35  y <- as_adq(y)
36  by <- handle_join_by(by, x, y)
37
38  x$join <- list(
39    type = JoinType[[join_type]],
40    right_data = y,
41    by = by
42  )
43  collapse.arrow_dplyr_query(x)
44}
45
46left_join.arrow_dplyr_query <- function(x,
47                                        y,
48                                        by = NULL,
49                                        copy = FALSE,
50                                        suffix = c(".x", ".y"),
51                                        ...,
52                                        keep = FALSE) {
53  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_OUTER")
54}
55left_join.Dataset <- left_join.ArrowTabular <- left_join.arrow_dplyr_query
56
57right_join.arrow_dplyr_query <- function(x,
58                                         y,
59                                         by = NULL,
60                                         copy = FALSE,
61                                         suffix = c(".x", ".y"),
62                                         ...,
63                                         keep = FALSE) {
64  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "RIGHT_OUTER")
65}
66right_join.Dataset <- right_join.ArrowTabular <- right_join.arrow_dplyr_query
67
68inner_join.arrow_dplyr_query <- function(x,
69                                         y,
70                                         by = NULL,
71                                         copy = FALSE,
72                                         suffix = c(".x", ".y"),
73                                         ...,
74                                         keep = FALSE) {
75  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "INNER")
76}
77inner_join.Dataset <- inner_join.ArrowTabular <- inner_join.arrow_dplyr_query
78
79full_join.arrow_dplyr_query <- function(x,
80                                        y,
81                                        by = NULL,
82                                        copy = FALSE,
83                                        suffix = c(".x", ".y"),
84                                        ...,
85                                        keep = FALSE) {
86  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "FULL_OUTER")
87}
88full_join.Dataset <- full_join.ArrowTabular <- full_join.arrow_dplyr_query
89
90semi_join.arrow_dplyr_query <- function(x,
91                                        y,
92                                        by = NULL,
93                                        copy = FALSE,
94                                        suffix = c(".x", ".y"),
95                                        ...,
96                                        keep = FALSE) {
97  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_SEMI")
98}
99semi_join.Dataset <- semi_join.ArrowTabular <- semi_join.arrow_dplyr_query
100
101anti_join.arrow_dplyr_query <- function(x,
102                                        y,
103                                        by = NULL,
104                                        copy = FALSE,
105                                        suffix = c(".x", ".y"),
106                                        ...,
107                                        keep = FALSE) {
108  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_ANTI")
109}
110anti_join.Dataset <- anti_join.ArrowTabular <- anti_join.arrow_dplyr_query
111
112handle_join_by <- function(by, x, y) {
113  if (is.null(by)) {
114    return(set_names(intersect(names(x), names(y))))
115  }
116  stopifnot(is.character(by))
117  if (is.null(names(by))) {
118    by <- set_names(by)
119  }
120  # TODO: nicer messages?
121  stopifnot(
122    all(names(by) %in% names(x)),
123    all(by %in% names(y))
124  )
125  by
126}
127