1#
2# Licensed to the Apache Software Foundation (ASF) under one or more
3# contributor license agreements.  See the NOTICE file distributed with
4# this work for additional information regarding copyright ownership.
5# The ASF licenses this file to You under the Apache License, Version 2.0
6# (the "License"); you may not use this file except in compliance with
7# the License.  You may obtain a copy of the License at
8#
9#    http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18# Utility functions to serialize R objects so they can be read in Java.
19
20# nolint start
21# Type mapping from R to Java
22#
23# NULL -> Void
24# integer -> Int
25# character -> String
26# logical -> Boolean
27# double, numeric -> Double
28# raw -> Array[Byte]
29# Date -> Date
30# POSIXct,POSIXlt -> Time
31#
32# list[T] -> Array[T], where T is one of above mentioned types
33# environment -> Map[String, T], where T is a native type
34# jobj -> Object, where jobj is an object created in the backend
35# nolint end
36
37getSerdeType <- function(object) {
38  type <- class(object)[[1]]
39  if (type != "list") {
40    type
41  } else {
42    # Check if all elements are of same type
43    elemType <- unique(sapply(object, function(elem) { getSerdeType(elem) }))
44    if (length(elemType) <= 1) {
45      "array"
46    } else {
47      "list"
48    }
49  }
50}
51
52writeObject <- function(con, object, writeType = TRUE) {
53  # NOTE: In R vectors have same type as objects. So we don't support
54  # passing in vectors as arrays and instead require arrays to be passed
55  # as lists.
56  type <- class(object)[[1]]  # class of POSIXlt is c("POSIXlt", "POSIXt")
57  # Checking types is needed here, since 'is.na' only handles atomic vectors,
58  # lists and pairlists
59  if (type %in% c("integer", "character", "logical", "double", "numeric")) {
60    if (is.na(object)) {
61      object <- NULL
62      type <- "NULL"
63    }
64  }
65
66  serdeType <- getSerdeType(object)
67  if (writeType) {
68    writeType(con, serdeType)
69  }
70  switch(serdeType,
71         NULL = writeVoid(con),
72         integer = writeInt(con, object),
73         character = writeString(con, object),
74         logical = writeBoolean(con, object),
75         double = writeDouble(con, object),
76         numeric = writeDouble(con, object),
77         raw = writeRaw(con, object),
78         array = writeArray(con, object),
79         list = writeList(con, object),
80         struct = writeList(con, object),
81         jobj = writeJobj(con, object),
82         environment = writeEnv(con, object),
83         Date = writeDate(con, object),
84         POSIXlt = writeTime(con, object),
85         POSIXct = writeTime(con, object),
86         stop(paste("Unsupported type for serialization", type)))
87}
88
89writeVoid <- function(con) {
90  # no value for NULL
91}
92
93writeJobj <- function(con, value) {
94  if (!isValidJobj(value)) {
95    stop("invalid jobj ", value$id)
96  }
97  writeString(con, value$id)
98}
99
100writeString <- function(con, value) {
101  utfVal <- enc2utf8(value)
102  writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1))
103  writeBin(utfVal, con, endian = "big", useBytes = TRUE)
104}
105
106writeInt <- function(con, value) {
107  writeBin(as.integer(value), con, endian = "big")
108}
109
110writeDouble <- function(con, value) {
111  writeBin(value, con, endian = "big")
112}
113
114writeBoolean <- function(con, value) {
115  # TRUE becomes 1, FALSE becomes 0
116  writeInt(con, as.integer(value))
117}
118
119writeRawSerialize <- function(outputCon, batch) {
120  outputSer <- serialize(batch, ascii = FALSE, connection = NULL)
121  writeRaw(outputCon, outputSer)
122}
123
124writeRowSerialize <- function(outputCon, rows) {
125  invisible(lapply(rows, function(r) {
126    bytes <- serializeRow(r)
127    writeRaw(outputCon, bytes)
128  }))
129}
130
131serializeRow <- function(row) {
132  rawObj <- rawConnection(raw(0), "wb")
133  on.exit(close(rawObj))
134  writeList(rawObj, row)
135  rawConnectionValue(rawObj)
136}
137
138writeRaw <- function(con, batch) {
139  writeInt(con, length(batch))
140  writeBin(batch, con, endian = "big")
141}
142
143writeType <- function(con, class) {
144  type <- switch(class,
145                 NULL = "n",
146                 integer = "i",
147                 character = "c",
148                 logical = "b",
149                 double = "d",
150                 numeric = "d",
151                 raw = "r",
152                 array = "a",
153                 list = "l",
154                 struct = "s",
155                 jobj = "j",
156                 environment = "e",
157                 Date = "D",
158                 POSIXlt = "t",
159                 POSIXct = "t",
160                 stop(paste("Unsupported type for serialization", class)))
161  writeBin(charToRaw(type), con)
162}
163
164# Used to pass arrays where all the elements are of the same type
165writeArray <- function(con, arr) {
166  # TODO: Empty lists are given type "character" right now.
167  # This may not work if the Java side expects array of any other type.
168  if (length(arr) == 0) {
169    elemType <- class("somestring")
170  } else {
171    elemType <- getSerdeType(arr[[1]])
172  }
173
174  writeType(con, elemType)
175  writeInt(con, length(arr))
176
177  if (length(arr) > 0) {
178    for (a in arr) {
179      writeObject(con, a, FALSE)
180    }
181  }
182}
183
184# Used to pass arrays where the elements can be of different types
185writeList <- function(con, list) {
186  writeInt(con, length(list))
187  for (elem in list) {
188    writeObject(con, elem)
189  }
190}
191
192# Used to pass in hash maps required on Java side.
193writeEnv <- function(con, env) {
194  len <- length(env)
195
196  writeInt(con, len)
197  if (len > 0) {
198    writeArray(con, as.list(ls(env)))
199    vals <- lapply(ls(env), function(x) { env[[x]] })
200    writeList(con, as.list(vals))
201  }
202}
203
204writeDate <- function(con, date) {
205  writeString(con, as.character(date))
206}
207
208writeTime <- function(con, time) {
209  writeDouble(con, as.double(time))
210}
211
212# Used to serialize in a list of objects where each
213# object can be of a different type. Serialization format is
214# <object type> <object> for each object
215writeArgs <- function(con, args) {
216  if (length(args) > 0) {
217    for (a in args) {
218      writeObject(con, a)
219    }
220  }
221}
222