1# 2# Licensed to the Apache Software Foundation (ASF) under one or more 3# contributor license agreements. See the NOTICE file distributed with 4# this work for additional information regarding copyright ownership. 5# The ASF licenses this file to You under the Apache License, Version 2.0 6# (the "License"); you may not use this file except in compliance with 7# the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18# Utility functions to serialize R objects so they can be read in Java. 19 20# nolint start 21# Type mapping from R to Java 22# 23# NULL -> Void 24# integer -> Int 25# character -> String 26# logical -> Boolean 27# double, numeric -> Double 28# raw -> Array[Byte] 29# Date -> Date 30# POSIXct,POSIXlt -> Time 31# 32# list[T] -> Array[T], where T is one of above mentioned types 33# environment -> Map[String, T], where T is a native type 34# jobj -> Object, where jobj is an object created in the backend 35# nolint end 36 37getSerdeType <- function(object) { 38 type <- class(object)[[1]] 39 if (type != "list") { 40 type 41 } else { 42 # Check if all elements are of same type 43 elemType <- unique(sapply(object, function(elem) { getSerdeType(elem) })) 44 if (length(elemType) <= 1) { 45 "array" 46 } else { 47 "list" 48 } 49 } 50} 51 52writeObject <- function(con, object, writeType = TRUE) { 53 # NOTE: In R vectors have same type as objects. So we don't support 54 # passing in vectors as arrays and instead require arrays to be passed 55 # as lists. 56 type <- class(object)[[1]] # class of POSIXlt is c("POSIXlt", "POSIXt") 57 # Checking types is needed here, since 'is.na' only handles atomic vectors, 58 # lists and pairlists 59 if (type %in% c("integer", "character", "logical", "double", "numeric")) { 60 if (is.na(object)) { 61 object <- NULL 62 type <- "NULL" 63 } 64 } 65 66 serdeType <- getSerdeType(object) 67 if (writeType) { 68 writeType(con, serdeType) 69 } 70 switch(serdeType, 71 NULL = writeVoid(con), 72 integer = writeInt(con, object), 73 character = writeString(con, object), 74 logical = writeBoolean(con, object), 75 double = writeDouble(con, object), 76 numeric = writeDouble(con, object), 77 raw = writeRaw(con, object), 78 array = writeArray(con, object), 79 list = writeList(con, object), 80 struct = writeList(con, object), 81 jobj = writeJobj(con, object), 82 environment = writeEnv(con, object), 83 Date = writeDate(con, object), 84 POSIXlt = writeTime(con, object), 85 POSIXct = writeTime(con, object), 86 stop(paste("Unsupported type for serialization", type))) 87} 88 89writeVoid <- function(con) { 90 # no value for NULL 91} 92 93writeJobj <- function(con, value) { 94 if (!isValidJobj(value)) { 95 stop("invalid jobj ", value$id) 96 } 97 writeString(con, value$id) 98} 99 100writeString <- function(con, value) { 101 utfVal <- enc2utf8(value) 102 writeInt(con, as.integer(nchar(utfVal, type = "bytes") + 1)) 103 writeBin(utfVal, con, endian = "big", useBytes = TRUE) 104} 105 106writeInt <- function(con, value) { 107 writeBin(as.integer(value), con, endian = "big") 108} 109 110writeDouble <- function(con, value) { 111 writeBin(value, con, endian = "big") 112} 113 114writeBoolean <- function(con, value) { 115 # TRUE becomes 1, FALSE becomes 0 116 writeInt(con, as.integer(value)) 117} 118 119writeRawSerialize <- function(outputCon, batch) { 120 outputSer <- serialize(batch, ascii = FALSE, connection = NULL) 121 writeRaw(outputCon, outputSer) 122} 123 124writeRowSerialize <- function(outputCon, rows) { 125 invisible(lapply(rows, function(r) { 126 bytes <- serializeRow(r) 127 writeRaw(outputCon, bytes) 128 })) 129} 130 131serializeRow <- function(row) { 132 rawObj <- rawConnection(raw(0), "wb") 133 on.exit(close(rawObj)) 134 writeList(rawObj, row) 135 rawConnectionValue(rawObj) 136} 137 138writeRaw <- function(con, batch) { 139 writeInt(con, length(batch)) 140 writeBin(batch, con, endian = "big") 141} 142 143writeType <- function(con, class) { 144 type <- switch(class, 145 NULL = "n", 146 integer = "i", 147 character = "c", 148 logical = "b", 149 double = "d", 150 numeric = "d", 151 raw = "r", 152 array = "a", 153 list = "l", 154 struct = "s", 155 jobj = "j", 156 environment = "e", 157 Date = "D", 158 POSIXlt = "t", 159 POSIXct = "t", 160 stop(paste("Unsupported type for serialization", class))) 161 writeBin(charToRaw(type), con) 162} 163 164# Used to pass arrays where all the elements are of the same type 165writeArray <- function(con, arr) { 166 # TODO: Empty lists are given type "character" right now. 167 # This may not work if the Java side expects array of any other type. 168 if (length(arr) == 0) { 169 elemType <- class("somestring") 170 } else { 171 elemType <- getSerdeType(arr[[1]]) 172 } 173 174 writeType(con, elemType) 175 writeInt(con, length(arr)) 176 177 if (length(arr) > 0) { 178 for (a in arr) { 179 writeObject(con, a, FALSE) 180 } 181 } 182} 183 184# Used to pass arrays where the elements can be of different types 185writeList <- function(con, list) { 186 writeInt(con, length(list)) 187 for (elem in list) { 188 writeObject(con, elem) 189 } 190} 191 192# Used to pass in hash maps required on Java side. 193writeEnv <- function(con, env) { 194 len <- length(env) 195 196 writeInt(con, len) 197 if (len > 0) { 198 writeArray(con, as.list(ls(env))) 199 vals <- lapply(ls(env), function(x) { env[[x]] }) 200 writeList(con, as.list(vals)) 201 } 202} 203 204writeDate <- function(con, date) { 205 writeString(con, as.character(date)) 206} 207 208writeTime <- function(con, time) { 209 writeDouble(con, as.double(time)) 210} 211 212# Used to serialize in a list of objects where each 213# object can be of a different type. Serialization format is 214# <object type> <object> for each object 215writeArgs <- function(con, args) { 216 if (length(args) > 0) { 217 for (a in args) { 218 writeObject(con, a) 219 } 220 } 221} 222