1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
2 // Licensed under the MIT License:
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21
22 // This file implements a simple serialization format for Cap'n Proto messages. The format
23 // is as follows:
24 //
25 // * 32-bit little-endian segment count (4 bytes).
26 // * 32-bit little-endian size of each segment (4*(segment count) bytes).
27 // * Padding so that subsequent data is 64-bit-aligned (0 or 4 bytes). (I.e., if there are an even
28 // number of segments, there are 4 bytes of zeros here, otherwise there is no padding.)
29 // * Data from each segment, in order (8*sum(segment sizes) bytes)
30 //
31 // This format has some important properties:
32 // - It is self-delimiting, so multiple messages may be written to a stream without any external
33 // delimiter.
34 // - The total size and position of each segment can be determined by reading only the first part
35 // of the message, allowing lazy and random-access reading of the segment data.
36 // - A message is always at least 8 bytes.
37 // - A single-segment message can be read entirely in two system calls with no buffering.
38 // - A multi-segment message can be read entirely in three system calls with no buffering.
39 // - The format is appropriate for mmap()ing since all data is aligned.
40
41 #pragma once
42
43 #include "message.h"
44 #include <kj/io.h>
45
46 CAPNP_BEGIN_HEADER
47
48 namespace capnp {
49
50 class FlatArrayMessageReader: public MessageReader {
51 // Parses a message from a flat array. Note that it makes sense to use this together with mmap()
52 // for extremely fast parsing.
53
54 public:
55 FlatArrayMessageReader(kj::ArrayPtr<const word> array, ReaderOptions options = ReaderOptions());
56 // The array must remain valid until the MessageReader is destroyed.
57
58 kj::ArrayPtr<const word> getSegment(uint id) override;
59
getEnd()60 const word* getEnd() const { return end; }
61 // Get a pointer just past the end of the message as determined by reading the message header.
62 // This could actually be before the end of the input array. This pointer is useful e.g. if
63 // you know that the input array has extra stuff appended after the message and you want to
64 // get at it.
65
66 private:
67 // Optimize for single-segment case.
68 kj::ArrayPtr<const word> segment0;
69 kj::Array<kj::ArrayPtr<const word>> moreSegments;
70 const word* end;
71 };
72
73 kj::ArrayPtr<const word> initMessageBuilderFromFlatArrayCopy(
74 kj::ArrayPtr<const word> array, MessageBuilder& target,
75 ReaderOptions options = ReaderOptions());
76 // Convenience function which reads a message using `FlatArrayMessageReader` then copies the
77 // content into the target `MessageBuilder`, verifying that the message structure is valid
78 // (although not necessarily that it matches the desired schema).
79 //
80 // Returns an ArrayPtr containing any words left over in the array after consuming the whole
81 // message. This is useful when reading multiple messages that have been concatenated. See also
82 // FlatArrayMessageReader::getEnd().
83 //
84 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
85 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
86 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
87
88 kj::Array<word> messageToFlatArray(MessageBuilder& builder);
89 // Constructs a flat array containing the entire content of the given message.
90 //
91 // To output the message as bytes, use `.asBytes()` on the returned word array. Keep in mind that
92 // `asBytes()` returns an ArrayPtr, so you have to save the Array as well to prevent it from being
93 // deleted. For example:
94 //
95 // kj::Array<capnp::word> words = messageToFlatArray(myMessage);
96 // kj::ArrayPtr<kj::byte> bytes = words.asBytes();
97 // write(fd, bytes.begin(), bytes.size());
98
99 kj::Array<word> messageToFlatArray(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
100 // Version of messageToFlatArray that takes a raw segment array.
101
102 size_t computeSerializedSizeInWords(MessageBuilder& builder);
103 // Returns the size, in words, that will be needed to serialize the message, including the header.
104
105 size_t computeSerializedSizeInWords(kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
106 // Version of computeSerializedSizeInWords that takes a raw segment array.
107
108 size_t expectedSizeInWordsFromPrefix(kj::ArrayPtr<const word> messagePrefix);
109 // Given a prefix of a serialized message, try to determine the expected total size of the message,
110 // in words. The returned size is based on the information known so far; it may be an underestimate
111 // if the prefix doesn't contain the full segment table.
112 //
113 // If the returned value is greater than `messagePrefix.size()`, then the message is not yet
114 // complete and the app cannot parse it yet. If the returned value is less than or equal to
115 // `messagePrefix.size()`, then the returned value is the exact total size of the message; any
116 // remaining bytes are part of the next message.
117 //
118 // This function is useful when reading messages from a stream in an asynchronous way, but when
119 // using the full KJ async infrastructure would be too difficult. Each time bytes are received,
120 // use this function to determine if an entire message is ready to be parsed.
121
122 // =======================================================================================
123
124 class InputStreamMessageReader: public MessageReader {
125 // A MessageReader that reads from an abstract kj::InputStream. See also StreamFdMessageReader
126 // for a subclass specific to file descriptors.
127
128 public:
129 InputStreamMessageReader(kj::InputStream& inputStream,
130 ReaderOptions options = ReaderOptions(),
131 kj::ArrayPtr<word> scratchSpace = nullptr);
132 ~InputStreamMessageReader() noexcept(false);
133
134 // implements MessageReader ----------------------------------------
135 kj::ArrayPtr<const word> getSegment(uint id) override;
136
137 private:
138 kj::InputStream& inputStream;
139 byte* readPos;
140
141 // Optimize for single-segment case.
142 kj::ArrayPtr<const word> segment0;
143 kj::Array<kj::ArrayPtr<const word>> moreSegments;
144
145 kj::Array<word> ownedSpace;
146 // Only if scratchSpace wasn't big enough.
147
148 kj::UnwindDetector unwindDetector;
149 };
150
151 void readMessageCopy(kj::InputStream& input, MessageBuilder& target,
152 ReaderOptions options = ReaderOptions(),
153 kj::ArrayPtr<word> scratchSpace = nullptr);
154 // Convenience function which reads a message using `InputStreamMessageReader` then copies the
155 // content into the target `MessageBuilder`, verifying that the message structure is valid
156 // (although not necessarily that it matches the desired schema).
157 //
158 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
159 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
160 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
161
162 void writeMessage(kj::OutputStream& output, MessageBuilder& builder);
163 // Write the message to the given output stream.
164
165 void writeMessage(kj::OutputStream& output, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
166 // Write the segment array to the given output stream.
167
168 // =======================================================================================
169 // Specializations for reading from / writing to file descriptors.
170
171 class StreamFdMessageReader: private kj::FdInputStream, public InputStreamMessageReader {
172 // A MessageReader that reads from a stream-based file descriptor.
173
174 public:
175 StreamFdMessageReader(int fd, ReaderOptions options = ReaderOptions(),
176 kj::ArrayPtr<word> scratchSpace = nullptr)
FdInputStream(fd)177 : FdInputStream(fd), InputStreamMessageReader(*this, options, scratchSpace) {}
178 // Read message from a file descriptor, without taking ownership of the descriptor.
179
180 StreamFdMessageReader(kj::AutoCloseFd fd, ReaderOptions options = ReaderOptions(),
181 kj::ArrayPtr<word> scratchSpace = nullptr)
FdInputStream(kj::mv (fd))182 : FdInputStream(kj::mv(fd)), InputStreamMessageReader(*this, options, scratchSpace) {}
183 // Read a message from a file descriptor, taking ownership of the descriptor.
184
185 ~StreamFdMessageReader() noexcept(false);
186 };
187
188 void readMessageCopyFromFd(int fd, MessageBuilder& target,
189 ReaderOptions options = ReaderOptions(),
190 kj::ArrayPtr<word> scratchSpace = nullptr);
191 // Convenience function which reads a message using `StreamFdMessageReader` then copies the
192 // content into the target `MessageBuilder`, verifying that the message structure is valid
193 // (although not necessarily that it matches the desired schema).
194 //
195 // (Note that it's also possible to initialize a `MessageBuilder` directly without a copy using one
196 // of `MessageBuilder`'s constructors. However, this approach skips the validation step and is not
197 // safe to use on untrusted input. Therefore, we do not provide a convenience method for it.)
198
199 void writeMessageToFd(int fd, MessageBuilder& builder);
200 // Write the message to the given file descriptor.
201 //
202 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure
203 // you catch this exception at the call site. If throwing an exception is not acceptable, you
204 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
205
206 void writeMessageToFd(int fd, kj::ArrayPtr<const kj::ArrayPtr<const word>> segments);
207 // Write the segment array to the given file descriptor.
208 //
209 // This function throws an exception on any I/O error. If your code is not exception-safe, be sure
210 // you catch this exception at the call site. If throwing an exception is not acceptable, you
211 // can implement your own OutputStream with arbitrary error handling and then use writeMessage().
212
213 // =======================================================================================
214 // inline stuff
215
messageToFlatArray(MessageBuilder & builder)216 inline kj::Array<word> messageToFlatArray(MessageBuilder& builder) {
217 return messageToFlatArray(builder.getSegmentsForOutput());
218 }
219
computeSerializedSizeInWords(MessageBuilder & builder)220 inline size_t computeSerializedSizeInWords(MessageBuilder& builder) {
221 return computeSerializedSizeInWords(builder.getSegmentsForOutput());
222 }
223
writeMessage(kj::OutputStream & output,MessageBuilder & builder)224 inline void writeMessage(kj::OutputStream& output, MessageBuilder& builder) {
225 writeMessage(output, builder.getSegmentsForOutput());
226 }
227
writeMessageToFd(int fd,MessageBuilder & builder)228 inline void writeMessageToFd(int fd, MessageBuilder& builder) {
229 writeMessageToFd(fd, builder.getSegmentsForOutput());
230 }
231
232 } // namespace capnp
233
234 CAPNP_END_HEADER
235