1 /*
2 * ports-internal.h - internal-only declarations for ports.
3 *
4 * Copyright (C) 2013 Free Software Foundation, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public License
8 * as published by the Free Software Foundation; either version 3 of
9 * the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * 02110-1301 USA
20 */
21
22 #ifndef SCM_PORTS_INTERNAL
23 #define SCM_PORTS_INTERNAL
24
25 #include <assert.h>
26 #include <iconv.h>
27
28 #include "libguile/_scm.h"
29 #include "libguile/ports.h"
30
31 typedef enum scm_t_port_type_flags {
32 /* Indicates that the port should be closed if it is garbage collected
33 while it is open. */
34 SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC = 1 << 0
35 } scm_t_port_type_flags;
36
37 /* port-type description. */
38 struct scm_t_port_type
39 {
40 char *name;
41 int (*print) (SCM exp, SCM port, scm_print_state *pstate);
42
43 size_t (*c_read) (SCM port, SCM dst, size_t start, size_t count);
44 size_t (*c_write) (SCM port, SCM src, size_t start, size_t count);
45 SCM scm_read;
46 SCM scm_write;
47
48 int (*read_wait_fd) (SCM port);
49 int (*write_wait_fd) (SCM port);
50
51 scm_t_off (*seek) (SCM port, scm_t_off OFFSET, int WHENCE);
52 void (*close) (SCM port);
53
54 void (*get_natural_buffer_sizes) (SCM port, size_t *read_size,
55 size_t *write_size);
56 int (*random_access_p) (SCM port);
57
58 int (*input_waiting) (SCM port);
59
60 void (*truncate) (SCM port, scm_t_off length);
61
62 unsigned flags;
63
64 /* GOOPS tomfoolery. */
65 SCM input_class, output_class, input_output_class;
66 };
67
68 /* Port buffers.
69
70 It's important to avoid calling into the kernel too many times. For
71 that reason we buffer the input and output, using "port buffer"
72 objects. Port buffers are represented as vectors containing the
73 buffer, two cursors, and a flag. The bytes in a read buffer are laid
74 out like this:
75
76 |already read | not yet | invalid
77 | data | read | data
78 readbuf: #vu8(|r r r r r r r|u u u u u|x x x x x|)
79 ^buf ^cur ^end ^size(buf)
80
81 Similarly for a write buffer:
82
83 |already written | not yet | invalid
84 | data | written | data
85 writebuf: #vu8(|w w w w w w w w |u u u u u|x x x x x|)
86 ^buf ^cur ^end ^size(buf)
87
88 We use the same port buffer data structure for both purposes. Port
89 buffers are implemented as their own object so that they can be
90 atomically swapped in or out of ports, and as Scheme vectors so they
91 can be manipulated from Scheme. */
92
93 enum scm_port_buffer_field {
94 SCM_PORT_BUFFER_FIELD_BYTEVECTOR,
95 SCM_PORT_BUFFER_FIELD_CUR,
96 SCM_PORT_BUFFER_FIELD_END,
97 SCM_PORT_BUFFER_FIELD_HAS_EOF_P,
98 SCM_PORT_BUFFER_FIELD_POSITION,
99 SCM_PORT_BUFFER_FIELD_COUNT
100 };
101
102 /* The port buffers are exposed to Scheme, which can mutate their
103 fields. We have to do dynamic checks to ensure that
104 potentially-malicious Scheme doesn't invalidate our invariants.
105 However these dynamic checks are slow, so we need to avoid them where
106 they are unnecessary. An unnecessary check is a check which has
107 already been performed, or one which would already be performed by
108 the time that memory is accessed. Given that the "can_take",
109 "can_put", or "can_putback" functions are eventually called before
110 any access to the buffer, we hoist the necessary type checks the
111 can_foo and size functions, and otherwise assume that the cur and end
112 values are inums within the right ranges. */
113
114 static inline SCM
scm_port_buffer_bytevector(SCM buf)115 scm_port_buffer_bytevector (SCM buf)
116 {
117 return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_BYTEVECTOR);
118 }
119
120 static inline SCM
scm_port_buffer_cur(SCM buf)121 scm_port_buffer_cur (SCM buf)
122 {
123 return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_CUR);
124 }
125
126 static inline void
scm_port_buffer_set_cur(SCM buf,SCM cur)127 scm_port_buffer_set_cur (SCM buf, SCM cur)
128 {
129 SCM_SIMPLE_VECTOR_SET (buf, SCM_PORT_BUFFER_FIELD_CUR, cur);
130 }
131
132 static inline SCM
scm_port_buffer_end(SCM buf)133 scm_port_buffer_end (SCM buf)
134 {
135 return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_END);
136 }
137
138 static inline void
scm_port_buffer_set_end(SCM buf,SCM end)139 scm_port_buffer_set_end (SCM buf, SCM end)
140 {
141 SCM_SIMPLE_VECTOR_SET (buf, SCM_PORT_BUFFER_FIELD_END, end);
142 }
143
144 static inline SCM
scm_port_buffer_has_eof_p(SCM buf)145 scm_port_buffer_has_eof_p (SCM buf)
146 {
147 return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_HAS_EOF_P);
148 }
149
150 static inline void
scm_port_buffer_set_has_eof_p(SCM buf,SCM has_eof_p)151 scm_port_buffer_set_has_eof_p (SCM buf, SCM has_eof_p)
152 {
153 SCM_SIMPLE_VECTOR_SET (buf, SCM_PORT_BUFFER_FIELD_HAS_EOF_P,
154 has_eof_p);
155 }
156
157 /* The port position object is a pair that is referenced by the port.
158 To make things easier for Scheme port code, it is also referenced by
159 port buffers. */
160 static inline SCM
scm_port_buffer_position(SCM buf)161 scm_port_buffer_position (SCM buf)
162 {
163 return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_POSITION);
164 }
165
166 static inline SCM
scm_port_position_line(SCM position)167 scm_port_position_line (SCM position)
168 {
169 return scm_car (position);
170 }
171
172 static inline void
scm_port_position_set_line(SCM position,SCM line)173 scm_port_position_set_line (SCM position, SCM line)
174 {
175 scm_set_car_x (position, line);
176 }
177
178 static inline SCM
scm_port_position_column(SCM position)179 scm_port_position_column (SCM position)
180 {
181 return scm_cdr (position);
182 }
183
184 static inline void
scm_port_position_set_column(SCM position,SCM column)185 scm_port_position_set_column (SCM position, SCM column)
186 {
187 scm_set_cdr_x (position, column);
188 }
189
190 static inline size_t
scm_port_buffer_size(SCM buf)191 scm_port_buffer_size (SCM buf)
192 {
193 SCM bv = scm_port_buffer_bytevector (buf);
194 if (SCM_LIKELY (SCM_BYTEVECTOR_P (bv)))
195 return SCM_BYTEVECTOR_LENGTH (bv);
196 scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (bv));
197 return -1;
198 }
199
200 static inline void
scm_port_buffer_reset(SCM buf)201 scm_port_buffer_reset (SCM buf)
202 {
203 scm_port_buffer_set_cur (buf, SCM_INUM0);
204 scm_port_buffer_set_end (buf, SCM_INUM0);
205 }
206
207 static inline void
scm_port_buffer_reset_end(SCM buf)208 scm_port_buffer_reset_end (SCM buf)
209 {
210 scm_port_buffer_set_cur (buf, scm_from_size_t (scm_port_buffer_size (buf)));
211 scm_port_buffer_set_end (buf, scm_from_size_t (scm_port_buffer_size (buf)));
212 }
213
214 static inline size_t
scm_port_buffer_can_take(SCM buf,size_t * cur_out)215 scm_port_buffer_can_take (SCM buf, size_t *cur_out)
216 {
217 size_t cur, end;
218 cur = scm_to_size_t (scm_port_buffer_cur (buf));
219 end = scm_to_size_t (scm_port_buffer_end (buf));
220 if (end > scm_port_buffer_size (buf))
221 scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (buf));
222 /* If something races and we end up with end < cur, signal the caller
223 to do a fill_input and centralize there. */
224 *cur_out = cur;
225 return end < cur ? 0 : end - cur;
226 }
227
228 static inline size_t
scm_port_buffer_can_put(SCM buf,size_t * end_out)229 scm_port_buffer_can_put (SCM buf, size_t *end_out)
230 {
231 size_t end = scm_to_size_t (scm_port_buffer_end (buf));
232 if (end > scm_port_buffer_size (buf))
233 scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (buf));
234 *end_out = end;
235 return scm_port_buffer_size (buf) - end;
236 }
237
238 static inline size_t
scm_port_buffer_can_putback(SCM buf)239 scm_port_buffer_can_putback (SCM buf)
240 {
241 size_t cur = scm_to_size_t (scm_port_buffer_cur (buf));
242 if (cur > scm_port_buffer_size (buf))
243 scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (buf));
244 return cur;
245 }
246
247 static inline void
scm_port_buffer_did_take(SCM buf,size_t prev_cur,size_t count)248 scm_port_buffer_did_take (SCM buf, size_t prev_cur, size_t count)
249 {
250 scm_port_buffer_set_cur (buf, SCM_I_MAKINUM (prev_cur + count));
251 }
252
253 static inline void
scm_port_buffer_did_put(SCM buf,size_t prev_end,size_t count)254 scm_port_buffer_did_put (SCM buf, size_t prev_end, size_t count)
255 {
256 scm_port_buffer_set_end (buf, SCM_I_MAKINUM (prev_end + count));
257 }
258
259 static inline const scm_t_uint8 *
scm_port_buffer_take_pointer(SCM buf,size_t cur)260 scm_port_buffer_take_pointer (SCM buf, size_t cur)
261 {
262 signed char *ret = SCM_BYTEVECTOR_CONTENTS (scm_port_buffer_bytevector (buf));
263 return ((scm_t_uint8 *) ret) + cur;
264 }
265
266 static inline scm_t_uint8 *
scm_port_buffer_put_pointer(SCM buf,size_t end)267 scm_port_buffer_put_pointer (SCM buf, size_t end)
268 {
269 signed char *ret = SCM_BYTEVECTOR_CONTENTS (scm_port_buffer_bytevector (buf));
270 return ((scm_t_uint8 *) ret) + end;
271 }
272
273 static inline size_t
scm_port_buffer_take(SCM buf,scm_t_uint8 * dst,size_t count,size_t cur,size_t avail)274 scm_port_buffer_take (SCM buf, scm_t_uint8 *dst, size_t count,
275 size_t cur, size_t avail)
276 {
277 if (avail < count)
278 count = avail;
279 if (dst)
280 memcpy (dst, scm_port_buffer_take_pointer (buf, cur), count);
281 scm_port_buffer_did_take (buf, cur, count);
282 return count;
283 }
284
285 static inline size_t
scm_port_buffer_put(SCM buf,const scm_t_uint8 * src,size_t count,size_t end,size_t avail)286 scm_port_buffer_put (SCM buf, const scm_t_uint8 *src, size_t count,
287 size_t end, size_t avail)
288 {
289 if (avail < count)
290 count = avail;
291 if (src)
292 memcpy (scm_port_buffer_put_pointer (buf, end), src, count);
293 scm_port_buffer_did_put (buf, end, count);
294 return count;
295 }
296
297 static inline void
scm_port_buffer_putback(SCM buf,const scm_t_uint8 * src,size_t count,size_t cur)298 scm_port_buffer_putback (SCM buf, const scm_t_uint8 *src, size_t count,
299 size_t cur)
300 {
301 assert (count <= cur);
302
303 /* Sometimes used to move around data within a buffer, so we must use
304 memmove. */
305 cur -= count;
306 scm_port_buffer_set_cur (buf, scm_from_size_t (cur));
307 memmove (SCM_BYTEVECTOR_CONTENTS (scm_port_buffer_bytevector (buf)) + cur,
308 src, count);
309 }
310
311 struct scm_t_port
312 {
313 /* Source location information. */
314 SCM file_name;
315 SCM position;
316
317 /* Port buffers. */
318 SCM read_buf;
319 SCM write_buf;
320 SCM write_buf_aux;
321
322 /* All ports have read and write buffers; an unbuffered port simply
323 has a one-byte buffer. However unreading bytes can expand the read
324 buffer, but that doesn't mean that we want to increase the input
325 buffering. For that reason `read_buffering' is a separate
326 indication of how many characters to buffer on the read side.
327 There isn't a write_buf_size because there isn't an
328 `unwrite-byte'. */
329 size_t read_buffering;
330
331 /* Reads and writes can proceed concurrently, but we don't want to
332 start any read or write after close() has been called. So we have
333 a refcount which is positive if close has not yet been called.
334 Reading, writing, and the like temporarily increments this
335 refcount, provided it was nonzero to start with. */
336 scm_t_uint32 refcount;
337
338 /* True if the port is random access. Implies that the buffers must
339 be flushed before switching between reading and writing, seeking,
340 and so on. */
341 scm_t_uint32 rw_random : 1;
342 scm_t_uint32 at_stream_start_for_bom_read : 1;
343 scm_t_uint32 at_stream_start_for_bom_write : 1;
344
345 /* Character encoding support. */
346 SCM encoding; /* A symbol of upper-case ASCII. */
347 SCM conversion_strategy; /* A symbol; either substitute, error, or escape. */
348
349 /* This is the same as pt->encoding, except if `encoding' is UTF-16 or
350 UTF-32, in which case this is UTF-16LE or a similar
351 byte-order-specialed version of UTF-16 or UTF-32. This is a
352 separate field from `encoding' because being just plain UTF-16 or
353 UTF-32 has an additional meaning, being that we should consume and
354 produce byte order marker codepoints as appropriate. Set to #f
355 before the iconv descriptors have been opened. */
356 SCM precise_encoding; /* with iconv_lock */
357 iconv_t input_cd; /* with iconv_lock */
358 iconv_t output_cd; /* with iconv_lock */
359
360 /* Port properties. */
361 SCM alist;
362 };
363
364 #define SCM_UNICODE_BOM 0xFEFFUL /* Unicode byte-order mark */
365
366 #define SCM_FILENAME(x) (SCM_PORT (x)->file_name)
367 #define SCM_SET_FILENAME(x, n) (SCM_PORT (x)->file_name = (n))
368
369 SCM_INTERNAL void scm_port_acquire_iconv_descriptors (SCM port,
370 iconv_t *input_cd,
371 iconv_t *output_cd);
372 SCM_INTERNAL void scm_port_release_iconv_descriptors (SCM port);
373
374 #endif
375