1 /*
2  * ports-internal.h - internal-only declarations for ports.
3  *
4  * Copyright (C) 2013 Free Software Foundation, Inc.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public License
8  * as published by the Free Software Foundation; either version 3 of
9  * the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301 USA
20  */
21 
22 #ifndef SCM_PORTS_INTERNAL
23 #define SCM_PORTS_INTERNAL
24 
25 #include <assert.h>
26 #include <iconv.h>
27 
28 #include "libguile/_scm.h"
29 #include "libguile/ports.h"
30 
31 typedef enum scm_t_port_type_flags {
32   /* Indicates that the port should be closed if it is garbage collected
33      while it is open.  */
34   SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC = 1 << 0
35 } scm_t_port_type_flags;
36 
37 /* port-type description.  */
38 struct scm_t_port_type
39 {
40   char *name;
41   int (*print) (SCM exp, SCM port, scm_print_state *pstate);
42 
43   size_t (*c_read) (SCM port, SCM dst, size_t start, size_t count);
44   size_t (*c_write) (SCM port, SCM src, size_t start, size_t count);
45   SCM scm_read;
46   SCM scm_write;
47 
48   int (*read_wait_fd) (SCM port);
49   int (*write_wait_fd) (SCM port);
50 
51   scm_t_off (*seek) (SCM port, scm_t_off OFFSET, int WHENCE);
52   void (*close) (SCM port);
53 
54   void (*get_natural_buffer_sizes) (SCM port, size_t *read_size,
55                                     size_t *write_size);
56   int (*random_access_p) (SCM port);
57 
58   int (*input_waiting) (SCM port);
59 
60   void (*truncate) (SCM port, scm_t_off length);
61 
62   unsigned flags;
63 
64   /* GOOPS tomfoolery.  */
65   SCM input_class, output_class, input_output_class;
66 };
67 
68 /* Port buffers.
69 
70    It's important to avoid calling into the kernel too many times.  For
71    that reason we buffer the input and output, using "port buffer"
72    objects.  Port buffers are represented as vectors containing the
73    buffer, two cursors, and a flag.  The bytes in a read buffer are laid
74    out like this:
75 
76                     |already read | not yet | invalid
77                     |    data     |  read   |  data
78       readbuf: #vu8(|r r r r r r r|u u u u u|x x x x x|)
79                ^buf               ^cur      ^end      ^size(buf)
80 
81    Similarly for a write buffer:
82 
83                      |already written | not yet | invalid
84                      |    data        | written |  data
85       writebuf: #vu8(|w w w w w w w w |u u u u u|x x x x x|)
86                 ^buf                  ^cur      ^end      ^size(buf)
87 
88    We use the same port buffer data structure for both purposes.  Port
89    buffers are implemented as their own object so that they can be
90    atomically swapped in or out of ports, and as Scheme vectors so they
91    can be manipulated from Scheme.  */
92 
93 enum scm_port_buffer_field {
94   SCM_PORT_BUFFER_FIELD_BYTEVECTOR,
95   SCM_PORT_BUFFER_FIELD_CUR,
96   SCM_PORT_BUFFER_FIELD_END,
97   SCM_PORT_BUFFER_FIELD_HAS_EOF_P,
98   SCM_PORT_BUFFER_FIELD_POSITION,
99   SCM_PORT_BUFFER_FIELD_COUNT
100 };
101 
102 /* The port buffers are exposed to Scheme, which can mutate their
103    fields.  We have to do dynamic checks to ensure that
104    potentially-malicious Scheme doesn't invalidate our invariants.
105    However these dynamic checks are slow, so we need to avoid them where
106    they are unnecessary.  An unnecessary check is a check which has
107    already been performed, or one which would already be performed by
108    the time that memory is accessed.  Given that the "can_take",
109    "can_put", or "can_putback" functions are eventually called before
110    any access to the buffer, we hoist the necessary type checks the
111    can_foo and size functions, and otherwise assume that the cur and end
112    values are inums within the right ranges.  */
113 
114 static inline SCM
scm_port_buffer_bytevector(SCM buf)115 scm_port_buffer_bytevector (SCM buf)
116 {
117   return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_BYTEVECTOR);
118 }
119 
120 static inline SCM
scm_port_buffer_cur(SCM buf)121 scm_port_buffer_cur (SCM buf)
122 {
123   return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_CUR);
124 }
125 
126 static inline void
scm_port_buffer_set_cur(SCM buf,SCM cur)127 scm_port_buffer_set_cur (SCM buf, SCM cur)
128 {
129   SCM_SIMPLE_VECTOR_SET (buf, SCM_PORT_BUFFER_FIELD_CUR, cur);
130 }
131 
132 static inline SCM
scm_port_buffer_end(SCM buf)133 scm_port_buffer_end (SCM buf)
134 {
135   return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_END);
136 }
137 
138 static inline void
scm_port_buffer_set_end(SCM buf,SCM end)139 scm_port_buffer_set_end (SCM buf, SCM end)
140 {
141   SCM_SIMPLE_VECTOR_SET (buf, SCM_PORT_BUFFER_FIELD_END, end);
142 }
143 
144 static inline SCM
scm_port_buffer_has_eof_p(SCM buf)145 scm_port_buffer_has_eof_p (SCM buf)
146 {
147   return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_HAS_EOF_P);
148 }
149 
150 static inline void
scm_port_buffer_set_has_eof_p(SCM buf,SCM has_eof_p)151 scm_port_buffer_set_has_eof_p (SCM buf, SCM has_eof_p)
152 {
153   SCM_SIMPLE_VECTOR_SET (buf, SCM_PORT_BUFFER_FIELD_HAS_EOF_P,
154                          has_eof_p);
155 }
156 
157 /* The port position object is a pair that is referenced by the port.
158    To make things easier for Scheme port code, it is also referenced by
159    port buffers.  */
160 static inline SCM
scm_port_buffer_position(SCM buf)161 scm_port_buffer_position (SCM buf)
162 {
163   return SCM_SIMPLE_VECTOR_REF (buf, SCM_PORT_BUFFER_FIELD_POSITION);
164 }
165 
166 static inline SCM
scm_port_position_line(SCM position)167 scm_port_position_line (SCM position)
168 {
169   return scm_car (position);
170 }
171 
172 static inline void
scm_port_position_set_line(SCM position,SCM line)173 scm_port_position_set_line (SCM position, SCM line)
174 {
175   scm_set_car_x (position, line);
176 }
177 
178 static inline SCM
scm_port_position_column(SCM position)179 scm_port_position_column (SCM position)
180 {
181   return scm_cdr (position);
182 }
183 
184 static inline void
scm_port_position_set_column(SCM position,SCM column)185 scm_port_position_set_column (SCM position, SCM column)
186 {
187   scm_set_cdr_x (position, column);
188 }
189 
190 static inline size_t
scm_port_buffer_size(SCM buf)191 scm_port_buffer_size (SCM buf)
192 {
193   SCM bv = scm_port_buffer_bytevector (buf);
194   if (SCM_LIKELY (SCM_BYTEVECTOR_P (bv)))
195     return SCM_BYTEVECTOR_LENGTH (bv);
196   scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (bv));
197   return -1;
198 }
199 
200 static inline void
scm_port_buffer_reset(SCM buf)201 scm_port_buffer_reset (SCM buf)
202 {
203   scm_port_buffer_set_cur (buf, SCM_INUM0);
204   scm_port_buffer_set_end (buf, SCM_INUM0);
205 }
206 
207 static inline void
scm_port_buffer_reset_end(SCM buf)208 scm_port_buffer_reset_end (SCM buf)
209 {
210   scm_port_buffer_set_cur (buf, scm_from_size_t (scm_port_buffer_size (buf)));
211   scm_port_buffer_set_end (buf, scm_from_size_t (scm_port_buffer_size (buf)));
212 }
213 
214 static inline size_t
scm_port_buffer_can_take(SCM buf,size_t * cur_out)215 scm_port_buffer_can_take (SCM buf, size_t *cur_out)
216 {
217   size_t cur, end;
218   cur = scm_to_size_t (scm_port_buffer_cur (buf));
219   end = scm_to_size_t (scm_port_buffer_end (buf));
220   if (end > scm_port_buffer_size (buf))
221     scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (buf));
222   /* If something races and we end up with end < cur, signal the caller
223      to do a fill_input and centralize there.  */
224   *cur_out = cur;
225   return end < cur ? 0 : end - cur;
226 }
227 
228 static inline size_t
scm_port_buffer_can_put(SCM buf,size_t * end_out)229 scm_port_buffer_can_put (SCM buf, size_t *end_out)
230 {
231   size_t end = scm_to_size_t (scm_port_buffer_end (buf));
232   if (end > scm_port_buffer_size (buf))
233     scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (buf));
234   *end_out = end;
235   return scm_port_buffer_size (buf) - end;
236 }
237 
238 static inline size_t
scm_port_buffer_can_putback(SCM buf)239 scm_port_buffer_can_putback (SCM buf)
240 {
241   size_t cur = scm_to_size_t (scm_port_buffer_cur (buf));
242   if (cur > scm_port_buffer_size (buf))
243     scm_misc_error (NULL, "invalid port buffer ~a", scm_list_1 (buf));
244   return cur;
245 }
246 
247 static inline void
scm_port_buffer_did_take(SCM buf,size_t prev_cur,size_t count)248 scm_port_buffer_did_take (SCM buf, size_t prev_cur, size_t count)
249 {
250   scm_port_buffer_set_cur (buf, SCM_I_MAKINUM (prev_cur + count));
251 }
252 
253 static inline void
scm_port_buffer_did_put(SCM buf,size_t prev_end,size_t count)254 scm_port_buffer_did_put (SCM buf, size_t prev_end, size_t count)
255 {
256   scm_port_buffer_set_end (buf, SCM_I_MAKINUM (prev_end + count));
257 }
258 
259 static inline const scm_t_uint8 *
scm_port_buffer_take_pointer(SCM buf,size_t cur)260 scm_port_buffer_take_pointer (SCM buf, size_t cur)
261 {
262   signed char *ret = SCM_BYTEVECTOR_CONTENTS (scm_port_buffer_bytevector (buf));
263   return ((scm_t_uint8 *) ret) + cur;
264 }
265 
266 static inline scm_t_uint8 *
scm_port_buffer_put_pointer(SCM buf,size_t end)267 scm_port_buffer_put_pointer (SCM buf, size_t end)
268 {
269   signed char *ret = SCM_BYTEVECTOR_CONTENTS (scm_port_buffer_bytevector (buf));
270   return ((scm_t_uint8 *) ret) + end;
271 }
272 
273 static inline size_t
scm_port_buffer_take(SCM buf,scm_t_uint8 * dst,size_t count,size_t cur,size_t avail)274 scm_port_buffer_take (SCM buf, scm_t_uint8 *dst, size_t count,
275                       size_t cur, size_t avail)
276 {
277   if (avail < count)
278     count = avail;
279   if (dst)
280     memcpy (dst, scm_port_buffer_take_pointer (buf, cur), count);
281   scm_port_buffer_did_take (buf, cur, count);
282   return count;
283 }
284 
285 static inline size_t
scm_port_buffer_put(SCM buf,const scm_t_uint8 * src,size_t count,size_t end,size_t avail)286 scm_port_buffer_put (SCM buf, const scm_t_uint8 *src, size_t count,
287                      size_t end, size_t avail)
288 {
289   if (avail < count)
290     count = avail;
291   if (src)
292     memcpy (scm_port_buffer_put_pointer (buf, end), src, count);
293   scm_port_buffer_did_put (buf, end, count);
294   return count;
295 }
296 
297 static inline void
scm_port_buffer_putback(SCM buf,const scm_t_uint8 * src,size_t count,size_t cur)298 scm_port_buffer_putback (SCM buf, const scm_t_uint8 *src, size_t count,
299                          size_t cur)
300 {
301   assert (count <= cur);
302 
303   /* Sometimes used to move around data within a buffer, so we must use
304      memmove.  */
305   cur -= count;
306   scm_port_buffer_set_cur (buf, scm_from_size_t (cur));
307   memmove (SCM_BYTEVECTOR_CONTENTS (scm_port_buffer_bytevector (buf)) + cur,
308            src, count);
309 }
310 
311 struct scm_t_port
312 {
313   /* Source location information.  */
314   SCM file_name;
315   SCM position;
316 
317   /* Port buffers.  */
318   SCM read_buf;
319   SCM write_buf;
320   SCM write_buf_aux;
321 
322   /* All ports have read and write buffers; an unbuffered port simply
323      has a one-byte buffer.  However unreading bytes can expand the read
324      buffer, but that doesn't mean that we want to increase the input
325      buffering.  For that reason `read_buffering' is a separate
326      indication of how many characters to buffer on the read side.
327      There isn't a write_buf_size because there isn't an
328      `unwrite-byte'.  */
329   size_t read_buffering;
330 
331   /* Reads and writes can proceed concurrently, but we don't want to
332      start any read or write after close() has been called.  So we have
333      a refcount which is positive if close has not yet been called.
334      Reading, writing, and the like temporarily increments this
335      refcount, provided it was nonzero to start with.  */
336   scm_t_uint32 refcount;
337 
338   /* True if the port is random access.  Implies that the buffers must
339      be flushed before switching between reading and writing, seeking,
340      and so on.  */
341   scm_t_uint32 rw_random : 1;
342   scm_t_uint32 at_stream_start_for_bom_read  : 1;
343   scm_t_uint32 at_stream_start_for_bom_write : 1;
344 
345   /* Character encoding support.  */
346   SCM encoding;  /* A symbol of upper-case ASCII.  */
347   SCM conversion_strategy; /* A symbol; either substitute, error, or escape.  */
348 
349   /* This is the same as pt->encoding, except if `encoding' is UTF-16 or
350      UTF-32, in which case this is UTF-16LE or a similar
351      byte-order-specialed version of UTF-16 or UTF-32.  This is a
352      separate field from `encoding' because being just plain UTF-16 or
353      UTF-32 has an additional meaning, being that we should consume and
354      produce byte order marker codepoints as appropriate.  Set to #f
355      before the iconv descriptors have been opened.  */
356   SCM precise_encoding;  /* with iconv_lock */
357   iconv_t input_cd;      /* with iconv_lock */
358   iconv_t output_cd;     /* with iconv_lock */
359 
360   /* Port properties.  */
361   SCM alist;
362 };
363 
364 #define SCM_UNICODE_BOM  0xFEFFUL  /* Unicode byte-order mark */
365 
366 #define SCM_FILENAME(x)           (SCM_PORT (x)->file_name)
367 #define SCM_SET_FILENAME(x, n)    (SCM_PORT (x)->file_name = (n))
368 
369 SCM_INTERNAL void scm_port_acquire_iconv_descriptors (SCM port,
370                                                       iconv_t *input_cd,
371                                                       iconv_t *output_cd);
372 SCM_INTERNAL void scm_port_release_iconv_descriptors (SCM port);
373 
374 #endif
375