1This file is mapfile.def, from which is created mapfile.c.
2It implements the builtin "mapfile" in Bash.
3
4Copyright (C) 2005-2006 Rocky Bernstein for Free Software Foundation, Inc.
5Copyright (C) 2008-2020 Free Software Foundation, Inc.
6
7This file is part of GNU Bash, the Bourne Again SHell.
8
9Bash is free software: you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation, either version 3 of the License, or
12(at your option) any later version.
13
14Bash is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with Bash.  If not, see <http://www.gnu.org/licenses/>.
21
22$PRODUCES mapfile.c
23
24$BUILTIN mapfile
25$FUNCTION mapfile_builtin
26$SHORT_DOC mapfile [-d delim] [-n count] [-O origin] [-s count] [-t] [-u fd] [-C callback] [-c quantum] [array]
27Read lines from the standard input into an indexed array variable.
28
29Read lines from the standard input into the indexed array variable ARRAY, or
30from file descriptor FD if the -u option is supplied.  The variable MAPFILE
31is the default ARRAY.
32
33Options:
34  -d delim	Use DELIM to terminate lines, instead of newline
35  -n count	Copy at most COUNT lines.  If COUNT is 0, all lines are copied
36  -O origin	Begin assigning to ARRAY at index ORIGIN.  The default index is 0
37  -s count	Discard the first COUNT lines read
38  -t	Remove a trailing DELIM from each line read (default newline)
39  -u fd	Read lines from file descriptor FD instead of the standard input
40  -C callback	Evaluate CALLBACK each time QUANTUM lines are read
41  -c quantum	Specify the number of lines read between each call to
42			CALLBACK
43
44Arguments:
45  ARRAY	Array variable name to use for file data
46
47If -C is supplied without -c, the default quantum is 5000.  When
48CALLBACK is evaluated, it is supplied the index of the next array
49element to be assigned and the line to be assigned to that element
50as additional arguments.
51
52If not supplied with an explicit origin, mapfile will clear ARRAY before
53assigning to it.
54
55Exit Status:
56Returns success unless an invalid option is given or ARRAY is readonly or
57not an indexed array.
58$END
59
60$BUILTIN readarray
61$FUNCTION mapfile_builtin
62$SHORT_DOC readarray [-d delim] [-n count] [-O origin] [-s count] [-t] [-u fd] [-C callback] [-c quantum] [array]
63Read lines from a file into an array variable.
64
65A synonym for `mapfile'.
66$END
67
68#include <config.h>
69
70#include "builtins.h"
71#include "posixstat.h"
72
73#if defined (HAVE_UNISTD_H)
74#  include <unistd.h>
75#endif
76
77#include "bashansi.h"
78#include "bashintl.h"
79
80#include <stdio.h>
81#include <errno.h>
82
83#include "../bashintl.h"
84#include "../shell.h"
85#include "common.h"
86#include "bashgetopt.h"
87
88#if !defined (errno)
89extern int errno;
90#endif
91
92#if defined (ARRAY_VARS)
93
94static int run_callback PARAMS((const char *, unsigned int, const char *));
95
96#define DEFAULT_ARRAY_NAME	"MAPFILE"
97#define DEFAULT_VARIABLE_NAME	"MAPLINE"	/* not used right now */
98
99/* The value specifying how frequently `mapfile'  calls the callback. */
100#define DEFAULT_QUANTUM 5000
101
102/* Values for FLAGS */
103#define MAPF_CLEARARRAY	0x01
104#define MAPF_CHOP	0x02
105
106static int delim;
107
108static int
109run_callback (callback, curindex, curline)
110     const char *callback;
111     unsigned int curindex;
112     const char *curline;
113{
114  unsigned int execlen;
115  char  *execstr, *qline;
116  int flags;
117
118  qline = sh_single_quote (curline);
119  execlen = strlen (callback) + strlen (qline) + 10;
120  /* 1 for each space between %s and %d,
121     another 1 for the last nul char for C string. */
122  execlen += 3;
123  execstr = xmalloc (execlen);
124
125  flags = SEVAL_NOHIST;
126#if 0
127  if (interactive)
128    flags |= SEVAL_INTERACT;
129#endif
130  snprintf (execstr, execlen, "%s %d %s", callback, curindex, qline);
131  free (qline);
132  return evalstring (execstr, NULL, flags);
133}
134
135static void
136do_chop(line, delim)
137     char *line;
138     unsigned char delim;
139{
140  int length;
141
142  length = strlen (line);
143  if (length && line[length-1] == delim)
144    line[length-1] = '\0';
145}
146
147static int
148mapfile (fd, line_count_goal, origin, nskip, callback_quantum, callback, array_name, delim, flags)
149     int fd;
150     long line_count_goal, origin, nskip, callback_quantum;
151     char *callback, *array_name;
152     int delim;
153     int flags;
154{
155  char *line;
156  size_t line_length;
157  unsigned int array_index, line_count;
158  SHELL_VAR *entry;
159  int unbuffered_read;
160
161  line = NULL;
162  line_length = 0;
163  unbuffered_read = 0;
164
165  /* The following check should be done before reading any lines.  Doing it
166     here allows us to call bind_array_element instead of bind_array_variable
167     and skip the variable lookup on every call. */
168  entry = find_or_make_array_variable (array_name, 1);
169  if (entry == 0 || readonly_p (entry) || noassign_p (entry))
170    {
171      if (entry && readonly_p (entry))
172	err_readonly (array_name);
173
174      return (EXECUTION_FAILURE);
175    }
176  else if (array_p (entry) == 0)
177    {
178      builtin_error (_("%s: not an indexed array"), array_name);
179      return (EXECUTION_FAILURE);
180    }
181  else if (invisible_p (entry))
182    VUNSETATTR (entry, att_invisible);	/* no longer invisible */
183
184  if (flags & MAPF_CLEARARRAY)
185    array_flush (array_cell (entry));
186
187#ifndef __CYGWIN__
188  unbuffered_read = (lseek (fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE);
189#else
190  unbuffered_read = 1;
191#endif
192
193  if (delim != '\n')
194    unbuffered_read = 1;
195
196  zreset ();
197
198  /* Skip any lines at beginning of file? */
199  for (line_count = 0; line_count < nskip; line_count++)
200    if (zgetline (fd, &line, &line_length, delim, unbuffered_read) < 0)
201      break;
202
203  line = 0;
204  line_length = 0;
205
206  /* Reset the buffer for bash own stream */
207  for (array_index = origin, line_count = 1;
208 	zgetline (fd, &line, &line_length, delim, unbuffered_read) != -1;
209	array_index++)
210    {
211      /* Remove trailing newlines? */
212      if (flags & MAPF_CHOP)
213	do_chop (line, delim);
214
215      /* Has a callback been registered and if so is it time to call it? */
216      if (callback && line_count && (line_count % callback_quantum) == 0)
217	{
218	  run_callback (callback, array_index, line);
219
220	  /* Reset the buffer for bash own stream. */
221	  if (unbuffered_read == 0)
222	    zsyncfd (fd);
223	}
224
225      /* XXX - bad things can happen if the callback modifies ENTRY, e.g.,
226	 unsetting it or changing it to a non-indexed-array type. */
227      bind_array_element (entry, array_index, line, 0);
228
229      /* Have we exceeded # of lines to store? */
230      line_count++;
231      if (line_count_goal != 0 && line_count > line_count_goal)
232	break;
233    }
234
235  free (line);
236
237  if (unbuffered_read == 0)
238    zsyncfd (fd);
239
240  return EXECUTION_SUCCESS;
241}
242
243int
244mapfile_builtin (list)
245     WORD_LIST *list;
246{
247  int opt, code, fd, flags;
248  intmax_t intval;
249  long lines, origin, nskip, callback_quantum;
250  char *array_name, *callback;
251
252  fd = 0;
253  lines = origin = nskip = 0;
254  flags = MAPF_CLEARARRAY;
255  callback_quantum = DEFAULT_QUANTUM;
256  callback = 0;
257  delim = '\n';
258
259  reset_internal_getopt ();
260  while ((opt = internal_getopt (list, "d:u:n:O:tC:c:s:")) != -1)
261    {
262      switch (opt)
263	{
264	case 'd':
265	  delim = *list_optarg;
266	  break;
267	case 'u':
268	  code = legal_number (list_optarg, &intval);
269	  if (code == 0 || intval < 0 || intval != (int)intval)
270	    {
271	      builtin_error (_("%s: invalid file descriptor specification"), list_optarg);
272	      return (EXECUTION_FAILURE);
273	    }
274	  else
275	    fd = intval;
276
277	  if (sh_validfd (fd) == 0)
278	    {
279	      builtin_error (_("%d: invalid file descriptor: %s"), fd, strerror (errno));
280	      return (EXECUTION_FAILURE);
281	    }
282	  break;
283
284	case 'n':
285	  code = legal_number (list_optarg, &intval);
286	  if (code == 0 || intval < 0 || intval != (unsigned)intval)
287	    {
288	      builtin_error (_("%s: invalid line count"), list_optarg);
289	      return (EXECUTION_FAILURE);
290	    }
291	  else
292	    lines = intval;
293	  break;
294
295	case 'O':
296	  code = legal_number (list_optarg, &intval);
297	  if (code == 0 || intval < 0 || intval != (unsigned)intval)
298	    {
299	      builtin_error (_("%s: invalid array origin"), list_optarg);
300	      return (EXECUTION_FAILURE);
301	    }
302	  else
303	    origin = intval;
304	  flags &= ~MAPF_CLEARARRAY;
305	  break;
306	case 't':
307	  flags |= MAPF_CHOP;
308	  break;
309	case 'C':
310	  callback = list_optarg;
311	  break;
312	case 'c':
313	  code = legal_number (list_optarg, &intval);
314	  if (code == 0 || intval <= 0 || intval != (unsigned)intval)
315	    {
316	      builtin_error (_("%s: invalid callback quantum"), list_optarg);
317	      return (EXECUTION_FAILURE);
318	    }
319	  else
320	    callback_quantum = intval;
321	  break;
322	case 's':
323	  code = legal_number (list_optarg, &intval);
324	  if (code == 0 || intval < 0 || intval != (unsigned)intval)
325	    {
326	      builtin_error (_("%s: invalid line count"), list_optarg);
327	      return (EXECUTION_FAILURE);
328	    }
329	  else
330	    nskip = intval;
331	  break;
332	CASE_HELPOPT;
333	default:
334	  builtin_usage ();
335	  return (EX_USAGE);
336	}
337    }
338  list = loptend;
339
340  if (list == 0)
341    array_name = DEFAULT_ARRAY_NAME;
342  else if (list->word == 0 || list->word->word == 0)
343    {
344      builtin_error ("internal error: getting variable name");
345      return (EXECUTION_FAILURE);
346    }
347  else if (list->word->word[0] == '\0')
348    {
349      builtin_error (_("empty array variable name"));
350      return (EX_USAGE);
351    }
352  else
353    array_name = list->word->word;
354
355  if (legal_identifier (array_name) == 0)
356    {
357      sh_invalidid (array_name);
358      return (EXECUTION_FAILURE);
359    }
360
361  return mapfile (fd, lines, origin, nskip, callback_quantum, callback, array_name, delim, flags);
362}
363
364#else
365
366int
367mapfile_builtin (list)
368     WORD_LIST *list;
369{
370  builtin_error (_("array variable support required"));
371  return (EXECUTION_FAILURE);
372}
373
374#endif  /* ARRAY_VARS */
375