1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2004, 2006, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "language/data-io/data-writer.h"
20
21 #include <assert.h>
22 #include <errno.h>
23 #include <stdint.h>
24 #include <stdlib.h>
25 #include <sys/stat.h>
26
27 #include "data/file-name.h"
28 #include "data/make-file.h"
29 #include "language/data-io/file-handle.h"
30 #include "libpspp/assertion.h"
31 #include "libpspp/encoding-guesser.h"
32 #include "libpspp/integer-format.h"
33 #include "libpspp/i18n.h"
34 #include "libpspp/message.h"
35 #include "libpspp/str.h"
36
37 #include "gl/minmax.h"
38 #include "gl/xalloc.h"
39
40 #include "gettext.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) (msgid)
43
44 /* Data file writer. */
45 struct dfm_writer
46 {
47 struct file_handle *fh; /* File handle. */
48 struct fh_lock *lock; /* Exclusive access to file. */
49 FILE *file; /* Associated file. */
50 struct replace_file *rf; /* Atomic file replacement support. */
51 char *encoding; /* Encoding. */
52 enum fh_line_ends line_ends; /* Line ends for text files. */
53
54 int unit; /* Unit width, in bytes. */
55 char cr[MAX_UNIT]; /* \r in encoding, 'unit' bytes long. */
56 char lf[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */
57 char spaces[32]; /* 32 bytes worth of ' ' in encoding. */
58 };
59
60 /* Opens a file handle for writing as a data file.
61
62 The encoding of the file written is by default that of FH itself. If
63 ENCODING is nonnull, then it overrides the default encoding.
64
65 *However*: ENCODING directly affects only text strings written by the data
66 writer code itself, that is, new-lines in FH_MODE_TEXT and space padding in
67 FH_MODE_FIXED mode. The client must do its own encoding translation for the
68 data that it writes. (This is unavoidable because sometimes the data
69 written includes binary data that reencoding would mangle.) The client can
70 obtain the encoding to re-encode into with dfm_writer_get_encoding(). */
71 struct dfm_writer *
dfm_open_writer(struct file_handle * fh,const char * encoding)72 dfm_open_writer (struct file_handle *fh, const char *encoding)
73 {
74 struct encoding_info ei;
75 struct dfm_writer *w;
76 struct fh_lock *lock;
77 int ofs;
78
79 lock = fh_lock (fh, FH_REF_FILE, N_("data file"), FH_ACC_WRITE, false);
80 if (lock == NULL)
81 return NULL;
82
83 w = fh_lock_get_aux (lock);
84 if (w != NULL)
85 return w;
86
87 encoding = encoding_guess_parse_encoding (encoding != NULL
88 ? encoding
89 : fh_get_encoding (fh));
90 get_encoding_info (&ei, encoding);
91
92 w = xmalloc (sizeof *w);
93 w->fh = fh_ref (fh);
94 w->lock = lock;
95 w->rf = replace_file_start (w->fh, "wb", 0666, &w->file);
96 w->encoding = xstrdup (encoding);
97 w->line_ends = fh_get_line_ends (fh);
98 w->unit = ei.unit;
99 memcpy (w->cr, ei.cr, sizeof w->cr);
100 memcpy (w->lf, ei.lf, sizeof w->lf);
101 for (ofs = 0; ofs + ei.unit <= sizeof w->spaces; ofs += ei.unit)
102 memcpy (&w->spaces[ofs], ei.space, ei.unit);
103
104 if (w->rf == NULL)
105 {
106 msg (ME, _("An error occurred while opening `%s' for writing "
107 "as a data file: %s."),
108 fh_get_file_name (w->fh), strerror (errno));
109 dfm_close_writer (w);
110 return NULL;
111 }
112 fh_lock_set_aux (lock, w);
113
114 return w;
115 }
116
117 /* Returns true if an I/O error occurred on WRITER, false otherwise. */
118 bool
dfm_write_error(const struct dfm_writer * writer)119 dfm_write_error (const struct dfm_writer *writer)
120 {
121 return ferror (writer->file);
122 }
123
124 /* Writes record REC (which need not be null-terminated) having
125 length LEN to the file corresponding to HANDLE. Adds any
126 needed formatting, such as a trailing new-line. Returns true
127 on success, false on failure. */
128 bool
dfm_put_record(struct dfm_writer * w,const char * rec,size_t len)129 dfm_put_record (struct dfm_writer *w, const char *rec, size_t len)
130 {
131 assert (w != NULL);
132
133 if (dfm_write_error (w))
134 return false;
135
136 switch (fh_get_mode (w->fh))
137 {
138 case FH_MODE_TEXT:
139 fwrite (rec, len, 1, w->file);
140 if (w->line_ends == FH_END_CRLF)
141 fwrite (w->cr, w->unit, 1, w->file);
142 fwrite (w->lf, w->unit, 1, w->file);
143 break;
144
145 case FH_MODE_FIXED:
146 {
147 size_t record_width = fh_get_record_width (w->fh);
148 size_t write_bytes = MIN (len, record_width);
149 size_t pad_bytes = record_width - write_bytes;
150 fwrite (rec, write_bytes, 1, w->file);
151 while (pad_bytes > 0)
152 {
153 size_t chunk = MIN (pad_bytes, sizeof w->spaces);
154 fwrite (w->spaces, chunk, 1, w->file);
155 pad_bytes -= chunk;
156 }
157 }
158 break;
159
160 case FH_MODE_VARIABLE:
161 {
162 uint32_t size = len;
163 integer_convert (INTEGER_NATIVE, &size, INTEGER_LSB_FIRST, &size,
164 sizeof size);
165 fwrite (&size, sizeof size, 1, w->file);
166 fwrite (rec, len, 1, w->file);
167 fwrite (&size, sizeof size, 1, w->file);
168 }
169 break;
170
171 case FH_MODE_360_VARIABLE:
172 case FH_MODE_360_SPANNED:
173 {
174 size_t ofs = 0;
175 if (fh_get_mode (w->fh) == FH_MODE_360_VARIABLE)
176 len = MIN (65527, len);
177 while (ofs < len)
178 {
179 size_t chunk = MIN (65527, len - ofs);
180 uint32_t bdw = (chunk + 8) << 16;
181 int scc = (ofs == 0 && chunk == len ? 0
182 : ofs == 0 ? 1
183 : ofs + chunk == len ? 2
184 : 3);
185 uint32_t rdw = ((chunk + 4) << 16) | (scc << 8);
186
187 integer_convert (INTEGER_NATIVE, &bdw, INTEGER_MSB_FIRST, &bdw,
188 sizeof bdw);
189 integer_convert (INTEGER_NATIVE, &rdw, INTEGER_MSB_FIRST, &rdw,
190 sizeof rdw);
191 fwrite (&bdw, 1, sizeof bdw, w->file);
192 fwrite (&rdw, 1, sizeof rdw, w->file);
193 fwrite (rec + ofs, 1, chunk, w->file);
194 ofs += chunk;
195 }
196 }
197 break;
198
199 default:
200 NOT_REACHED ();
201 }
202
203 return !dfm_write_error (w);
204 }
205
206 /* Closes data file writer W. */
207 bool
dfm_close_writer(struct dfm_writer * w)208 dfm_close_writer (struct dfm_writer *w)
209 {
210 bool ok;
211
212 if (w == NULL)
213 return true;
214 if (fh_unlock (w->lock))
215 return true;
216
217 ok = true;
218 if (w->file != NULL)
219 {
220 const char *file_name = fh_get_file_name (w->fh);
221 ok = !dfm_write_error (w) && !fn_close (w->fh, w->file);
222
223 if (!ok)
224 msg (ME, _("I/O error occurred writing data file `%s'."), file_name);
225
226 if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf))
227 ok = false;
228 }
229 fh_unref (w->fh);
230 free (w->encoding);
231 free (w);
232
233 return ok;
234 }
235
236 /* Returns the encoding of data written to WRITER. */
237 const char *
dfm_writer_get_encoding(const struct dfm_writer * writer)238 dfm_writer_get_encoding (const struct dfm_writer *writer)
239 {
240 return writer->encoding;
241 }
242