1 // Copyright 2020 Michael Reilly (mreilly@resiliware.com).
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions
5 // are met:
6 // 1. Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // 2. Redistributions in binary form must reproduce the above copyright
9 // notice, this list of conditions and the following disclaimer in the
10 // documentation and/or other materials provided with the distribution.
11 // 3. Neither the names of the copyright holders nor the names of the
12 // contributors may be used to endorse or promote products derived from
13 // this software without specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 // ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
18 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
19 // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #define SRCNAME "hexpeek_files.c"
28
29 #include <hexpeek.h>
30
31 #include <stdlib.h>
32 #include <libgen.h>
33 #include <unistd.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <errno.h>
39
whichfile(int fd)40 static int whichfile(int fd)
41 {
42 if(fd < 0)
43 return -1;
44 for(int fi = 0; fi < MAX_INFILES; fi++)
45 {
46 if(fd == Params.infiles[fi].fd)
47 return fi;
48 }
49 return -1;
50 }
51
fdname(int fd)52 char const *fdname(int fd)
53 {
54 if(fd >= 0)
55 {
56 for(int fi = 0; fi < MAX_INFILES; fi++)
57 {
58 if(Params.infiles[fi].fd >= 0)
59 {
60 if(fd == Params.infiles[fi].fd)
61 return DT_NAME(fi);
62 for(int bidx = 0; bidx < BACKUP_FILE_COUNT; bidx++)
63 {
64 if(fd == Params.infiles[fi].bk_fds[bidx])
65 return BK_NAME(fi, bidx);
66 }
67 }
68 }
69 }
70 return "";
71 }
72
isBackupFile(int fd)73 static int isBackupFile(int fd)
74 {
75 for(int fi = 0; fi < MAX_INFILES; fi++)
76 {
77 if(Params.infiles[fi].fd >= 0)
78 {
79 for(int bidx = 0; bidx < BACKUP_FILE_COUNT; bidx++)
80 {
81 if(fd == Params.infiles[fi].bk_fds[bidx])
82 {
83 return 1;
84 }
85 }
86 }
87 }
88 return 0;
89 }
90
hexpeek_open(char const * path,int flags,mode_t mode,int * fd)91 rc_t hexpeek_open(char const *path, int flags, mode_t mode, int *fd)
92 {
93 int tmpfd = open(path, flags, mode);
94 if(tmpfd < 0)
95 {
96 prerr("error opening path \"%s\": %s\n", cleanstring(path),
97 strerror(errno));
98 return RC_CRIT;
99 }
100 *fd = tmpfd;
101 return RC_OK;
102 }
103
104 #define _hexpeek_seek lseek
105
hexpeek_seek(int fd,hoff_t offset,int whence)106 hoff_t hexpeek_seek(int fd, hoff_t offset, int whence)
107 {
108 errno = 0;
109 hoff_t result = _hexpeek_seek(fd, offset, whence);
110 if(result < 0) switch(errno)
111 {
112 case EINVAL:
113 {
114 prerr("invalid file offset\n");
115 break;
116 }
117 case ESPIPE:
118 {
119 // Try a forward-only seek for non-seekable files
120 int wf = whichfile(fd);
121 if(wf >= 0 && whence == SEEK_SET && Params.infiles[wf].track <= offset)
122 {
123 uint8_t discard[PAGESZ];
124 while(Params.infiles[wf].track < offset)
125 {
126 ssize_t lcl_rd = read(fd,
127 discard,
128 min(sizeof discard,
129 offset - Params.infiles[wf].track));
130 if(lcl_rd <= 0)
131 goto fallthrough;
132 Params.infiles[wf].track += (hoff_t)lcl_rd;
133 }
134 result = Params.infiles[wf].track;
135 break;
136 }
137 }
138 default:
139 {
140 fallthrough:
141 prerr("error seeking in %s: %s\n", fdname(fd), strerror(errno));
142 break;
143 }
144 }
145 return result;
146 }
147
isseekable(int file_index)148 bool isseekable(int file_index)
149 {
150 return (_hexpeek_seek(DT_FD(file_index), 0, SEEK_CUR) >= 0);
151 }
152
seekto(int fd,hoff_t offset)153 rc_t seekto(int fd, hoff_t offset)
154 {
155 // No current code path has offset < 0 here, but if it happens, fix it
156 // rather than failing.
157 int whence = (offset < 0 ? SEEK_END : SEEK_SET);
158 if(hexpeek_seek(fd, offset, whence) >= 0)
159 return RC_OK;
160 else if(errno == EINVAL)
161 return RC_USER;
162 else
163 return RC_CRIT;
164 }
165
readfull(int fd,void * buf,size_t count)166 ssize_t readfull(int fd, void *buf, size_t count)
167 {
168 int wf = whichfile(fd);
169 ssize_t result = -1;
170 size_t octets_read = 0;
171
172 while(octets_read < count)
173 {
174 ssize_t lcl_rd = read(fd,
175 buf + octets_read,
176 count - octets_read);
177 if(lcl_rd < 0)
178 goto end;
179 if(lcl_rd == 0)
180 break;
181 octets_read += lcl_rd;
182 }
183
184 result = (ssize_t)octets_read;
185
186 end:
187 if(wf >= 0)
188 {
189 if(Params.infiles[wf].track > HOFF_MAX - octets_read)
190 Params.infiles[wf].track = HOFF_MAX;
191 else
192 Params.infiles[wf].track += (hoff_t)octets_read;
193 }
194 return result;
195 }
196
hexpeek_read(int fd,void * buf,hoff_t count)197 hoff_t hexpeek_read(int fd, void *buf, hoff_t count)
198 {
199 if(count < 0 || (uintmax_t)count > (uintmax_t)SIZE_MAX)
200 return (hoff_t)-1;
201 if(count == 0)
202 return 0;
203 ssize_t result = readfull(fd, buf, (size_t)count);
204 if(result < 0)
205 prerr("error reading from %s: %s\n", fdname(fd), strerror(errno));
206 return (hoff_t)result;
207 }
208
readstrict(int fd,void * buf,hoff_t count)209 static hoff_t readstrict(int fd, void *buf, hoff_t count)
210 {
211 hoff_t result = hexpeek_read(fd, buf, count);
212 if(result >= 0 && result != count)
213 {
214 prerr(EofErrString, fdname(fd));
215 result = -1;
216 }
217 return result;
218 }
219
hexpeek_write(int fd,const void * buf,hoff_t count)220 hoff_t hexpeek_write(int fd, const void *buf, hoff_t count)
221 {
222 if(count < 0 || (uintmax_t)count > (uintmax_t)SIZE_MAX)
223 return (hoff_t)-1;
224 if(count == 0)
225 return 0;
226 ssize_t result = write(fd, buf, (size_t)count);
227 if(result != count)
228 prerr("error writing to %s: %s\n", fdname(fd), strerror(errno));
229 return (hoff_t)result;
230 }
231
hexpeek_stat(int fd,struct stat * fileinfo)232 rc_t hexpeek_stat(int fd, struct stat *fileinfo)
233 {
234 if(fstat(fd, fileinfo))
235 {
236 prerr("error retrieving file info for %s: %s\n", fdname(fd),
237 strerror(errno));
238 return RC_CRIT;
239 }
240 return RC_OK;
241 }
242
hexpeek_sync(int fd)243 rc_t hexpeek_sync(int fd)
244 {
245 if(fsync(fd))
246 {
247 prerr("error syncing %s: %s\n", fdname(fd), strerror(errno));
248 return RC_CRIT;
249 }
250 return RC_OK;
251 }
252
hexpeek_syncdir(char const * path)253 rc_t hexpeek_syncdir(char const *path)
254 {
255 rc_t rc = RC_UNSPEC;
256 const size_t len = strlen(path) + 1;
257 char buf[len];
258
259 strncpy(buf, path, len);
260 buf[len - 1] = '\0';
261
262 char *directory = dirname(buf);
263 int fd = open(directory, O_RDONLY);
264 if(fd < 0)
265 {
266 rc = RC_CRIT;
267 prerr("error opening path \"%s\": %s\n", cleanstring(directory),
268 strerror(errno));
269 goto end;
270 }
271
272 rc = hexpeek_sync(fd);
273
274 end:
275 if(fd >= 0)
276 close(fd);
277 return rc;
278 }
279
hexpeek_truncate(int fd,hoff_t len)280 rc_t hexpeek_truncate(int fd, hoff_t len)
281 {
282 if(ftruncate(fd, len))
283 {
284 prerr("error truncating %s: %s\n", fdname(fd), strerror(errno));
285 return RC_CRIT;
286 }
287 return RC_OK;
288 }
289
290 // Returns:
291 // 0 - unique
292 // 1 - same
293 // 2 - unknown
sameness(int fd0,int fd1)294 int sameness(int fd0, int fd1)
295 {
296 int result = 2;
297
298 if(fd0 == fd1)
299 {
300 result = 1;
301 }
302 else
303 {
304 struct stat i0, i1;
305 if(hexpeek_stat(fd0, &i0))
306 goto end;
307 if(hexpeek_stat(fd1, &i1))
308 goto end;
309 result = (i0.st_dev == i1.st_dev && i0.st_ino == i1.st_ino) ? 1 : 0;
310 }
311
312 end:
313 return result;
314 }
315
filesize(int file_index)316 hoff_t filesize(int file_index)
317 {
318 struct stat info;
319 assert(hexpeek_stat(DT_FD(file_index), &info) == RC_OK);
320 assert(info.st_size >= 0);
321 return info.st_size;
322 }
323
pathsize(char const * path)324 hoff_t pathsize(char const *path)
325 {
326 struct stat info;
327 errno = 0;
328 if(stat(path, &info))
329 {
330 if(errno == ENOENT)
331 return -1;
332 prerr("error retrieving information about path \"%s\": %s\n",
333 cleanstring(path), strerror(errno));
334 die();
335 }
336 return info.st_size;
337 }
338
readat(int fd,hoff_t at,void * buf,hoff_t count)339 rc_t readat(int fd, hoff_t at, void *buf, hoff_t count)
340 {
341 if(hexpeek_seek(fd, at, SEEK_SET) != at)
342 return RC_CRIT;
343 if(readstrict(fd, buf, count) != count)
344 return RC_CRIT;
345 return RC_OK;
346 }
347
writeat(int fd,hoff_t at,const void * buf,hoff_t count)348 rc_t writeat(int fd, hoff_t at, const void *buf, hoff_t count)
349 {
350 if(hexpeek_seek(fd, at, SEEK_SET) != at)
351 return RC_CRIT;
352 if(hexpeek_write(fd, buf, count) != count)
353 return RC_CRIT;
354 return RC_OK;
355 }
356
357 // Copy data backwards. This function is safe for overlapping file regions
358 // when src_at is <= dst_at.
cpybk(int src_fd,hoff_t src_at,int dst_fd,hoff_t dst_at,hoff_t length)359 static rc_t cpybk(int src_fd, hoff_t src_at, int dst_fd, hoff_t dst_at,
360 hoff_t length)
361 {
362 rc_t rc = RC_UNSPEC;
363 hoff_t sz = (src_at + length) % PAGESZ;
364 uint8_t cpybuf[max(BUFSZ, PAGESZ)];
365
366 if(sz == 0)
367 sz = BUFSZ;
368
369 for(hoff_t rel = length; rel > 0; sz = BUFSZ)
370 {
371 sz = min(sz, rel);
372 rel -= sz;
373 progress(length - rel, length, 0);
374 rc = readat(src_fd, src_at + rel, cpybuf, sz);
375 checkrc(rc);
376 rc = writeat(dst_fd, dst_at + rel, cpybuf, sz);
377 checkrc(rc);
378 plugin(2, NULL);
379 }
380
381 progress(-1, length, 0);
382
383 rc = RC_OK;
384
385 end:
386 return rc;
387 }
388
389 // Copy data forwards. This function is safe for overlapping file regions
390 // when src_at is >= dst_at.
cpyfw(int src_fd,hoff_t src_at,int dst_fd,hoff_t dst_at,hoff_t length)391 static rc_t cpyfw(int src_fd, hoff_t src_at, int dst_fd, hoff_t dst_at,
392 hoff_t length)
393 {
394 rc_t rc = RC_UNSPEC;
395 hoff_t sz = distbound(src_at, PAGESZ);
396 uint8_t cpybuf[max(BUFSZ, PAGESZ)];
397
398 for(hoff_t rel = 0; rel < length; sz = BUFSZ)
399 {
400 sz = min(sz, length - rel);
401 progress(rel, length, 0);
402 rc = readat(src_fd, src_at + rel, cpybuf, sz);
403 checkrc(rc);
404 rc = writeat(dst_fd, dst_at + rel, cpybuf, sz);
405 checkrc(rc);
406 rel += sz;
407 plugin(2, NULL);
408 }
409
410 progress(-1, length, 0);
411
412 rc = RC_OK;
413
414 end:
415 return rc;
416 }
417
418 // This function is NOT safe to use on the same file. It exists because this
419 // restriction allows less seeking than the functions above.
cpyext(int src_fd,hoff_t src_at,int dst_fd,hoff_t dst_at,hoff_t length,int isbk)420 static rc_t cpyext(int src_fd, hoff_t src_at, int dst_fd, hoff_t dst_at,
421 hoff_t length, int isbk)
422 {
423 rc_t rc = RC_UNSPEC;
424 hoff_t sz = distbound(src_at, PAGESZ);
425 uint8_t cpybuf[max(BUFSZ, PAGESZ)];
426
427 rc = seekto(src_fd, src_at);
428 checkrc(rc);
429 rc = seekto(dst_fd, dst_at);
430 checkrc(rc);
431
432 for(hoff_t rel = 0; rel < length; sz = BUFSZ)
433 {
434 sz = min(sz, length - rel);
435 progress(rel, length, isbk);
436 if(readstrict(src_fd, cpybuf, sz) != sz)
437 {
438 rc = RC_CRIT;
439 goto end;
440 }
441 if(hexpeek_write(dst_fd, cpybuf, sz) != sz)
442 {
443 rc = RC_CRIT;
444 goto end;
445 }
446 rel += sz;
447 plugin(2, NULL);
448 }
449
450 progress(-1, length, isbk);
451
452 rc = RC_OK;
453
454 end:
455 return rc;
456 }
457
filecpy(int src_fd,hoff_t src_at,hoff_t src_len,int dst_fd,hoff_t dst_at,hoff_t dst_len)458 rc_t filecpy(int src_fd, hoff_t src_at, hoff_t src_len,
459 int dst_fd, hoff_t dst_at, hoff_t dst_len)
460 {
461 rc_t rc = RC_UNSPEC;
462 hoff_t src_before = -1, dst_before = -1, cpy_tot = 0;
463
464 traceEntry("%s, " TRC_hoff ", " TRC_hoff ", "
465 "%s, " TRC_hoff ", " TRC_hoff,
466 fdname(src_fd), trchoff(src_at), trchoff(src_len),
467 fdname(dst_fd), trchoff(dst_at), trchoff(dst_len));
468
469 assert(src_fd >= 0);
470 assert(dst_fd >= 0);
471 assert(src_at >= 0);
472 assert(src_len >= 0);
473 assert(dst_at >= 0);
474 assert(dst_len >= 0);
475 assert(src_len <= dst_len);
476
477 SAVE_OFFSET(src_fd, src_before);
478 SAVE_OFFSET(dst_fd, dst_before);
479
480 bool isbk = isBackupFile(src_fd) ^ isBackupFile(dst_fd);
481 bool uniq = isbk || (sameness(src_fd, dst_fd) == 0);
482
483 if(uniq)
484 {
485 // Copy src_len octets into start of dst_at
486 rc = cpyext(src_fd, src_at, dst_fd, dst_at, src_len, isbk);
487 checkrc(rc);
488 cpy_tot += src_len;
489
490 // Repeated write if needed
491 while(cpy_tot < dst_len)
492 {
493 hoff_t cpy_len = min(dst_len - cpy_tot, src_len);
494 rc = cpyext(src_fd, src_at, dst_fd, dst_at+cpy_tot, cpy_len, isbk);
495 checkrc(rc);
496 cpy_tot += cpy_len;
497 }
498 }
499 else
500 {
501 // Copy src_len octets into start of dst_at
502 if(src_at < dst_at && src_at + src_len > dst_at)
503 {
504 rc = cpybk(src_fd, src_at, dst_fd, dst_at, src_len);
505 checkrc(rc);
506 }
507 else
508 {
509 rc = cpyfw(src_fd, src_at, dst_fd, dst_at, src_len);
510 checkrc(rc);
511 }
512 cpy_tot += src_len;
513
514 // Repeated write if needed (data at src_at may have been overwritten
515 // depending on overlap, so read from dst_at).
516 while(cpy_tot < dst_len)
517 {
518 hoff_t cpy_len = min(dst_len - cpy_tot, src_len);
519 rc = cpyfw(dst_fd, dst_at, dst_fd, dst_at + cpy_tot, cpy_len);
520 checkrc(rc);
521 cpy_tot += cpy_len;
522 }
523 }
524
525 rc = RC_OK;
526
527 end:
528 if(src_before >= 0)
529 RESTORE_OFFSET(src_fd, src_before);
530 if(dst_before >= 0)
531 RESTORE_OFFSET(dst_fd, dst_before);
532 traceExit(TRC_rc, rc);
533 return rc;
534 }
535
lclcpy(int fd,hoff_t src_at,hoff_t dst_at,hoff_t length)536 rc_t lclcpy(int fd, hoff_t src_at, hoff_t dst_at, hoff_t length)
537 {
538 return filecpy(fd, src_at, length, fd, dst_at, length);
539 }
540
adjustSize(int data_fi,hoff_t pos,hoff_t amt,int backup_fd)541 rc_t adjustSize(int data_fi, hoff_t pos, hoff_t amt, int backup_fd)
542 {
543 rc_t rc = RC_UNSPEC;
544 hoff_t f_sz = filesize(data_fi);
545
546 traceEntry("%d, " TRC_hoff ", " TRC_hoff ", %d",
547 DT_FD(data_fi), trchoff(pos), trchoff(amt), backup_fd);
548
549 assert(pos >= 0);
550 if(amt < 0)
551 pos -= amt;
552 if(backup_fd < 0)
553 backup_fd = backupFd(data_fi);
554
555 rc = makeAdjBackup(data_fi, backup_fd, pos);
556 checkrc(rc);
557
558 if(pos < f_sz)
559 {
560 rc = lclcpy(DT_FD(data_fi), pos, pos + amt, f_sz - pos);
561 checkrc(rc);
562 }
563
564 if(amt < 0 && hexpeek_truncate(DT_FD(data_fi), f_sz + amt))
565 goto end;
566
567 rc = clearAdjBackup(backup_fd, NULL);
568 checkrc(rc);
569
570 rc = RC_OK;
571
572 end:
573 traceExit(TRC_rc, rc);
574 return rc;
575 }
576