1 // Copyright 2020 Michael Reilly (mreilly@resiliware.com).
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions
5 // are met:
6 // 1. Redistributions of source code must retain the above copyright
7 //    notice, this list of conditions and the following disclaimer.
8 // 2. Redistributions in binary form must reproduce the above copyright
9 //    notice, this list of conditions and the following disclaimer in the
10 //    documentation and/or other materials provided with the distribution.
11 // 3. Neither the names of the copyright holders nor the names of the
12 //    contributors may be used to endorse or promote products derived from
13 //    this software without specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 // ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
18 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
19 // OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #define SRCNAME "hexpeek_files.c"
28 
29 #include <hexpeek.h>
30 
31 #include <stdlib.h>
32 #include <libgen.h>
33 #include <unistd.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <errno.h>
39 
whichfile(int fd)40 static int whichfile(int fd)
41 {
42     if(fd < 0)
43         return -1;
44     for(int fi = 0; fi < MAX_INFILES; fi++)
45     {
46         if(fd == Params.infiles[fi].fd)
47             return fi;
48     }
49     return -1;
50 }
51 
fdname(int fd)52 char const *fdname(int fd)
53 {
54     if(fd >= 0)
55     {
56         for(int fi = 0; fi < MAX_INFILES; fi++)
57         {
58             if(Params.infiles[fi].fd >= 0)
59             {
60                 if(fd == Params.infiles[fi].fd)
61                     return DT_NAME(fi);
62                 for(int bidx = 0; bidx < BACKUP_FILE_COUNT; bidx++)
63                 {
64                     if(fd == Params.infiles[fi].bk_fds[bidx])
65                         return BK_NAME(fi, bidx);
66                 }
67             }
68         }
69     }
70     return "";
71 }
72 
isBackupFile(int fd)73 static int isBackupFile(int fd)
74 {
75     for(int fi = 0; fi < MAX_INFILES; fi++)
76     {
77         if(Params.infiles[fi].fd >= 0)
78         {
79             for(int bidx = 0; bidx < BACKUP_FILE_COUNT; bidx++)
80             {
81                 if(fd == Params.infiles[fi].bk_fds[bidx])
82                 {
83                     return 1;
84                 }
85             }
86         }
87     }
88     return 0;
89 }
90 
hexpeek_open(char const * path,int flags,mode_t mode,int * fd)91 rc_t hexpeek_open(char const *path, int flags, mode_t mode, int *fd)
92 {
93     int tmpfd = open(path, flags, mode);
94     if(tmpfd < 0)
95     {
96         prerr("error opening path \"%s\": %s\n", cleanstring(path),
97               strerror(errno));
98         return RC_CRIT;
99     }
100     *fd = tmpfd;
101     return RC_OK;
102 }
103 
104 #define _hexpeek_seek lseek
105 
hexpeek_seek(int fd,hoff_t offset,int whence)106 hoff_t hexpeek_seek(int fd, hoff_t offset, int whence)
107 {
108     errno = 0;
109     hoff_t result = _hexpeek_seek(fd, offset, whence);
110     if(result < 0) switch(errno)
111     {
112     case EINVAL:
113     {
114         prerr("invalid file offset\n");
115         break;
116     }
117     case ESPIPE:
118     {
119         // Try a forward-only seek for non-seekable files
120         int wf = whichfile(fd);
121         if(wf >= 0 && whence == SEEK_SET && Params.infiles[wf].track <= offset)
122         {
123             uint8_t discard[PAGESZ];
124             while(Params.infiles[wf].track < offset)
125             {
126                 ssize_t lcl_rd = read(fd,
127                                       discard,
128                                       min(sizeof discard,
129                                           offset - Params.infiles[wf].track));
130                 if(lcl_rd <= 0)
131                     goto fallthrough;
132                 Params.infiles[wf].track += (hoff_t)lcl_rd;
133             }
134             result = Params.infiles[wf].track;
135             break;
136         }
137     }
138     default:
139     {
140 fallthrough:
141         prerr("error seeking in %s: %s\n", fdname(fd), strerror(errno));
142         break;
143     }
144     }
145     return result;
146 }
147 
isseekable(int file_index)148 bool isseekable(int file_index)
149 {
150     return (_hexpeek_seek(DT_FD(file_index), 0, SEEK_CUR) >= 0);
151 }
152 
seekto(int fd,hoff_t offset)153 rc_t seekto(int fd, hoff_t offset)
154 {
155     // No current code path has offset < 0 here, but if it happens, fix it
156     // rather than failing.
157     int whence = (offset < 0 ? SEEK_END : SEEK_SET);
158     if(hexpeek_seek(fd, offset, whence) >= 0)
159         return RC_OK;
160     else if(errno == EINVAL)
161         return RC_USER;
162     else
163         return RC_CRIT;
164 }
165 
readfull(int fd,void * buf,size_t count)166 ssize_t readfull(int fd, void *buf, size_t count)
167 {
168     int wf = whichfile(fd);
169     ssize_t result = -1;
170     size_t octets_read = 0;
171 
172     while(octets_read < count)
173     {
174         ssize_t lcl_rd = read(fd,
175                               buf + octets_read,
176                               count - octets_read);
177         if(lcl_rd < 0)
178             goto end;
179         if(lcl_rd == 0)
180             break;
181         octets_read += lcl_rd;
182     }
183 
184     result = (ssize_t)octets_read;
185 
186 end:
187     if(wf >= 0)
188     {
189         if(Params.infiles[wf].track > HOFF_MAX - octets_read)
190             Params.infiles[wf].track = HOFF_MAX;
191         else
192             Params.infiles[wf].track += (hoff_t)octets_read;
193     }
194     return result;
195 }
196 
hexpeek_read(int fd,void * buf,hoff_t count)197 hoff_t hexpeek_read(int fd, void *buf, hoff_t count)
198 {
199     if(count < 0 || (uintmax_t)count > (uintmax_t)SIZE_MAX)
200         return (hoff_t)-1;
201     if(count == 0)
202         return 0;
203     ssize_t result = readfull(fd, buf, (size_t)count);
204     if(result < 0)
205         prerr("error reading from %s: %s\n", fdname(fd), strerror(errno));
206     return (hoff_t)result;
207 }
208 
readstrict(int fd,void * buf,hoff_t count)209 static hoff_t readstrict(int fd, void *buf, hoff_t count)
210 {
211     hoff_t result = hexpeek_read(fd, buf, count);
212     if(result >= 0 && result != count)
213     {
214         prerr(EofErrString, fdname(fd));
215         result = -1;
216     }
217     return result;
218 }
219 
hexpeek_write(int fd,const void * buf,hoff_t count)220 hoff_t hexpeek_write(int fd, const void *buf, hoff_t count)
221 {
222     if(count < 0 || (uintmax_t)count > (uintmax_t)SIZE_MAX)
223         return (hoff_t)-1;
224     if(count == 0)
225         return 0;
226     ssize_t result = write(fd, buf, (size_t)count);
227     if(result != count)
228         prerr("error writing to %s: %s\n", fdname(fd), strerror(errno));
229     return (hoff_t)result;
230 }
231 
hexpeek_stat(int fd,struct stat * fileinfo)232 rc_t hexpeek_stat(int fd, struct stat *fileinfo)
233 {
234     if(fstat(fd, fileinfo))
235     {
236         prerr("error retrieving file info for %s: %s\n", fdname(fd),
237               strerror(errno));
238         return RC_CRIT;
239     }
240     return RC_OK;
241 }
242 
hexpeek_sync(int fd)243 rc_t hexpeek_sync(int fd)
244 {
245     if(fsync(fd))
246     {
247         prerr("error syncing %s: %s\n", fdname(fd), strerror(errno));
248         return RC_CRIT;
249     }
250     return RC_OK;
251 }
252 
hexpeek_syncdir(char const * path)253 rc_t hexpeek_syncdir(char const *path)
254 {
255     rc_t rc  = RC_UNSPEC;
256     const size_t len = strlen(path) + 1;
257     char buf[len];
258 
259     strncpy(buf, path, len);
260     buf[len - 1] = '\0';
261 
262     char *directory = dirname(buf);
263     int fd = open(directory, O_RDONLY);
264     if(fd < 0)
265     {
266         rc = RC_CRIT;
267         prerr("error opening path \"%s\": %s\n", cleanstring(directory),
268               strerror(errno));
269         goto end;
270     }
271 
272     rc = hexpeek_sync(fd);
273 
274 end:
275     if(fd >= 0)
276         close(fd);
277     return rc;
278 }
279 
hexpeek_truncate(int fd,hoff_t len)280 rc_t hexpeek_truncate(int fd, hoff_t len)
281 {
282     if(ftruncate(fd, len))
283     {
284         prerr("error truncating %s: %s\n", fdname(fd), strerror(errno));
285         return RC_CRIT;
286     }
287     return RC_OK;
288 }
289 
290 // Returns:
291 //   0 - unique
292 //   1 - same
293 //   2 - unknown
sameness(int fd0,int fd1)294 int sameness(int fd0, int fd1)
295 {
296     int result = 2;
297 
298     if(fd0 == fd1)
299     {
300         result = 1;
301     }
302     else
303     {
304         struct stat i0, i1;
305         if(hexpeek_stat(fd0, &i0))
306             goto end;
307         if(hexpeek_stat(fd1, &i1))
308             goto end;
309         result = (i0.st_dev == i1.st_dev && i0.st_ino == i1.st_ino) ? 1 : 0;
310     }
311 
312 end:
313     return result;
314 }
315 
filesize(int file_index)316 hoff_t filesize(int file_index)
317 {
318     struct stat info;
319     assert(hexpeek_stat(DT_FD(file_index), &info) == RC_OK);
320     assert(info.st_size >= 0);
321     return info.st_size;
322 }
323 
pathsize(char const * path)324 hoff_t pathsize(char const *path)
325 {
326     struct stat info;
327     errno = 0;
328     if(stat(path, &info))
329     {
330         if(errno == ENOENT)
331             return -1;
332         prerr("error retrieving information about path \"%s\": %s\n",
333               cleanstring(path), strerror(errno));
334         die();
335     }
336     return info.st_size;
337 }
338 
readat(int fd,hoff_t at,void * buf,hoff_t count)339 rc_t readat(int fd, hoff_t at, void *buf, hoff_t count)
340 {
341     if(hexpeek_seek(fd, at, SEEK_SET) != at)
342         return RC_CRIT;
343     if(readstrict(fd, buf, count) != count)
344         return RC_CRIT;
345     return RC_OK;
346 }
347 
writeat(int fd,hoff_t at,const void * buf,hoff_t count)348 rc_t writeat(int fd, hoff_t at, const void *buf, hoff_t count)
349 {
350     if(hexpeek_seek(fd, at, SEEK_SET) != at)
351         return RC_CRIT;
352     if(hexpeek_write(fd, buf, count) != count)
353         return RC_CRIT;
354     return RC_OK;
355 }
356 
357 // Copy data backwards. This function is safe for overlapping file regions
358 // when src_at is <= dst_at.
cpybk(int src_fd,hoff_t src_at,int dst_fd,hoff_t dst_at,hoff_t length)359 static rc_t cpybk(int src_fd, hoff_t src_at, int dst_fd, hoff_t dst_at,
360                   hoff_t length)
361 {
362     rc_t rc = RC_UNSPEC;
363     hoff_t sz = (src_at + length) % PAGESZ;
364     uint8_t cpybuf[max(BUFSZ, PAGESZ)];
365 
366     if(sz == 0)
367         sz = BUFSZ;
368 
369     for(hoff_t rel = length; rel > 0; sz = BUFSZ)
370     {
371         sz = min(sz, rel);
372         rel -= sz;
373         progress(length - rel, length, 0);
374         rc = readat(src_fd, src_at + rel, cpybuf, sz);
375         checkrc(rc);
376         rc = writeat(dst_fd, dst_at + rel, cpybuf, sz);
377         checkrc(rc);
378         plugin(2, NULL);
379     }
380 
381     progress(-1, length, 0);
382 
383     rc = RC_OK;
384 
385 end:
386     return rc;
387 }
388 
389 // Copy data forwards. This function is safe for overlapping file regions
390 // when src_at is >= dst_at.
cpyfw(int src_fd,hoff_t src_at,int dst_fd,hoff_t dst_at,hoff_t length)391 static rc_t cpyfw(int src_fd, hoff_t src_at, int dst_fd, hoff_t dst_at,
392                   hoff_t length)
393 {
394     rc_t rc = RC_UNSPEC;
395     hoff_t sz = distbound(src_at, PAGESZ);
396     uint8_t cpybuf[max(BUFSZ, PAGESZ)];
397 
398     for(hoff_t rel = 0; rel < length; sz = BUFSZ)
399     {
400         sz = min(sz, length - rel);
401         progress(rel, length, 0);
402         rc = readat(src_fd, src_at + rel, cpybuf, sz);
403         checkrc(rc);
404         rc = writeat(dst_fd, dst_at + rel, cpybuf, sz);
405         checkrc(rc);
406         rel += sz;
407         plugin(2, NULL);
408     }
409 
410     progress(-1, length, 0);
411 
412     rc = RC_OK;
413 
414 end:
415     return rc;
416 }
417 
418 // This function is NOT safe to use on the same file. It exists because this
419 // restriction allows less seeking than the functions above.
cpyext(int src_fd,hoff_t src_at,int dst_fd,hoff_t dst_at,hoff_t length,int isbk)420 static rc_t cpyext(int src_fd, hoff_t src_at, int dst_fd, hoff_t dst_at,
421                    hoff_t length, int isbk)
422 {
423     rc_t rc = RC_UNSPEC;
424     hoff_t sz = distbound(src_at, PAGESZ);
425     uint8_t cpybuf[max(BUFSZ, PAGESZ)];
426 
427     rc = seekto(src_fd, src_at);
428     checkrc(rc);
429     rc = seekto(dst_fd, dst_at);
430     checkrc(rc);
431 
432     for(hoff_t rel = 0; rel < length; sz = BUFSZ)
433     {
434         sz = min(sz, length - rel);
435         progress(rel, length, isbk);
436         if(readstrict(src_fd, cpybuf, sz) != sz)
437         {
438             rc = RC_CRIT;
439             goto end;
440         }
441         if(hexpeek_write(dst_fd, cpybuf, sz) != sz)
442         {
443             rc = RC_CRIT;
444             goto end;
445         }
446         rel += sz;
447         plugin(2, NULL);
448     }
449 
450     progress(-1, length, isbk);
451 
452     rc = RC_OK;
453 
454 end:
455     return rc;
456 }
457 
filecpy(int src_fd,hoff_t src_at,hoff_t src_len,int dst_fd,hoff_t dst_at,hoff_t dst_len)458 rc_t filecpy(int src_fd, hoff_t src_at, hoff_t src_len,
459              int dst_fd, hoff_t dst_at, hoff_t dst_len)
460 {
461     rc_t rc = RC_UNSPEC;
462     hoff_t src_before = -1, dst_before = -1, cpy_tot = 0;
463 
464     traceEntry("%s, " TRC_hoff ", " TRC_hoff ", "
465                "%s, " TRC_hoff ", " TRC_hoff,
466                fdname(src_fd), trchoff(src_at), trchoff(src_len),
467                fdname(dst_fd), trchoff(dst_at), trchoff(dst_len));
468 
469     assert(src_fd >= 0);
470     assert(dst_fd >= 0);
471     assert(src_at >= 0);
472     assert(src_len >= 0);
473     assert(dst_at >= 0);
474     assert(dst_len >= 0);
475     assert(src_len <= dst_len);
476 
477     SAVE_OFFSET(src_fd, src_before);
478     SAVE_OFFSET(dst_fd, dst_before);
479 
480     bool isbk = isBackupFile(src_fd) ^ isBackupFile(dst_fd);
481     bool uniq = isbk || (sameness(src_fd, dst_fd) == 0);
482 
483     if(uniq)
484     {
485         // Copy src_len octets into start of dst_at
486         rc = cpyext(src_fd, src_at, dst_fd, dst_at, src_len, isbk);
487         checkrc(rc);
488         cpy_tot += src_len;
489 
490         // Repeated write if needed
491         while(cpy_tot < dst_len)
492         {
493             hoff_t cpy_len = min(dst_len - cpy_tot, src_len);
494             rc = cpyext(src_fd, src_at, dst_fd, dst_at+cpy_tot, cpy_len, isbk);
495             checkrc(rc);
496             cpy_tot += cpy_len;
497         }
498     }
499     else
500     {
501         // Copy src_len octets into start of dst_at
502         if(src_at < dst_at && src_at + src_len > dst_at)
503         {
504             rc = cpybk(src_fd, src_at, dst_fd, dst_at, src_len);
505             checkrc(rc);
506         }
507         else
508         {
509             rc = cpyfw(src_fd, src_at, dst_fd, dst_at, src_len);
510             checkrc(rc);
511         }
512         cpy_tot += src_len;
513 
514         // Repeated write if needed (data at src_at may have been overwritten
515         // depending on overlap, so read from dst_at).
516         while(cpy_tot < dst_len)
517         {
518             hoff_t cpy_len = min(dst_len - cpy_tot, src_len);
519             rc = cpyfw(dst_fd, dst_at, dst_fd, dst_at + cpy_tot, cpy_len);
520             checkrc(rc);
521             cpy_tot += cpy_len;
522         }
523     }
524 
525     rc = RC_OK;
526 
527 end:
528     if(src_before >= 0)
529         RESTORE_OFFSET(src_fd, src_before);
530     if(dst_before >= 0)
531         RESTORE_OFFSET(dst_fd, dst_before);
532     traceExit(TRC_rc, rc);
533     return rc;
534 }
535 
lclcpy(int fd,hoff_t src_at,hoff_t dst_at,hoff_t length)536 rc_t lclcpy(int fd, hoff_t src_at, hoff_t dst_at, hoff_t length)
537 {
538     return filecpy(fd, src_at, length, fd, dst_at, length);
539 }
540 
adjustSize(int data_fi,hoff_t pos,hoff_t amt,int backup_fd)541 rc_t adjustSize(int data_fi, hoff_t pos, hoff_t amt, int backup_fd)
542 {
543     rc_t rc = RC_UNSPEC;
544     hoff_t f_sz = filesize(data_fi);
545 
546     traceEntry("%d, " TRC_hoff ", " TRC_hoff ", %d",
547                DT_FD(data_fi), trchoff(pos), trchoff(amt), backup_fd);
548 
549     assert(pos >= 0);
550     if(amt < 0)
551         pos -= amt;
552     if(backup_fd < 0)
553         backup_fd = backupFd(data_fi);
554 
555     rc = makeAdjBackup(data_fi, backup_fd, pos);
556     checkrc(rc);
557 
558     if(pos < f_sz)
559     {
560         rc = lclcpy(DT_FD(data_fi), pos, pos + amt, f_sz - pos);
561         checkrc(rc);
562     }
563 
564     if(amt < 0 && hexpeek_truncate(DT_FD(data_fi), f_sz + amt))
565         goto end;
566 
567     rc = clearAdjBackup(backup_fd, NULL);
568     checkrc(rc);
569 
570     rc = RC_OK;
571 
572 end:
573     traceExit(TRC_rc, rc);
574     return rc;
575 }
576