1 /* shred.c - overwrite files and devices to make it harder to recover data
2 
3    Copyright (C) 1999-2020 Free Software Foundation, Inc.
4    Copyright (C) 1997, 1998, 1999 Colin Plumb.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation, either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.
18 
19    Written by Colin Plumb.  */
20 
21 /*
22  * Do a more secure overwrite of given files or devices, to make it harder
23  * for even very expensive hardware probing to recover the data.
24  *
25  * Although this process is also known as "wiping", I prefer the longer
26  * name both because I think it is more evocative of what is happening and
27  * because a longer name conveys a more appropriate sense of deliberateness.
28  *
29  * For the theory behind this, see "Secure Deletion of Data from Magnetic
30  * and Solid-State Memory", on line at
31  * https://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
32  *
33  * Just for the record, reversing one or two passes of disk overwrite
34  * is not terribly difficult with hardware help.  Hook up a good-quality
35  * digitizing oscilloscope to the output of the head preamplifier and copy
36  * the high-res digitized data to a computer for some off-line analysis.
37  * Read the "current" data and average all the pulses together to get an
38  * "average" pulse on the disk.  Subtract this average pulse from all of
39  * the actual pulses and you can clearly see the "echo" of the previous
40  * data on the disk.
41  *
42  * Real hard drives have to balance the cost of the media, the head,
43  * and the read circuitry.  They use better-quality media than absolutely
44  * necessary to limit the cost of the read circuitry.  By throwing that
45  * assumption out, and the assumption that you want the data processed
46  * as fast as the hard drive can spin, you can do better.
47  *
48  * If asked to wipe a file, this also unlinks it, renaming it in a
49  * clever way to try to leave no trace of the original filename.
50  *
51  * This was inspired by a desire to improve on some code titled:
52  * Wipe V1.0-- Overwrite and delete files.  S. 2/3/96
53  * but I've rewritten everything here so completely that no trace of
54  * the original remains.
55  *
56  * Thanks to:
57  * Bob Jenkins, for his good RNG work and patience with the FSF copyright
58  * paperwork.
59  * Jim Meyering, for his work merging this into the GNU fileutils while
60  * still letting me feel a sense of ownership and pride.  Getting me to
61  * tolerate the GNU brace style was quite a feat of diplomacy.
62  * Paul Eggert, for lots of useful discussion and code.  I disagree with
63  * an awful lot of his suggestions, but they're disagreements worth having.
64  *
65  * Things to think about:
66  * - Security: Is there any risk to the race
67  *   between overwriting and unlinking a file?  Will it do anything
68  *   drastically bad if told to attack a named pipe or socket?
69  */
70 
71 /* The official name of this program (e.g., no 'g' prefix).  */
72 #define PROGRAM_NAME "shred"
73 
74 #define AUTHORS proper_name ("Colin Plumb")
75 
76 #include <config.h>
77 
78 #include <getopt.h>
79 #include <stdio.h>
80 #include <assert.h>
81 #include <setjmp.h>
82 #include <sys/types.h>
83 #if defined __linux__ && HAVE_SYS_MTIO_H
84 # include <sys/mtio.h>
85 #endif
86 
87 #include "system.h"
88 #include "argmatch.h"
89 #include "xdectoint.h"
90 #include "die.h"
91 #include "error.h"
92 #include "fcntl--.h"
93 #include "human.h"
94 #include "randint.h"
95 #include "randread.h"
96 #include "renameatu.h"
97 #include "stat-size.h"
98 
99 /* Default number of times to overwrite.  */
100 enum { DEFAULT_PASSES = 3 };
101 
102 /* How many seconds to wait before checking whether to output another
103    verbose output line.  */
104 enum { VERBOSE_UPDATE = 5 };
105 
106 /* Sector size and corresponding mask, for recovering after write failures.
107    The size must be a power of 2.  */
108 enum { SECTOR_SIZE = 512 };
109 enum { SECTOR_MASK = SECTOR_SIZE - 1 };
110 verify (0 < SECTOR_SIZE && (SECTOR_SIZE & SECTOR_MASK) == 0);
111 
112 enum remove_method
113 {
114   remove_none = 0,      /* the default: only wipe data.  */
115   remove_unlink,        /* don't obfuscate name, just unlink.  */
116   remove_wipe,          /* obfuscate name before unlink.  */
117   remove_wipesync       /* obfuscate name, syncing each byte, before unlink.  */
118 };
119 
120 static char const *const remove_args[] =
121 {
122   "unlink", "wipe", "wipesync", NULL
123 };
124 
125 static enum remove_method const remove_methods[] =
126 {
127   remove_unlink, remove_wipe, remove_wipesync
128 };
129 
130 struct Options
131 {
132   bool force;		/* -f flag: chmod files if necessary */
133   size_t n_iterations;	/* -n flag: Number of iterations */
134   off_t size;		/* -s flag: size of file */
135   enum remove_method remove_file; /* -u flag: remove file after shredding */
136   bool verbose;		/* -v flag: Print progress */
137   bool exact;		/* -x flag: Do not round up file size */
138   bool zero_fill;	/* -z flag: Add a final zero pass */
139 };
140 
141 /* For long options that have no equivalent short option, use a
142    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
143 enum
144 {
145   RANDOM_SOURCE_OPTION = CHAR_MAX + 1
146 };
147 
148 static struct option const long_opts[] =
149 {
150   {"exact", no_argument, NULL, 'x'},
151   {"force", no_argument, NULL, 'f'},
152   {"iterations", required_argument, NULL, 'n'},
153   {"size", required_argument, NULL, 's'},
154   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
155   {"remove", optional_argument, NULL, 'u'},
156   {"verbose", no_argument, NULL, 'v'},
157   {"zero", no_argument, NULL, 'z'},
158   {GETOPT_HELP_OPTION_DECL},
159   {GETOPT_VERSION_OPTION_DECL},
160   {NULL, 0, NULL, 0}
161 };
162 
163 void
usage(int status)164 usage (int status)
165 {
166   if (status != EXIT_SUCCESS)
167     emit_try_help ();
168   else
169     {
170       printf (_("Usage: %s [OPTION]... FILE...\n"), program_name);
171       fputs (_("\
172 Overwrite the specified FILE(s) repeatedly, in order to make it harder\n\
173 for even very expensive hardware probing to recover the data.\n\
174 "), stdout);
175       fputs (_("\
176 \n\
177 If FILE is -, shred standard output.\n\
178 "), stdout);
179 
180       emit_mandatory_arg_note ();
181 
182       printf (_("\
183   -f, --force    change permissions to allow writing if necessary\n\
184   -n, --iterations=N  overwrite N times instead of the default (%d)\n\
185       --random-source=FILE  get random bytes from FILE\n\
186   -s, --size=N   shred this many bytes (suffixes like K, M, G accepted)\n\
187 "), DEFAULT_PASSES);
188       fputs (_("\
189   -u             deallocate and remove file after overwriting\n\
190       --remove[=HOW]  like -u but give control on HOW to delete;  See below\n\
191   -v, --verbose  show progress\n\
192   -x, --exact    do not round file sizes up to the next full block;\n\
193                    this is the default for non-regular files\n\
194   -z, --zero     add a final overwrite with zeros to hide shredding\n\
195 "), stdout);
196       fputs (HELP_OPTION_DESCRIPTION, stdout);
197       fputs (VERSION_OPTION_DESCRIPTION, stdout);
198       fputs (_("\
199 \n\
200 Delete FILE(s) if --remove (-u) is specified.  The default is not to remove\n\
201 the files because it is common to operate on device files like /dev/hda,\n\
202 and those files usually should not be removed.\n\
203 The optional HOW parameter indicates how to remove a directory entry:\n\
204 'unlink' => use a standard unlink call.\n\
205 'wipe' => also first obfuscate bytes in the name.\n\
206 'wipesync' => also sync each obfuscated byte to disk.\n\
207 The default mode is 'wipesync', but note it can be expensive.\n\
208 \n\
209 "), stdout);
210       fputs (_("\
211 CAUTION: shred assumes the file system and hardware overwrite data in place.\n\
212 Although this is common, many platforms operate otherwise.  Also, backups\n\
213 and mirrors may contain unremovable copies that will let a shredded file\n\
214 be recovered later.  See the GNU coreutils manual for details.\n\
215 "), stdout);
216       emit_ancillary_info (PROGRAM_NAME);
217     }
218   exit (status);
219 }
220 
221 /*
222  * Determine if pattern type is periodic or not.
223  */
224 static bool
periodic_pattern(int type)225 periodic_pattern (int type)
226 {
227   if (type <= 0)
228     return false;
229 
230   unsigned char r[3];
231   unsigned int bits = type & 0xfff;
232 
233   bits |= bits << 12;
234   r[0] = (bits >> 4) & 255;
235   r[1] = (bits >> 8) & 255;
236   r[2] = bits & 255;
237 
238   return (r[0] != r[1]) || (r[0] != r[2]);
239 }
240 
241 /*
242  * Fill a buffer with a fixed pattern.
243  *
244  * The buffer must be at least 3 bytes long, even if
245  * size is less.  Larger sizes are filled exactly.
246  */
247 static void
fillpattern(int type,unsigned char * r,size_t size)248 fillpattern (int type, unsigned char *r, size_t size)
249 {
250   size_t i;
251   unsigned int bits = type & 0xfff;
252 
253   bits |= bits << 12;
254   r[0] = (bits >> 4) & 255;
255   r[1] = (bits >> 8) & 255;
256   r[2] = bits & 255;
257   for (i = 3; i <= size / 2; i *= 2)
258     memcpy (r + i, r, i);
259   if (i < size)
260     memcpy (r + i, r, size - i);
261 
262   /* Invert the first bit of every sector. */
263   if (type & 0x1000)
264     for (i = 0; i < size; i += SECTOR_SIZE)
265       r[i] ^= 0x80;
266 }
267 
268 /*
269  * Generate a 6-character (+ nul) pass name string
270  * FIXME: allow translation of "random".
271  */
272 #define PASS_NAME_SIZE 7
273 static void
passname(unsigned char const * data,char name[PASS_NAME_SIZE])274 passname (unsigned char const *data, char name[PASS_NAME_SIZE])
275 {
276   if (data)
277     sprintf (name, "%02x%02x%02x", data[0], data[1], data[2]);
278   else
279     memcpy (name, "random", PASS_NAME_SIZE);
280 }
281 
282 /* Return true when it's ok to ignore an fsync or fdatasync
283    failure that set errno to ERRNO_VAL.  */
284 static bool
ignorable_sync_errno(int errno_val)285 ignorable_sync_errno (int errno_val)
286 {
287   return (errno_val == EINVAL
288           || errno_val == EBADF
289           /* HP-UX does this */
290           || errno_val == EISDIR);
291 }
292 
293 /* Request that all data for FD be transferred to the corresponding
294    storage device.  QNAME is the file name (quoted for colons).
295    Report any errors found.  Return 0 on success, -1
296    (setting errno) on failure.  It is not an error if fdatasync and/or
297    fsync is not supported for this file, or if the file is not a
298    writable file descriptor.  */
299 static int
dosync(int fd,char const * qname)300 dosync (int fd, char const *qname)
301 {
302   int err;
303 
304 #if HAVE_FDATASYNC
305   if (fdatasync (fd) == 0)
306     return 0;
307   err = errno;
308   if ( ! ignorable_sync_errno (err))
309     {
310       error (0, err, _("%s: fdatasync failed"), qname);
311       errno = err;
312       return -1;
313     }
314 #endif
315 
316   if (fsync (fd) == 0)
317     return 0;
318   err = errno;
319   if ( ! ignorable_sync_errno (err))
320     {
321       error (0, err, _("%s: fsync failed"), qname);
322       errno = err;
323       return -1;
324     }
325 
326   sync ();
327   return 0;
328 }
329 
330 /* Turn on or off direct I/O mode for file descriptor FD, if possible.
331    Try to turn it on if ENABLE is true.  Otherwise, try to turn it off.  */
332 static void
direct_mode(int fd,bool enable)333 direct_mode (int fd, bool enable)
334 {
335   if (O_DIRECT)
336     {
337       int fd_flags = fcntl (fd, F_GETFL);
338       if (0 < fd_flags)
339         {
340           int new_flags = (enable
341                            ? (fd_flags | O_DIRECT)
342                            : (fd_flags & ~O_DIRECT));
343           if (new_flags != fd_flags)
344             fcntl (fd, F_SETFL, new_flags);
345         }
346     }
347 
348 #if HAVE_DIRECTIO && defined DIRECTIO_ON && defined DIRECTIO_OFF
349   /* This is Solaris-specific.  */
350   directio (fd, enable ? DIRECTIO_ON : DIRECTIO_OFF);
351 #endif
352 }
353 
354 /* Rewind FD; its status is ST.  */
355 static bool
dorewind(int fd,struct stat const * st)356 dorewind (int fd, struct stat const *st)
357 {
358   if (S_ISCHR (st->st_mode))
359     {
360 #if defined __linux__ && HAVE_SYS_MTIO_H
361       /* In the Linux kernel, lseek does not work on tape devices; it
362          returns a randomish value instead.  Try the low-level tape
363          rewind operation first.  */
364       struct mtop op;
365       op.mt_op = MTREW;
366       op.mt_count = 1;
367       if (ioctl (fd, MTIOCTOP, &op) == 0)
368         return true;
369 #endif
370     }
371   off_t offset = lseek (fd, 0, SEEK_SET);
372   if (0 < offset)
373     errno = EINVAL;
374   return offset == 0;
375 }
376 
377 /* By convention, negative sizes represent unknown values.  */
378 
379 static bool
known(off_t size)380 known (off_t size)
381 {
382   return 0 <= size;
383 }
384 
385 /*
386  * Do pass number K of N, writing *SIZEP bytes of the given pattern TYPE
387  * to the file descriptor FD.  K and N are passed in only for verbose
388  * progress message purposes.  If N == 0, no progress messages are printed.
389  *
390  * If *SIZEP == -1, the size is unknown, and it will be filled in as soon
391  * as writing fails with ENOSPC.
392  *
393  * Return 1 on write error, -1 on other error, 0 on success.
394  */
395 static int
dopass(int fd,struct stat const * st,char const * qname,off_t * sizep,int type,struct randread_source * s,unsigned long int k,unsigned long int n)396 dopass (int fd, struct stat const *st, char const *qname, off_t *sizep,
397         int type, struct randread_source *s,
398         unsigned long int k, unsigned long int n)
399 {
400   off_t size = *sizep;
401   off_t offset;			/* Current file position */
402   time_t thresh IF_LINT ( = 0);	/* Time to maybe print next status update */
403   time_t now = 0;		/* Current time */
404   size_t lim;			/* Amount of data to try writing */
405   size_t soff;			/* Offset into buffer for next write */
406   ssize_t ssize;		/* Return value from write */
407 
408   /* Fill pattern buffer.  Aligning it to a page so we can do direct I/O.  */
409   size_t page_size = getpagesize ();
410 #define PERIODIC_OUTPUT_SIZE (60 * 1024)
411 #define NONPERIODIC_OUTPUT_SIZE (64 * 1024)
412   verify (PERIODIC_OUTPUT_SIZE % 3 == 0);
413   size_t output_size = periodic_pattern (type)
414                        ? PERIODIC_OUTPUT_SIZE : NONPERIODIC_OUTPUT_SIZE;
415 #define PAGE_ALIGN_SLOP (page_size - 1)                /* So directio works */
416 #define FILLPATTERN_SIZE (((output_size + 2) / 3) * 3) /* Multiple of 3 */
417 #define PATTERNBUF_SIZE (PAGE_ALIGN_SLOP + FILLPATTERN_SIZE)
418   void *fill_pattern_mem = xmalloc (PATTERNBUF_SIZE);
419   unsigned char *pbuf = ptr_align (fill_pattern_mem, page_size);
420 
421   char pass_string[PASS_NAME_SIZE];	/* Name of current pass */
422   bool write_error = false;
423   bool other_error = false;
424 
425   /* Printable previous offset into the file */
426   char previous_offset_buf[LONGEST_HUMAN_READABLE + 1];
427   char const *previous_human_offset IF_LINT ( = 0);
428 
429   /* As a performance tweak, avoid direct I/O for small sizes,
430      as it's just a performance rather then security consideration,
431      and direct I/O can often be unsupported for small non aligned sizes.  */
432   bool try_without_directio = 0 < size && size < output_size;
433   if (! try_without_directio)
434     direct_mode (fd, true);
435 
436   if (! dorewind (fd, st))
437     {
438       error (0, errno, _("%s: cannot rewind"), qname);
439       other_error = true;
440       goto free_pattern_mem;
441     }
442 
443   /* Constant fill patterns need only be set up once. */
444   if (type >= 0)
445     {
446       lim = known (size) && size < FILLPATTERN_SIZE ? size : FILLPATTERN_SIZE;
447       fillpattern (type, pbuf, lim);
448       passname (pbuf, pass_string);
449     }
450   else
451     {
452       passname (0, pass_string);
453     }
454 
455   /* Set position if first status update */
456   if (n)
457     {
458       error (0, 0, _("%s: pass %lu/%lu (%s)..."), qname, k, n, pass_string);
459       thresh = time (NULL) + VERBOSE_UPDATE;
460       previous_human_offset = "";
461     }
462 
463   offset = 0;
464   while (true)
465     {
466       /* How much to write this time? */
467       lim = output_size;
468       if (known (size) && size - offset < output_size)
469         {
470           if (size < offset)
471             break;
472           lim = size - offset;
473           if (!lim)
474             break;
475         }
476       if (type < 0)
477         randread (s, pbuf, lim);
478       /* Loop to retry partial writes. */
479       for (soff = 0; soff < lim; soff += ssize)
480         {
481           ssize = write (fd, pbuf + soff, lim - soff);
482           if (0 < ssize)
483             assume (ssize <= lim - soff);
484           else
485             {
486               if (! known (size) && (ssize == 0 || errno == ENOSPC))
487                 {
488                   /* We have found the end of the file.  */
489                   if (soff <= OFF_T_MAX - offset)
490                     *sizep = size = offset + soff;
491                   break;
492                 }
493               else
494                 {
495                   int errnum = errno;
496                   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
497 
498                   /* Retry without direct I/O since this may not be supported
499                      at all on some (file) systems, or with the current size.
500                      I.e., a specified --size that is not aligned, or when
501                      dealing with slop at the end of a file with --exact.  */
502                   if (! try_without_directio && errno == EINVAL)
503                     {
504                       direct_mode (fd, false);
505                       ssize = 0;
506                       try_without_directio = true;
507                       continue;
508                     }
509                   error (0, errnum, _("%s: error writing at offset %s"),
510                          qname, umaxtostr (offset + soff, buf));
511 
512                   /* 'shred' is often used on bad media, before throwing it
513                      out.  Thus, it shouldn't give up on bad blocks.  This
514                      code works because lim is always a multiple of
515                      SECTOR_SIZE, except at the end.  This size constraint
516                      also enables direct I/O on some (file) systems.  */
517                   verify (PERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0);
518                   verify (NONPERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0);
519                   if (errnum == EIO && known (size)
520                       && (soff | SECTOR_MASK) < lim)
521                     {
522                       size_t soff1 = (soff | SECTOR_MASK) + 1;
523                       if (lseek (fd, offset + soff1, SEEK_SET) != -1)
524                         {
525                           /* Arrange to skip this block. */
526                           ssize = soff1 - soff;
527                           write_error = true;
528                           continue;
529                         }
530                       error (0, errno, _("%s: lseek failed"), qname);
531                     }
532                   other_error = true;
533                   goto free_pattern_mem;
534                 }
535             }
536         }
537 
538       /* Okay, we have written "soff" bytes. */
539 
540       if (OFF_T_MAX - offset < soff)
541         {
542           error (0, 0, _("%s: file too large"), qname);
543           other_error = true;
544           goto free_pattern_mem;
545         }
546 
547       offset += soff;
548 
549       bool done = offset == size;
550 
551       /* Time to print progress? */
552       if (n && ((done && *previous_human_offset)
553                 || thresh <= (now = time (NULL))))
554         {
555           char offset_buf[LONGEST_HUMAN_READABLE + 1];
556           char size_buf[LONGEST_HUMAN_READABLE + 1];
557           int human_progress_opts = (human_autoscale | human_SI
558                                      | human_base_1024 | human_B);
559           char const *human_offset
560             = human_readable (offset, offset_buf,
561                               human_floor | human_progress_opts, 1, 1);
562 
563           if (done || !STREQ (previous_human_offset, human_offset))
564             {
565               if (! known (size))
566                 error (0, 0, _("%s: pass %lu/%lu (%s)...%s"),
567                        qname, k, n, pass_string, human_offset);
568               else
569                 {
570                   uintmax_t off = offset;
571                   int percent = (size == 0
572                                  ? 100
573                                  : (off <= TYPE_MAXIMUM (uintmax_t) / 100
574                                     ? off * 100 / size
575                                     : off / (size / 100)));
576                   char const *human_size
577                     = human_readable (size, size_buf,
578                                       human_ceiling | human_progress_opts,
579                                       1, 1);
580                   if (done)
581                     human_offset = human_size;
582                   error (0, 0, _("%s: pass %lu/%lu (%s)...%s/%s %d%%"),
583                          qname, k, n, pass_string, human_offset, human_size,
584                          percent);
585                 }
586 
587               strcpy (previous_offset_buf, human_offset);
588               previous_human_offset = previous_offset_buf;
589               thresh = now + VERBOSE_UPDATE;
590 
591               /*
592                * Force periodic syncs to keep displayed progress accurate
593                * FIXME: Should these be present even if -v is not enabled,
594                * to keep the buffer cache from filling with dirty pages?
595                * It's a common problem with programs that do lots of writes,
596                * like mkfs.
597                */
598               if (dosync (fd, qname) != 0)
599                 {
600                   if (errno != EIO)
601                     {
602                       other_error = true;
603                       goto free_pattern_mem;
604                     }
605                   write_error = true;
606                 }
607             }
608         }
609     }
610 
611   /* Force what we just wrote to hit the media. */
612   if (dosync (fd, qname) != 0)
613     {
614       if (errno != EIO)
615         {
616           other_error = true;
617           goto free_pattern_mem;
618         }
619       write_error = true;
620     }
621 
622 free_pattern_mem:
623   free (fill_pattern_mem);
624 
625   return other_error ? -1 : write_error;
626 }
627 
628 /*
629  * The passes start and end with a random pass, and the passes in between
630  * are done in random order.  The idea is to deprive someone trying to
631  * reverse the process of knowledge of the overwrite patterns, so they
632  * have the additional step of figuring out what was done to the disk
633  * before they can try to reverse or cancel it.
634  *
635  * First, all possible 1-bit patterns.  There are two of them.
636  * Then, all possible 2-bit patterns.  There are four, but the two
637  * which are also 1-bit patterns can be omitted.
638  * Then, all possible 3-bit patterns.  Likewise, 8-2 = 6.
639  * Then, all possible 4-bit patterns.  16-4 = 12.
640  *
641  * The basic passes are:
642  * 1-bit: 0x000, 0xFFF
643  * 2-bit: 0x555, 0xAAA
644  * 3-bit: 0x249, 0x492, 0x924, 0x6DB, 0xB6D, 0xDB6 (+ 1-bit)
645  *        100100100100         110110110110
646  *           9   2   4            D   B   6
647  * 4-bit: 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
648  *        0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE (+ 1-bit, 2-bit)
649  * Adding three random passes at the beginning, middle and end
650  * produces the default 25-pass structure.
651  *
652  * The next extension would be to 5-bit and 6-bit patterns.
653  * There are 30 uncovered 5-bit patterns and 64-8-2 = 46 uncovered
654  * 6-bit patterns, so they would increase the time required
655  * significantly.  4-bit patterns are enough for most purposes.
656  *
657  * The main gotcha is that this would require a trickier encoding,
658  * since lcm(2,3,4) = 12 bits is easy to fit into an int, but
659  * lcm(2,3,4,5) = 60 bits is not.
660  *
661  * One extension that is included is to complement the first bit in each
662  * 512-byte block, to alter the phase of the encoded data in the more
663  * complex encodings.  This doesn't apply to MFM, so the 1-bit patterns
664  * are considered part of the 3-bit ones and the 2-bit patterns are
665  * considered part of the 4-bit patterns.
666  *
667  *
668  * How does the generalization to variable numbers of passes work?
669  *
670  * Here's how...
671  * Have an ordered list of groups of passes.  Each group is a set.
672  * Take as many groups as will fit, plus a random subset of the
673  * last partial group, and place them into the passes list.
674  * Then shuffle the passes list into random order and use that.
675  *
676  * One extra detail: if we can't include a large enough fraction of the
677  * last group to be interesting, then just substitute random passes.
678  *
679  * If you want more passes than the entire list of groups can
680  * provide, just start repeating from the beginning of the list.
681  */
682 static int const
683   patterns[] =
684 {
685   -2,				/* 2 random passes */
686   2, 0x000, 0xFFF,		/* 1-bit */
687   2, 0x555, 0xAAA,		/* 2-bit */
688   -1,				/* 1 random pass */
689   6, 0x249, 0x492, 0x6DB, 0x924, 0xB6D, 0xDB6,	/* 3-bit */
690   12, 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
691   0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE,	/* 4-bit */
692   -1,				/* 1 random pass */
693         /* The following patterns have the first bit per block flipped */
694   8, 0x1000, 0x1249, 0x1492, 0x16DB, 0x1924, 0x1B6D, 0x1DB6, 0x1FFF,
695   14, 0x1111, 0x1222, 0x1333, 0x1444, 0x1555, 0x1666, 0x1777,
696   0x1888, 0x1999, 0x1AAA, 0x1BBB, 0x1CCC, 0x1DDD, 0x1EEE,
697   -1,				/* 1 random pass */
698   0				/* End */
699 };
700 
701 /*
702  * Generate a random wiping pass pattern with num passes.
703  * This is a two-stage process.  First, the passes to include
704  * are chosen, and then they are shuffled into the desired
705  * order.
706  */
707 static void
genpattern(int * dest,size_t num,struct randint_source * s)708 genpattern (int *dest, size_t num, struct randint_source *s)
709 {
710   size_t randpasses;
711   int const *p;
712   int *d;
713   size_t n;
714   size_t accum, top, swap;
715   int k;
716 
717   if (!num)
718     return;
719 
720   /* Stage 1: choose the passes to use */
721   p = patterns;
722   randpasses = 0;
723   d = dest;			/* Destination for generated pass list */
724   n = num;			/* Passes remaining to fill */
725 
726   while (true)
727     {
728       k = *p++;			/* Block descriptor word */
729       if (!k)
730         {			/* Loop back to the beginning */
731           p = patterns;
732         }
733       else if (k < 0)
734         {			/* -k random passes */
735           k = -k;
736           if ((size_t) k >= n)
737             {
738               randpasses += n;
739               break;
740             }
741           randpasses += k;
742           n -= k;
743         }
744       else if ((size_t) k <= n)
745         {			/* Full block of patterns */
746           memcpy (d, p, k * sizeof (int));
747           p += k;
748           d += k;
749           n -= k;
750         }
751       else if (n < 2 || 3 * n < (size_t) k)
752         {			/* Finish with random */
753           randpasses += n;
754           break;
755         }
756       else
757         {			/* Pad out with n of the k available */
758           do
759             {
760               if (n == (size_t) k || randint_choose (s, k) < n)
761                 {
762                   *d++ = *p;
763                   n--;
764                 }
765               p++;
766               k--;
767             }
768           while (n);
769           break;
770         }
771     }
772   top = num - randpasses;	/* Top of initialized data */
773   /* assert (d == dest+top); */
774 
775   /*
776    * We now have fixed patterns in the dest buffer up to
777    * "top", and we need to scramble them, with "randpasses"
778    * random passes evenly spaced among them.
779    *
780    * We want one at the beginning, one at the end, and
781    * evenly spaced in between.  To do this, we basically
782    * use Bresenham's line draw (a.k.a DDA) algorithm
783    * to draw a line with slope (randpasses-1)/(num-1).
784    * (We use a positive accumulator and count down to
785    * do this.)
786    *
787    * So for each desired output value, we do the following:
788    * - If it should be a random pass, copy the pass type
789    *   to top++, out of the way of the other passes, and
790    *   set the current pass to -1 (random).
791    * - If it should be a normal pattern pass, choose an
792    *   entry at random between here and top-1 (inclusive)
793    *   and swap the current entry with that one.
794    */
795   randpasses--;			/* To speed up later math */
796   accum = randpasses;		/* Bresenham DDA accumulator */
797   for (n = 0; n < num; n++)
798     {
799       if (accum <= randpasses)
800         {
801           accum += num - 1;
802           dest[top++] = dest[n];
803           dest[n] = -1;
804         }
805       else
806         {
807           swap = n + randint_choose (s, top - n);
808           k = dest[n];
809           dest[n] = dest[swap];
810           dest[swap] = k;
811         }
812       accum -= randpasses;
813     }
814   /* assert (top == num); */
815 }
816 
817 /*
818  * The core routine to actually do the work.  This overwrites the first
819  * size bytes of the given fd.  Return true if successful.
820  */
821 static bool
do_wipefd(int fd,char const * qname,struct randint_source * s,struct Options const * flags)822 do_wipefd (int fd, char const *qname, struct randint_source *s,
823            struct Options const *flags)
824 {
825   size_t i;
826   struct stat st;
827   off_t size;		/* Size to write, size to read */
828   off_t i_size = 0;	/* For small files, initial size to overwrite inode */
829   unsigned long int n;	/* Number of passes for printing purposes */
830   int *passarray;
831   bool ok = true;
832   struct randread_source *rs;
833 
834   n = 0;		/* dopass takes n == 0 to mean "don't print progress" */
835   if (flags->verbose)
836     n = flags->n_iterations + flags->zero_fill;
837 
838   if (fstat (fd, &st))
839     {
840       error (0, errno, _("%s: fstat failed"), qname);
841       return false;
842     }
843 
844   /* If we know that we can't possibly shred the file, give up now.
845      Otherwise, we may go into an infinite loop writing data before we
846      find that we can't rewind the device.  */
847   if ((S_ISCHR (st.st_mode) && isatty (fd))
848       || S_ISFIFO (st.st_mode)
849       || S_ISSOCK (st.st_mode))
850     {
851       error (0, 0, _("%s: invalid file type"), qname);
852       return false;
853     }
854   else if (S_ISREG (st.st_mode) && st.st_size < 0)
855     {
856       error (0, 0, _("%s: file has negative size"), qname);
857       return false;
858     }
859 
860   /* Allocate pass array */
861   passarray = xnmalloc (flags->n_iterations, sizeof *passarray);
862 
863   size = flags->size;
864   if (size == -1)
865     {
866       if (S_ISREG (st.st_mode))
867         {
868           size = st.st_size;
869 
870           if (! flags->exact)
871             {
872               /* Round up to the nearest block size to clear slack space.  */
873               off_t remainder = size % ST_BLKSIZE (st);
874               if (size && size < ST_BLKSIZE (st))
875                 i_size = size;
876               if (remainder != 0)
877                 {
878                   off_t size_incr = ST_BLKSIZE (st) - remainder;
879                   size += MIN (size_incr, OFF_T_MAX - size);
880                 }
881             }
882         }
883       else
884         {
885           /* The behavior of lseek is unspecified, but in practice if
886              it returns a positive number that's the size of this
887              device.  */
888           size = lseek (fd, 0, SEEK_END);
889           if (size <= 0)
890             {
891               /* We are unable to determine the length, up front.
892                  Let dopass do that as part of its first iteration.  */
893               size = -1;
894             }
895         }
896     }
897   else if (S_ISREG (st.st_mode)
898            && st.st_size < MIN (ST_BLKSIZE (st), size))
899     i_size = st.st_size;
900 
901   /* Schedule the passes in random order. */
902   genpattern (passarray, flags->n_iterations, s);
903 
904   rs = randint_get_source (s);
905 
906   while (true)
907     {
908       off_t pass_size;
909       unsigned long int pn = n;
910 
911       if (i_size)
912         {
913           pass_size = i_size;
914           i_size = 0;
915           pn = 0;
916         }
917       else if (size)
918         {
919           pass_size = size;
920           size = 0;
921         }
922       /* TODO: consider handling tail packing by
923          writing the tail padding as a separate pass,
924          (that would not rewind).  */
925       else
926         break;
927 
928       for (i = 0; i < flags->n_iterations + flags->zero_fill; i++)
929         {
930           int err = 0;
931           int type = i < flags->n_iterations ? passarray[i] : 0;
932 
933           err = dopass (fd, &st, qname, &pass_size, type, rs, i + 1, pn);
934 
935           if (err)
936             {
937               ok = false;
938               if (err < 0)
939                 goto wipefd_out;
940             }
941         }
942     }
943 
944   /* Now deallocate the data.  The effect of ftruncate is specified
945      on regular files and shared memory objects (also directories, but
946      they are not possible here); don't worry about errors reported
947      for other file types.  */
948 
949   if (flags->remove_file && ftruncate (fd, 0) != 0
950       && (S_ISREG (st.st_mode) || S_TYPEISSHM (&st)))
951     {
952       error (0, errno, _("%s: error truncating"), qname);
953       ok = false;
954       goto wipefd_out;
955     }
956 
957 wipefd_out:
958   free (passarray);
959   return ok;
960 }
961 
962 /* A wrapper with a little more checking for fds on the command line */
963 static bool
wipefd(int fd,char const * qname,struct randint_source * s,struct Options const * flags)964 wipefd (int fd, char const *qname, struct randint_source *s,
965         struct Options const *flags)
966 {
967   int fd_flags = fcntl (fd, F_GETFL);
968 
969   if (fd_flags < 0)
970     {
971       error (0, errno, _("%s: fcntl failed"), qname);
972       return false;
973     }
974   if (fd_flags & O_APPEND)
975     {
976       error (0, 0, _("%s: cannot shred append-only file descriptor"), qname);
977       return false;
978     }
979   return do_wipefd (fd, qname, s, flags);
980 }
981 
982 /* --- Name-wiping code --- */
983 
984 /* Characters allowed in a file name - a safe universal set.  */
985 static char const nameset[] =
986 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.";
987 
988 /* Increment NAME (with LEN bytes).  NAME must be a big-endian base N
989    number with the digits taken from nameset.  Return true if successful.
990    Otherwise, (because NAME already has the greatest possible value)
991    return false.  */
992 
993 static bool
incname(char * name,size_t len)994 incname (char *name, size_t len)
995 {
996   while (len--)
997     {
998       char const *p = strchr (nameset, name[len]);
999 
1000       /* Given that NAME is composed of bytes from NAMESET,
1001          P will never be NULL here.  */
1002       assert (p);
1003 
1004       /* If this character has a successor, use it.  */
1005       if (p[1])
1006         {
1007           name[len] = p[1];
1008           return true;
1009         }
1010 
1011       /* Otherwise, set this digit to 0 and increment the prefix.  */
1012       name[len] = nameset[0];
1013     }
1014 
1015   return false;
1016 }
1017 
1018 /*
1019  * Repeatedly rename a file with shorter and shorter names,
1020  * to obliterate all traces of the file name (and length) on any system
1021  * that adds a trailing delimiter to on-disk file names and reuses
1022  * the same directory slot.  Finally, unlink it.
1023  * The passed-in filename is modified in place to the new filename.
1024  * (Which is unlinked if this function succeeds, but is still present if
1025  * it fails for some reason.)
1026  *
1027  * The main loop is written carefully to not get stuck if all possible
1028  * names of a given length are occupied.  It counts down the length from
1029  * the original to 0.  While the length is non-zero, it tries to find an
1030  * unused file name of the given length.  It continues until either the
1031  * name is available and the rename succeeds, or it runs out of names
1032  * to try (incname wraps and returns 1).  Finally, it unlinks the file.
1033  *
1034  * The unlink is Unix-specific, as ANSI-standard remove has more
1035  * portability problems with C libraries making it "safe".  rename
1036  * is ANSI-standard.
1037  *
1038  * To force the directory data out, we try to open the directory and
1039  * invoke fdatasync and/or fsync on it.  This is non-standard, so don't
1040  * insist that it works: just fall back to a global sync in that case.
1041  * This is fairly significantly Unix-specific.  Of course, on any
1042  * file system with synchronous metadata updates, this is unnecessary.
1043  */
1044 static bool
wipename(char * oldname,char const * qoldname,struct Options const * flags)1045 wipename (char *oldname, char const *qoldname, struct Options const *flags)
1046 {
1047   char *newname = xstrdup (oldname);
1048   char *base = last_component (newname);
1049   char *dir = dir_name (newname);
1050   char *qdir = xstrdup (quotef (dir));
1051   bool first = true;
1052   bool ok = true;
1053   int dir_fd = -1;
1054 
1055   if (flags->remove_file == remove_wipesync)
1056     dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK);
1057 
1058   if (flags->verbose)
1059     error (0, 0, _("%s: removing"), qoldname);
1060 
1061   if (flags->remove_file != remove_unlink)
1062     for (size_t len = base_len (base); len != 0; len--)
1063       {
1064         memset (base, nameset[0], len);
1065         base[len] = 0;
1066         bool rename_ok;
1067         while (! (rename_ok = (renameatu (AT_FDCWD, oldname, AT_FDCWD, newname,
1068                                           RENAME_NOREPLACE)
1069                                == 0))
1070                && errno == EEXIST && incname (base, len))
1071           continue;
1072         if (rename_ok)
1073           {
1074             if (0 <= dir_fd && dosync (dir_fd, qdir) != 0)
1075               ok = false;
1076             if (flags->verbose)
1077               {
1078                 /* People seem to understand this better than talking
1079                    about renaming OLDNAME.  NEWNAME doesn't need
1080                    quoting because we picked it.  OLDNAME needs to be
1081                    quoted only the first time.  */
1082                 char const *old = first ? qoldname : oldname;
1083                 error (0, 0,
1084                        _("%s: renamed to %s"), old, newname);
1085                 first = false;
1086               }
1087             memcpy (oldname + (base - newname), base, len + 1);
1088           }
1089       }
1090 
1091   if (unlink (oldname) != 0)
1092     {
1093       error (0, errno, _("%s: failed to remove"), qoldname);
1094       ok = false;
1095     }
1096   else if (flags->verbose)
1097     error (0, 0, _("%s: removed"), qoldname);
1098   if (0 <= dir_fd)
1099     {
1100       if (dosync (dir_fd, qdir) != 0)
1101         ok = false;
1102       if (close (dir_fd) != 0)
1103         {
1104           error (0, errno, _("%s: failed to close"), qdir);
1105           ok = false;
1106         }
1107     }
1108   free (newname);
1109   free (dir);
1110   free (qdir);
1111   return ok;
1112 }
1113 
1114 /*
1115  * Finally, the function that actually takes a filename and grinds
1116  * it into hamburger.
1117  *
1118  * FIXME
1119  * Detail to note: since we do not restore errno to EACCES after
1120  * a failed chmod, we end up printing the error code from the chmod.
1121  * This is actually the error that stopped us from proceeding, so
1122  * it's arguably the right one, and in practice it'll be either EACCES
1123  * again or EPERM, which both give similar error messages.
1124  * Does anyone disagree?
1125  */
1126 static bool
wipefile(char * name,char const * qname,struct randint_source * s,struct Options const * flags)1127 wipefile (char *name, char const *qname,
1128           struct randint_source *s, struct Options const *flags)
1129 {
1130   bool ok;
1131   int fd;
1132 
1133   fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1134   if (fd < 0
1135       && (errno == EACCES && flags->force)
1136       && chmod (name, S_IWUSR) == 0)
1137     fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1138   if (fd < 0)
1139     {
1140       error (0, errno, _("%s: failed to open for writing"), qname);
1141       return false;
1142     }
1143 
1144   ok = do_wipefd (fd, qname, s, flags);
1145   if (close (fd) != 0)
1146     {
1147       error (0, errno, _("%s: failed to close"), qname);
1148       ok = false;
1149     }
1150   if (ok && flags->remove_file)
1151     ok = wipename (name, qname, flags);
1152   return ok;
1153 }
1154 
1155 
1156 /* Buffers for random data.  */
1157 static struct randint_source *randint_source;
1158 
1159 /* Just on general principles, wipe buffers containing information
1160    that may be related to the possibly-pseudorandom values used during
1161    shredding.  */
1162 static void
clear_random_data(void)1163 clear_random_data (void)
1164 {
1165   randint_all_free (randint_source);
1166 }
1167 
1168 
1169 int
main(int argc,char ** argv)1170 main (int argc, char **argv)
1171 {
1172   bool ok = true;
1173   struct Options flags = { 0, };
1174   char **file;
1175   int n_files;
1176   int c;
1177   int i;
1178   char const *random_source = NULL;
1179 
1180   initialize_main (&argc, &argv);
1181   set_program_name (argv[0]);
1182   setlocale (LC_ALL, "");
1183   bindtextdomain (PACKAGE, LOCALEDIR);
1184   textdomain (PACKAGE);
1185 
1186   atexit (close_stdout);
1187 
1188   flags.n_iterations = DEFAULT_PASSES;
1189   flags.size = -1;
1190 
1191   while ((c = getopt_long (argc, argv, "fn:s:uvxz", long_opts, NULL)) != -1)
1192     {
1193       switch (c)
1194         {
1195         case 'f':
1196           flags.force = true;
1197           break;
1198 
1199         case 'n':
1200           flags.n_iterations = xdectoumax (optarg, 0,
1201                                            MIN (ULONG_MAX,
1202                                                 SIZE_MAX / sizeof (int)), "",
1203                                            _("invalid number of passes"), 0);
1204           break;
1205 
1206         case RANDOM_SOURCE_OPTION:
1207           if (random_source && !STREQ (random_source, optarg))
1208             die (EXIT_FAILURE, 0, _("multiple random sources specified"));
1209           random_source = optarg;
1210           break;
1211 
1212         case 'u':
1213           if (optarg == NULL)
1214             flags.remove_file = remove_wipesync;
1215           else
1216             flags.remove_file = XARGMATCH ("--remove", optarg,
1217                                            remove_args, remove_methods);
1218           break;
1219 
1220         case 's':
1221           flags.size = xnumtoumax (optarg, 0, 0, OFF_T_MAX, "cbBkKMGTPEZY0",
1222                                    _("invalid file size"), 0);
1223           break;
1224 
1225         case 'v':
1226           flags.verbose = true;
1227           break;
1228 
1229         case 'x':
1230           flags.exact = true;
1231           break;
1232 
1233         case 'z':
1234           flags.zero_fill = true;
1235           break;
1236 
1237         case_GETOPT_HELP_CHAR;
1238 
1239         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1240 
1241         default:
1242           usage (EXIT_FAILURE);
1243         }
1244     }
1245 
1246   file = argv + optind;
1247   n_files = argc - optind;
1248 
1249   if (n_files == 0)
1250     {
1251       error (0, 0, _("missing file operand"));
1252       usage (EXIT_FAILURE);
1253     }
1254 
1255   randint_source = randint_all_new (random_source, SIZE_MAX);
1256   if (! randint_source)
1257     die (EXIT_FAILURE, errno, "%s", quotef (random_source));
1258   atexit (clear_random_data);
1259 
1260   for (i = 0; i < n_files; i++)
1261     {
1262       char *qname = xstrdup (quotef (file[i]));
1263       if (STREQ (file[i], "-"))
1264         {
1265           ok &= wipefd (STDOUT_FILENO, qname, randint_source, &flags);
1266         }
1267       else
1268         {
1269           /* Plain filename - Note that this overwrites *argv! */
1270           ok &= wipefile (file[i], qname, randint_source, &flags);
1271         }
1272       free (qname);
1273     }
1274 
1275   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
1276 }
1277 /*
1278  * vim:sw=2:sts=2:
1279  */
1280