1 /*  granulate.c - version 0.2 - 2001/07/01
2 
3     Original code: Copyright (c) 2001 by Kurt Rosenfeld (hardware@cyberspace.org)
4     Win32 port: Copyright (c) 2001 by Dr.BiC (drbic@exe2bin.com)
5 
6     Granulate 0.2 Home Page: http://exe2bin.com/granulate/
7     Granulate 0.1 Official ftp: ftp://134.74.16.12/pub/granulate-0.1.tar.gz
8 
9     This program is free software; you can redistribute it and/or modify
10     it under the terms of the GNU General Public License as published by
11     the Free Software Foundation; either version 2 of the License, or
12     (at your option) any later version.
13 
14     This program is distributed in the hope that it will be useful,
15     but WITHOUT ANY WARRANTY; without even the implied warranty of
16     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17     GNU General Public License for more details.
18 
19     You should have received a copy of the GNU General Public License
20     along with this program; if not, write to the Free Software
21     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 */
23 
24 #include <stdio.h>
25 #include <unistd.h>
26 #include <sys/types.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <stdlib.h>
31 
32 #define BUFSIZE 1024
33 #define RADIX 26
34 #define BASE_SYMBOL 'a'
35 
36 #ifdef _WIN32
37 #include <windows.h>
38 #include <direct.h>
39 #include <errno.h>
40 #include <ctype.h>
41 #ifdef _MSC_VER
42 # include <io.h>
43 #endif /* _MSC_VER */
44 #endif /* _WIN32 */
45 
46 
47 void makename(int *, char *, int, char *);
48 void decrement(int *, int);
49 int calculate_digits(int, int);
50 
51 #ifdef _WIN32
52 int ftruncate (int, unsigned);
53 #endif
54 
main(int argc,char ** argv)55 int main (int argc, char **argv) {
56 	struct stat in_stat;
57 	int in_fd, out_fd, bytes_read;
58 	int *counter, n_digits, i, seg_ptr, prefix_size;
59 	int n_segs, mod_segs, in_size, seg_size;
60 	char buf[BUFSIZE], *name;
61 
62 	if (argc != 4) {
63 		fprintf(stderr, "Usage: granulate seg_size infile outfile_prefix\n");
64 		exit(1);
65 	}
66 
67 	/* This is the maximum size for our output files. */
68 	seg_size = atoi(argv[1]);
69 	if (seg_size < 1) {
70 		fprintf(stderr, "seg_size must be >= 1\n");
71 		exit(1);
72 	}
73 
74 	/* Get stats on input file.  We need to know the size. */
75 	if	(stat(argv[2], &in_stat) == -1) {
76 		fprintf(stderr, "couldn't stat() %s\n", argv[2]);
77 		exit(1);
78 	}
79 
80 	/* A prefix for output filenames is specified on the command line. */
81 	prefix_size = strlen(argv[3]);
82 
83 	/* in_size holds the original size of the input file */
84 	in_size = in_stat.st_size;
85 
86 	/* the number of digits needed for uniqueness of output filenames */
87 	n_digits = calculate_digits(in_size, seg_size);
88 
89 	/* Counter is used to generate reverse sequential filenames. */
90 	if ((counter = (int *) malloc(n_digits * sizeof(int))) == NULL) {
91 		fprintf(stderr, "malloc() failed for counter\n");
92 		exit(1);
93 	}
94 
95 	/* Initialize counter to max value.  We'll count down. */
96 	for (i = 0; i < n_digits; i++) counter[i] = RADIX - 1;
97 
98 	/* Name is a string buffer for assembling filenames. */
99 	if ((name = (char *) malloc(prefix_size + n_digits + 1)) == NULL) {
100 		fprintf(stderr, "malloc() failed for name\n");
101 		exit(1);
102 	}
103 
104 	/* Open the input file. */
105 	#ifdef _WIN32
106 	if ((in_fd = open(argv[2], O_RDWR|O_BINARY)) == -1) {
107 	#else
108 	if ((in_fd = open(argv[2], O_RDWR)) == -1) {
109 	#endif
110 		fprintf(stderr, "open %s failed.\n", argv[1]);
111 		exit(1);
112 	}
113 
114 	if (seg_size > in_size) {
115 		fprintf(stderr, "error: seg_size greater than infile size\n");
116 		exit(1);
117 	}
118 
119 	/* set the pointer to the end of infile */
120 	seg_ptr = in_size;
121 
122 	/* *********************************************************** */
123 	/* In this loop, we copy segments of seg_size bytes from the   */
124 	/* end of the input file to (hopefully) uniquely named output  */
125 	/* files.  A new output file is created on each pass, and they */
126 	/* are named in reverse-lexicographical order.  Each time we   */
127 	/* finish writing to the output file, we truncate the input    */
128 	/* file, reducing its size by seg_size.  Maximum disk usage    */
129 	/* occurs just before truncating.  This is the size of the     */
130 	/* input file plus seg_size.  Making seg_size very small is    */
131 	/* not beneficial because filesystem overhead takes over.      */
132 	/* *********************************************************** */
133 	do {
134 		seg_ptr -= seg_size;
135 
136 		/* trap the pointer when we get near the end (beginning, actually) */
137 		if (seg_ptr < 0) seg_ptr = 0;
138 
139 		if (lseek(in_fd, seg_ptr, SEEK_SET) == -1) {
140 			fprintf(stderr, "lseek() failed\n");
141 			exit(1);
142 		}
143 
144 		/* after this call, name will be a filename, i.e. "zzxc" */
145 		makename(counter, name, n_digits, argv[3]);
146 
147 		/* after this call, name will be decremented, i.e. "zzxb" */
148 		decrement(counter, n_digits);
149 
150 		/* Open the output file */
151 		#ifdef _WIN32
152 		if ((out_fd = open(name, O_RDWR|O_CREAT|O_BINARY, S_IRUSR|S_IWUSR)) == -1) {
153 		#else
154 		if ((out_fd = open(name, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) == -1) {
155 		#endif
156 			fprintf(stderr, "open %s failed.\n", argv[3]);
157 			exit(1);
158 		}
159 
160 		/* Copy from sought position to the end, to the output file. */
161 		while((bytes_read = read(in_fd, buf, BUFSIZE)) != 0) {
162 			if (write (out_fd, buf, bytes_read) != bytes_read) {
163 				fprintf(stderr, "couldn't write to output file\n");
164 				exit(1);
165 			}
166 		}
167 		close(out_fd);
168 
169 		/* Truncate the input file at the point where we copied from. */
170 		if ((ftruncate(in_fd, seg_ptr)) == -1) {
171 			perror("ftruncate(): ");
172 			exit(1);
173 		}
174 	} while (seg_ptr != 0);
175 
176 	close(in_fd);
177 
178 	/* The input file has been truncated to nothing.  Now we delete. */
179 	if (remove(argv[2]) == -1) {
180 		fprintf(stderr, "remove() failed\n");
181 		exit(1);
182 	}
183 	return 0;
184 }
185 
186 void makename(int *counter, char *name, int n_digits, char *prefix) {
187 	int i, j, prefix_size;
188 	prefix_size = strlen(prefix);
189 
190 	/* Yes, I have heard of strncpy. */
191 	for (i = 0; i < prefix_size; i++) {
192 		name[i] = prefix[i];
193 	}
194 	/* At this point, i is the index of the first char after the prefix */
195 
196 	/* BASE_SYMBOL serves as an offset into the ASCII table */
197 	for (j = 0; j < n_digits; j++) {
198 		name[i + j] = (char) counter[j] + BASE_SYMBOL;
199 	}
200 	name[i + j]  = '\0';
201 }
202 
203 void decrement(int *counter, int n_digits) {
204 	/* This function implements a somewhat generalized down-counter */
205 	/* Each digit is stored as an int, and the radix is arbitrary;  */
206 	/* The least siginificant digit is stored in the element with   */
207 	/* the maximum index.  There is no profound reason for this.    */
208 
209 	int i, carry = 0;
210  	i = n_digits - 1;
211 
212 	/* This is the actual decrement operation */
213 	counter[i]--;
214 
215 	/* Now we just propagate the carry, if any */
216 	for (i = n_digits - 1; i >= 0; i--) {
217 		counter[i] -= carry;
218 		if (counter[i] < 0) {
219 			counter[i] += RADIX;
220 			carry = 1;
221 		}
222 		else carry = 0;
223 	}
224 }
225 
226 int calculate_digits(int in_size, int seg_size) {
227 	/* ********************************************************* */
228 	/* n_segs is the number of output files we will generate     */
229 	/* We need enough digits so that they can be uniquely named. */
230 	/* We calculate the base-RADIX log of n_segs by repeatedly   */
231 	/* dividing by RADIX until the quotient becomes zero.        */
232 	/* ********************************************************* */
233 
234 	int n_segs, n_digits = 0;
235 	n_segs = (in_size / seg_size) + 1;
236 	do {
237 		n_segs /= RADIX;
238 		n_digits++;
239 	} while (n_segs > 0);
240 	return n_digits;
241 }
242 
243 
244 #ifdef _WIN32
245 /* ftruncate for Win32 has been ripped off glib package:
246  * ---
247  * GLIB - Library of useful routines for C programming
248  * Copyright (C) 1995-1998  Peter Mattis, Spencer Kimball and Josh MacDonald
249  *
250  * Modified by the GLib Team and others 1997-1999.  See the AUTHORS
251  * file for a list of people on the GLib Team.  See the ChangeLog
252  * files for a list of changes.  These files are distributed with
253  * GLib at ftp://ftp.gtk.org/pub/gtk/.
254  *
255  * GLIB library under the GNU Library General Public License version 2 or later
256  * ---
257  * Code adapted for granulate utility by Dr.BiC (drbic@exe2bin.com)
258  */
259 
260 int ftruncate (int  fd, unsigned size)
261 {
262   HANDLE hfile;
263   int curpos;
264 
265   hfile = (HANDLE) _get_osfhandle (fd);
266   curpos = SetFilePointer (hfile, 0, NULL, FILE_CURRENT);
267   if (curpos == 0xFFFFFFFF
268       || SetFilePointer (hfile, size, NULL, FILE_BEGIN) == 0xFFFFFFFF
269       || !SetEndOfFile (hfile))
270     {
271       int error = GetLastError ();
272 
273       switch (error)
274         {
275         case ERROR_INVALID_HANDLE:
276           errno = EBADF;
277           break;
278         default:
279           errno = EIO;
280           break;
281         }
282 
283       return -1;
284     }
285 
286   return 0;
287 }
288 #endif
289