1 typedef struct _RcsWalker               RcsWalker;
2 typedef struct _RcsFile                 RcsFile;
3 typedef struct _RcsVersion              RcsVersion;
4 typedef struct _RcsStats                RcsStats;
5 typedef struct _IntStat                 IntStat;
6 typedef struct _DblStat                 DblStat;
7 typedef struct _BinCounter              BinCounter;
8 typedef struct _ConfigOption            ConfigOption;
9 
10 struct _RcsWalker {
11   void*    (* initialize)    (void);
12   int      (* finalize)      (RcsStats* stats, void* data);
13   int      (* onefile)       (RcsFile* rcs, RcsStats* stats, void* data);
14   int      (* dateorder)     (RcsFile* rcs, RcsVersion* v, void* data);
15   int      (* delta_orig)    (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
16   int      (* delta_date)    (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
17   int      min_versions;
18   int      max_versions;
19   gboolean write_files;
20 };
21 
22 struct _RcsVersion {
23   RcsFile    *rcs;
24   time_t      date;
25   int         dateseq;
26   int         chain_length;
27   char       *vname;
28   off_t       size;
29   int         cc;
30   guint8*     segment;
31   char       *filename;
32   RcsVersion *parent;
33   GSList     *children;
34   guint       on_trunk : 1;
35 };
36 
37 struct _RcsFile {
38   char       *filename;
39   char       *copyname;
40   char       *headname;
41 
42   int         version_count;
43   int         forward_count;
44   int         reverse_count;
45   int         branch_count;
46 
47   RcsVersion *versions;
48   RcsVersion **versions_date;
49 
50   RcsVersion *head_version;
51   RcsVersion *root_version;
52 
53   off_t       total_size;
54 
55   guint       atflag : 1;
56 };
57 
58 struct _RcsStats {
59   BinCounter *avg_version_size;
60   IntStat* version_stat;
61   IntStat* forward_stat;
62   IntStat* reverse_stat;
63   IntStat* branch_stat;
64   IntStat* unencoded_stat;
65   IntStat* literal_stat;
66 };
67 
68 struct _IntStat {
69   const char* name;
70   int count;
71   long long sum;
72   long long min;
73   long long max;
74 
75   GArray *values;
76 };
77 
78 struct _DblStat {
79   const char* name;
80   int count;
81   double sum;
82   double min;
83   double max;
84 
85   GArray *values;
86 };
87 
88 struct _BinCounter {
89   const char *name;
90   GPtrArray  *bins;
91 };
92 
93 enum _ConfigArgument {
94   CO_Required,
95   CO_Optional,
96   CO_None
97 };
98 
99 typedef enum _ConfigArgument ConfigArgument;
100 
101 enum _ConfigOptionType {
102   CD_Bool,
103   CD_Int32,
104   CD_Double,
105   CD_String
106 };
107 
108 typedef enum _ConfigOptionType ConfigOptionType;
109 
110 enum _ConfigStyle {
111   CS_Ignore,
112   CS_UseAsFile,
113   CS_Use
114 };
115 
116 typedef enum _ConfigStyle ConfigStyle;
117 
118 struct _ConfigOption {
119   const char       *name;
120   const char       *abbrev;
121   ConfigStyle       style;
122   ConfigArgument    arg;
123   ConfigOptionType  type;
124   void             *value;
125   gboolean          found;
126 };
127 
128 /* RCS inspection stuff
129  */
130 
131 void                rcswalk_init   (void);
132 int            rcswalk        (RcsWalker *walker, const char* copy_base);
133 void                rcswalk_report (RcsStats* stats);
134 
135 IntStat*            stat_int_new      (const char* name);
136 void                stat_int_add_item (IntStat* stat, long long v);
137 void                stat_int_report   (IntStat* stat);
138 
139 DblStat*            stat_dbl_new      (const char* name);
140 void                stat_dbl_add_item (DblStat* stat, double v);
141 void                stat_dbl_report   (DblStat* stat);
142 
143 BinCounter*         stat_bincount_new      (const char* name);
144 void                stat_bincount_add_item (BinCounter* bc, int bin, double val);
145 void                stat_bincount_report   (BinCounter* bc);
146 
147 /* Experiment configuration stuff
148  */
149 
150 void                config_register   (ConfigOption *opts, int nopts);
151 int            config_parse      (const char* config_file);
152 int            config_done       (void);
153 void                config_help       (void);
154 void                config_set_string (const char* var, const char* val);
155 int            config_clear_dir  (const char* dir);
156 int            config_create_dir (const char* dir);
157 FILE*               config_output     (const char* fmt, ...);
158 
159 #ifdef __cplusplus
160 }
161 #endif
162 
163 #endif
164 #include "rcswalk.h"
165 #include "edsio.h"
166 #include <stdio.h>
167 #include <stdlib.h>
168 #include <string.h>
169 #include <sys/types.h>
170 #include <sys/stat.h>
171 #include <sys/wait.h>
172 #include <fcntl.h>
173 #include <errno.h>
174 #include <dirent.h>
175 #include <unistd.h>
176 #include <math.h>
177 
178 #undef BUFSIZE
179 #define BUFSIZE (1<<14)
180 
181 char       *tmp_file_1;
182 gboolean    tmp_file_1_free = TRUE;
183 char       *tmp_file_2;
184 gboolean    tmp_file_2_free = TRUE;
185 
186 int         skip_count;
187 int         small_count;
188 int         large_count;
189 int         process_count;
190 
191 extern time_t str2time (char const *, time_t, long);
192 
193 static guint8 readbuf[BUFSIZE];
194 
195 static const char* rcswalk_input_dir = NULL;
196 static const char* config_output_base = NULL;
197 static const char* config_output_dir = NULL;
198 static const char* rcswalk_experiment = NULL;
199 
200 static ConfigOption rcswalk_options[] = {
201   { "rcswalk_experiment", "ex", CS_Use,       CO_Required, CD_String, & rcswalk_experiment },
202   { "rcs_input_dir",      "id", CS_UseAsFile, CO_Required, CD_String, & rcswalk_input_dir }
203 };
204 
205 static ConfigOption config_options[] = {
206   { "config_output_base", "ob", CS_Ignore, CO_Required, CD_String, & config_output_base }
207 };
208 
209 
210 void
211 rcswalk_free_segment (RcsVersion *v)
212 {
213   if (v->segment)
214     g_free (v->segment);
215 
216   if (v->filename == tmp_file_1)
217     tmp_file_1_free = TRUE;
218   else if (v->filename == tmp_file_2)
219     tmp_file_2_free = TRUE;
220   else if (v->filename)
221     g_free (v->filename);
222 
223   v->segment = NULL;
224   v->filename = NULL;
225 }
226 
227 int
228 rcswalk_checkout (RcsFile* rcs, RcsWalker* walker, RcsVersion *v)
229 {
230   FILE* out;
231   char cmdbuf[1024];
232   int nread;
233   int alloc = BUFSIZE;
234   int pos = 0;
235 
236   sprintf (cmdbuf, "co -ko -p%s %s 2>/dev/null\n", v->vname, rcs->filename);
237 
238   g_assert (! v->segment);
239 
240   v->segment = g_malloc (alloc);
241 
242   if (! (out = popen (cmdbuf, "r")))
243     {
244       g_warning ("popen failed: %s: %s", cmdbuf, g_strerror (errno));
245       return errno;
246     }
247 
248   for (;;)
249     {
250       nread = fread (readbuf, 1, BUFSIZE, out);
251 
252       if (nread == 0)
253 	break;
254 
255       if (nread < 0)
256 	{
257 	  g_warning ("fread failed: %s", g_strerror (errno));
258 	  return errno;
259 	}
260 
261       if (pos + nread > alloc)
262 	{
263 	  alloc *= 2;
264 	  v->segment = g_realloc (v->segment, alloc);
265 	}
266 
267       memcpy (v->segment + pos, readbuf, nread);
268 
269       pos += nread;
270     }
271 
272   if (pclose (out) < 0)
273     {
274       g_warning ("pclose failed");
275       return errno;
276     }
277 
278   v->size = pos;
279 
280   if (walker->write_files)
281     {
282       char* file = NULL;
283 
284       if (! file && tmp_file_1_free)
285 	{
286 	  file = tmp_file_1;
287 	  tmp_file_1_free = FALSE;
288 	}
289 
290       if (! file && tmp_file_2_free)
291 	{
292 	  file = tmp_file_2;
293 	  tmp_file_2_free = FALSE;
294 	}
295 
296       g_assert (file);
297 
298       v->filename = file;
299 
300       if (! (out = fopen (file, "w")))
301 	{
302 	  g_warning ("fopen failed: %s\n", file);
303 	  return errno;
304 	}
305 
306       if (fwrite (v->segment, v->size, 1, out) != 1)
307 	{
308 	  g_warning ("fwrite failed: %s\n", file);
309 	  return errno;
310 	}
311 
312       if (fclose (out) < 0)
313 	{
314 	  g_warning ("fclose failed: %s\n", file);
315 	  return errno;
316 	}
317     }
318 
319   return 0;
320 }
321 
322 int
323 rcswalk_delta_date (RcsFile* rcs, RcsWalker* walker, void* data)
324 {
325   int i;
326   int ret;
327   RcsVersion *vf = NULL;
328   RcsVersion *vt = NULL;
329 
330   for (i = 0; i < (rcs->version_count-1); i += 1)
331     {
332       vf = rcs->versions_date[i+1];
333       vt = rcs->versions_date[i];
334 
335       if (! vt->segment && (ret = rcswalk_checkout (rcs, walker, vt))) {
336 	return ret;
337       }
338 
339       if ((ret = rcswalk_checkout (rcs, walker, vf))) {
340 	return ret;
341       }
342 
343       if ((ret = walker->delta_date (rcs, vf, vt, data))) {
344 	return ret;
345       }
346 
347       rcswalk_free_segment (vt);
348     }
349 
350   if (vf) rcswalk_free_segment (vf);
351   if (vt) rcswalk_free_segment (vt);
352 
353   return 0;
354 }
355 
356 int
357 rcswalk_delta_orig (RcsFile* rcs, RcsWalker* walker, RcsVersion* version, int *count, void* data)
358 {
359   int ret;
360   GSList *c;
361   RcsVersion *child;
362 
363   for (c = version->children; c; c = c->next)
364     {
365       gboolean reverse;
366 
367       child = c->data;
368 
369       if (! version->segment)
370 	{
371 	  if ((ret = rcswalk_checkout (rcs, walker, version))) {
372 	    return ret;
373 	  }
374 	}
375 
376       if ((ret = rcswalk_checkout (rcs, walker, child))) {
377 	return ret;
378       }
379 
380       reverse = version->on_trunk && child->on_trunk;
381 
382       (* count) += 1;
383 
384       if ((ret = walker->delta_orig (rcs, reverse ? child : version, reverse ? version : child, data))) {
385 	return ret;
386       }
387 
388       rcswalk_free_segment (version);
389 
390       if ((ret = rcswalk_delta_orig (rcs, walker, child, count, data))) {
391 	return ret;
392       }
393     }
394 
395   rcswalk_free_segment (version);
396   return 0;
397 }
398 
399 int
400 rcswalk_dateorder (RcsFile* rcs, RcsWalker *walker, RcsStats *stats, void* data)
401 {
402   int i, ret;
403 
404   for (i = 0; i < rcs->version_count; i += 1)
405     {
406       RcsVersion *v = rcs->versions_date[i];
407 
408       if ((ret = rcswalk_checkout (rcs, walker, v))) {
409 	return ret;
410       }
411 
412       stat_bincount_add_item (stats->avg_version_size, i, v->size);
413 
414       if ((ret = walker->dateorder (rcs, v, data))) {
415 	return ret;
416       }
417 
418       rcswalk_free_segment (v);
419     }
420 
421   return 0;
422 }
423 
424 gboolean
425 rcswalk_match (char** line_p, char* str)
426 {
427   int len = strlen (str);
428 
429   if (strncmp (*line_p, str, len) == 0)
430     {
431       (*line_p) += len;
432       return TRUE;
433     }
434 
435   return FALSE;
436 }
437 
438 void
439 rcswalk_find_parent (RcsFile *rcs, GHashTable* hash, RcsVersion *v)
440 {
441   char *lastdot;
442   char  mbuf[1024];
443   int   lastn;
444   RcsVersion *p;
445 
446   strcpy (mbuf, v->vname);
447 
448   if (! (lastdot = strchr (mbuf, '.')))
449     abort ();
450 
451   if (! (lastdot = strchr (lastdot+1, '.')))
452     v->on_trunk = TRUE;
453 
454   lastdot = strrchr (mbuf, '.');
455   lastn = atoi (lastdot + 1);
456 
457   do
458     {
459       if (lastn == 1)
460 	{
461 	  (*lastdot) = 0;
462 
463 	  if (strcmp (mbuf, "1") == 0)
464 	    {
465 	      /* Assuming the first version is always "1.1".
466 	       */
467 	      rcs->root_version = v;
468 	      return;
469 	    }
470 	  else if (! (lastdot = strrchr (mbuf, '.')))
471 	    {
472 	      int i = 1;
473 	      int br = atoi (mbuf) - 1;
474 	      RcsVersion *p2 = NULL;
475 
476 	      /* Now we have something like "2.1" and need to
477 	       * search for the highest "1.x" version.
478 	       */
479 
480 	      do
481 		{
482 		  sprintf (mbuf, "%d.%d", br, i++);
483 		  p = p2;
484 		}
485 	      while ((p2 = g_hash_table_lookup (hash, mbuf)));
486 
487 	      if (p == NULL)
488 		{
489 		  rcs->root_version = v;
490 		  return;
491 		}
492 
493 	      break;
494 	    }
495 	  else
496 	    {
497 	      /* 1.2.3.1 => 1.2 */
498 	      (*lastdot) = 0;
499 	      lastdot = strrchr (mbuf, '.');
500 	      lastn = atoi (lastdot + 1);
501 	    }
502 	}
503       else
504 	{
505 	  lastn -= 1;
506 	  sprintf (lastdot, ".%d", lastn);
507 	}
508     }
509   while (! (p = g_hash_table_lookup (hash, mbuf)));
510 
511   g_assert (p);
512 
513   v->parent = p;
514 
515   p->children = g_slist_prepend (p->children, v);
516 }
517 
518 int
519 rcswalk_traverse_graph (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
520 {
521   GSList *c;
522   int distance = -1;
523 
524   version->cc = g_slist_length (version->children);
525 
526   if (version->cc > 1)
527     rcs->branch_count += (version->cc - 1);
528 
529   if (parent)
530     {
531       /* Insure that there is proper date ordering. */
532       if (version->date <= parent->date)
533 	version->date = parent->date + 1;
534 
535       if (parent->on_trunk && version->on_trunk)
536 	rcs->reverse_count += 1;
537       else
538 	rcs->forward_count += 1;
539     }
540 
541   for (c = version->children; c; c = c->next)
542     {
543       int c_dist = rcswalk_traverse_graph (rcs, c->data, version);
544 
545       distance = MAX (distance, c_dist);
546     }
547 
548   if (version == rcs->head_version)
549     distance = 0;
550 
551   if (distance >= 0)
552     {
553       version->chain_length = distance;
554 
555       return distance + 1;
556     }
557 
558   return -1;
559 }
560 
561 void
562 rcswalk_compute_chain_length (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
563 {
564   GSList *c;
565 
566   if (! parent)
567     {
568       g_assert (version->chain_length >= 0);
569     }
570   else if (version->chain_length < 0)
571     {
572       version->chain_length = parent->chain_length + 1;
573     }
574 
575   for (c = version->children; c; c = c->next)
576     {
577       rcswalk_compute_chain_length (rcs, c->data, version);
578     }
579 }
580 
581 int
582 rcswalk_date_compare (const void* a, const void* b)
583 {
584   RcsVersion **ra = (void*) a;
585   RcsVersion **rb = (void*) b;
586 
587   return (*ra)->date - (*rb)->date;
588 }
589 
590 int
591 rcswalk_build_graph (RcsFile* rcs)
592 {
593   GHashTable* hash = g_hash_table_new (g_str_hash, g_str_equal);
594   int i;
595 
596   for (i = 0; i < rcs->version_count; i += 1)
597     g_hash_table_insert (hash, rcs->versions[i].vname, rcs->versions + i);
598 
599   for (i = 0; i < rcs->version_count; i += 1)
600     {
601       RcsVersion *v = rcs->versions + i;
602 
603       v->chain_length = -1;
604       v->rcs = rcs;
605 
606       rcswalk_find_parent (rcs, hash, v);
607     }
608 
609   rcs->head_version = g_hash_table_lookup (hash, rcs->headname);
610 
611   rcswalk_traverse_graph (rcs, rcs->root_version, NULL);
612 
613   rcswalk_compute_chain_length (rcs, rcs->root_version, NULL);
614 
615   for (i = 0; i < rcs->version_count; i += 1)
616     rcs->versions_date[i] = rcs->versions + i;
617 
618   qsort (rcs->versions_date, rcs->version_count, sizeof (RcsVersion*), & rcswalk_date_compare);
619 
620   for (i = 0; i < rcs->version_count; i += 1)
621     {
622       RcsVersion *v = rcs->versions_date[i];
623 
624       v->dateseq = i;
625     }
626 
627   g_hash_table_destroy (hash);
628 
629   return 0;
630 }
631 
632 #define HEAD_STATE 0
633 #define BAR_STATE 1
634 #define REV_STATE 2
635 #define DATE_STATE 3
636 
637 int
638 rcswalk_load (RcsFile *rcs, gboolean *skip)
639 {
640   FILE* rlog;
641   char cmdbuf[1024];
642   char oneline[1024], *oneline_p;
643   char rbuf[1024];
644   int version_i = 0, ret;
645   int read_state = HEAD_STATE;
646 
647   sprintf (cmdbuf, "rlog %s", rcs->filename);
648 
649   if (! (rlog = popen (cmdbuf, "r")))
650     {
651       g_warning ("popen failed: %s", cmdbuf);
652       return errno;
653     }
654 
655   rcs->headname = NULL;
656 
657   while (fgets (oneline, 1024, rlog))
658     {
659       oneline_p = oneline;
660 
661       if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "total revisions: "))
662 	{
663 	  if (sscanf (oneline_p, "%d", & rcs->version_count) != 1)
664 	    goto badscan;
665 
666 	  rcs->versions = g_new0 (RcsVersion, rcs->version_count);
667 	  rcs->versions_date = g_new (RcsVersion*, rcs->version_count);
668 	  read_state = BAR_STATE;
669 	}
670       else if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "head: "))
671 	{
672 	  if (sscanf (oneline_p, "%s", rbuf) != 1)
673 	    goto badscan;
674 
675 	  rcs->headname = g_strdup (rbuf);
676 	  read_state = HEAD_STATE; /* no change */
677 	}
678       else if (read_state == BAR_STATE && rcswalk_match (& oneline_p, "----------------------------"))
679 	{
680 	  read_state = REV_STATE;
681 	}
682       else if (read_state == REV_STATE && rcswalk_match (& oneline_p, "revision "))
683 	{
684 	  if (version_i >= rcs->version_count)
685 	    {
686 	      /* jkh likes to insert the rlog of one RCS file into the log
687 	       * message of another, and this can confuse things.  Why, oh why,
688 	       * doesn't rlog have an option to not print the log?
689 	       */
690 	      fprintf (stderr, "rcswalk: too many versions: skipping file %s\n", rcs->filename);
691 	      *skip = TRUE;
692 	      skip_count += 1;
693 	      pclose (rlog);
694 	      return 0;
695 	    }
696 
697 	  if (sscanf (oneline_p, "%s", rbuf) != 1)
698 	    goto badscan;
699 
700 	  rcs->versions[version_i].vname = g_strdup (rbuf);
701 	  read_state = DATE_STATE;
702 
703 	  g_assert (rcs->versions[version_i].vname);
704 	}
705       else if (read_state == DATE_STATE && rcswalk_match (& oneline_p, "date: "))
706 	{
707 	  char* semi = strchr (oneline_p, ';');
708 
709 	  if (! semi)
710 	    goto badscan;
711 
712 	  strncpy (rbuf, oneline_p, semi - oneline_p);
713 
714 	  rbuf[semi - oneline_p] = 0;
715 
716 	  rcs->versions[version_i].date = str2time (rbuf, 0, 0);
717 
718 	  version_i += 1;
719 	  read_state = BAR_STATE;
720 	}
721     }
722 
723   if (! rcs->headname)
724     {
725       fprintf (stderr, "rcswalk: no head version: skipping file %s\n", rcs->filename);
726       *skip = TRUE;
727       skip_count += 1;
728       pclose (rlog);
729       return 0;
730     }
731 
732   if (pclose (rlog) < 0)
733     {
734       g_warning ("pclose failed: %s", cmdbuf);
735       return errno;
736     }
737 
738   if ((ret = rcswalk_build_graph (rcs))) {
739     return ret;
740   }
741 
742   return 0;
743 
744  badscan:
745 
746   pclose (rlog);
747 
748   g_warning ("rlog syntax error");
749   return -1;
750 }
751 
752 void
753 rcswalk_free (RcsFile* rcs)
754 {
755   int i;
756 
757   for (i = 0; i < rcs->version_count; i += 1)
758     {
759       g_free (rcs->versions[i].vname);
760       g_slist_free (rcs->versions[i].children);
761     }
762 
763   g_free (rcs->filename);
764   g_free (rcs->headname);
765   g_free (rcs->versions);
766   g_free (rcs->versions_date);
767   g_free (rcs);
768 }
769 
770 int
771 rcswalk_one (char* rcsfile, char* copyfile, RcsWalker* walker, RcsStats* stats, void* data)
772 {
773   RcsFile* rcs;
774   int i, ret;
775   long long maxsize = 0;
776   gboolean skip = FALSE;
777 
778   rcs = g_new0 (RcsFile, 1);
779 
780   rcs->filename = g_strdup (rcsfile);
781   rcs->copyname = copyfile;
782 
783   if ((ret = rcswalk_load (rcs, & skip))) {
784     return ret;
785   }
786 
787   if (walker->min_versions > rcs->version_count)
788     {
789       small_count += 1;
790       skip = TRUE;
791     }
792 
793   if (walker->max_versions < rcs->version_count)
794     {
795       large_count += 1;
796       skip = TRUE;
797     }
798 
799   if (! skip)
800     {
801       process_count += 1;
802 
803       if (walker->dateorder && (ret = rcswalk_dateorder (rcs, walker, stats, data))) {
804 	return ret;
805       }
806 
807       if (walker->delta_orig)
808 	{
809 	  int count = 0;
810 
811 	  if ((ret = rcswalk_delta_orig (rcs, walker, rcs->root_version, & count, data))) {
812 	    return ret;
813 	  }
814 
815 	  g_assert (count == (rcs->version_count - 1));
816 	}
817 
818       if (walker->delta_date && (ret = rcswalk_delta_date (rcs, walker, data))) {
819 	return ret;
820       }
821 
822       for (i = 0; i < rcs->version_count; i += 1)
823 	{
824 	  rcs->total_size += rcs->versions[i].size;
825 	  maxsize = MAX (rcs->versions[i].size, maxsize);
826 	}
827 
828       stat_int_add_item (stats->version_stat, rcs->version_count);
829       stat_int_add_item (stats->forward_stat, rcs->forward_count);
830       stat_int_add_item (stats->reverse_stat, rcs->reverse_count);
831       stat_int_add_item (stats->branch_stat, rcs->branch_count);
832       stat_int_add_item (stats->unencoded_stat, rcs->total_size);
833       stat_int_add_item (stats->literal_stat, maxsize);
834 
835       if (walker->onefile && (ret = walker->onefile (rcs, stats, data))) {
836 	return ret;
837       }
838     }
839 
840   rcswalk_free (rcs);
841 
842   return 0;
843 }
844 
845 int
846 rcswalk_dir (const char* dir, RcsWalker* walker, RcsStats* stats, void* data, const char* copy_dir)
847 {
848   int ret;
849   DIR* thisdir;
850   struct dirent* ent;
851 
852   if (copy_dir && (ret = config_create_dir (copy_dir))) {
853     return ret;
854   }
855 
856   if (! (thisdir = opendir (dir)))
857     {
858       g_warning ("opendir failed: %s", dir);
859       return errno;
860     }
861 
862   while ((ent = readdir (thisdir)))
863     {
864       char* name = ent->d_name;
865       int len;
866       struct stat buf;
867       char* fullname;
868       char* copyname = NULL;
869 
870       if (strcmp (name, ".") == 0)
871 	continue;
872 
873       if (strcmp (name, "..") == 0)
874 	continue;
875 
876       len = strlen (name);
877 
878       fullname = g_strdup_printf ("%s/%s", dir, name);
879 
880       if (copy_dir)
881 	copyname = g_strdup_printf ("%s/%s", copy_dir, name);
882 
883       if (len > 2 && strcmp (name + len - 2, ",v") == 0)
884 	{
885 	  if ((ret = rcswalk_one (fullname, copyname, walker, stats, data))) {
886 	    goto abort;
887 	  }
888 	}
889       else
890 	{
891 	  if (stat (fullname, & buf) < 0)
892 	    {
893 	      g_warning ("stat failed: %s\n", fullname);
894 	      goto abort;
895 	    }
896 
897 	  if (S_ISDIR (buf.st_mode))
898 	    {
899 	      if ((ret = rcswalk_dir (fullname, walker, stats, data, copyname))) {
900 		goto abort;
901 	      }
902 	    }
903 	}
904 
905       g_free (fullname);
906 
907       if (copyname)
908 	g_free (copyname);
909     }
910 
911   if (closedir (thisdir) < 0)
912     {
913       g_warning ("closedir failed: %s", dir);
914       return errno;
915     }
916 
917   return 0;
918 
919  abort:
920 
921   if (thisdir)
922     closedir (thisdir);
923 
924   return -1;
925 }
926 
927 void
928 rcswalk_init (void)
929 {
930   config_register (rcswalk_options, ARRAY_SIZE (rcswalk_options));
931 }
932 
933 int
934 rcswalk (RcsWalker *walker, const char* copy_base)
935 {
936   void* data = NULL;
937   RcsStats stats;
938   int ret;
939 
940   skip_count = 0;
941   small_count = 0;
942   process_count = 0;
943   large_count = 0;
944 
945   memset (& stats, 0, sizeof (stats));
946 
947   stats.avg_version_size = stat_bincount_new ("AvgVersionSize"); /* @@@ leak */
948   stats.version_stat = stat_int_new ("Version"); /* @@@ leak */
949   stats.forward_stat = stat_int_new ("Forward"); /* @@@ leak */
950   stats.reverse_stat = stat_int_new ("Reverse"); /* @@@ leak */
951   stats.branch_stat  = stat_int_new ("Branch"); /* @@@ leak */
952   stats.unencoded_stat = stat_int_new ("Unencoded"); /* @@@ leak */
953   stats.literal_stat   = stat_int_new ("Literal"); /* @@@ leak */
954 
955   tmp_file_1 = g_strdup_printf ("%s/rcs1.%d", g_get_tmp_dir (), (int) getpid ());
956   tmp_file_2 = g_strdup_printf ("%s/rcs2.%d", g_get_tmp_dir (), (int) getpid ());
957 
958   if (walker->initialize)
959     data = walker->initialize ();
960 
961   if ((ret = rcswalk_dir (rcswalk_input_dir, walker, & stats, data, copy_base))) {
962     return ret;
963   }
964 
965   if (walker->finalize)
966     {
967       if ((ret = walker->finalize (& stats, data))) {
968 	return ret;
969       }
970     }
971 
972   unlink (tmp_file_1);
973   unlink (tmp_file_2);
974 
975   fprintf (stderr, "rcswalk: processed %d files: too small %d; too large: %d; damaged: %d\n", process_count, small_count, large_count, skip_count);
976 
977   return 0;
978 }
979 
980 /* Statistics
981  */
982 
983 void
984 rcswalk_report (RcsStats* set)
985 {
986   stat_bincount_report (set->avg_version_size);
987   stat_int_report (set->version_stat);
988   stat_int_report (set->forward_stat);
989   stat_int_report (set->reverse_stat);
990   stat_int_report (set->branch_stat);
991   stat_int_report (set->unencoded_stat);
992   stat_int_report (set->literal_stat);
993 }
994 
995 /* Int stat
996  */
997 IntStat*
998 stat_int_new (const char* name)
999 {
1000   IntStat* s = g_new0 (IntStat, 1);
1001 
1002   s->name = name;
1003   s->values = g_array_new (FALSE, FALSE, sizeof (long long));
1004 
1005   return s;
1006 }
1007 
1008 void
1009 stat_int_add_item (IntStat* stat, long long v)
1010 {
1011   if (! stat->count)
1012     stat->min = v;
1013   stat->count += 1;
1014   stat->min = MIN (v, stat->min);
1015   stat->max = MAX (v, stat->max);
1016   stat->sum += v;
1017 
1018   g_array_append_val (stat->values, v);
1019 }
1020 
1021 double
1022 stat_int_stddev (IntStat *stat)
1023 {
1024   double f = 0;
1025   double m = (double) stat->sum / (double) stat->count;
1026   double v;
1027   int i;
1028 
1029   for (i = 0; i < stat->count; i += 1)
1030     {
1031       long long x = g_array_index (stat->values, long long, i);
1032 
1033       f += (m - (double) x) * (m - (double) x);
1034     }
1035 
1036   v = f / (double) stat->count;
1037 
1038   return sqrt (v);
1039 }
1040 
1041 int
1042 ll_comp (const void* a, const void* b)
1043 {
1044   const long long* lla = a;
1045   const long long* llb = b;
1046   return (*lla) - (*llb);
1047 }
1048 
1049 void
1050 stat_int_histogram (IntStat *stat)
1051 {
1052   int i, consec;
1053   long long cum = 0;
1054 
1055   FILE* p_out;
1056   FILE* s_out;
1057 
1058   if (! (p_out = config_output ("%s.pop.hist", stat->name)))
1059     abort ();
1060 
1061   if (! (s_out = config_output ("%s.sum.hist", stat->name)))
1062     abort ();
1063 
1064   qsort (stat->values->data, stat->count, sizeof (long long), ll_comp);
1065 
1066   for (i = 0; i < stat->count; i += consec)
1067     {
1068       long long ix = g_array_index (stat->values, long long, i);
1069 
1070       for (consec = 1; (i+consec) < stat->count; consec += 1)
1071 	{
1072 	  long long jx = g_array_index (stat->values, long long, i+consec);
1073 
1074 	  if (ix != jx)
1075 	    break;
1076 	}
1077 
1078       cum += consec * g_array_index (stat->values, long long, i);
1079 
1080       fprintf (p_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) (i+consec) / (double) stat->count);
1081       fprintf (s_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) cum / (double) stat->sum);
1082     }
1083 
1084   if (fclose (p_out) < 0 || fclose (s_out) < 0)
1085     {
1086       g_error ("fclose failed\n");
1087     }
1088 }
1089 
1090 void
1091 stat_int_report (IntStat* stat)
1092 {
1093   FILE* out;
1094 
1095   if (! (out = config_output ("%s.stat", stat->name)))
1096     abort ();
1097 
1098   fprintf (out, "Name: %s\n", stat->name);
1099   fprintf (out, "Count: %d\n", stat->count);
1100   fprintf (out, "Min: %qd\n", stat->min);
1101   fprintf (out, "Max: %qd\n", stat->max);
1102   fprintf (out, "Sum: %qd\n", stat->sum);
1103   fprintf (out, "Mean: %0.2f\n", (double) stat->sum / (double) stat->count);
1104   fprintf (out, "Stddev: %0.2f\n", stat_int_stddev (stat));
1105 
1106   if (fclose (out) < 0)
1107     g_error ("fclose failed");
1108 
1109   stat_int_histogram (stat);
1110 }
1111 
1112 /* Dbl stat
1113  */
1114 
1115 DblStat*
1116 stat_dbl_new (const char* name)
1117 {
1118   DblStat* s = g_new0 (DblStat, 1);
1119 
1120   s->name = name;
1121   s->values = g_array_new (FALSE, FALSE, sizeof (double));
1122 
1123   return s;
1124 }
1125 
1126 void
1127 stat_dbl_add_item (DblStat* stat, double v)
1128 {
1129   if (! stat->count)
1130     stat->min = v;
1131   stat->count += 1;
1132   stat->min = MIN (v, stat->min);
1133   stat->max = MAX (v, stat->max);
1134   stat->sum += v;
1135 
1136   g_array_append_val (stat->values, v);
1137 }
1138 
1139 double
1140 stat_dbl_stddev (DblStat *stat)
1141 {
1142   double f = 0;
1143   double m = stat->sum / stat->count;
1144   double v;
1145   int i;
1146 
1147   for (i = 0; i < stat->count; i += 1)
1148     {
1149       double x = g_array_index (stat->values, double, i);
1150 
1151       f += (m - x) * (m - x);
1152     }
1153 
1154   v = f / stat->count;
1155 
1156   return sqrt (v);
1157 }
1158 
1159 int
1160 dbl_comp (const void* a, const void* b)
1161 {
1162   const double* da = a;
1163   const double* db = b;
1164   double diff = (*da) - (*db);
1165 
1166   if (diff > 0.0)
1167     return 1;
1168   else if (diff < 0.0)
1169     return -1;
1170   else
1171     return 0;
1172 }
1173 
1174 void
1175 stat_dbl_histogram (DblStat *stat)
1176 {
1177   int i, consec;
1178   double cum = 0.0;
1179 
1180   FILE* p_out;
1181   FILE* s_out;
1182 
1183   if (! (p_out = config_output ("%s.pop.hist", stat->name)))
1184     abort ();
1185 
1186   if (! (s_out = config_output ("%s.sum.hist", stat->name)))
1187     abort ();
1188 
1189   qsort (stat->values->data, stat->count, sizeof (double), dbl_comp);
1190 
1191   for (i = 0; i < stat->count; i += consec)
1192     {
1193       double ix = g_array_index (stat->values, double, i);
1194 
1195       for (consec = 1; (i+consec) < stat->count; consec += 1)
1196 	{
1197 	  double jx = g_array_index (stat->values, double, i+consec);
1198 
1199 	  if (ix != jx)
1200 	    break;
1201 	}
1202 
1203       cum += ((double) consec) * g_array_index (stat->values, double, i);
1204 
1205       fprintf (p_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), (double) (i+consec) / (double) stat->count);
1206       fprintf (s_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), cum / stat->sum);
1207     }
1208 
1209   if (fclose (p_out) < 0 || fclose (s_out) < 0)
1210     {
1211       g_error ("fclose failed\n");
1212     }
1213 }
1214 
1215 void
1216 stat_dbl_report (DblStat* stat)
1217 {
1218   FILE* out;
1219 
1220   if (! (out = config_output ("%s.stat", stat->name)))
1221     abort ();
1222 
1223   fprintf (out, "Name:   %s\n", stat->name);
1224   fprintf (out, "Count:  %d\n", stat->count);
1225   fprintf (out, "Min:    %0.6f\n", stat->min);
1226   fprintf (out, "Max:    %0.6f\n", stat->max);
1227   fprintf (out, "Sum:    %0.6f\n", stat->sum);
1228   fprintf (out, "Mean:   %0.6f\n", stat->sum / stat->count);
1229   fprintf (out, "Stddev: %0.6f\n", stat_dbl_stddev (stat));
1230 
1231   if (fclose (out) < 0)
1232     g_error ("fclose failed");
1233 
1234   stat_dbl_histogram (stat);
1235 }
1236 
1237 /* Bincount
1238  */
1239 BinCounter*
1240 stat_bincount_new (const char* name)
1241 {
1242   BinCounter* bc = g_new0 (BinCounter, 1);
1243 
1244   bc->name = name;
1245   bc->bins = g_ptr_array_new ();
1246 
1247   return bc;
1248 }
1249 
1250 void
1251 stat_bincount_add_item (BinCounter* bc, int bin, double val)
1252 {
1253   GArray* one;
1254   int last;
1255 
1256   if (bin >= bc->bins->len)
1257     {
1258       g_ptr_array_set_size (bc->bins, bin+1);
1259     }
1260 
1261   if (! (one = bc->bins->pdata[bin]))
1262     {
1263       one = bc->bins->pdata[bin] = g_array_new (FALSE, TRUE, sizeof (double));
1264     }
1265 
1266   g_assert (one);
1267 
1268   last = one->len;
1269 
1270   g_array_set_size (one, last + 1);
1271 
1272   g_array_index (one, double, last) = val;
1273 }
1274 
1275 void
1276 stat_bincount_report (BinCounter* bc)
1277 {
1278   FILE *avg_out;
1279   FILE *raw_out;
1280   int i;
1281 
1282   if (! (avg_out = config_output ("%s.avg", bc->name)))
1283     abort ();
1284 
1285   if (! (raw_out = config_output ("%s.raw", bc->name)))
1286     abort ();
1287 
1288   for (i = 0; i < bc->bins->len; i += 1)
1289     {
1290       GArray* one = bc->bins->pdata[i];
1291 
1292       double sum = 0.0;
1293       int j;
1294 
1295       for (j = 0; j < one->len; j += 1)
1296 	{
1297 	  double d = g_array_index (one, double, j);
1298 
1299 	  sum += d;
1300 
1301 	  fprintf (raw_out, "%e ", d);
1302 	}
1303 
1304       fprintf (raw_out, "\n");
1305       fprintf (avg_out, "%e %d\n", sum / one->len, one->len);
1306     }
1307 
1308   if (fclose (avg_out) < 0)
1309     g_error ("fclose failed");
1310 
1311   if (fclose (raw_out) < 0)
1312     g_error ("fclose failed");
1313 }
1314 
1315 /* Config stuff
1316  */
1317 
1318 int
1319 config_create_dir (const char* dirname)
1320 {
1321   struct stat buf;
1322 
1323   if (stat (dirname, & buf) < 0)
1324     {
1325       if (mkdir (dirname, 0777) < 0)
1326 	{
1327 	  fprintf (stderr, "mkdir failed: %s\n", dirname);
1328 	  return errno;
1329 	}
1330     }
1331   else
1332     {
1333       if (! S_ISDIR (buf.st_mode))
1334 	{
1335 	  fprintf (stderr, "not a directory: %s\n", dirname);
1336 	  return errno;
1337 	}
1338     }
1339 
1340   return 0;
1341 }
1342 
1343 int
1344 config_clear_dir (const char* dir)
1345 {
1346   char buf[1024];
1347 
1348   if (dir)
1349     {
1350       sprintf (buf, "rm -rf %s", dir);
1351 
1352       system (buf);
1353     }
1354 
1355   return 0;
1356 }
1357 
1358 static ConfigOption all_options[64];
1359 static int          option_count;
1360 
1361 void
1362 config_init ()
1363 {
1364   static gboolean once = FALSE;
1365   if (! once)
1366     {
1367       once = TRUE;
1368       config_register (config_options, ARRAY_SIZE (config_options));
1369     }
1370 }
1371 
1372 void
1373 config_register (ConfigOption *opts, int nopts)
1374 {
1375   int i;
1376 
1377   config_init ();
1378 
1379   for (i = 0; i < nopts; i += 1)
1380     {
1381       all_options[option_count++] = opts[i];
1382     }
1383 }
1384 
1385 void
1386 config_set_string (const char* var, const char* val)
1387 {
1388   int i;
1389 
1390   for (i = 0; i < option_count; i += 1)
1391     {
1392       ConfigOption *opt = all_options + i;
1393 
1394       if (strcmp (opt->name, var) == 0)
1395 	{
1396 	  (* (const char**) opt->value) = val;
1397 	  opt->found = TRUE;
1398 	  return;
1399 	}
1400     }
1401 }
1402 
1403 int
1404 config_parse (const char* config_file)
1405 {
1406   FILE *in;
1407   char oname[1024], value[1024];
1408   int i;
1409 
1410   if (! (in = fopen (config_file, "r")))
1411     {
1412       fprintf (stderr, "fopen failed: %s\n", config_file);
1413       return errno;
1414     }
1415 
1416   for (;;)
1417     {
1418       ConfigOption *opt = NULL;
1419 
1420       if (fscanf (in, "%s", oname) != 1)
1421 	break;
1422 
1423       for (i = 0; i < option_count; i += 1)
1424 	{
1425 	  if (strcmp (oname, all_options[i].name) == 0)
1426 	    {
1427 	      opt = all_options + i;
1428 	      break;
1429 	    }
1430 	}
1431 
1432       if (opt && opt->arg == CO_None)
1433 	{
1434 	  (* (gboolean*) opt->value) = TRUE;
1435 	  opt->found = TRUE;
1436 	  continue;
1437 	}
1438 
1439       if (fscanf (in, "%s", value) != 1)
1440 	{
1441 	  fprintf (stderr, "no value for option: %s; file: %s\n", oname, config_file);
1442 	  goto abort;
1443 	}
1444 
1445       if (! opt)
1446 	{
1447 	  /*fprintf (stderr, "unrecognized option: %s\n", oname);*/
1448 	  continue;
1449 	}
1450 
1451       switch (opt->type)
1452 	{
1453 	case CD_Bool:
1454 
1455 	  if (strcasecmp (value, "yes") == 0 ||
1456 	      strcasecmp (value, "true") == 0 ||
1457 	      strcmp     (value, "1") == 0 ||
1458 	      strcasecmp (value, "on") == 0)
1459 	    {
1460 	      ((gboolean*) opt->value) = TRUE;
1461 	    }
1462 	  else
1463 	    {
1464 	      ((gboolean*) opt->value) = FALSE;
1465 	    }
1466 
1467 	  break;
1468 	case CD_Int32:
1469 
1470 	  if (sscanf (value, "%d", (gint32*) opt->value) != 1)
1471 	    {
1472 	      fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
1473 	      goto abort;
1474 	    }
1475 
1476 	  break;
1477 	case CD_Double:
1478 
1479 	  if (sscanf (value, "%lf", (double*) opt->value) != 1)
1480 	    {
1481 	      fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
1482 	      goto abort;
1483 	    }
1484 
1485 	  break;
1486 	case CD_String:
1487 
1488 	  (* (const char**) opt->value) = g_strdup (value);
1489 
1490 	  break;
1491 	}
1492 
1493       opt->found = TRUE;
1494     }
1495 
1496   fclose (in);
1497 
1498   return 0;
1499 
1500  abort:
1501 
1502   fclose (in);
1503 
1504   return -1;
1505 }
1506 
1507 int
1508 config_compute_output_dir ()
1509 {
1510   char tmp[1024];
1511   char buf[1024];
1512   int i;
1513   gboolean last = FALSE;
1514 
1515   buf[0] = 0;
1516 
1517   for (i = 0; i < option_count; i += 1)
1518     {
1519       ConfigOption *opt = all_options + i;
1520 
1521       if (opt->style == CS_Ignore)
1522 	continue;
1523 
1524       if (! opt->found)
1525 	continue;
1526 
1527       if (last)
1528 	strcat (buf, ",");
1529 
1530       last = TRUE;
1531 
1532       strcat (buf, opt->abbrev);
1533       strcat (buf, "=");
1534 
1535       switch (opt->type)
1536 	{
1537 	case CD_Bool:
1538 
1539 	  if (* (gboolean*) opt->value)
1540 	    strcat (buf, "true");
1541 	  else
1542 	    strcat (buf, "false");
1543 
1544 	  break;
1545 	case CD_Int32:
1546 
1547 	  sprintf (tmp, "%d", (* (gint32*) opt->value));
1548 	  strcat (buf, tmp);
1549 
1550 	  break;
1551 	case CD_Double:
1552 
1553 	  sprintf (tmp, "%0.2f", (* (double*) opt->value));
1554 	  strcat (buf, tmp);
1555 
1556 	  break;
1557 	case CD_String:
1558 
1559 	  if (opt->style == CS_UseAsFile)
1560 	    {
1561 	      const char* str = (* (const char**) opt->value);
1562 	      const char* ls = strrchr (str, '/');
1563 
1564 	      strcat (buf, ls ? (ls + 1) : str);
1565 	    }
1566 	  else
1567 	    {
1568 	      strcat (buf, (* (const char**) opt->value));
1569 	    }
1570 
1571 	  break;
1572 	}
1573     }
1574 
1575   config_output_dir = g_strdup_printf ("%s/%s", config_output_base, buf);
1576 
1577   return 0;
1578 }
1579 
1580 int
1581 config_done (void)
1582 {
1583   int i, ret;
1584   FILE *out;
1585 
1586   for (i = 0; i < option_count; i += 1)
1587     {
1588       ConfigOption *opt = all_options + i;
1589 
1590       if (! opt->found && opt->arg == CO_Required)
1591 	{
1592 	  fprintf (stderr, "required option not found: %s\n", all_options[i].name);
1593 	  return -1;
1594 	}
1595     }
1596 
1597   if ((ret = config_compute_output_dir ())) {
1598     return ret;
1599   }
1600 
1601   if ((ret = config_clear_dir (config_output_dir))) {
1602     return ret;
1603   }
1604 
1605   if ((ret = config_create_dir (config_output_dir))) {
1606     return ret;
1607   }
1608 
1609   if (! (out = config_output ("Options")))
1610     abort ();
1611 
1612   for (i = 0; i < option_count; i += 1)
1613     {
1614       ConfigOption *opt = all_options + i;
1615 
1616       fprintf (out, "option: %s; value: ", all_options[i].name);
1617 
1618       switch (opt->type)
1619 	{
1620 	case CD_Bool:
1621 
1622 	  fprintf (out, "%s", (* (gboolean*) opt->value) ? "TRUE" : "FALSE");
1623 
1624 	  break;
1625 	case CD_Int32:
1626 
1627 	  fprintf (out, "%d", (* (gint32*) opt->value));
1628 
1629 	  break;
1630 	case CD_Double:
1631 
1632 	  fprintf (out, "%0.2f", (* (double*) opt->value));
1633 
1634 	  break;
1635 	case CD_String:
1636 
1637 	  fprintf (out, "%s", (* (const char**) opt->value));
1638 
1639 	  break;
1640 	}
1641 
1642       fprintf (out, "\n");
1643     }
1644 
1645   if (fclose (out))
1646     {
1647       fprintf (stderr, "fclose failed\n");
1648       return errno;
1649     }
1650 
1651   return 0;
1652 }
1653 
1654 const char*
1655 config_help_arg (ConfigOption *opt)
1656 {
1657   switch (opt->arg)
1658     {
1659     case CO_Required:
1660       return "required";
1661     case CO_Optional:
1662       return "optional";
1663     case CO_None:
1664       return "no value";
1665     }
1666 
1667   return "unknown";
1668 }
1669 
1670 const char*
1671 config_help_type (ConfigOption *opt)
1672 {
1673   switch (opt->arg)
1674     {
1675     case CO_None:
1676       return "boolean";
1677     default:
1678       break;
1679     }
1680 
1681   switch (opt->type)
1682     {
1683     case CD_Bool:
1684       return "boolean";
1685     case CD_Int32:
1686       return "int";
1687     case CD_Double:
1688       return "double";
1689     case CD_String:
1690       return "string";
1691     }
1692 
1693   return "unknown";
1694 }
1695 
1696 void
1697 config_help (void)
1698 {
1699   int i;
1700 
1701   fprintf (stderr, "Expecting the following options in one or more config files on the command line:\n");
1702 
1703   for (i = 0; i < option_count; i += 1)
1704     {
1705       ConfigOption *opt = all_options + i;
1706 
1707       fprintf (stderr, "%s: %s %s\n",
1708 	       opt->name,
1709 	       config_help_arg (opt),
1710 	       config_help_type (opt));
1711     }
1712 }
1713 
1714 FILE*
1715 config_output (const char* format, ...)
1716 {
1717   gchar *buffer;
1718   gchar *file;
1719   va_list args;
1720   FILE *f;
1721 
1722   va_start (args, format);
1723   buffer = g_strdup_vprintf (format, args);
1724   va_end (args);
1725 
1726   file = g_strdup_printf ("%s/%s", config_output_dir, buffer);
1727 
1728   if (! (f = fopen (file, "w")))
1729     g_error ("fopen failed: %s\n", buffer);
1730 
1731   g_free (file);
1732 
1733   g_free (buffer);
1734 
1735   return f;
1736 }
1737 
1738 
1739 #include <edsio.h>
1740 #include <edsiostdio.h>
1741 #include <ctype.h>
1742 #include "xdfs.h"
1743 
1744 /* Warning: very cheesy!
1745  */
1746 
1747 #ifdef DEBUG_EXTRACT
1748   FileHandle *fh2 = handle_read_file (filename);
1749 
1750   guint8* debug_buf = g_malloc (buflen);
1751 
1752   if (! handle_read (fh2, debug_buf, buflen))
1753     g_error ("read failed");
1754 #endif
1755 
1756 gboolean
1757 rcs_count (const char* filename, guint *encoded_size)
1758 {
1759   char *readbuf0, *readbuf;
1760   gboolean in_string = FALSE;
1761   gboolean in_text = FALSE;
1762   guint string_start = 0;
1763   guint string_end = 0;
1764   guint current_pos = 0;
1765   /*char *current_delta = NULL;*/
1766   FileHandle *fh = handle_read_file (filename);
1767   guint buflen = handle_length (fh);
1768 
1769   (* encoded_size) = 0;
1770 
1771   readbuf0 = g_new (guint8, buflen);
1772 
1773   for (;;)
1774     {
1775       int c = handle_gets (fh, readbuf0, buflen);
1776 
1777       readbuf = readbuf0;
1778 
1779       if (c < 0)
1780 	break;
1781 
1782       if (strncmp (readbuf, "text", 4) == 0)
1783 	in_text = TRUE;
1784 
1785       if (! in_string && readbuf[0] == '@')
1786 	{
1787 	  string_start = current_pos + 1;
1788 	  in_string = TRUE;
1789 	  readbuf += 1;
1790 	}
1791 
1792       current_pos += c;
1793 
1794       if (in_string)
1795 	{
1796 	  while ((readbuf = strchr (readbuf, '@')))
1797 	    {
1798 	      if (readbuf[1] == '@')
1799 		{
1800 		  string_start += 1; /* @@@ bogus, just counting. */
1801 		  readbuf += 2;
1802 		  continue;
1803 		}
1804 
1805 	      in_string = FALSE;
1806 	      break;
1807 	    }
1808 
1809 	  string_end = current_pos - 2;
1810 
1811 	  if (in_text && ! in_string)
1812 	    {
1813 	      in_text = FALSE;
1814 
1815 	      /*g_free (current_delta);
1816 		current_delta = NULL;*/
1817 
1818 	      (* encoded_size) += (string_end - string_start);
1819 	    }
1820 
1821 	  continue;
1822 	}
1823 
1824       if (isdigit (readbuf[0]))
1825 	{
1826 #if 0
1827 	  (* strchr (readbuf, '\n')) = 0;
1828 	  if (current_delta)
1829 	    g_free (current_delta);
1830 	  current_delta = g_strdup (readbuf);
1831 #endif
1832 	}
1833     }
1834 
1835   handle_close (fh);
1836 
1837   g_free (readbuf0);
1838 
1839 #if 0
1840   if (current_delta)
1841     g_free (current_delta);
1842 #endif
1843 
1844   return TRUE;
1845 }
1846 
1847 #if 0
1848 int
1849 main (int argc, char** argv)
1850 {
1851   guint size;
1852 
1853   if (argc != 2)
1854     g_error ("usage: %s RCS_file\n", argv[0]);
1855 
1856   if (! rcs_count (argv[1], &size))
1857     g_error ("rcs_parse failed");
1858 
1859   return 0;
1860 }
1861 #endif
1862