1 /* inv-ids.c:
2  *
3  ****************************************************************
4  * Copyright (C) 2002, 2003 Tom Lord
5  *
6  * See the file "COPYING" for further information about
7  * the copyright and warranty status of this work.
8  */
9 
10 
11 #include "hackerlab/bugs/panic.h"
12 #include "hackerlab/os/errno.h"
13 #include "hackerlab/os/errno-to-string.h"
14 #include "hackerlab/os/time.h"
15 #include "hackerlab/os/sys/types.h"
16 #include "hackerlab/os/unistd.h"
17 #include "hackerlab/mem/mem.h"
18 #include "hackerlab/char/char-class.h"
19 #include "hackerlab/char/str.h"
20 #include "hackerlab/fmt/cvt.h"
21 #include "hackerlab/fs/file-names.h"
22 #include "hackerlab/vu/safe.h"
23 #include "tla/libfsutils/ensure-dir.h"
24 #include "tla/libarch/my.h"
25 #include "tla/libarch/project-tree.h"
26 #include "tla/libarch/patch-logs.h"
27 #include "tla/libarch/invent.h"
28 #include "tla/libarch/inode-sig.h"
29 #include "tla/libarch/inv-ids.h"
30 
31 
32 
33 enum ftag_method
34 {
35   ftag_names,
36   ftag_implicit,
37   ftag_tagline,
38   ftag_explicit
39 };
40 
41 
42 /* __STDC__ prototypes for static functions */
43 static t_uchar * file_id (int * errn,
44                           struct alloc_limits * limits,
45                           enum ftag_method method,
46                           int untagged_is_source,
47                           const t_uchar * path,
48                           assoc_table id_tagging_shortcut,
49                           struct stat * known_lstat,
50                           assoc_table * explicit_skips);
51 static int is_at_or_underneath_archdir (const char * rel_file);
52 static int filename_matches (regex_t * pattern,
53                              const char * filename);
54 static t_uchar * explicit_id (int * errn,
55                               assoc_table * skips,
56                               struct alloc_limits * limits,
57                               const t_uchar * arg_file,
58                               const t_uchar * id_file,
59                               const t_uchar * prefix,
60                               const t_uchar * postfix);
61 static t_uchar * implicit_id (int * errn,
62                               struct alloc_limits * limits,
63                               const t_uchar * file,
64                               const t_uchar * basename,
65                               const t_uchar * prefix,
66                               struct stat * statb,
67                               assoc_table id_tagging_shortcut);
68 static long smash_non_graphical (t_uchar * buf, long amt);
69 
70 
71 
72 t_uchar *
arch_log_file_id(const t_uchar * archive,const t_uchar * revision)73 arch_log_file_id (const t_uchar * archive,
74                   const t_uchar * revision)
75 {
76   t_uchar * log_file_path = 0;
77   t_uchar * answer = 0;
78 
79   log_file_path = arch_log_file (".", archive, revision);
80   answer = str_alloc_cat (0, "A_", log_file_path);
81 
82   lim_free (0, log_file_path);
83   return answer;
84 }
85 
86 
87 assoc_table
arch_filenames_ids(rel_table * file_list,const t_uchar * tree_root)88 arch_filenames_ids (rel_table * file_list,
89                     const t_uchar * tree_root)
90 {
91   struct arch_inventory_options options = {0, };
92   int i = 0;
93   int file_size = rel_n_records (* file_list);
94   assoc_table id_list = 0;
95 
96   options.categories = arch_inventory_source;
97   options.want_ids = 1;
98   options.include_excluded = 1;
99   arch_get_inventory_naming_conventions (&options, tree_root);
100 
101   for (i = 0; i != file_size; ++ i)
102     {
103       t_uchar * id = arch_inventory_id (options.method, 0, rel_peek_str (*file_list, i, 0), 0, 0, 0);
104       assoc_set_taking (&id_list, rel_make_field_str (id), rel_get_field (*file_list, i, 0));
105       lim_free (0, id);
106     }
107 
108   arch_free_inventory_naming_conventions (&options);
109   return id_list;
110 }
111 
112 
113 t_uchar *
arch_inventory_id(enum arch_id_tagging_method method,int untagged_is_source,const t_uchar * path,assoc_table id_tagging_shortcut,struct stat * known_lstat,assoc_table * explicit_skips)114 arch_inventory_id (enum arch_id_tagging_method method,
115                    int untagged_is_source,
116                    const t_uchar * path,
117                    assoc_table id_tagging_shortcut,
118                    struct stat * known_lstat,
119                    assoc_table * explicit_skips)
120 {
121   int errn;
122   t_uchar * answer;
123   enum ftag_method m;
124 
125   if (method == arch_unspecified_id_tagging)
126     {
127       t_uchar * dir = 0;
128       t_uchar * root = 0;
129 
130       dir = file_name_directory_file (0, path);
131       root = arch_tree_root (0, dir, 0);
132 
133       if (!root)
134         {
135           method = arch_names_id_tagging;
136           untagged_is_source = 1;
137         }
138       else
139         {
140           enum arch_inventory_category untagged_category;
141 
142           method = arch_tree_id_tagging_method (&untagged_category, root, 0);
143           untagged_is_source = (untagged_category == arch_inventory_source);
144         }
145 
146       lim_free (0, dir);
147       lim_free (0, root);
148     }
149 
150   switch (method)
151     {
152     default:
153       panic ("unrecognized method in arch_inventory_id");
154       break;
155 
156     case arch_names_id_tagging:
157       {
158         m = ftag_names;
159         break;
160       }
161     case arch_explicit_id_tagging:
162       {
163         m = ftag_explicit;
164         break;
165       }
166     case arch_implicit_id_tagging:
167       {
168         m = ftag_implicit;
169         break;
170       }
171     case arch_tagline_id_tagging:
172       {
173         m = ftag_tagline;
174         break;
175       }
176     }
177 
178 
179 
180   errn = 0;
181 
182   answer = file_id (&errn, 0, m, untagged_is_source, path, id_tagging_shortcut, known_lstat, explicit_skips);
183 
184   if (!answer && errn)
185     {
186       safe_printfmt (2, "error finding file id (%d: %s)\n path: %s\n", errn, errno_to_string(errn), path);
187       panic ("arch_inventory_id");
188     }
189 
190   return answer;
191 }
192 
193 
194 t_uchar *
arch_id_tagging_method_name(enum arch_id_tagging_method m)195 arch_id_tagging_method_name (enum arch_id_tagging_method m)
196 {
197   switch (m)
198     {
199     default:
200       panic ("unknown id tagging method (arch_id_tagging_method_name)");
201       return 0;                 /* not reached */
202 
203     case arch_names_id_tagging:            return str_save (0, "names");
204     case arch_implicit_id_tagging:         return str_save (0, "implicit");
205     case arch_tagline_id_tagging:          return str_save (0, "tagline");
206     case arch_explicit_id_tagging:         return str_save (0, "explicit");
207     }
208 }
209 
210 
211 enum arch_id_tagging_method
arch_id_tagging_method_from_name(const t_uchar * name)212 arch_id_tagging_method_from_name (const t_uchar * name)
213 {
214   if (!str_casecmp (name, "explicit"))
215     return arch_explicit_id_tagging;
216   else if (!str_casecmp (name, "implicit"))
217     return arch_implicit_id_tagging;
218   else if (!str_casecmp (name, "tagline"))
219     return arch_tagline_id_tagging;
220   else if (!str_casecmp (name, "names"))
221     return arch_names_id_tagging;
222   else
223     {
224       safe_printfmt (2, "no such id tagging method (%s)\n", name);
225       exit (2);
226       return arch_names_id_tagging; /* notreached */
227     }
228 }
229 
230 
231 t_uchar *
arch_default_id_tagging_method_contents(enum arch_id_tagging_method method)232 arch_default_id_tagging_method_contents (enum arch_id_tagging_method method)
233 {
234   t_uchar * method_name = 0;
235   t_uchar * excludes_regexp = 0;
236   t_uchar * junk_regexp = 0;
237   t_uchar * backup_regexp = 0;
238   t_uchar * precious_regexp = 0;
239   t_uchar * unrecognized_regexp = 0;
240   t_uchar * source_regexp = 0;
241   t_uchar * answer = 0;
242 
243 
244   if (method == arch_unspecified_id_tagging)
245     method = arch_tagline_id_tagging;
246 
247   method_name = arch_id_tagging_method_name (method);
248   excludes_regexp = arch_default_naming_conventions_regexp (arch_inventory_excludes);
249   backup_regexp = arch_default_naming_conventions_regexp (arch_inventory_backup);
250   junk_regexp = arch_default_naming_conventions_regexp (arch_inventory_junk);
251   precious_regexp = arch_default_naming_conventions_regexp (arch_inventory_precious);
252   unrecognized_regexp = arch_default_naming_conventions_regexp (arch_inventory_unrecognized);
253   source_regexp = arch_default_naming_conventions_regexp (arch_inventory_source);
254 
255 
256   answer = str_alloc_cat_many (0,
257                                ("# id tagging method\n"
258                                 "#\n"
259                                 "# This determines how \"inventory ids\", strings conveying\n"
260                                 "# logical file identity, are computed for each file, directory\n"
261                                 "# and symbolic link.\n"
262                                 "#\n"
263                                 "# The choices are:\n"
264                                 "#\n"
265                                 "# tagline: inventory ids may be set using add-id, or omitted\n"
266                                 "#          (though tree-lint warns about omitted ids), or in\n"
267                                 "#          text files, set in a comment line near the top or\n"
268                                 "#          bottom of the file of a form like \"<PUNCT> arch-tag: <STRING>\".\n"
269                                 "#          Renames of files with no id are treated as a combined\n"
270                                 "#          add and delete (e.g., local changes can be lost).\n"
271                                 "#\n"
272                                 "# explicit: ids must be set using add-id.  Files passing the naming\n"
273                                 "#          conventions for source, but lacking add-id ids, are treated\n"
274                                 "#          as unrecognized files (see below).\n"
275                                 "#\n"
276                                 "# names: ids are not used.  All renames are treated as add+delete\n"
277                                 "#\n"
278                                 "# implicit: similar to tagline, but in addition, the id comment\n"
279                                 "#          may be of the form \"<PUNCT> <BASENAME> - <STRING>\", where\n"
280                                 "#          <BASENAME> is the basename of the file.   This method\n"
281                                 "#          is not recommended, but is retained for backwards\n"
282                                 "#          compatibility.\n"
283                                 "#\n"
284                                 "\n"),
285                                ("explicit\n"
286                                 "\n"),
287                                ("# disposition of untagged source files\n"
288                                 "#\n"
289                                 "# (NOTE: this option must follow the tagline/explicit/names/implicit\n"
290                                 "# directive.)\n"
291                                 "#\n"
292                                 "# By default, the explicit method treats untagged files matching the naming\n"
293                                 "# conventions for source files as unrecognized and the implicit and tagline\n"
294                                 "# methods treat such untagged files as source.\n"
295                                 "#\n"
296                                 "# You can override those default treatments of untagged files by specifying\n"
297                                 "# which inventory category (see below) should be used for files whose names\n"
298                                 "# suggest they are source but which lack ids.\n"
299                                 "#\n"
300                                 "# This feature may be especially convenient when importing sources that do\n"
301                                 "# not use file naming conventions that can be conveniently described with\n"
302                                 "# the regexps below.\n"
303                                 "#\n"
304                                 "# Uncomment one of these lines as appropriate to override the default:\n"
305                                 "#\n"
306                                 "# untagged-source source\n"
307                                 "untagged-source precious\n"
308                                 "# untagged-source backup\n"
309                                 "# untagged-source junk\n"
310                                 "# untagged-source unrecognized\n"
311                                 "#\n"
312                                 "\n"),
313                                ("# naming convention regexps\n"
314                                 "#\n"
315                                 "# For various commands, arch traverses your project trees, categorizing\n"
316                                 "# the files found there.  For example, when importing a project for\n"
317                                 "# the first time, this traversal determines which files are included\n"
318                                 "# in the import.\n"
319                                 "#\n"
320                                 "# The categories of greatest importance are defined in terms of three\n"
321                                 "# questions:\n"
322                                 "#\n"
323                                 "# 1) If arch makes a local copy of this tree, should this file be included\n"
324                                 "#    in the copy?\n"
325                                 "#\n"
326                                 "# 2) Is it generally safe to remove this file based only on how it is named?\n"
327                                 "#    For example, can it be safely clobbered by a new file of the same name?\n"
328                                 "#\n"
329                                 "# 3) Should this file be archived along with the project?  For example,\n"
330                                 "#    should it be included when importing the project for the first time?\n"
331                                 "#\n"
332                                 "# The primary categories are:\n"
333                                 "#\n"
334                                 "# category:      copy locally?       safe to clobber?      archive?\n"
335                                 "#\n"
336                                 "# junk           no                  yes                   no\n"
337                                 "# backup         no                  no                    no\n"
338                                 "# precious       yes                 no                    no\n"
339                                 "# source         yes                 no                    yes\n"
340                                 "#\n"
341                                 "# There are two additional categories, unrelated to those questions:\n"
342                                 "#\n"
343                                 "# excluded -- during a traversal by inventory, this file (and,\n"
344                                 "#             if a directory, its contents) are simply ignored unless the\n"
345                                 "#             --all flag is specified.   This category is usually used to\n"
346                                 "#             omit arch's own control files from a listing.\n"
347                                 "#\n"
348                                 "# unrecognized -- a category for files whose name fits no other pattern.\n"
349                                 "#             Usually, the presence of unrecognized files is treated as an\n"
350                                 "#             error.   You can use the naming conventions to define certain\n"
351                                 "#             names as \"deliberately unrecognized\" -- i.e., filenames whose\n"
352                                 "#             presence in a source tree you _want_ to be treated as an error\n"
353                                 "#\n"
354                                 "# The traveral algorithm is described here, along with lines you can edit to\n"
355                                 "# customize the naming conventions.\n"
356                                 "#\n"
357                                 "# Starting at \".\" within a project tree (usually at the root of the\n"
358                                 "# project tree) consider each filename in that directory.\n"
359                                 "#\n"
360                                 "# The files \".\" and \"..\" are simply ignored.\n"
361                                 "#\n"
362                                 "# Files containing \"illegal characters\" are characterized as unrecognized.\n"
363                                 "# If they are directories, traversal does _not_ descend into those directories.\n"
364                                 "# Currently, the illegal characters are *, ?, [, ], \\, space, and tab.\n"
365                                 "# (The set of illegal characters may shrink in future releases.)\n"
366                                 "#\n"
367                                 "# In an interactive call to inventory _without_ the --all flag,\n"
368                                 "# names are next compared to the exclude regexp defined here.  Those that\n"
369                                 "# are ignored and not descended below.  (Most arch operations performing\n"
370                                 "# traversals internally, e.g. import, do not use this pattern\n"
371                                 "# and skip this step of the algorithm.\n"
372                                 "#\n"),
373                                "\n",
374                                "exclude ", excludes_regexp, "\n",
375                                "\n",
376                                ("# If the file has a name that begins with \"++\", it is categorized as\n"
377                                 "# _precious_.  Names of this form are hard-wired and reserved for use by arch\n"
378                                 "# itself.  Traversal does not descend into precious directories, but when a\n"
379                                 "# precious directory is copied, its contents are recursively copied.\n"
380                                 "#\n"
381                                 "# Files and directories that reach this stage and which arch recognizes as its\n"
382                                 "# own control files are classified at this step as source.   Traversal _does_\n"
383                                 "# descend into source directories.\n"
384                                 "#\n"
385                                 "# If the file has a name that begins with \",,\", it is categorized as _junk_.\n"
386                                 "# Names of this form are hard-wired and reserved for use by arch and other tools,\n"
387                                 "# and arch may clobber such files without warning.  In a project tree, when no \n"
388                                 "# arch commands are running, it is safe for users to delete any \",,\" files. \n"
389                                 "# Although the general rule for junk files is that arch is free to clobber them,\n"
390                                 "# in fact, arch will only ever clobber files starting with \",,\".\n"
391                                 "#\n"
392                                 "# Traversal does not descend into junk directories.\n"
393                                 "#\n"
394                                 "# For your convenience, at this step of the traversal, you can classify\n"
395                                 "# additional files as junk or precious:\n"
396                                 "#\n"),
397                                "\n",
398                                "junk ", junk_regexp, "\n",
399                                "\n",
400                                "precious ", precious_regexp, "\n",
401                                "\n",
402                                ("# Files matching the following regexp are classified as backup files, and\n"
403                                 "# traversal does not descend into backup directories:\n"
404                                 "#\n"),
405                                "\n",
406                                "backup ", backup_regexp, "\n",
407                                "\n",
408                                ("# If you want to force certain filenames to be treated as errors when present,\n"
409                                 "# you can add them to the regexp for deliberately unrecognized files.  Traversal\n"
410                                 "# does not descend into unrecognized directories.\n"),
411                                "\n",
412                                "unrecognized ", unrecognized_regexp, "\n",
413                                "\n",
414                                ("# Files which match the following pattern are treated as source files.\n"
415                                 "# Traversal _does_ descend into source directories:\n"),
416                                "\n",
417                                "source ", source_regexp, "\n",
418                                "\n",
419                                ("# Any files not classified by the above rules are classified as unrecognized.\n"
420                                 "# Traversal does not descend into unrecognized directories.\n"
421                                 "\n"),
422                                str_end);
423 
424 
425   lim_free (0, method_name);
426   lim_free (0, excludes_regexp);
427   lim_free (0, junk_regexp);
428   lim_free (0, backup_regexp);
429   lim_free (0, precious_regexp);
430   lim_free (0, unrecognized_regexp);
431   lim_free (0, source_regexp);
432 
433   return answer;
434 }
435 
436 
437 t_uchar *
arch_tree_id_tagging_method_file(const t_uchar * tree_root)438 arch_tree_id_tagging_method_file (const t_uchar * tree_root)
439 {
440   t_uchar * ctl_dir;
441   t_uchar * answer;
442 
443   ctl_dir = arch_tree_ctl_dir (tree_root);
444   answer = file_name_in_vicinity (0, ctl_dir, "=tagging-method");
445   lim_free (0, ctl_dir);
446   return answer;
447 }
448 
449 
450 enum arch_id_tagging_method
arch_tree_id_tagging_method(enum arch_inventory_category * cat_var,const t_uchar * tree_root,int strict)451 arch_tree_id_tagging_method (enum arch_inventory_category * cat_var,
452                              const t_uchar * tree_root,
453                              int strict)
454 {
455   struct arch_inventory_options options;
456   enum arch_id_tagging_method answer;
457 
458   mem_set0 ((t_uchar *)&options, sizeof (options));
459   arch_get_inventory_naming_conventions (&options, tree_root);
460   if (cat_var)
461     {
462       *cat_var = options.untagged_source_category;
463     }
464   answer = options.method;
465   arch_free_inventory_naming_conventions (&options);
466   return answer;
467 }
468 
469 
470 void
arch_set_tree_id_tagging_method(const t_uchar * tree_root,enum arch_id_tagging_method method)471 arch_set_tree_id_tagging_method (const t_uchar * tree_root,
472                                  enum arch_id_tagging_method method)
473 {
474   int errn;
475   t_uchar * method_name;
476   t_uchar * method_file;
477   t_uchar * method_dir;
478   t_uchar * method_tmp;
479   int out_fd;
480 
481   method_name = arch_id_tagging_method_name (method);
482   method_file = arch_tree_id_tagging_method_file (tree_root);
483   method_dir = file_name_directory_file (0, method_file);
484   method_tmp = file_name_in_vicinity (0, method_dir, ",,tagging-method");
485 
486   vu_unlink (&errn, method_tmp);
487   out_fd = safe_open (method_tmp, O_WRONLY | O_CREAT | O_EXCL, 0666);
488 
489   if (safe_access (method_file, F_OK))
490     {
491       safe_printfmt (out_fd, "%s\n", method_name);
492     }
493   else
494     {
495       int in_fd;
496       t_uchar * line;
497       long len;
498       int emitted_method;
499 
500       in_fd = safe_open (method_file, O_RDONLY, 0);
501 
502       emitted_method = 0;
503 
504       while (1)
505         {
506           t_uchar * pos;
507           t_uchar * lim;
508           int line_maybe_specifies_method;
509           enum arch_id_tagging_method maybe_method;
510           int replace_with_method_name;
511 
512           safe_next_line (&line, &len, in_fd);
513           if (!line)
514             break;
515 
516           lim = line + len;
517           pos = line;
518 
519           line_maybe_specifies_method = 0;
520           replace_with_method_name = 0;
521 
522           while ((pos < lim) && char_is_blank (*pos))
523             ++pos;
524 
525           if (((lim - pos) >= (sizeof ("names") - 1)) && !str_casecmp_n ("names", sizeof ("names") - 1, pos, sizeof ("names") - 1))
526             {
527               line_maybe_specifies_method = 1;
528               maybe_method = arch_names_id_tagging;
529               pos += sizeof ("names") - 1;
530             }
531           else if (((lim - pos) >= (sizeof ("explicit") - 1)) && !str_casecmp_n ("explicit", sizeof ("explicit") - 1, pos, sizeof ("explicit") - 1))
532             {
533               line_maybe_specifies_method = 1;
534               maybe_method = arch_explicit_id_tagging;
535               pos += sizeof ("explicit") - 1;
536             }
537           else if (((lim - pos) >= (sizeof ("implicit") - 1)) && !str_casecmp_n ("implicit", sizeof ("implicit") - 1, pos, sizeof ("implicit") - 1))
538             {
539               line_maybe_specifies_method = 1;
540               maybe_method = arch_implicit_id_tagging;
541               pos += sizeof ("implicit") - 1;
542             }
543           else if (((lim - pos) >= (sizeof ("tagline") - 1)) && !str_casecmp_n ("tagline", sizeof ("tagline") - 1, pos, sizeof ("tagline") - 1))
544             {
545               line_maybe_specifies_method = 1;
546               maybe_method = arch_tagline_id_tagging;
547               pos += sizeof ("tagline") - 1;
548             }
549 
550           if (line_maybe_specifies_method)
551             {
552               while ((pos < lim) && char_is_space (*pos))
553                 ++pos;
554               if (pos == lim)
555                 replace_with_method_name = 1;
556             }
557 
558           if (replace_with_method_name)
559             {
560               safe_printfmt (out_fd, "%s\n", method_name);
561               emitted_method = 1;
562             }
563           else
564             {
565               safe_printfmt (out_fd, "%.*s", (int)len, line);
566               if (len && (line[len - 1] != '\n'))
567                 safe_printfmt (out_fd, "\n");
568             }
569         }
570 
571       if (!emitted_method)
572         safe_printfmt (out_fd, "%s\n", method_name);
573 
574       safe_close (in_fd);
575     }
576 
577   safe_close (out_fd);
578   safe_rename (method_tmp, method_file);
579 
580   lim_free (0, method_name);
581   lim_free (0, method_file);
582   lim_free (0, method_dir);
583   lim_free (0, method_tmp);
584 }
585 
586 
587 t_uchar *
arch_explicit_id_file_for(const t_uchar * path)588 arch_explicit_id_file_for (const t_uchar * path)
589 {
590   int errn;
591   struct stat stat_buf;
592   int is_file;
593   t_uchar * parent_dir;
594   t_uchar * dot_arch_dir;
595   t_uchar * id_file_basename;
596   t_uchar * id_file_path;
597 
598   if (vu_lstat (&errn, path, &stat_buf))
599     {
600       if (errn != ENOENT)
601         {
602           safe_printfmt (2, "i/o error (%d: %s) for vu_lstat of  %s\n", errn, errno_to_string (errn), path);
603           exit (2);
604         }
605       is_file = 1;
606     }
607   else
608     {
609       is_file = !S_ISDIR (stat_buf.st_mode);
610     }
611 
612   if (is_file)
613     parent_dir = file_name_directory_file (0, path);
614   else
615     parent_dir = str_save (0, path);
616 
617   dot_arch_dir = file_name_in_vicinity (0, parent_dir, ".arch-ids");
618 
619   if (is_file)
620     {
621       id_file_basename = file_name_tail (0, path);
622       id_file_basename = str_realloc_cat (0, id_file_basename, ".id");
623     }
624   else
625     id_file_basename = str_save (0, "=id");
626 
627 
628   id_file_path = file_name_in_vicinity (0, dot_arch_dir, id_file_basename);
629 
630   lim_free (0, parent_dir);
631   lim_free (0, dot_arch_dir);
632   lim_free (0, id_file_basename);
633 
634   return id_file_path;
635 }
636 
637 
638 t_uchar *
arch_generate_id(void)639 arch_generate_id (void)
640 {
641   static unsigned long seq = 0;
642 
643   time_t now;
644   char * now_str;
645   t_uchar * nl;
646   t_uchar * my_id;
647   pid_t my_pid;
648   t_uchar my_pid_str[128];
649   t_uchar seq_str[128];
650   t_uchar * id;
651 
652   if (0 > time (&now))
653     panic ("unable to get time of day in arch_generate_id");
654 
655   now_str = ctime (&now);
656   nl = str_chr_index (now_str, '\n');
657   if (nl)
658     *nl = 0;
659   my_id = arch_my_id ();
660   my_pid = getpid ();
661   cvt_ulong_to_decimal (my_pid_str, (unsigned long)my_pid);
662   cvt_ulong_to_decimal (seq_str, (unsigned long)seq);
663   ++seq;
664 
665   id = str_alloc_cat_many (0, my_id, " ", now_str, " ", my_pid_str, ".", seq_str, str_end);
666 
667   lim_free (0, my_id);
668   return id;
669 }
670 
671 
672 int
arch_add_explicit_id(const t_uchar * path,const t_uchar * id)673 arch_add_explicit_id (const t_uchar * path,
674                       const t_uchar * id)
675 {
676   t_uchar * id_file;
677   t_uchar * id_dir;
678   int out_fd;
679   int r = 1;
680 
681   id_file = arch_explicit_id_file_for (path);
682 
683   if (!safe_access (id_file, F_OK))
684     {
685       safe_printfmt (2, "attempt to id already tagged file: %s\n", path);
686       r = 0;
687     }
688   else
689     {
690       id_dir = file_name_directory_file (0, id_file);
691 
692       ensure_directory_exists (id_dir);
693       out_fd = safe_open (id_file, O_WRONLY | O_CREAT | O_EXCL, 0666);
694       safe_printfmt (out_fd, "%s\n", id);
695       safe_close (out_fd);
696       lim_free (0, id_dir);
697     }
698   lim_free (0, id_file);
699   return r;
700 
701 }
702 
703 
704 void
arch_delete_explicit_id(const t_uchar * path)705 arch_delete_explicit_id (const t_uchar * path)
706 {
707   t_uchar * id_file;
708 
709   id_file = arch_explicit_id_file_for (path);
710 
711   if (!safe_access (id_file, F_OK))
712     safe_unlink (id_file);
713   else
714     {
715       safe_printfmt (2, "attempt to remove non-existent id for %s\n", path);
716       exit (2);
717     }
718 
719   lim_free (0, id_file);
720 }
721 
722 
723 void
arch_move_explicit_id(const t_uchar * from,const t_uchar * to)724 arch_move_explicit_id (const t_uchar * from,
725                        const t_uchar * to)
726 {
727   t_uchar * old_id_file;
728   t_uchar * new_id_file;
729   t_uchar * new_id_dir;
730 
731   old_id_file = arch_explicit_id_file_for (from);
732   new_id_file = arch_explicit_id_file_for (to);
733   new_id_dir = file_name_directory_file (0, new_id_file);
734 
735   ensure_directory_exists (new_id_dir);
736   safe_rename (old_id_file, new_id_file);
737 
738   lim_free (0, old_id_file);
739   lim_free (0, new_id_file);
740   lim_free (0, new_id_dir);
741 }
742 
743 
744 t_uchar *
arch_strong_explicit_dflt_file(const t_uchar * dir)745 arch_strong_explicit_dflt_file (const t_uchar * dir)
746 {
747   t_uchar * dot_arch_dir;
748   t_uchar * answer;
749 
750   dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
751   answer = file_name_in_vicinity (0, dot_arch_dir, "=all");
752 
753   lim_free (0, dot_arch_dir);
754   return answer;
755 }
756 
757 
758 t_uchar *
arch_weak_explicit_dflt_file(const t_uchar * dir)759 arch_weak_explicit_dflt_file (const t_uchar * dir)
760 {
761   t_uchar * dot_arch_dir;
762   t_uchar * answer;
763 
764   dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
765   answer = file_name_in_vicinity (0, dot_arch_dir, "=default");
766 
767   lim_free (0, dot_arch_dir);
768   return answer;
769 }
770 
771 
772 t_uchar *
arch_dont_care_explicit_dflt_file(const t_uchar * dir)773 arch_dont_care_explicit_dflt_file (const t_uchar * dir)
774 {
775   t_uchar * dot_arch_dir;
776   t_uchar * answer;
777 
778   dot_arch_dir = file_name_in_vicinity (0, dir, ".arch-ids");
779   answer = file_name_in_vicinity (0, dot_arch_dir, "=dont-care");
780 
781   lim_free (0, dot_arch_dir);
782   return answer;
783 }
784 
785 
786 int
arch_is_dont_care_explicit_dflt_dir(const t_uchar * dir)787 arch_is_dont_care_explicit_dflt_dir (const t_uchar * dir)
788 {
789   t_uchar * file = 0;
790   int answer;
791 
792   file = arch_dont_care_explicit_dflt_file (dir);
793   answer = !safe_access (file, F_OK);
794 
795   lim_free (0, file);
796   return answer;
797 }
798 
799 
800 void
arch_delete_strong_explicit_default(const t_uchar * dir)801 arch_delete_strong_explicit_default (const t_uchar * dir)
802 {
803   t_uchar * file;
804 
805   file = arch_strong_explicit_dflt_file (dir);
806 
807   if (safe_access (file, F_OK))
808     {
809       safe_printfmt (2, "attempt to delete non-existing strong explicit default in %s\n", dir);
810       exit (2);
811     }
812 
813   safe_unlink (dir);
814   lim_free (0, file);
815 }
816 
817 
818 void
arch_delete_weak_explicit_default(const t_uchar * dir)819 arch_delete_weak_explicit_default (const t_uchar * dir)
820 {
821   t_uchar * file;
822 
823   file = arch_weak_explicit_dflt_file (dir);
824 
825   if (safe_access (file, F_OK))
826     {
827       safe_printfmt (2, "attempt to delete non-existing weak explicit default in %s\n", dir);
828       exit (2);
829     }
830 
831   safe_unlink (dir);
832   lim_free (0, file);
833 }
834 
835 
836 void
arch_delete_dont_care_explicit_default(const t_uchar * dir)837 arch_delete_dont_care_explicit_default (const t_uchar * dir)
838 {
839   t_uchar * file;
840 
841   file = arch_dont_care_explicit_dflt_file (dir);
842 
843   if (safe_access (file, F_OK))
844     {
845       safe_printfmt (2, "attempt to delete non-existing dont-care explicit default in %s\n", dir);
846       exit (2);
847     }
848 
849   safe_unlink (dir);
850   lim_free (0, file);
851 }
852 
853 
854 void
arch_set_strong_explicit_default(const t_uchar * dir,const t_uchar * id)855 arch_set_strong_explicit_default (const t_uchar * dir,
856                                   const t_uchar * id)
857 {
858   t_uchar * file;
859   t_uchar * file_dir;
860   int out_fd;
861 
862   file = arch_strong_explicit_dflt_file (dir);
863   file_dir = file_name_directory_file (0, file);
864 
865   if (!safe_access (file, F_OK))
866     {
867       safe_printfmt (2, "attempt to overwrite strong explicit default in %s\n", dir);
868       exit (2);
869     }
870 
871   ensure_directory_exists (file_dir);
872   out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
873   safe_printfmt (out_fd, "%s\n", id);
874   safe_close (out_fd);
875 
876   lim_free (0, file);
877   lim_free (0, file_dir);
878 }
879 
880 
881 void
arch_set_weak_explicit_default(const t_uchar * dir,const t_uchar * id)882 arch_set_weak_explicit_default (const t_uchar * dir,
883                                 const t_uchar * id)
884 {
885   t_uchar * file;
886   t_uchar * file_dir;
887   int out_fd;
888 
889   file = arch_weak_explicit_dflt_file (dir);
890   file_dir = file_name_directory_file (0, file);
891 
892   if (!safe_access (file, F_OK))
893     {
894       safe_printfmt (2, "attempt to overwrite weak explicit default in %s\n", dir);
895       exit (2);
896     }
897 
898   ensure_directory_exists (file_dir);
899   out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
900   safe_printfmt (out_fd, "%s\n", id);
901   safe_close (out_fd);
902 
903   lim_free (0, file);
904   lim_free (0, file_dir);
905 }
906 
907 
908 void
arch_set_dont_care_explicit_default(const t_uchar * dir)909 arch_set_dont_care_explicit_default (const t_uchar * dir)
910 {
911   t_uchar * file;
912   t_uchar * file_dir;
913   int out_fd;
914 
915   file = arch_dont_care_explicit_dflt_file (dir);
916   file_dir = file_name_directory_file (0, file);
917 
918   if (safe_access (file, F_OK))
919     {
920       ensure_directory_exists (file_dir);
921       out_fd = safe_open (file, O_WRONLY | O_CREAT | O_EXCL, 0666);
922       safe_close (out_fd);
923     }
924 
925   lim_free (0, file);
926   lim_free (0, file_dir);
927 }
928 
929 
930 
931 
932 /*(c file_id)
933  * static t_uchar * file_id (int * errn,
934  *                           struct alloc_limits * limits,
935  *                           enum ftag_method method,
936  *                           const t_uchar * path,
937  *                           assoc_table id_tagging_shortcut,
938  *                           struct stat * known_lstat,
939  *                           assoc_table * explicit_skips)
940  *
941  * Return a newly allocated string containing the inventory id
942  * of the file `path' using inventory method `method'.
943  *
944  * Return 0 and set `*errn' if the id can not be computed.
945  * If no I/O error occurs, the file does not have an explicit
946  * id, but `method' is `ftag_explicit', `*errn' is set to 0.
947  */
948 static t_uchar *
file_id(int * errn,struct alloc_limits * limits,enum ftag_method method,int untagged_is_source,const t_uchar * path,assoc_table id_tagging_shortcut,struct stat * known_lstat,assoc_table * explicit_skips)949 file_id (int * errn,
950          struct alloc_limits * limits,
951          enum ftag_method method,
952          int untagged_is_source,
953          const t_uchar * path,
954          assoc_table id_tagging_shortcut,
955          struct stat * known_lstat,
956          assoc_table * explicit_skips)
957 {
958   t_uchar * answer = 0;
959   t_uchar * as_file = 0;
960   t_uchar * basename = 0;
961   t_uchar * dir = 0;
962   t_uchar * dir_as_file = 0;
963   t_uchar * dir_basename = 0;
964   t_uchar * id_file = 0;
965   struct stat stat_buf;
966   int is_dir;
967   int is_symlink;
968 
969   if (!path)
970     {
971       *errn = EINVAL;
972 
973     return_answer:
974 
975       lim_free (limits, as_file);
976       lim_free (limits, basename);
977       lim_free (limits, dir);
978       lim_free (limits, dir_as_file);
979       lim_free (limits, dir_basename);
980       lim_free (limits, id_file);
981 
982       return answer;
983     }
984 
985   as_file = file_name_from_directory (limits, path);
986   if (!as_file)
987     {
988     enomem_error:
989       *errn = ENOMEM;
990       goto return_answer;
991     }
992 
993   if (method == ftag_names)
994     {
995       answer = str_alloc_cat (limits, "?", as_file);
996       if (!answer)
997         goto enomem_error;
998       else
999         goto return_answer;
1000     }
1001 
1002 
1003   basename = file_name_tail (limits, as_file);
1004   dir = file_name_directory (limits, as_file);
1005   if (!dir)
1006     dir = str_save (limits, ".");
1007   if (!(basename && dir))
1008     goto enomem_error;
1009 
1010   dir_as_file = file_name_from_directory (limits, dir);
1011   if (!dir_as_file)
1012     goto enomem_error;
1013 
1014   dir_basename = file_name_tail (limits, dir_as_file);
1015   if (!dir_basename)
1016     goto enomem_error;
1017 
1018   /* Explicit id files use their contents as id, with the
1019    * prefix 'E'.
1020    */
1021   if (!str_cmp (dir_basename, ".arch-ids"))
1022     {
1023       answer = explicit_id (errn, 0, limits, path, as_file, "E_", 0);
1024       goto return_answer;
1025     }
1026 
1027   /* Explicit id file directories:
1028    */
1029   if (!str_cmp (basename, ".arch-ids"))
1030     {
1031       long amt;
1032       answer = str_alloc_cat (limits, "D_", as_file);
1033       if (!answer)
1034         goto enomem_error;
1035       amt = smash_non_graphical (answer, str_length (answer));
1036       answer[amt] = 0;
1037       goto return_answer;
1038     }
1039 
1040   /* Paths beginning with "./{arch}" are tagged with their own
1041    * path name, with the prefix "A_".  The presumptions are that these
1042    * files never move, and that if a file is present, its contents are
1043    * invariant.
1044    */
1045   if (is_at_or_underneath_archdir (as_file))
1046     {
1047       long amt;
1048       answer = str_alloc_cat (limits, "A_", as_file);
1049       if (!answer)
1050         goto enomem_error;
1051       amt = smash_non_graphical (answer, str_length (answer));
1052       answer[amt] = 0;
1053       goto return_answer;
1054     }
1055 
1056 
1057   /* Try for an explicit id:
1058    */
1059   if (known_lstat)
1060     stat_buf = *known_lstat;
1061   else if (0 > vu_lstat (errn, as_file, &stat_buf))
1062     goto return_answer;
1063 
1064   if (S_ISDIR (stat_buf.st_mode))
1065     {
1066       is_dir = 1;
1067       is_symlink = 0;
1068       id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=id");
1069       if (!id_file)
1070         goto enomem_error;
1071     }
1072   else
1073     {
1074       is_dir = 0;
1075       is_symlink = S_ISLNK (stat_buf.st_mode);
1076 
1077       id_file = file_name_in_vicinity (limits, dir, ".arch-ids/");
1078       if (!id_file)
1079         goto enomem_error;
1080       id_file = str_realloc_cat (limits, id_file, basename);
1081       if (!id_file)
1082         goto enomem_error;
1083       id_file = str_realloc_cat (limits, id_file, ".id");
1084       if (!id_file)
1085         goto enomem_error;
1086     }
1087 
1088   *errn = 0;
1089   answer = explicit_id (errn, 0, limits, path, id_file, "x_", 0);
1090   if (answer || (*errn != ENOENT))
1091     goto return_answer;
1092   else
1093     {
1094       /* Is there a .arch-ids/=all file here?
1095        */
1096       lim_free (limits, id_file);
1097       if (is_dir)
1098         id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=all");
1099       else
1100         id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=all");
1101 
1102       if (!id_file)
1103         goto enomem_error;
1104 
1105       *errn = 0;
1106       answer = explicit_id (errn, explicit_skips, limits, path, id_file, "a_", (is_dir ? (t_uchar *)"./." : basename));
1107       if (answer || (*errn != ENOENT))
1108         goto return_answer;
1109 
1110       if ((method == ftag_implicit) && !is_dir && !is_symlink)
1111         {
1112           *errn = 0;
1113           answer = implicit_id (errn, limits, path, basename, "i_", &stat_buf, id_tagging_shortcut);
1114           if (answer || *errn)
1115             goto return_answer;
1116         }
1117 
1118       if ((method == ftag_tagline) && !is_dir && !is_symlink)
1119         {
1120           *errn = 0;
1121           answer = implicit_id (errn, limits, path, 0, "i_", &stat_buf, id_tagging_shortcut);
1122           if (answer || *errn)
1123             goto return_answer;
1124         }
1125 
1126 
1127       /* is there an "=default" id?
1128        */
1129       lim_free (limits, id_file);
1130       if (is_dir)
1131         id_file = file_name_in_vicinity (limits, as_file, ".arch-ids/=default");
1132       else
1133         id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=default");
1134 
1135       if (!id_file)
1136         goto enomem_error;
1137 
1138       *errn = 0;
1139       answer = explicit_id (errn, explicit_skips, limits, path, id_file, "w_", (is_dir ?  (t_uchar *)"./." : basename));
1140       if (answer || (*errn != ENOENT))
1141         goto return_answer;
1142 
1143       /* no explicit, =all, implicit, tagline, or =default id.
1144        */
1145       if (untagged_is_source)
1146         {
1147           int skipped;
1148 
1149           lim_free (limits, id_file);
1150           id_file = file_name_in_vicinity (limits, dir, ".arch-ids/=dont-care");
1151           if (!id_file)
1152             goto enomem_error;
1153 
1154           skipped = (explicit_skips && !!assoc_get_str_taking (*explicit_skips, rel_make_field_str (id_file)));
1155 
1156           if (!skipped && (0 <= vu_lstat (errn, id_file, &stat_buf)))
1157             {
1158               long amt;
1159               answer = str_alloc_cat (limits, "k_", as_file);
1160               if (!answer)
1161                 goto enomem_error;
1162               amt = smash_non_graphical (answer, str_length (answer));
1163               answer[amt] = 0;
1164               goto return_answer;
1165             }
1166 
1167           if (!skipped && explicit_skips)
1168             assoc_set_taking (explicit_skips, rel_make_field_str (id_file), rel_make_field_str ("yes"));
1169 
1170           if (*errn == ENOENT)
1171             {
1172               long amt;
1173               answer = str_alloc_cat (limits, "?_", as_file);
1174               if (!answer)
1175                 goto enomem_error;
1176               amt = smash_non_graphical (answer, str_length (answer));
1177               answer[amt] = 0;
1178               goto return_answer;
1179             }
1180           else
1181             goto return_answer;
1182         }
1183       else
1184         {
1185           *errn = 0;
1186           goto return_answer;
1187         }
1188     }
1189 
1190 }
1191 
1192 
1193 
1194 t_uchar *
arch_id_from_explicit_file(int * errn,const t_uchar * path)1195 arch_id_from_explicit_file (int *errn,
1196                             const t_uchar * path)
1197 {
1198   return explicit_id (errn, 0, 0, 0, path, "x_", 0);
1199 }
1200 
1201 
1202 static int
is_at_or_underneath_archdir(const char * rel_file)1203 is_at_or_underneath_archdir (const char * rel_file)
1204 {
1205   static int compiled = 0;
1206   static regex_t pattern = {0,};
1207 
1208   if (!compiled)
1209     {
1210       int re_error;
1211 
1212       re_error = regcomp (&pattern, "^(.*/)?(\\{arch\\}(/[a-zA-Z=][^/~]*)*|\\{arch\\}/\\.arch-project-tree)$", REG_EXTENDED);
1213       invariant (!re_error);
1214       compiled = 1;
1215     }
1216 
1217   return filename_matches (&pattern, rel_file);
1218 }
1219 
1220 
1221 static int
filename_matches(regex_t * pattern,const char * filename)1222 filename_matches (regex_t * pattern,
1223                   const char * filename)
1224 {
1225   int answer;
1226 
1227   answer = regexec (pattern, filename, 0, 0, 0);
1228 
1229   if (answer == REG_NOMATCH)
1230     return 0;
1231 
1232   if (answer == REG_NOERROR)
1233     return 1;
1234 
1235   panic ("unexpected regexec error in arch_inventory_traversal");
1236   return -1;
1237 }
1238 
1239 
1240 static t_uchar *
explicit_id(int * errn,assoc_table * skips,struct alloc_limits * limits,const t_uchar * arg_file,const t_uchar * id_file,const t_uchar * prefix,const t_uchar * postfix)1241 explicit_id (int * errn,
1242              assoc_table * skips,
1243              struct alloc_limits * limits,
1244              const t_uchar * arg_file,
1245              const t_uchar * id_file,
1246              const t_uchar * prefix,
1247              const t_uchar * postfix)
1248 {
1249   int id_fd;
1250   t_uchar * answer;
1251   char buf[1024];
1252   long amt;
1253   int ign;
1254 
1255 
1256   if (skips && assoc_get_str_taking (*skips, rel_make_field_str (id_file)))
1257     {
1258       *errn = ENOENT;
1259       return 0;
1260     }
1261 
1262   id_fd = vu_open (errn, id_file, O_RDONLY, 0);
1263 
1264   if (id_fd < 0)
1265     {
1266       if (skips)
1267         assoc_set_taking (skips, rel_make_field_str (id_file), rel_make_field_str ("yes"));
1268       return 0;
1269     }
1270 
1271   answer = str_save (limits, prefix);
1272   if (!answer)
1273     {
1274     enomem_error:
1275       *errn = ENOMEM;
1276       if (answer)
1277         lim_free (limits, answer);
1278       return 0;
1279     }
1280 
1281   while (1)
1282     {
1283       t_uchar * eol;
1284 
1285       amt = vu_read_retry (errn, id_fd, buf, sizeof (buf));
1286 
1287       if (amt < 0)
1288         {
1289           lim_free (limits, answer);
1290           vu_close (&ign, id_fd);
1291           return 0;
1292         }
1293 
1294       if (!amt)
1295         break;
1296 
1297       eol = str_chr_index_n (buf, amt, '\n');
1298       if (!eol)
1299         {
1300           t_uchar * old_answer;
1301           amt = smash_non_graphical (buf, amt);
1302           old_answer = answer;
1303           answer = str_realloc_cat_n (limits, answer, buf, amt);
1304           if (!answer)
1305             goto enomem_error;
1306         }
1307       else
1308         {
1309           t_uchar * old_answer;
1310           amt = eol - (t_uchar *)buf;
1311           amt = smash_non_graphical (buf, amt);
1312           old_answer = answer;
1313           answer = str_realloc_cat_n (limits, answer, buf, amt);
1314           if (!answer)
1315             goto enomem_error;
1316           break;
1317         }
1318     }
1319 
1320   answer = str_realloc_cat (limits, answer, (postfix ? postfix : (t_uchar *)""));
1321   if (!answer)
1322     goto enomem_error;
1323   vu_close (&ign, id_fd);
1324   return answer;
1325 }
1326 
1327 
1328 static t_uchar *
implicit_id(int * errn,struct alloc_limits * limits,const t_uchar * file,const t_uchar * basename,const t_uchar * prefix,struct stat * statb,assoc_table id_tagging_shortcut)1329 implicit_id (int * errn,
1330              struct alloc_limits * limits,
1331              const t_uchar * file,
1332              const t_uchar * basename,
1333              const t_uchar * prefix,
1334              struct stat * statb,
1335              assoc_table id_tagging_shortcut)
1336 {
1337   int file_fd;
1338   struct stat file_stat_buf;
1339   char buf[1025];
1340   int amt;
1341   int line;
1342   int bottom;
1343 
1344   if (id_tagging_shortcut)
1345     {
1346       t_uchar * signature = arch_statb_inode_sig (statb);
1347       const t_uchar * cached_id = assoc_get_str_taking (id_tagging_shortcut, rel_make_field_str (signature));
1348 
1349       lim_free (0, signature);
1350       if (cached_id && (cached_id[0] == 'i') && (cached_id[1] == '_'))
1351         return str_save (0, cached_id);
1352       else if (cached_id)
1353         return 0;
1354     }
1355 
1356   /* This is a slightly screwy, historic interface.
1357    *
1358    * Passing `base != 0' means the old, larch-style tag syntax.
1359    *
1360    * Passing `base == 0' means tagline syntax.
1361    */
1362 
1363   /* Search the file itself (last, then first 1K) for a line beginning:
1364    *
1365    * tla-style tagline id tagging (basename == 0)
1366    * -----------------------------------------
1367    *
1368    * <punct>arch-tag:<blanks>
1369    *
1370    *
1371    * larch-style implicit id tagging (basename != 0)
1372    * --------------------------------------------
1373    *
1374    * <punct>basename<blanks>-
1375    *
1376    * or
1377    *
1378    * <punct>tag:<blanks>
1379    *
1380    * after the dash, skip any blanks -- the rest is the id.
1381    */
1382 
1383   file_fd = vu_open (errn, file, O_RDONLY, 0);
1384   if (file_fd < 0)
1385     return 0;
1386 
1387   if (0 > vu_fstat (errn, file_fd, &file_stat_buf))
1388       goto error_return;
1389 
1390   for (bottom = 1; bottom >= 0; --bottom)
1391     {
1392       if (!bottom)
1393         {
1394           if (0 > vu_lseek (errn, file_fd, 0, SEEK_SET))
1395             {
1396               int ign;
1397             error_return:
1398               vu_close (&ign, file_fd);
1399               return 0;
1400             }
1401           amt = vu_read_retry (errn, file_fd, buf, sizeof (buf) - 1);
1402           if (amt < 0)
1403             goto error_return;
1404         }
1405       else
1406         {
1407           char * x;
1408 
1409           if (file_stat_buf.st_size > sizeof (buf))
1410             amt = sizeof (buf);
1411           else
1412             continue;
1413           /* Yes, this is a off by one error. However changing it
1414            * breaks existing file-ids
1415            */
1416           if (0 > vu_lseek (errn, file_fd, -1026, SEEK_END))
1417             goto error_return;
1418           amt = vu_read_retry (errn, file_fd, buf, sizeof (buf));
1419           if (amt < 0)
1420             goto error_return;
1421           x = str_chr_index_n (buf, amt, '\n');
1422           if (!x)
1423             continue;
1424           amt = amt - (1 + x - buf);
1425           mem_move (buf, x + 1, amt);
1426         }
1427 
1428       buf[amt] = 0;
1429       line = 0;
1430 
1431       while (1)
1432         {
1433           int is_inventory_id;
1434 
1435           /* skip punctuation and blanks at the start of the line
1436            */
1437           while ((line < amt) && (char_is_punct (buf[line]) || char_is_blank (buf[line])))
1438             ++line;
1439 
1440           if (line == amt)
1441             break;
1442 
1443           if (buf[line] == '\n')
1444             {
1445               ++line;
1446               continue;
1447             }
1448 
1449           is_inventory_id = (basename ? !str_cmp_prefix ("tag:", buf + line) : !str_cmp_prefix ("arch-tag:", buf + line));
1450 
1451           if (   !is_inventory_id
1452               && (!basename || str_cmp_prefix (basename, buf + line)))
1453             {
1454               t_uchar * eol;
1455 
1456             not_this_line:
1457               eol = str_chr_index_n (buf + line, amt - line, '\n');
1458               if (!eol)
1459                 break;
1460               line = eol - (t_uchar *)buf;
1461             }
1462           else
1463             {
1464               t_uchar * eol;
1465 
1466               if (is_inventory_id)
1467                 line += (basename ? str_length ("tag:") : str_length ("arch-tag:"));
1468               else
1469                 line += str_length (basename);
1470 
1471               if (!is_inventory_id)
1472                 {
1473                   while ((line < amt) && char_is_blank (buf[line]))
1474                     ++line;
1475 
1476                   if (line == amt)
1477                     break;
1478 
1479                   if (buf[line] != '-')
1480                     goto not_this_line;
1481 
1482                   ++line;
1483                 }
1484 
1485               if (line == amt)
1486                 break;
1487 
1488               /* This is the tag line.
1489                */
1490               while ((line < amt) && char_is_blank (buf[line]))
1491                 ++line;
1492 
1493               eol = str_chr_index_n (buf + line, amt - line, '\n');
1494               if (!eol)
1495                 /* end of buffer no eol */
1496                 {
1497                   eol = buf + amt;
1498                   if (!bottom)
1499                     if (file_stat_buf.st_size > sizeof (buf))
1500                       safe_printfmt (2, "Warning: top-of-file truncated tag in: %s\n", file);
1501                 }
1502 
1503               if (0 == (eol - (t_uchar *)(buf + line)))
1504                 {
1505                   /* an empty id
1506                    */
1507                   break;
1508                 }
1509 
1510               {
1511                 long size;
1512                 t_uchar * answer;
1513 
1514                 size = smash_non_graphical (buf + line, eol - (t_uchar *)(buf + line));
1515                 answer = str_alloc_cat_n (limits, prefix, buf + line, size);
1516                 if (0 > vu_close (errn, file_fd))
1517                   goto error_return;
1518                 if (!answer)
1519                   *errn = ENOMEM;
1520                 return answer;
1521               }
1522             }
1523         }
1524     }
1525 
1526   if (0 > vu_close (errn, file_fd))
1527     goto error_return;
1528   *errn = 0;
1529   return 0;
1530 }
1531 
1532 
1533 
1534 static long
smash_non_graphical(t_uchar * buf,long amt)1535 smash_non_graphical (t_uchar * buf, long amt)
1536 {
1537   long x;
1538 
1539   while (amt > 0)
1540     {
1541       if (!char_is_graph (buf[amt - 1]))
1542         --amt;
1543       else
1544         break;
1545     }
1546 
1547   for (x = 0; x < amt; ++x)
1548     {
1549       if (!char_is_graph (buf[x]))
1550         buf[x] = '_';
1551     }
1552 
1553   return amt;
1554 }
1555 
1556 
1557 int
str_cmp_suffix_n(t_uchar * a_string,t_uchar * b_string,int length)1558 str_cmp_suffix_n(t_uchar * a_string, t_uchar * b_string, int length)
1559 {
1560     int len_a = str_length(a_string);
1561     int len_b = str_length(b_string);
1562     int cmp_length = (len_a < len_b) ? len_a : len_b;
1563     cmp_length = (cmp_length < length) ? cmp_length : length;
1564     return str_cmp(a_string + len_a - cmp_length, b_string + len_b - cmp_length);
1565 }
1566 
1567 
1568 
1569 /* tag: Tom Lord Wed May 14 07:20:26 2003 (inv-tags.c)
1570  */
1571