1 static char rcsid[] = "$Id: datadir.c 218159 2019-01-17 05:49:10Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "datadir.h"
7 #include <stdio.h>
8 #include <stdlib.h>		/* For getenv */
9 #include <string.h>		/* For strlen */
10 #include <strings.h>		/* For rindex */
11 #include <pwd.h>
12 #ifdef HAVE_SYS_TYPES_H
13 #include <sys/types.h>		/* Needed for dirent.h */
14 #endif
15 #if HAVE_DIRENT_H
16 # include <dirent.h>
17 # define NAMLEN(dirent) strlen((dirent)->d_name)
18 #else
19 # define dirent direct
20 # define NAMLEN(dirent) (dirent)->d_namlen
21 # if HAVE_SYS_NDIR_H
22 #  include <sys/ndir.h>
23 # endif
24 # if HAVE_SYS_DIR_H
25 #  include <sys/dir.h>
26 # endif
27 # if HAVE_NDIR_H
28 #  include <ndir.h>
29 # endif
30 #endif
31 
32 #include <math.h>		/* for qsort */
33 #include <string.h>		/* for strcmp */
34 #include "mem.h"
35 #include "fopen.h"
36 #include "getline.h"
37 #include "access.h"
38 #include "list.h"
39 
40 
41 /* Note: GMAPDB is defined externally by configure */
42 #ifndef GMAPDB
43 #error A default value for GMAPDB was not provided to configure.  Please do so, or edit the Makefile
44 #endif
45 
46 static char *
read_config_file(FILE * fp,char * tag)47 read_config_file (FILE *fp, char *tag) {
48   char *directory, *seentag, *dirbuffer, *line;
49   int line_length;
50 
51   while ((line = Getline_wlength(&line_length,fp)) != NULL) {
52     seentag = (char *) MALLOC((line_length+1)*sizeof(char));
53     dirbuffer = (char *) MALLOC((line_length+1)*sizeof(char));
54 
55     if (sscanf(line,"%s=%s",seentag,dirbuffer) > 0 && !strcmp(seentag,tag)) {
56       directory = (char *) MALLOC((strlen(dirbuffer)+1)*sizeof(char));
57       strcpy(directory,dirbuffer);
58       FREE(dirbuffer);
59       FREE(seentag);
60       FREE(line);
61       return directory;
62 
63     } else {
64       FREE(dirbuffer);
65       FREE(seentag);
66       FREE(line);
67     }
68   }
69   return NULL;
70 }
71 
72 
73 static FILE *
find_homedir_config()74 find_homedir_config () {
75   FILE *fp = NULL;
76   struct passwd *p;
77   char *user, *configfile;
78 
79   if ((user = getenv("USER")) != NULL) {
80     if ((p = getpwnam(user)) != NULL) {
81       configfile = (char *) CALLOC(strlen(p->pw_dir)+strlen("/")+strlen(".gmaprc")+1,sizeof(char));
82       sprintf(configfile,"%s/.gmaprc",p->pw_dir);
83       fp = FOPEN_READ_TEXT(configfile);
84       FREE(configfile);
85     }
86   }
87   return fp;
88 }
89 
90 
91 
92 static char *
find_fileroot(char * genomesubdir,char * genomedir,char * dbroot)93 find_fileroot (char *genomesubdir, char *genomedir, char *dbroot) {
94   char *fileroot = NULL, *filename, *p;
95   struct dirent *entry;
96   DIR *dp;
97   int nchoices = 0;
98 
99   if ((dp = opendir(genomesubdir)) == NULL) {
100     /* Problem found.  Try to diagnose */
101     if ((dp = opendir(genomedir)) == NULL) {
102       fprintf(stderr,"Unable to find genome directory %s\n",genomedir);
103       fprintf(stderr,"Either recompile the GMAP package to have the");
104       fprintf(stderr,"correct default directory (seen by doing gmap --version),\n");
105       fprintf(stderr,"or use the -D flag to gmap to specify the correct genome directory.\n");
106       exit(9);
107     } else {
108       fprintf(stderr,"Unable to find genome %s in directory %s\n",dbroot,genomedir);
109       fprintf(stderr,"Make sure you have typed the genome correctly, or use the -D flag\n");
110       fprintf(stderr,"(or the -F flag for cmetindex or atoiindex) to specify a directory.\n");
111       fprintf(stderr,"For example, '-D .' specifies this directory.\n");
112       exit(9);
113     }
114   }
115 
116   /* Determine the number of .version files */
117   while ((entry = readdir(dp)) != NULL) {
118     filename = entry->d_name;
119     if ((p = rindex(filename,'.')) != NULL) {
120       if (!strcmp(p,".version")) {
121 	FREE(fileroot);
122 	fileroot = (char *) MALLOC((p - &(filename[0]) + 1)*sizeof(char));
123 	strncpy(fileroot,filename,p-&(filename[0]));
124 	fileroot[p-&(filename[0])] = '\0';
125 
126 	if (!strcmp(fileroot,dbroot)) {
127 	  /* However, if dbroot exists, just take that */
128 	  if (closedir(dp) < 0) {
129 	    fprintf(stderr,"Unable to close directory %s\n",genomesubdir);
130 	  }
131 	  return fileroot;
132 
133 	} else {
134 	  nchoices += 1;
135 	}
136       }
137     }
138   }
139 
140   /* Did not find dbroot */
141   if (closedir(dp) < 0) {
142     fprintf(stderr,"Unable to close directory %s\n",genomesubdir);
143   }
144 
145   if (nchoices == 1) {
146     return fileroot;
147   } else if (nchoices == 0) {
148     fprintf(stderr,"Unable to find file ending with .version in directory %s\n",genomesubdir);
149     exit(9);
150   } else {
151     fprintf(stderr,"Found multiple files (%d) ending with .version in directory %s\n",nchoices,genomesubdir);
152     FREE(fileroot);
153     exit(9);
154   }
155 }
156 
157 
158 
159 static char *
get_dbversion(char * filename)160 get_dbversion (char *filename) {
161   char *line;
162   FILE *fp;
163 
164   fp = FOPEN_READ_TEXT(filename);
165   if (!fp) {
166     return (char *) NULL;
167   } else if ((line = Getline(fp)) == NULL) {
168     fclose(fp);
169     return (char *) NULL;
170   } else {
171 #if 0
172     if ((p = rindex(Buffer,'\n')) != NULL) {
173       *p = '\0';
174     }
175 #endif
176     fclose(fp);
177   }
178 
179   return line;
180 }
181 
182 
183 char *
Datadir_find_genomedir(char * user_genomedir)184 Datadir_find_genomedir (char *user_genomedir) {
185   FILE *fp;
186   char *genomedir;
187 
188   if (user_genomedir != NULL) {
189     genomedir = (char *) CALLOC(strlen(user_genomedir)+1,sizeof(char));
190     strcpy(genomedir,user_genomedir);
191 
192   } else if (getenv("GMAPDB") != NULL) {
193     /* Use genomedir provided by environment variable */
194     genomedir = (char *) CALLOC(strlen(getenv("GMAPDB"))+1,sizeof(char));
195     strcpy(genomedir,getenv("GMAPDB"));
196 
197   } else if ((fp = FOPEN_READ_TEXT("./.gmaprc")) != NULL) {
198     genomedir = read_config_file(fp,"GMAPDB");
199     fclose(fp);
200 
201   } else if ((fp = find_homedir_config()) != NULL) {
202     genomedir = read_config_file(fp,"GMAPDB");
203     fclose(fp);
204 
205   } else {
206     genomedir = (char *) CALLOC(strlen(GMAPDB)+1,sizeof(char));
207     strcpy(genomedir,GMAPDB);
208   }
209 
210   return genomedir;
211 }
212 
213 
214 /* Allocates space for genomesubdir, fileroot, and dbversion */
215 char *
Datadir_find_genomesubdir(char ** fileroot,char ** dbversion,char * user_genomedir,char * dbroot)216 Datadir_find_genomesubdir (char **fileroot, char **dbversion,
217 			   char *user_genomedir, char *dbroot) {
218   FILE *fp;
219   char *genomesubdir, *genomedir, *filename, *p, *dbrootdir, *newgenomedir;
220 
221   /* First get genomedir */
222   genomedir = Datadir_find_genomedir(user_genomedir);
223 
224   /* Append directory part of dbroot */
225   if ((p = rindex(dbroot,'/')) != NULL) {
226     *p = '\0';
227     p++;
228     dbrootdir = dbroot;
229     dbroot = p;
230 
231     newgenomedir = (char *) CALLOC(strlen(genomedir)+strlen("/")+strlen(dbrootdir)+1,sizeof(char));
232     sprintf(newgenomedir,"%s/%s",genomedir,dbrootdir);
233     FREE(genomedir);
234     genomedir = newgenomedir;
235   }
236 
237   /* Find version file */
238   filename = (char *) CALLOC(strlen(genomedir) + strlen("/") + strlen(dbroot) +
239 			     strlen(".version") + 1,sizeof(char));
240   sprintf(filename,"%s/%s.version",genomedir,dbroot);
241   if ((fp = FOPEN_READ_TEXT(filename)) != NULL) {
242     /* Found in top-level genomedir */
243     fclose(fp);
244     FREE(filename);
245     genomesubdir = genomedir;
246     *fileroot = (char *) CALLOC(strlen(dbroot)+1,sizeof(char));
247     strcpy(*fileroot,dbroot);
248 
249   } else {
250     FREE(filename);
251     genomesubdir = (char *) CALLOC(strlen(genomedir) + strlen("/") + strlen(dbroot) + 1,sizeof(char));
252     sprintf(genomesubdir,"%s/%s",genomedir,dbroot);
253 
254     if ((*fileroot = find_fileroot(genomesubdir,genomedir,dbroot)) != NULL) {
255       /* Found in subdirectory */
256        FREE(genomedir);
257     } else {
258       fprintf(stderr,"Error: Can't open genome files in %s or %s.\n",genomedir,genomesubdir);
259       fprintf(stderr,"       Please specify directory using -D flag, GMAPDB environment variable,\n");
260       fprintf(stderr,"       or a configuration file .gmaprc with the line GMAPDB=<directory>;\n");
261       fprintf(stderr,"       or recompile the GMAP package using the --with-gmapdb flag to configure.\n");
262       Datadir_avail_gmap_databases(stderr,user_genomedir);
263       exit(9);
264     }
265   }
266 
267   filename = (char *) CALLOC(strlen(genomesubdir) + strlen("/") + strlen(*fileroot) + strlen(".version") + 1,
268 			     sizeof(char));
269   sprintf(filename,"%s/%s.version",genomesubdir,*fileroot);
270   if ((*dbversion = get_dbversion(filename)) == NULL) {
271     /* Something wrong with version file.  Use dbroot instead */
272     *dbversion = (char *) CALLOC(strlen(dbroot)+1,sizeof(char));
273     strcpy(*dbversion,dbroot);
274   }
275 
276   FREE(filename);
277 
278   return genomesubdir;
279 }
280 
281 
282 char *
Datadir_find_mapdir(char * user_mapdir,char * genomesubdir,char * fileroot)283 Datadir_find_mapdir (char *user_mapdir, char *genomesubdir, char *fileroot) {
284   char *mapdir;
285 
286   if (user_mapdir != NULL) {
287     mapdir = (char *) CALLOC(strlen(user_mapdir)+1,sizeof(char));
288     strcpy(mapdir,user_mapdir);
289   } else {
290     mapdir = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
291 			     strlen(".maps")+1,sizeof(char));
292     sprintf(mapdir,"%s/%s.maps",genomesubdir,fileroot);
293   }
294 
295   return mapdir;
296 }
297 
298 
299 void
Datadir_list_directory_multicol(FILE * fp,char * directory)300 Datadir_list_directory_multicol (FILE *fp, char *directory) {
301   char *filename;
302   struct dirent *entry;
303   DIR *dp;
304   int pos = 0;
305 
306   if ((dp = opendir(directory)) == NULL) {
307     fprintf(stderr,"Unable to open directory %s\n",directory);
308     exit(9);
309   }
310   while ((entry = readdir(dp)) != NULL) {
311     filename = entry->d_name;
312     if (filename[0] != '.') {
313       if (pos == 0) {
314 	fprintf(fp,"     ");
315 	pos += strlen("     ");
316       } else {
317 	fprintf(fp," ");
318 	pos++;
319 	while (pos % 10 != 0) {
320 	  printf(" ");
321 	  pos++;
322 	}
323       }
324       fprintf(fp,"%s",filename);
325       pos += strlen(filename);
326       if (pos > 60) {
327 	fprintf(fp,"\n");
328 	pos = 0;
329       }
330     }
331   }
332   fprintf(fp,"\n");
333   if (closedir(dp) < 0) {
334     fprintf(stderr,"Unable to close directory %s\n",directory);
335   }
336 
337   return;
338 }
339 
340 void
Datadir_list_directory(FILE * fp,char * directory)341 Datadir_list_directory (FILE *fp, char *directory) {
342   char *filename;
343   struct dirent *entry;
344   DIR *dp;
345 
346   if ((dp = opendir(directory)) == NULL) {
347     fprintf(stderr,"Unable to open directory %s\n",directory);
348     exit(9);
349   }
350   while ((entry = readdir(dp)) != NULL) {
351     filename = entry->d_name;
352     fprintf(fp,"%s\n",filename);
353   }
354   if (closedir(dp) < 0) {
355     fprintf(stderr,"Unable to close directory %s\n",directory);
356   }
357 
358   return;
359 }
360 
361 static int
strcmp_cmp(const void * a,const void * b)362 strcmp_cmp (const void *a, const void *b) {
363   return strcmp(* (char * const *) a, * (char * const *) b);
364 }
365 
366 
367 void
Datadir_avail_gmap_databases(FILE * fp,char * user_genomedir)368 Datadir_avail_gmap_databases (FILE *fp, char *user_genomedir) {
369   char *genomedir;
370   struct dirent *entry;
371   char *filename;
372   DIR *dp;
373   List_T databases = NULL;
374   char **array;
375   int n, i;
376 
377   genomedir = Datadir_find_genomedir(user_genomedir);
378   fprintf(fp,"Available gmap databases in directory %s:\n",genomedir);
379 
380   if ((dp = opendir(genomedir)) == NULL) {
381     fprintf(stderr,"Unable to open genomedir %s\n",genomedir);
382     exit(9);
383   }
384   while ((entry = readdir(dp)) != NULL) {
385     filename = (char *) CALLOC(strlen(genomedir)+strlen("/")+strlen(entry->d_name)+strlen("/")+
386 			       strlen(entry->d_name)+strlen(".version")+1,sizeof(char));
387     sprintf(filename,"%s/%s/%s.version",genomedir,entry->d_name,entry->d_name);
388     if (Access_file_exists_p(filename) == true) {
389       FREE(filename);
390       filename = (char *) CALLOC(strlen(entry->d_name)+1,sizeof(char));
391       strcpy(filename,entry->d_name);
392       databases = List_push(databases,(void *) filename);
393     } else {
394       FREE(filename);
395     }
396   }
397   if (closedir(dp) < 0) {
398     fprintf(stderr,"Unable to close genomedir %s\n",genomedir);
399   }
400 
401   if ((n = List_length(databases)) == 0) {
402     fprintf(fp,"  (none found)\n");
403   } else {
404     array = (char **) List_to_array(databases,NULL);
405     qsort(array,n,sizeof(char *),strcmp_cmp);
406     for (i = 0; i < n; i++) {
407       fprintf(fp,"%s\n",array[i]);
408       FREE(array[i]);
409     }
410     FREE(array);
411     List_free(&databases);
412   }
413 
414   FREE(genomedir);
415   return;
416 }
417 
418 void
Datadir_avail_maps(FILE * fp,char * user_mapdir,char * genomesubdir,char * fileroot)419 Datadir_avail_maps (FILE *fp, char *user_mapdir, char *genomesubdir, char *fileroot) {
420   char *mapdir;
421   struct dirent *entry;
422   char *filename;
423   DIR *dp;
424   List_T maps = NULL;
425   char **array;
426   int n, i;
427 
428   mapdir = Datadir_find_mapdir(user_mapdir,genomesubdir,fileroot);
429   fprintf(fp,"Available maps in directory %s:\n",mapdir);
430 
431   if ((dp = opendir(mapdir)) == NULL) {
432     fprintf(stderr,"Unable to open mapdir %s\n",mapdir);
433     exit(9);
434   }
435   while ((entry = readdir(dp)) != NULL) {
436     if (entry->d_name[0] != '.') {
437       filename = (char *) CALLOC(strlen(mapdir)+strlen("/")+strlen(entry->d_name)+1,
438 				 sizeof(char));
439       sprintf(filename,"%s/%s",mapdir,entry->d_name);
440 
441       if (Access_file_exists_p(filename) == true) {
442 	FREE(filename);
443 	filename = (char *) CALLOC(strlen(entry->d_name)+1,sizeof(char));
444 	strcpy(filename,entry->d_name);
445 	maps = List_push(maps,(void *) filename);
446       } else {
447 	FREE(filename);
448       }
449     }
450   }
451   if (closedir(dp) < 0) {
452     fprintf(stderr,"Unable to close mapdir %s\n",mapdir);
453   }
454 
455   if ((n = List_length(maps)) == 0) {
456     fprintf(fp,"  (none found)\n");
457   } else {
458     array = (char **) List_to_array(maps,NULL);
459     qsort(array,n,sizeof(char *),strcmp_cmp);
460     for (i = 0; i < n; i++) {
461       fprintf(fp,"%s\n",array[i]);
462       FREE(array[i]);
463     }
464     FREE(array);
465     List_free(&maps);
466   }
467 
468   FREE(mapdir);
469   return;
470 
471 }
472 
473