1 static char rcsid[] = "$Id: datadir.c 218159 2019-01-17 05:49:10Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "datadir.h"
7 #include <stdio.h>
8 #include <stdlib.h> /* For getenv */
9 #include <string.h> /* For strlen */
10 #include <strings.h> /* For rindex */
11 #include <pwd.h>
12 #ifdef HAVE_SYS_TYPES_H
13 #include <sys/types.h> /* Needed for dirent.h */
14 #endif
15 #if HAVE_DIRENT_H
16 # include <dirent.h>
17 # define NAMLEN(dirent) strlen((dirent)->d_name)
18 #else
19 # define dirent direct
20 # define NAMLEN(dirent) (dirent)->d_namlen
21 # if HAVE_SYS_NDIR_H
22 # include <sys/ndir.h>
23 # endif
24 # if HAVE_SYS_DIR_H
25 # include <sys/dir.h>
26 # endif
27 # if HAVE_NDIR_H
28 # include <ndir.h>
29 # endif
30 #endif
31
32 #include <math.h> /* for qsort */
33 #include <string.h> /* for strcmp */
34 #include "mem.h"
35 #include "fopen.h"
36 #include "getline.h"
37 #include "access.h"
38 #include "list.h"
39
40
41 /* Note: GMAPDB is defined externally by configure */
42 #ifndef GMAPDB
43 #error A default value for GMAPDB was not provided to configure. Please do so, or edit the Makefile
44 #endif
45
46 static char *
read_config_file(FILE * fp,char * tag)47 read_config_file (FILE *fp, char *tag) {
48 char *directory, *seentag, *dirbuffer, *line;
49 int line_length;
50
51 while ((line = Getline_wlength(&line_length,fp)) != NULL) {
52 seentag = (char *) MALLOC((line_length+1)*sizeof(char));
53 dirbuffer = (char *) MALLOC((line_length+1)*sizeof(char));
54
55 if (sscanf(line,"%s=%s",seentag,dirbuffer) > 0 && !strcmp(seentag,tag)) {
56 directory = (char *) MALLOC((strlen(dirbuffer)+1)*sizeof(char));
57 strcpy(directory,dirbuffer);
58 FREE(dirbuffer);
59 FREE(seentag);
60 FREE(line);
61 return directory;
62
63 } else {
64 FREE(dirbuffer);
65 FREE(seentag);
66 FREE(line);
67 }
68 }
69 return NULL;
70 }
71
72
73 static FILE *
find_homedir_config()74 find_homedir_config () {
75 FILE *fp = NULL;
76 struct passwd *p;
77 char *user, *configfile;
78
79 if ((user = getenv("USER")) != NULL) {
80 if ((p = getpwnam(user)) != NULL) {
81 configfile = (char *) CALLOC(strlen(p->pw_dir)+strlen("/")+strlen(".gmaprc")+1,sizeof(char));
82 sprintf(configfile,"%s/.gmaprc",p->pw_dir);
83 fp = FOPEN_READ_TEXT(configfile);
84 FREE(configfile);
85 }
86 }
87 return fp;
88 }
89
90
91
92 static char *
find_fileroot(char * genomesubdir,char * genomedir,char * dbroot)93 find_fileroot (char *genomesubdir, char *genomedir, char *dbroot) {
94 char *fileroot = NULL, *filename, *p;
95 struct dirent *entry;
96 DIR *dp;
97 int nchoices = 0;
98
99 if ((dp = opendir(genomesubdir)) == NULL) {
100 /* Problem found. Try to diagnose */
101 if ((dp = opendir(genomedir)) == NULL) {
102 fprintf(stderr,"Unable to find genome directory %s\n",genomedir);
103 fprintf(stderr,"Either recompile the GMAP package to have the");
104 fprintf(stderr,"correct default directory (seen by doing gmap --version),\n");
105 fprintf(stderr,"or use the -D flag to gmap to specify the correct genome directory.\n");
106 exit(9);
107 } else {
108 fprintf(stderr,"Unable to find genome %s in directory %s\n",dbroot,genomedir);
109 fprintf(stderr,"Make sure you have typed the genome correctly, or use the -D flag\n");
110 fprintf(stderr,"(or the -F flag for cmetindex or atoiindex) to specify a directory.\n");
111 fprintf(stderr,"For example, '-D .' specifies this directory.\n");
112 exit(9);
113 }
114 }
115
116 /* Determine the number of .version files */
117 while ((entry = readdir(dp)) != NULL) {
118 filename = entry->d_name;
119 if ((p = rindex(filename,'.')) != NULL) {
120 if (!strcmp(p,".version")) {
121 FREE(fileroot);
122 fileroot = (char *) MALLOC((p - &(filename[0]) + 1)*sizeof(char));
123 strncpy(fileroot,filename,p-&(filename[0]));
124 fileroot[p-&(filename[0])] = '\0';
125
126 if (!strcmp(fileroot,dbroot)) {
127 /* However, if dbroot exists, just take that */
128 if (closedir(dp) < 0) {
129 fprintf(stderr,"Unable to close directory %s\n",genomesubdir);
130 }
131 return fileroot;
132
133 } else {
134 nchoices += 1;
135 }
136 }
137 }
138 }
139
140 /* Did not find dbroot */
141 if (closedir(dp) < 0) {
142 fprintf(stderr,"Unable to close directory %s\n",genomesubdir);
143 }
144
145 if (nchoices == 1) {
146 return fileroot;
147 } else if (nchoices == 0) {
148 fprintf(stderr,"Unable to find file ending with .version in directory %s\n",genomesubdir);
149 exit(9);
150 } else {
151 fprintf(stderr,"Found multiple files (%d) ending with .version in directory %s\n",nchoices,genomesubdir);
152 FREE(fileroot);
153 exit(9);
154 }
155 }
156
157
158
159 static char *
get_dbversion(char * filename)160 get_dbversion (char *filename) {
161 char *line;
162 FILE *fp;
163
164 fp = FOPEN_READ_TEXT(filename);
165 if (!fp) {
166 return (char *) NULL;
167 } else if ((line = Getline(fp)) == NULL) {
168 fclose(fp);
169 return (char *) NULL;
170 } else {
171 #if 0
172 if ((p = rindex(Buffer,'\n')) != NULL) {
173 *p = '\0';
174 }
175 #endif
176 fclose(fp);
177 }
178
179 return line;
180 }
181
182
183 char *
Datadir_find_genomedir(char * user_genomedir)184 Datadir_find_genomedir (char *user_genomedir) {
185 FILE *fp;
186 char *genomedir;
187
188 if (user_genomedir != NULL) {
189 genomedir = (char *) CALLOC(strlen(user_genomedir)+1,sizeof(char));
190 strcpy(genomedir,user_genomedir);
191
192 } else if (getenv("GMAPDB") != NULL) {
193 /* Use genomedir provided by environment variable */
194 genomedir = (char *) CALLOC(strlen(getenv("GMAPDB"))+1,sizeof(char));
195 strcpy(genomedir,getenv("GMAPDB"));
196
197 } else if ((fp = FOPEN_READ_TEXT("./.gmaprc")) != NULL) {
198 genomedir = read_config_file(fp,"GMAPDB");
199 fclose(fp);
200
201 } else if ((fp = find_homedir_config()) != NULL) {
202 genomedir = read_config_file(fp,"GMAPDB");
203 fclose(fp);
204
205 } else {
206 genomedir = (char *) CALLOC(strlen(GMAPDB)+1,sizeof(char));
207 strcpy(genomedir,GMAPDB);
208 }
209
210 return genomedir;
211 }
212
213
214 /* Allocates space for genomesubdir, fileroot, and dbversion */
215 char *
Datadir_find_genomesubdir(char ** fileroot,char ** dbversion,char * user_genomedir,char * dbroot)216 Datadir_find_genomesubdir (char **fileroot, char **dbversion,
217 char *user_genomedir, char *dbroot) {
218 FILE *fp;
219 char *genomesubdir, *genomedir, *filename, *p, *dbrootdir, *newgenomedir;
220
221 /* First get genomedir */
222 genomedir = Datadir_find_genomedir(user_genomedir);
223
224 /* Append directory part of dbroot */
225 if ((p = rindex(dbroot,'/')) != NULL) {
226 *p = '\0';
227 p++;
228 dbrootdir = dbroot;
229 dbroot = p;
230
231 newgenomedir = (char *) CALLOC(strlen(genomedir)+strlen("/")+strlen(dbrootdir)+1,sizeof(char));
232 sprintf(newgenomedir,"%s/%s",genomedir,dbrootdir);
233 FREE(genomedir);
234 genomedir = newgenomedir;
235 }
236
237 /* Find version file */
238 filename = (char *) CALLOC(strlen(genomedir) + strlen("/") + strlen(dbroot) +
239 strlen(".version") + 1,sizeof(char));
240 sprintf(filename,"%s/%s.version",genomedir,dbroot);
241 if ((fp = FOPEN_READ_TEXT(filename)) != NULL) {
242 /* Found in top-level genomedir */
243 fclose(fp);
244 FREE(filename);
245 genomesubdir = genomedir;
246 *fileroot = (char *) CALLOC(strlen(dbroot)+1,sizeof(char));
247 strcpy(*fileroot,dbroot);
248
249 } else {
250 FREE(filename);
251 genomesubdir = (char *) CALLOC(strlen(genomedir) + strlen("/") + strlen(dbroot) + 1,sizeof(char));
252 sprintf(genomesubdir,"%s/%s",genomedir,dbroot);
253
254 if ((*fileroot = find_fileroot(genomesubdir,genomedir,dbroot)) != NULL) {
255 /* Found in subdirectory */
256 FREE(genomedir);
257 } else {
258 fprintf(stderr,"Error: Can't open genome files in %s or %s.\n",genomedir,genomesubdir);
259 fprintf(stderr," Please specify directory using -D flag, GMAPDB environment variable,\n");
260 fprintf(stderr," or a configuration file .gmaprc with the line GMAPDB=<directory>;\n");
261 fprintf(stderr," or recompile the GMAP package using the --with-gmapdb flag to configure.\n");
262 Datadir_avail_gmap_databases(stderr,user_genomedir);
263 exit(9);
264 }
265 }
266
267 filename = (char *) CALLOC(strlen(genomesubdir) + strlen("/") + strlen(*fileroot) + strlen(".version") + 1,
268 sizeof(char));
269 sprintf(filename,"%s/%s.version",genomesubdir,*fileroot);
270 if ((*dbversion = get_dbversion(filename)) == NULL) {
271 /* Something wrong with version file. Use dbroot instead */
272 *dbversion = (char *) CALLOC(strlen(dbroot)+1,sizeof(char));
273 strcpy(*dbversion,dbroot);
274 }
275
276 FREE(filename);
277
278 return genomesubdir;
279 }
280
281
282 char *
Datadir_find_mapdir(char * user_mapdir,char * genomesubdir,char * fileroot)283 Datadir_find_mapdir (char *user_mapdir, char *genomesubdir, char *fileroot) {
284 char *mapdir;
285
286 if (user_mapdir != NULL) {
287 mapdir = (char *) CALLOC(strlen(user_mapdir)+1,sizeof(char));
288 strcpy(mapdir,user_mapdir);
289 } else {
290 mapdir = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+strlen(fileroot)+
291 strlen(".maps")+1,sizeof(char));
292 sprintf(mapdir,"%s/%s.maps",genomesubdir,fileroot);
293 }
294
295 return mapdir;
296 }
297
298
299 void
Datadir_list_directory_multicol(FILE * fp,char * directory)300 Datadir_list_directory_multicol (FILE *fp, char *directory) {
301 char *filename;
302 struct dirent *entry;
303 DIR *dp;
304 int pos = 0;
305
306 if ((dp = opendir(directory)) == NULL) {
307 fprintf(stderr,"Unable to open directory %s\n",directory);
308 exit(9);
309 }
310 while ((entry = readdir(dp)) != NULL) {
311 filename = entry->d_name;
312 if (filename[0] != '.') {
313 if (pos == 0) {
314 fprintf(fp," ");
315 pos += strlen(" ");
316 } else {
317 fprintf(fp," ");
318 pos++;
319 while (pos % 10 != 0) {
320 printf(" ");
321 pos++;
322 }
323 }
324 fprintf(fp,"%s",filename);
325 pos += strlen(filename);
326 if (pos > 60) {
327 fprintf(fp,"\n");
328 pos = 0;
329 }
330 }
331 }
332 fprintf(fp,"\n");
333 if (closedir(dp) < 0) {
334 fprintf(stderr,"Unable to close directory %s\n",directory);
335 }
336
337 return;
338 }
339
340 void
Datadir_list_directory(FILE * fp,char * directory)341 Datadir_list_directory (FILE *fp, char *directory) {
342 char *filename;
343 struct dirent *entry;
344 DIR *dp;
345
346 if ((dp = opendir(directory)) == NULL) {
347 fprintf(stderr,"Unable to open directory %s\n",directory);
348 exit(9);
349 }
350 while ((entry = readdir(dp)) != NULL) {
351 filename = entry->d_name;
352 fprintf(fp,"%s\n",filename);
353 }
354 if (closedir(dp) < 0) {
355 fprintf(stderr,"Unable to close directory %s\n",directory);
356 }
357
358 return;
359 }
360
361 static int
strcmp_cmp(const void * a,const void * b)362 strcmp_cmp (const void *a, const void *b) {
363 return strcmp(* (char * const *) a, * (char * const *) b);
364 }
365
366
367 void
Datadir_avail_gmap_databases(FILE * fp,char * user_genomedir)368 Datadir_avail_gmap_databases (FILE *fp, char *user_genomedir) {
369 char *genomedir;
370 struct dirent *entry;
371 char *filename;
372 DIR *dp;
373 List_T databases = NULL;
374 char **array;
375 int n, i;
376
377 genomedir = Datadir_find_genomedir(user_genomedir);
378 fprintf(fp,"Available gmap databases in directory %s:\n",genomedir);
379
380 if ((dp = opendir(genomedir)) == NULL) {
381 fprintf(stderr,"Unable to open genomedir %s\n",genomedir);
382 exit(9);
383 }
384 while ((entry = readdir(dp)) != NULL) {
385 filename = (char *) CALLOC(strlen(genomedir)+strlen("/")+strlen(entry->d_name)+strlen("/")+
386 strlen(entry->d_name)+strlen(".version")+1,sizeof(char));
387 sprintf(filename,"%s/%s/%s.version",genomedir,entry->d_name,entry->d_name);
388 if (Access_file_exists_p(filename) == true) {
389 FREE(filename);
390 filename = (char *) CALLOC(strlen(entry->d_name)+1,sizeof(char));
391 strcpy(filename,entry->d_name);
392 databases = List_push(databases,(void *) filename);
393 } else {
394 FREE(filename);
395 }
396 }
397 if (closedir(dp) < 0) {
398 fprintf(stderr,"Unable to close genomedir %s\n",genomedir);
399 }
400
401 if ((n = List_length(databases)) == 0) {
402 fprintf(fp," (none found)\n");
403 } else {
404 array = (char **) List_to_array(databases,NULL);
405 qsort(array,n,sizeof(char *),strcmp_cmp);
406 for (i = 0; i < n; i++) {
407 fprintf(fp,"%s\n",array[i]);
408 FREE(array[i]);
409 }
410 FREE(array);
411 List_free(&databases);
412 }
413
414 FREE(genomedir);
415 return;
416 }
417
418 void
Datadir_avail_maps(FILE * fp,char * user_mapdir,char * genomesubdir,char * fileroot)419 Datadir_avail_maps (FILE *fp, char *user_mapdir, char *genomesubdir, char *fileroot) {
420 char *mapdir;
421 struct dirent *entry;
422 char *filename;
423 DIR *dp;
424 List_T maps = NULL;
425 char **array;
426 int n, i;
427
428 mapdir = Datadir_find_mapdir(user_mapdir,genomesubdir,fileroot);
429 fprintf(fp,"Available maps in directory %s:\n",mapdir);
430
431 if ((dp = opendir(mapdir)) == NULL) {
432 fprintf(stderr,"Unable to open mapdir %s\n",mapdir);
433 exit(9);
434 }
435 while ((entry = readdir(dp)) != NULL) {
436 if (entry->d_name[0] != '.') {
437 filename = (char *) CALLOC(strlen(mapdir)+strlen("/")+strlen(entry->d_name)+1,
438 sizeof(char));
439 sprintf(filename,"%s/%s",mapdir,entry->d_name);
440
441 if (Access_file_exists_p(filename) == true) {
442 FREE(filename);
443 filename = (char *) CALLOC(strlen(entry->d_name)+1,sizeof(char));
444 strcpy(filename,entry->d_name);
445 maps = List_push(maps,(void *) filename);
446 } else {
447 FREE(filename);
448 }
449 }
450 }
451 if (closedir(dp) < 0) {
452 fprintf(stderr,"Unable to close mapdir %s\n",mapdir);
453 }
454
455 if ((n = List_length(maps)) == 0) {
456 fprintf(fp," (none found)\n");
457 } else {
458 array = (char **) List_to_array(maps,NULL);
459 qsort(array,n,sizeof(char *),strcmp_cmp);
460 for (i = 0; i < n; i++) {
461 fprintf(fp,"%s\n",array[i]);
462 FREE(array[i]);
463 }
464 FREE(array);
465 List_free(&maps);
466 }
467
468 FREE(mapdir);
469 return;
470
471 }
472
473