1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <sra/path-extern.h>
28 
29 struct SRARunlist;
30 #define KNAMELIST_IMPL struct SRARunlist
31 #include <klib/impl.h>
32 
33 struct NCBISRAPath;
34 #define SRAPATH_IMPL struct NCBISRAPath
35 #include <sra/impl.h>
36 
37 #include "libsrapath.vers.h"
38 
39 #include <vfs/manager.h>
40 #include <sra/srapath.h>
41 #include <kfs/directory.h>
42 #include <kfs/file.h>
43 #include <kfs/mmap.h>
44 #include <kfs/dyload.h>
45 #include <klib/container.h>
46 #include <klib/vector.h>
47 #include <klib/text.h>
48 #include <klib/rc.h>
49 #include <klib/printf.h>
50 #include <kfg/config.h>
51 #include <kfg/kfg-priv.h>
52 #include <klib/debug.h>
53 #include <klib/log.h>
54 #include <sysalloc.h>
55 #include <stdlib.h>
56 #include <stdio.h>
57 #include <string.h>
58 #include <ctype.h>
59 #include <os-native.h>
60 #include <assert.h>
61 
62 #include "path-priv.h"
63 
64 /*--------------------------------------------------------------------------
65  * SRAPathString
66  */
67 enum
68 {
69     alg_none,
70     alg_ncbi,
71     alg_ddbj,
72     alg_ebi,
73     alg_refseq,
74     alg_wgs
75 };
76 
77 typedef struct SRAPathString SRAPathString;
78 struct SRAPathString
79 {
80     DLNode n;
81     uint8_t alg;
82     char path [ 1 ];
83 };
84 
85 /* Whack
86  */
87 static
SRAPathStringWhack(DLNode * n,void * ignore)88 void CC SRAPathStringWhack ( DLNode *n, void *ignore )
89 {
90     free ( n );
91 }
92 
93 /* Make
94  */
95 static
SRAPathStringMake(DLList * list,const char * path,size_t sz,uint8_t alg)96 rc_t SRAPathStringMake ( DLList *list, const char *path, size_t sz, uint8_t alg )
97 {
98     SRAPathString *s = (SRAPathString*) malloc ( sizeof * s + sz );
99     if ( s == NULL )
100         return RC ( rcSRA, rcMgr, rcUpdating, rcMemory, rcExhausted );
101 
102     s -> alg = alg;
103     string_copy ( s -> path, sz + 1, path, sz );
104     DLListPushTail ( list, & s -> n );
105 
106     return 0;
107 }
108 
109 
110 /* Find
111  */
112 typedef struct SRAPathFindInfo SRAPathFindInfo;
113 struct SRAPathFindInfo
114 {
115     const char *path;
116     size_t size;
117     uint8_t alg;
118     bool found;
119 };
120 
121 static
SRAPathStringFind(DLNode * n,void * data)122 bool CC SRAPathStringFind ( DLNode *n, void *data )
123 {
124     const SRAPathString *p = ( const SRAPathString* ) n;
125     SRAPathFindInfo *pb = ( SRAPathFindInfo* ) data;
126     if ( memcmp ( p -> path, pb -> path, pb -> size ) == 0 )
127     {
128         if ( p -> path [ pb -> size ] == 0 )
129         {
130             pb -> alg = p -> alg;
131             return pb -> found = true;
132         }
133     }
134     return false;
135 }
136 
137 /*--------------------------------------------------------------------------
138  * NCBIRepository
139  *  represents a repository: replication server(s) / volume(s)
140  */
141 typedef struct NCBIRepository NCBIRepository;
142 struct NCBIRepository
143 {
144     DLNode n;
145 
146     uint8_t type; /* repository type: alg_ncbi (sra: ncbi/ebi/ddbj), alg_wgs, alg_refseq*/
147 
148     /* replication servers */
149     DLList repsrv;
150 
151     /* volumes upon each repserver */
152     DLList vols;
153 };
154 /* Init
155  */
156 static
SRARepoMake(NCBIRepository ** repo)157 rc_t CC SRARepoMake(NCBIRepository** repo)
158 {
159     *repo = (NCBIRepository*)malloc(sizeof(NCBIRepository));
160     if (* repo == 0)
161     {
162         return RC ( rcSRA, rcMgr, rcInitializing, rcMemory, rcExhausted );
163     }
164     (*repo)->type = alg_ncbi;
165     (*repo)->n.next = NULL;
166     (*repo)->n.prev = NULL;
167     DLListInit(&(*repo)->repsrv);
168     DLListInit(&(*repo)->vols);
169     return 0;
170 }
171 /* Whack
172  */
173 static
SRARepoWhack(DLNode * n,void * ignore)174 void CC SRARepoWhack ( DLNode *n, void *ignore )
175 {
176     if (n != NULL)
177     {
178         DLListWhack ( & ((NCBIRepository*) n) -> repsrv, SRAPathStringWhack, NULL );
179         DLListWhack ( & ((NCBIRepository*) n) -> vols, SRAPathStringWhack, NULL );
180         free ( (NCBIRepository*) n );
181     }
182 }
183 
184 static
AlgToStr(uint8_t alg)185 const char* AlgToStr(uint8_t alg)
186 {
187     switch (alg)
188     {
189     case alg_ncbi:
190     case alg_ddbj:
191     case alg_ebi: return "SRA";
192     case alg_wgs: return "WGS";
193     case alg_refseq: return "REFSEQ";
194     default: return "<unknown>";
195     }
196 }
197 
198 
199 static
LogVolume(DLNode * n,void * data)200 void CC LogVolume( DLNode *n, void *data )
201 {
202     const SRAPathString* self = (const SRAPathString*)n;
203     PATH_DEBUG (("        \"%s\", type=%s\n",
204                   self->path,
205                   AlgToStr(self->alg)));
206 }
207 
208 static
LogServer(DLNode * n,void * data)209 void CC LogServer( DLNode *n, void *data )
210 {
211     const SRAPathString* self = (const SRAPathString*)n;
212     PATH_DEBUG (("        \"%s\"\n", self->path));
213 }
214 
215 static
LogRepository(DLNode * n,void * data)216 void CC LogRepository ( DLNode *n, void *data )
217 {
218     if (n != NULL)
219     {
220         const NCBIRepository* self = (const NCBIRepository*)n;
221         PATH_DEBUG (("    type=%s\n", AlgToStr(self->type)));
222         PATH_DEBUG (("    servers:\n"));
223         DLListForEach ( & self->repsrv, false, LogServer, NULL );
224         PATH_DEBUG (("    volumes:\n"));
225         DLListForEach ( & self->vols, false, LogVolume, NULL );
226     }
227 }
228 
229 /*--------------------------------------------------------------------------
230  * NCBISRAPath
231  *  manages accession -> path conversion
232  */
233 typedef struct NCBISRAPath NCBISRAPath;
234 struct NCBISRAPath
235 {
236     struct SRAPath dad;
237 
238     /* working directory */
239     const KDirectory *dir;
240 
241     /* repositories */
242     DLList repos;
243     NCBIRepository* dflt_repo; /* default repository (likely to be removed in the future versions) */
244 
245     atomic32_t refcount;
246 };
247 
LogPathInfo(const NCBISRAPath * self)248 static void LogPathInfo(const NCBISRAPath* self)
249 {
250     PATH_DEBUG (("NCBISRAPath configuration:\n"));
251     DLListForEach ( & self->repos, false, LogRepository, NULL );
252     PATH_DEBUG (("default repository:\n"));
253     LogRepository( (DLNode*) self->dflt_repo, NULL );
254 }
255 
256 /* Whack
257  */
258 static
SRAPathWhack(NCBISRAPath * self)259 rc_t SRAPathWhack ( NCBISRAPath *self )
260 {
261     rc_t rc = KDirectoryRelease ( self -> dir );
262     if ( rc == 0 )
263     {
264         DLListWhack ( & self -> repos, SRARepoWhack, NULL );
265         SRARepoWhack ( (DLNode*) self -> dflt_repo, NULL );
266         free ( self );
267     }
268     return rc;
269 }
270 
271 
272 /* AddRef
273  * Release
274  */
275 static
NCBISRAPathAddRef(const NCBISRAPath * cself)276 rc_t CC NCBISRAPathAddRef ( const NCBISRAPath *cself )
277 {
278     atomic32_inc ( & ( ( NCBISRAPath* ) cself ) -> refcount );
279     return 0;
280 }
281 
282 static
NCBISRAPathRelease(const NCBISRAPath * cself)283 rc_t CC NCBISRAPathRelease ( const NCBISRAPath *cself )
284 {
285     NCBISRAPath *self = ( NCBISRAPath* ) cself;
286     if ( atomic32_dec_and_test ( & self -> refcount ) )
287         return SRAPathWhack ( self );
288     return 0;
289 }
290 
291 
292 /* Version
293  *  returns the library version
294  */
295 static
NCBISRAPathVersion(const NCBISRAPath * self,uint32_t * version)296 rc_t CC NCBISRAPathVersion ( const NCBISRAPath *self, uint32_t *version )
297 {
298     * version = LIBSRAPATH_VERS;
299     return 0;
300 }
301 
302 
303 /* Clear
304  *  forget existing server and volume paths for the default repository
305  */
306 static
NCBISRAPathClear(NCBISRAPath * self)307 rc_t CC NCBISRAPathClear ( NCBISRAPath *self )
308 {
309     DLListWhack ( & self -> dflt_repo -> repsrv, SRAPathStringWhack, NULL );
310     DLListWhack ( & self -> dflt_repo -> vols, SRAPathStringWhack, NULL );
311     DLListInit ( & self -> dflt_repo -> repsrv );
312     DLListInit ( & self -> dflt_repo -> vols );
313 
314     return 0;
315 }
316 
317 
318 /* AddPath
319  *  add an alternate replication or volume path
320  *
321  *  "path" [ IN ] and "size" [ IN ] - sets a search path
322  *
323  *  "alt" [ IN ] - use std or alternate volume path algorithm
324  */
325 static
SRAPathAddSubPath(DLList * list,const char * path,size_t size,uint8_t alg)326 rc_t SRAPathAddSubPath ( DLList *list, const char *path, size_t size, uint8_t alg )
327 {
328     /* see if it's already there */
329     SRAPathFindInfo pb;
330     pb . path = path;
331     pb . size = size;
332     pb . found = 0;
333     DLListDoUntil ( list, 0, SRAPathStringFind, & pb );
334     if ( pb . found )
335         return 0;
336 
337     /* create a new one */
338     return SRAPathStringMake ( list, path, size, alg );
339 }
340 
341 static
SRAPathAddPath(DLList * list,const char * path,uint8_t alg)342 rc_t SRAPathAddPath ( DLList *list, const char *path, uint8_t alg )
343 {
344     rc_t rc;
345     if ( path == NULL )
346         rc = RC ( rcSRA, rcMgr, rcUpdating, rcString, rcNull );
347     else if ( path [ 0 ] == 0 )
348         rc = RC ( rcSRA, rcMgr, rcUpdating, rcString, rcEmpty );
349     else
350     {
351         /* treat path as a Unix-style multi-path */
352         size_t size = string_size ( path );
353         while ( 1 )
354         {
355             /* find separator */
356             const char *sep = string_chr ( path, size, ':' );
357             if ( sep == NULL )
358                 break;
359 
360             /* add sub-path */
361             rc = SRAPathAddSubPath ( list, path, sep - path, alg );
362             if ( rc != 0 )
363                 return rc;
364 
365             /* consume ':' */
366             ++ sep;
367 
368             /* pop from string */
369             size -= sep - path;
370             path = sep;
371         }
372 
373         return SRAPathAddSubPath ( list, path, size, alg );
374     }
375     return rc;
376 }
377 
378 /* AddRepPath
379  *  add a replication path to a repository
380  *
381  *  "rep" [ IN ] - NUL-terminated server search path
382  *  may be a compound path with ':' separator characters, e.g.
383  *  "/panfs/traces01:/panfs/traces31"
384  *
385  *  NB - servers are searched in the order provided,
386  *  first to last, until one of them satisfies a request,
387  *  at which time the successful server is placed at the
388  *  head of the search path.
389  */
390 static
NCBISRAPathAddRepPath(NCBIRepository * repo,const char * rep)391 rc_t CC NCBISRAPathAddRepPath ( NCBIRepository* repo, const char *rep )
392 {
393     return SRAPathAddPath ( & repo -> repsrv, rep, alg_none );
394 }
395 static
NCBISRAPathAddRepPathDefault(NCBISRAPath * self,const char * rep)396 rc_t CC NCBISRAPathAddRepPathDefault ( NCBISRAPath *self, const char *rep )
397 {
398     return NCBISRAPathAddRepPath ( self -> dflt_repo, rep );
399 }
400 
401 /* AddVolPath
402  *  add a volume path to the default repository
403  *
404  *  "vol" [ IN ] - NUL-terminated volume search path
405  *  may be a compound path with ':' separator characters, e.g.
406  *  "sra2:sra1:sra0"
407  *
408  *  NB - volumes are searched in the order provided,
409  *  first to last. they are never re-ordered.
410  */
411 static
SRAPathAddAlgVolPath(NCBIRepository * repo,const char * vol,uint8_t alg)412 rc_t SRAPathAddAlgVolPath ( NCBIRepository* repo, const char *vol, uint8_t alg )
413 {
414     return SRAPathAddPath ( & repo -> vols, vol, alg );
415 }
416 static
NCBISRAPathAddVolPathDefault(NCBISRAPath * self,const char * vol)417 rc_t CC NCBISRAPathAddVolPathDefault ( NCBISRAPath *self, const char *vol )
418 {
419     return SRAPathAddAlgVolPath ( self -> dflt_repo, vol, alg_ncbi);
420 }
421 
422 /* Config
423  *  configure an existing path manager
424  */
425 static
SRAPathConfigValue(const KConfig * kfg,const char * node_path,char * value,size_t value_size,const char * dflt)426 rc_t SRAPathConfigValue ( const KConfig *kfg, const char *node_path,
427     char *value, size_t value_size, const char *dflt )
428 {
429     const KConfigNode *node;
430     rc_t rc = KConfigOpenNodeRead ( kfg, & node, "%s", node_path );
431     if ( rc == 0 )
432     {
433         size_t num_read, remaining;
434         rc = KConfigNodeRead ( node, 0, value, value_size - 1, & num_read,  & remaining );
435         if ( rc == 0 )
436         {
437             if ( remaining != 0 )
438                 rc = RC ( rcSRA, rcMgr, rcConstructing, rcString, rcExcessive );
439             else
440                 value [ num_read ] = 0;
441         }
442 
443         KConfigNodeRelease ( node );
444     }
445 
446     if ( rc != 0 )
447     {
448         if ( dflt != NULL && dflt [ 0 ] != 0 )
449         {
450             size_t num_read = string_copy_measure ( value, value_size, dflt );
451             rc = 0;
452             if ( num_read == value_size )
453                 rc = RC ( rcSRA, rcMgr, rcConstructing, rcString, rcExcessive );
454         }
455         else
456 	    	value[0] = 0;
457     }
458     return rc;
459 }
460 
461 static
ConfigVolume(NCBIRepository * repo,KConfig * kfg,const char * keyPref,const char * keySuff,uint8_t alg)462 rc_t ConfigVolume(NCBIRepository* repo, KConfig * kfg, const char* keyPref, const char* keySuff, uint8_t alg )
463 {
464     char value [ 4096 ];
465     char key [ 4096 ];
466     size_t pSize = string_size(keyPref);
467     size_t sSize = string_size(keySuff);
468     if ( pSize + sSize >= sizeof(key))
469     {
470         return RC ( rcSRA, rcMgr, rcConstructing, rcString, rcExcessive );
471     }
472     string_copy(key, sizeof(key), keyPref, pSize);
473     string_copy(key + pSize, sizeof(key) - pSize, keySuff, sSize);
474     key[pSize+sSize] = 0;
475     value[0] = '\0';
476     if ( SRAPathConfigValue ( kfg, key, value, sizeof value, NULL ) == 0 )
477         SRAPathAddAlgVolPath ( repo, value, alg );
478     return 0;
479 }
480 
481 static
ConfigRepo(KConfig * kfg,const char * dflt,const char * reps,const char * volPref,uint8_t type,NCBIRepository ** repo)482 rc_t ConfigRepo(KConfig * kfg, const char *dflt, const char* reps, const char* volPref, uint8_t type, NCBIRepository** repo)
483 {
484     char value [ 4096 ] = "";
485     rc_t rc;
486 
487     /* set up a new repo */
488     rc = SRARepoMake(repo);
489     if ( rc == 0 )
490     {
491         (*repo)->type = type;
492 
493         /* set up servers */
494         if ( SRAPathConfigValue ( kfg, reps, value, sizeof value, dflt ) == 0 )
495             NCBISRAPathAddRepPath ( *repo, value );
496 
497         if ( type == alg_ncbi )
498         {
499             /* set up NCBI volumes */
500             rc = ConfigVolume( *repo, kfg, volPref, "/ncbi/volumes", alg_ncbi );
501 
502             /* set up EBI volumes */
503             if (rc == 0)
504                 rc = ConfigVolume( *repo, kfg, volPref, "/ebi/volumes", alg_ebi );
505 
506             /* set up DDBJ volumes */
507             if (rc == 0)
508                 rc = ConfigVolume( *repo, kfg, volPref, "/ddbj/volumes", alg_ddbj );
509         }
510         else if (rc == 0)
511         {
512                 rc = ConfigVolume( *repo, kfg, volPref, "/volumes", type);
513         }
514     }
515     return rc;
516 }
517 
518 static
ConfigRepoSet(DLList * repos,KConfig * kfg,const char * kfgPath,const char * dflt,uint8_t type)519 rc_t ConfigRepoSet(DLList* repos, KConfig * kfg, const char* kfgPath, const char *dflt, uint8_t type)
520 {
521     const KConfigNode *node;
522 
523     rc_t rc = KConfigOpenNodeRead ( kfg, & node, "%s", kfgPath );
524     if ( rc == 0 )
525     {
526         KNamelist* children;
527         rc = KConfigNodeListChild ( node, &children );
528         if ( rc == 0 )
529         {
530             uint32_t count;
531             rc = KNamelistCount ( children, &count );
532             if ( rc == 0 )
533             {
534                 uint32_t i;
535                 for (i = 0; i < count; ++i)
536                 {
537                     const char* name;
538                     rc = KNamelistGet ( children, i, &name );
539                     if ( rc == 0 )
540                     {
541                         #define BufSize 4096
542                         char buf[ BufSize ];
543                         size_t bSize = string_size(kfgPath);
544                         string_copy(buf, BufSize, kfgPath, bSize);
545                         if (bSize + string_size(name) < sizeof(buf))
546                         {
547                             NCBIRepository* repo;
548                             string_copy(buf + bSize, sizeof(buf) - bSize, name, string_size(name) + 1);
549                             rc = ConfigRepo( kfg, dflt, buf, buf, type, &repo );
550                             DLListPushTail( repos, (DLNode*) repo );
551                         }
552                         #undef BufSize
553                     }
554                     else
555                     {
556                         rc = RC ( rcSRA, rcMgr, rcConstructing, rcString, rcExcessive );
557                     }
558                     if ( rc != 0 )
559                     {
560                         break;
561                     }
562                 }
563             }
564             KNamelistRelease ( children );
565         }
566 
567         KConfigNodeRelease ( node );
568     }
569     if (GetRCState(rc) == rcNotFound)
570     {
571         return 0;
572     }
573     return rc;
574 }
575 
576 static
SRAPathConfig(NCBISRAPath * self)577 rc_t SRAPathConfig ( NCBISRAPath *self )
578 {
579     const char *dflt;
580     KConfig * kfg;
581     rc_t rc = KConfigMakeLocal ( & kfg, NULL );
582     assert ( ( rc == 0 && kfg != NULL ) || ( rc != 0 && kfg == NULL ) );
583 
584     /* look for defaults */
585     dflt = getenv ( "SRAPATH" );
586 
587     /* locate and configure all repositories */
588     if ( rc == 0 )
589         rc = ConfigRepoSet( & self->repos, kfg, "/sra/repository/", dflt, alg_ncbi );
590     if ( rc == 0 )
591         rc = ConfigRepoSet( & self->repos, kfg, "/wgs/repository/", dflt, alg_wgs );
592     if ( rc == 0 )
593         rc = ConfigRepoSet( & self->repos, kfg, "/refseq/repository/", dflt, alg_refseq );
594 
595     if ( rc == 0 )
596     {   /* default repository for backwards compatibility */
597         rc = ConfigRepo( kfg, dflt, "sra/servers", "sra", alg_ncbi, &self->dflt_repo );
598 
599         /* set up REFSEQ volumes */
600         if (rc == 0)
601             rc = ConfigVolume( self->dflt_repo, kfg, "", "/refseq/volumes", alg_refseq );
602 
603         /* set up WGS volumes */
604         if (rc == 0)
605             rc = ConfigVolume( self->dflt_repo, kfg, "sra", "/wgs/volumes", alg_wgs );
606     }
607 
608     LogPathInfo(self);
609 
610     /* kfg may be NULL */
611     KConfigRelease ( kfg );
612 
613     return rc;
614 }
615 
616 
617 /* ParseRunAccession
618  *  accession is a string matching <prefix><number>
619  *  extract prefix length and accession number
620  */
621 static
SRAPathParseRunAccession(const char * accession,uint32_t * prefix,uint32_t * number)622 rc_t SRAPathParseRunAccession ( const char *accession, uint32_t *prefix, uint32_t *number )
623 {
624     char *end;
625     uint32_t i;
626 
627     for ( i = 0; accession [ i ] != 0; ++ i )
628     {
629         if ( isdigit ( accession [ i ] ) )
630             break;
631     }
632 
633     if ( i == 0 )
634         return RC ( rcSRA, rcMgr, rcResolving, rcPath, rcInvalid );
635 
636     * prefix = i;
637     * number = ( uint32_t ) strtoul ( & accession [ i ], & end, 10 );
638 
639     if (( end [ 0 ] == 0 ) || ( end [ 0 ] == '.' ))
640     {
641         return 0;
642     }
643     return RC ( rcSRA, rcMgr, rcResolving, rcPath, rcInvalid );
644 }
645 
646 
647 /* Full
648  *  creates full path from server, volume & accession
649  *
650  *  "rep" [ IN ] - NUL terminated full path of replication
651  *  server, e.g. "/panfs/traces01"
652  *
653  *  "vol" [ IN ] - NUL terminated relative path of volume,
654  *  e.g. "sra2"
655  *
656  *  "accession" [ IN ] - NUL terminated run accession,
657  *  e.g. "SRR000001"
658  *
659  *  "path" [ OUT ] and "path_max" [ IN ] - return buffer for
660  *  NUL-terminated full path to accession.
661  */
662 static
SRAPathFullInt(const NCBISRAPath * self,const char * rep,const char * vol,const char * accession,char * path,size_t path_max,size_t bank_size)663 rc_t SRAPathFullInt ( const NCBISRAPath *self, const char *rep, const char *vol,
664     const char *accession, char *path, size_t path_max, size_t bank_size )
665 {
666     /* pick apart accession */
667     uint32_t prefix, number;
668     rc_t rc = SRAPathParseRunAccession ( accession, & prefix, & number );
669     if ( rc == 0 )
670     {
671         size_t len;
672         size_t total = 0;
673         uint32_t bank = number / bank_size;
674 
675         if ( rep [ 0 ] != 0 && vol [ 0 ] != 0 )
676         {
677             /* normally write all parts at once */
678             if ( rep [ strlen (rep) - 1 ] == '/') /* check for need or not a '/' between rep and vol */
679                 rc= string_printf ( path, path_max, &len, "%s%s/%.*s/%06u/%s"
680                                  , rep
681                                  , vol
682                                  , ( int ) prefix, accession
683                                  , bank
684                                  , accession );
685             else
686                 rc= string_printf  ( path, path_max, &len, "%s/%s/%.*s/%06u/%s"
687                                  , rep
688                                  , vol
689                                  , ( int ) prefix, accession
690                                  , bank
691                                  , accession );
692         }
693         else
694         {
695             /* allow for individual rep-server and volume parts */
696             if ( rep [ 0 ] != 0 )
697             {
698                 rc = string_printf ( path, path_max, &len, "%s/", rep );
699                 if ( rc != 0 || ( total = len ) >= path_max )
700                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
701             }
702             else if ( vol [ 0 ] != 0 )
703             {
704                 rc = string_printf ( & path [ total ], path_max - total, &len, "%s/", vol );
705                 if ( rc != 0 || ( total = len ) >= path_max )
706                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
707             }
708 
709             /* append the accession */
710             rc = string_printf ( & path [ total ], path_max - total, &len, "%.*s/%06u/%s"
711                              , ( int ) prefix, accession
712                              , bank
713                              , accession );
714         }
715 
716         /* common detection of buffer overflow */
717         if ( rc != 0 || ( total += len ) >= path_max )
718             rc = RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
719     }
720 
721     return rc;
722 }
723 
724 static
SRAPathFullEBI(const NCBISRAPath * self,const char * rep,const char * vol,const char * accession,char * path,size_t path_max)725 rc_t SRAPathFullEBI ( const NCBISRAPath *self, const char *rep, const char *vol,
726     const char *accession, char *path, size_t path_max )
727 {
728     /* pick apart accession */
729     uint32_t prefix, number;
730     rc_t rc = SRAPathParseRunAccession ( accession, & prefix, & number );
731     if ( rc == 0 )
732     {
733         size_t len;
734         size_t total = 0;
735         uint32_t bank = number / 1000;
736 
737         if ( rep [ 0 ] != 0 && vol [ 0 ] != 0 )
738         {
739             /* normally write all parts at once */
740             if ( rep [ strlen (rep) - 1 ] == '/') /* check for need or not a '/' between rep and vol */
741                 rc= string_printf ( path, path_max, &len, "%s%s/%.*s/%.*s%03u/%s"
742                                  , rep
743                                  , vol
744                                  , ( int ) prefix, accession
745                                  , ( int ) prefix, accession
746                                  , bank
747                                  , accession );
748             else
749                 rc= string_printf  ( path, path_max, &len, "%s/%s/%.*s/%.*s%03u/%s"
750                                  , rep
751                                  , vol
752                                  , ( int ) prefix, accession
753                                  , ( int ) prefix, accession
754                                  , bank
755                                  , accession );
756         }
757         else
758         {
759             /* allow for individual rep-server and volume parts */
760             if ( rep [ 0 ] != 0 )
761             {
762                 rc = string_printf ( path, path_max, &len, "%s/", rep );
763                 if ( rc < 0 || ( total = len ) >= path_max )
764                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
765             }
766             else if ( vol [ 0 ] != 0 )
767             {
768                 rc = string_printf ( & path [ total ], path_max - total, &len, "%s/", vol );
769                 if ( rc != 0 || ( total = len ) >= path_max )
770                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
771             }
772 
773             /* append the accession */
774             rc = string_printf ( & path [ total ], path_max - total, &len, "%.*s/%.*s%03u/%s"
775                              , ( int ) prefix, accession
776                              , ( int ) prefix, accession
777                              , bank
778                              , accession );
779         }
780 
781         /* common detection of buffer overflow */
782         if ( len < 0 || ( total += len ) >= path_max )
783             rc = RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
784     }
785 
786     return rc;
787 }
788 
789 /*
790 * Refseq-style accessions come in 2 naming flavors, flat or SRA-like.
791 * If an accession with a flat name does not exist, use SRAPathFullInt to create an SRA-like path
792 */
793 static
SRAPathFullREFSEQ(const NCBISRAPath * self,const char * rep,const char * vol,const char * accession,char * path,size_t path_max)794 rc_t SRAPathFullREFSEQ ( const NCBISRAPath *self, const char *rep, const char *vol,
795     const char *accession, char *path, size_t path_max )
796 {
797     /* pick apart accession */
798     uint32_t prefix, number;
799     rc_t rc = SRAPathParseRunAccession ( accession, & prefix, & number );
800     if ( rc == 0 )
801     {
802         size_t len;
803         size_t total = 0;
804 
805         if ( rep [ 0 ] != 0 && vol [ 0 ] != 0 )
806         {
807             /* normally write all parts at once */
808             if ( rep [ strlen (rep) - 1 ] == '/') /* check for need or not a '/' between rep and vol */
809                 rc= string_printf ( path, path_max, &len, "%s%s/%s"
810                                  , rep
811                                  , vol
812                                  , accession );
813             else
814                 rc= string_printf  ( path, path_max, &len, "%s/%s/%s"
815                                  , rep
816                                  , vol
817                                  , accession );
818         }
819         else
820         {
821             /* allow for individual rep-server and volume parts */
822             if ( rep [ 0 ] != 0 )
823             {
824                 rc = string_printf ( path, path_max, &len, "%s/", rep );
825                 if ( rc != 0 || ( total = len ) >= path_max )
826                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
827             }
828             else if ( vol [ 0 ] != 0 )
829             {
830                 rc = string_printf ( & path [ total ], path_max - total, &len, "%s/", vol );
831                 if ( rc != 0 || ( total = len ) >= path_max )
832                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
833             }
834 
835             /* append the accession */
836             rc = string_printf ( & path [ total ], path_max - total, &len, "%s"
837                              , accession );
838         }
839 
840         /* common detection of buffer overflow */
841         if ( rc != 0 || ( total += len ) >= path_max )
842             rc = RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
843     }
844 
845     return rc;
846 }
847 
848 /*
849  * Try to locate in RefSeq Archives:
850  * check for pattern '(\w{4}\d{2})[\.\d]+'; the archive is $1
851  * use the scheme "x-ncbi-legrefseq" for vfs to recognize special case
852  */
853 static
SRAPathFullREFSEQArchive(NCBISRAPath const * self,char const rep[],char const vol[],char const accession[],char path[],size_t path_max)854 rc_t SRAPathFullREFSEQArchive(NCBISRAPath const *self,
855                               char const rep[],
856                               char const vol[],
857                               char const accession[],
858                               char path[],
859                               size_t path_max
860                              )
861 {
862     size_t const rep_sz = strlen(rep);
863     size_t const vol_sz = strlen(vol);
864     char const *const rep_sep = (rep_sz > 0 && rep[rep_sz - 1] != '/') ? "/" : "";
865     char const *const vol_sep = (vol_sz > 0 && vol[vol_sz - 1] != '/') ? "/" : "";
866     size_t sz;
867     unsigned i;
868     VFSManager *vfs;
869     rc_t rc = VFSManagerMake(&vfs);
870     VPath *vpath;
871     KDirectory const *dir;
872     KPathType type;
873 
874     if (rc)
875         return rc;
876 
877     for (i = 0; i < 4; ++i) {
878         int const ch = accession[i];
879 
880         if (ch == 0 || !isalpha(ch))
881             return RC(rcSRA, rcMgr, rcAccessing, rcPath, rcIncorrect);
882     }
883     for ( ; ; ++i) {
884         int const ch = accession[i];
885 
886         if (ch == 0)
887             break;
888         if (ch != '.' && !isdigit(ch))
889             return RC(rcSRA, rcMgr, rcAccessing, rcPath, rcIncorrect);
890     }
891     if (i < 8)
892         return RC(rcSRA, rcMgr, rcAccessing, rcPath, rcIncorrect);
893 
894     rc = string_printf(path, path_max, &sz, "x-ncbi-legrefseq:%s%s%s%s%.6s", rep, rep_sep, vol, vol_sep, accession);
895     if (rc) return rc;
896     i = sz;
897 
898     rc = VPathMake(&vpath, path + 17);
899     if (rc) return rc;
900 
901     rc = VFSManagerOpenDirectoryRead(vfs, &dir, vpath);
902     VPathRelease(vpath);
903     VFSManagerRelease(vfs);
904     if (rc) return rc;
905 
906     type = KDirectoryPathType(dir, "tbl/%s", accession);
907     KDirectoryRelease(dir);
908 
909     if (type != kptDir)
910         return RC(rcSRA, rcMgr, rcAccessing, rcPath, rcIncorrect);
911 
912     rc = string_printf(path + i, path_max - i, &sz, "#tbl/%s", accession);
913 
914     return rc;
915 }
916 
917 /*
918 * WGS style naming: accession name ABCD01 resolves into WGS/AB/CD/ABCD01
919 */
920 static
SRAPathFullWGS(const NCBISRAPath * self,const char * rep,const char * vol,const char * accession,char * path,size_t path_max)921 rc_t SRAPathFullWGS( const NCBISRAPath *self, const char *rep, const char *vol,
922     const char *accession, char *path, size_t path_max )
923 {
924     /* pick apart accession */
925     uint32_t prefix, number;
926     rc_t rc = SRAPathParseRunAccession ( accession, & prefix, & number );
927     if ( rc == 0 )
928     {
929         size_t len;
930         size_t total = 0;
931 
932         if ( prefix < 4 )
933         {
934             return RC ( rcSRA, rcMgr, rcResolving, rcName, rcTooShort );
935         }
936 
937         if ( rep [ 0 ] != 0 && vol [ 0 ] != 0 )
938         {
939             /* normally write all parts at once */
940             if ( rep [ strlen (rep) - 1 ] == '/') /* check for need or not a '/' between rep and vol */
941                 rc= string_printf ( path, path_max, &len, "%s%s/WGS/%.2s/%.2s/%s"
942                                  , rep
943                                  , vol
944                                  , accession
945                                  , accession+2
946                                  , accession );
947             else
948                 rc= string_printf ( path, path_max, &len, "%s/%s/WGS/%.2s/%.2s/%s"
949                                  , rep
950                                  , vol
951                                  , accession
952                                  , accession+2
953                                  , accession );
954         }
955         else
956         {
957             /* allow for individual rep-server and volume parts */
958             if ( rep [ 0 ] != 0 )
959             {
960                 rc = string_printf ( path, path_max, &len, "%s/", rep );
961                 if ( rc != 0 || ( total = len ) >= path_max )
962                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
963             }
964             else if ( vol [ 0 ] != 0 )
965             {
966                 rc = string_printf ( & path [ total ], path_max - total, &len, "%s/", vol );
967                 if ( rc != 0 || ( total = len ) >= path_max )
968                     return RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
969             }
970 
971             /* append the accession */
972                 rc= string_printf ( & path [ total ], path_max - total, &len, "WGS/%.2s/%.2s/%s"
973                                  , accession
974                                  , accession+2
975                                  , accession );
976         }
977 
978         /* common detection of buffer overflow */
979         if ( rc != 0 || ( total += len ) >= path_max )
980             rc = RC ( rcSRA, rcMgr, rcAccessing, rcBuffer, rcInsufficient );
981     }
982 
983     return rc;
984 }
985 
986 static
ApplyAlg(const NCBISRAPath * self,const char * rep,const char * vol,const char * accession,char * path,size_t path_max,NCBIRepository * repo,bool * found)987 rc_t ApplyAlg( const NCBISRAPath *self, const char *rep, const char *vol,
988     const char *accession, char *path, size_t path_max, NCBIRepository *repo, bool* found)
989 {
990     SRAPathFindInfo pb;
991     /* see if we recognize volume */
992     pb . path = vol;
993     pb . size = strlen ( vol );
994     pb . found = 0;
995 
996     DLListDoUntil ( & repo -> vols, 0, SRAPathStringFind, & pb );
997     if ( pb . found )
998     {
999         *found = true;
1000         switch (pb . alg)
1001         {
1002         case alg_ebi:
1003             return SRAPathFullEBI ( self, rep, vol, accession, path, path_max );
1004         case alg_refseq:
1005         {
1006             rc_t rc = SRAPathFullREFSEQ ( self, rep, vol, accession, path, path_max );
1007             if ( rc == 0) /* check for existence of accession at the root of the volume, and if not found try to apply the default path-building scheme */
1008             {
1009                 switch ( KDirectoryPathType ( self -> dir, "%s", path ) )
1010                 {
1011                 case kptNotFound:
1012                 case kptBadPath:
1013                     return SRAPathFullREFSEQArchive( self, rep, vol, accession, path, path_max );
1014                 default:
1015                 	return 0;
1016                 }
1017             }
1018 		}
1019         case alg_wgs:
1020             return SRAPathFullWGS ( self, rep, vol, accession, path, path_max );
1021         default:
1022             break;
1023         }
1024     }
1025     *found = false;
1026     return 0;
1027 }
1028 
1029 static
NCBISRAPathFull(const NCBISRAPath * self,const char * rep,const char * vol,const char * accession,char * path,size_t path_max)1030 rc_t CC NCBISRAPathFull ( const NCBISRAPath *self, const char *rep, const char *vol,
1031     const char *accession, char *path, size_t path_max )
1032 {
1033     NCBIRepository *repo;
1034     bool found;
1035     rc_t rc;
1036 
1037     /* loop through repositories */
1038     for ( repo = ( NCBIRepository* ) DLListHead ( & self -> repos );
1039           repo != NULL; repo = ( NCBIRepository* ) DLNodeNext ( & repo -> n ) )
1040     {
1041         rc = ApplyAlg( self, rep, vol, accession, path, path_max, repo, &found);
1042         if (found)
1043         {
1044             return rc;
1045         }
1046     }
1047     /* try the default repository */
1048     rc = ApplyAlg( self, rep, vol, accession, path, path_max, self -> dflt_repo, &found);
1049     if (found)
1050     {
1051         return rc;
1052     }
1053     /* internal version */
1054     return SRAPathFullInt ( self, rep, vol, accession, path, path_max, 1024 );
1055 }
1056 
1057 
1058 /* Test
1059  *  returns true if path appears to be accession
1060  *  the test is a heuristic, and may return false positives
1061  *
1062  *  "path" [ IN ] - NUL terminated path to run
1063  */
1064 static
SRAPathTestTable(const KDirectory * dir,uint32_t type,const char * path)1065 bool SRAPathTestTable ( const KDirectory *dir, uint32_t type, const char *path )
1066 {
1067     /* differentiate between legacy and current structure */
1068     switch ( KDirectoryPathType ( dir, "%s/idx", path ) )
1069     {
1070     case kptNotFound:
1071         switch ( KDirectoryPathType ( dir, "%s/meta", path ) )
1072         {
1073         case kptFile:
1074         case kptFile | kptAlias:
1075             switch ( KDirectoryPathType ( dir, "%s/skey", path ) )
1076             {
1077             case kptFile:
1078             case kptFile | kptAlias:
1079                 return true;
1080             }
1081             break;
1082         }
1083         break;
1084 
1085     case kptDir:
1086     case kptDir | kptAlias:
1087         switch ( KDirectoryPathType ( dir, "%s/md/cur", path ) )
1088         {
1089         case kptFile:
1090         case kptFile | kptAlias:
1091             return true;
1092         }
1093         break;
1094     }
1095 
1096     /* not a table */
1097     return false;
1098 }
1099 
1100 static
SRAPathTestDatabase(const KDirectory * dir,uint32_t type,const char * path)1101 bool SRAPathTestDatabase ( const KDirectory *dir, uint32_t type, const char *path )
1102 {
1103     /* don't have a more stringent requirement for database at this time */
1104     return true;
1105 }
1106 
1107 static
SRAPathTestFile(const KDirectory * dir,uint32_t type,const char * path)1108 bool SRAPathTestFile ( const KDirectory *dir, uint32_t type, const char *path )
1109 {
1110     /* TBD - need to look at magic information */
1111     return false;
1112 }
1113 
1114 static
SRAPathTestInt(const KDirectory * dir,uint32_t type,const char * path)1115 bool SRAPathTestInt ( const KDirectory *dir, uint32_t type, const char *path )
1116 {
1117     switch ( type )
1118     {
1119     case kptDir:
1120     case kptDir | kptAlias:
1121         break;
1122 
1123     case kptFile:
1124     case kptFile | kptAlias:
1125         return SRAPathTestFile ( dir, type, path );
1126 
1127     default:
1128         return false;
1129     }
1130 
1131     /* detect apparent tables */
1132     switch ( KDirectoryPathType ( dir, "%s/col", path ) )
1133     {
1134     case kptDir:
1135     case kptDir | kptAlias:
1136         return SRAPathTestTable ( dir, type, path );
1137     }
1138 
1139     /* detect apparent databases */
1140     switch ( KDirectoryPathType ( dir, "%s/tbl", path ) )
1141     {
1142     case kptDir:
1143     case kptDir | kptAlias:
1144         return SRAPathTestDatabase ( dir, type, path );
1145     }
1146 
1147     /* not a recognized accession */
1148     return false;
1149 }
1150 
1151 static
NCBISRAPathTest(const NCBISRAPath * self,const char * path)1152 bool CC NCBISRAPathTest ( const NCBISRAPath *self, const char *path )
1153 {
1154     return SRAPathTestInt ( self -> dir,
1155                             KDirectoryPathType ( self -> dir, "%s", path ), path );
1156 }
1157 
1158 
1159 /* FindOnServer
1160  *  find accession on rep-server
1161  */
1162 static
SRAPathFindOnServer(const NCBISRAPath * self,const NCBIRepository * repo,const SRAPathString * srv,const char * accession,char * path,size_t path_max,int vol_type)1163 rc_t SRAPathFindOnServer ( const NCBISRAPath *self, const NCBIRepository *repo, const SRAPathString *srv,
1164     const char *accession, char *path, size_t path_max, int vol_type )
1165 {
1166     const SRAPathString *vol;
1167 
1168     PATH_DEBUG (("SRAPathFindOnServer(%s)\n", srv->path));
1169 
1170     for ( vol = ( const SRAPathString* ) DLListHead ( & repo -> vols );
1171           vol != NULL; vol = ( const SRAPathString* ) DLNodeNext ( & vol -> n ) )
1172     {
1173         if ( vol_type == alg_none || vol_type == vol->alg )
1174         {
1175             rc_t rc;
1176 
1177             PATH_DEBUG (("SRAPathFindOnServer trying volume %s\n", vol->path));
1178 
1179             switch ( vol -> alg )
1180             {
1181             case alg_ebi:
1182                 rc = SRAPathFullEBI ( self, srv -> path, vol -> path, accession, path, path_max );
1183                 break;
1184             case alg_refseq:
1185                 rc = SRAPathFullREFSEQ ( self, srv -> path, vol -> path, accession, path, path_max );
1186                 if ( rc == 0) /* check for existence of accession at the root of the volume, and if not found try to apply the default path-building scheme */
1187                 {
1188                     switch ( KDirectoryPathType ( self -> dir, "%s", path ) )
1189                     {
1190                     case kptNotFound:
1191                     case kptBadPath:
1192                         rc = SRAPathFullREFSEQArchive( self, srv -> path, vol -> path, accession, path, path_max );
1193                         if (rc == 0) {
1194                             PATH_DEBUG (("SRAPathFindOnServer: found(%s)\n", path));
1195                             return 0;
1196                         }
1197                         break;
1198                     default:
1199                     	return 0;
1200                     }
1201                 }
1202                 break;
1203             case alg_wgs:
1204                 rc = SRAPathFullWGS ( self, srv -> path, vol -> path, accession, path, path_max );
1205                 break;
1206             default:
1207                 rc = SRAPathFullInt ( self, srv -> path, vol -> path, accession, path, path_max, 1024 );
1208                 break;
1209             }
1210             if ( rc == 0 )
1211             {
1212                 switch ( KDirectoryPathType ( self -> dir, "%s", path ) )
1213                 {
1214                 case kptNotFound:
1215                 case kptBadPath:
1216                     break;
1217                 default:
1218                     PATH_DEBUG (("SRAPathFindOnServer: found(%s)\n", path));
1219                     return 0;
1220                 }
1221             }
1222             else
1223             {
1224                 if ( GetRCState( rc ) == rcInsufficient )
1225                     return rc;
1226             }
1227         }
1228     }
1229 
1230     return RC ( rcSRA, rcMgr, rcSelecting, rcPath, rcNotFound );
1231 }
1232 
1233 /* FindInRepo
1234  *  find accession in a repository
1235  */
1236 static
SRAPathFindInRepo(const NCBISRAPath * self,NCBIRepository * repo,const char * accession,char * path,size_t path_max,size_t * rep_len,int vol_type)1237 rc_t SRAPathFindInRepo ( const NCBISRAPath *self, NCBIRepository *repo, const char *accession,
1238                          char *path, size_t path_max, size_t *rep_len, int vol_type )
1239 {
1240     SRAPathString *srv;
1241 
1242     PATH_DEBUG (("SRAPathFindInRepo(%s)\n", AlgToStr(repo->type)));
1243 
1244     /* look for accession on a rep-server */
1245     for ( srv = ( SRAPathString* ) DLListHead ( & repo -> repsrv ); srv != NULL; srv = ( SRAPathString* ) DLNodeNext ( & srv -> n ) )
1246     {
1247         /* try with this server */
1248         rc_t rc = SRAPathFindOnServer ( self, repo, srv, accession, path, path_max, vol_type );
1249         if ( rc == 0 )
1250         {
1251             /* make sure server is at head of list */
1252             if ( DLNodePrev ( & srv -> n ) != NULL )
1253             {
1254                 DLListUnlink ( & repo -> repsrv, & srv -> n );
1255                 DLListPushHead ( & repo -> repsrv, & srv -> n );
1256             }
1257 
1258             if ( rep_len != NULL )
1259                 * rep_len = strlen ( srv -> path );
1260 
1261             return 0;
1262         }
1263 
1264         if ( GetRCState ( rc ) != rcNotFound )
1265             return rc;
1266     }
1267 
1268     return RC ( rcSRA, rcMgr, rcSelecting, rcPath, rcNotFound );
1269 }
1270 
1271 /* FindInRepoByType
1272  *  find accession in a repository of a given type
1273  */
1274 static
SRAPathFindInRepoByType(const NCBISRAPath * self,const char * accession,char * path,size_t path_max,size_t * rep_len,int repo_type,int vol_type)1275 rc_t SRAPathFindInRepoByType ( const NCBISRAPath *self, const char *accession, char *path, size_t path_max, size_t *rep_len, int repo_type, int vol_type )
1276 {
1277     /* loop through all repositories */
1278     NCBIRepository *repo;
1279     for ( repo = ( NCBIRepository* ) DLListHead ( & self -> repos ); repo != NULL; repo = ( NCBIRepository* ) DLNodeNext ( & repo -> n ) )
1280     {
1281         if ( repo->type == repo_type && SRAPathFindInRepo(self, repo, accession, path, path_max, rep_len, vol_type) == 0 )
1282             return 0;
1283     }
1284     return RC ( rcSRA, rcMgr, rcSelecting, rcPath, rcNotFound );
1285 }
1286 
1287 /* FastFind
1288  * Uses heuristics to select the repository most likely to contain the accession, then tries to locate the acecssion in the repository.
1289 */
1290 static
FindFast(const NCBISRAPath * cself,const char * accession,char * path,size_t path_max,size_t * rep_len)1291 rc_t FindFast( const NCBISRAPath *cself, const char *accession, char *path, size_t path_max, size_t *rep_len )
1292 {
1293     /*TODO: look up cache first */
1294 
1295     /* recognize known naming schemes */
1296     size_t size = string_size(accession);
1297     if ( string_cmp(accession, size, "SRR", 3, 3) == 0 )
1298         return SRAPathFindInRepoByType(cself, accession, path, path_max, rep_len, alg_ncbi, alg_ncbi);
1299 
1300     if ( string_cmp(accession, size, "ERR", 3, 3) == 0 )
1301         return SRAPathFindInRepoByType(cself, accession, path, path_max, rep_len, alg_ncbi, alg_ebi);
1302 
1303     if ( string_cmp(accession, size, "DRR", 3, 3) == 0 )
1304         return SRAPathFindInRepoByType(cself, accession, path, path_max, rep_len, alg_ncbi, alg_ddbj);
1305 
1306     if ( string_chr(accession, size, '.') != NULL )
1307         return SRAPathFindInRepoByType(cself, accession, path, path_max, rep_len, alg_refseq, alg_none);
1308 
1309     if ( size > 2 && isdigit(accession[size-1]) && isdigit(accession[size-2]) && ! isdigit(accession[size-3]) )
1310         return SRAPathFindInRepoByType(cself, accession, path, path_max, rep_len, alg_wgs, alg_none);
1311 
1312     return RC ( rcSRA, rcMgr, rcSelecting, rcPath, rcNotFound );
1313 }
1314 
1315 /* Find
1316  *  finds location of run within rep-server/volume matrix
1317  *
1318  *  "accession" [ IN ] - NUL terminated run accession,
1319  *   e.g. "SRR000001"
1320  *
1321  *  "path" [ OUT ] and "path_max" [ IN ] - return buffer for
1322  *  NUL-terminated full path to accession.
1323  *
1324  *  returns 0 if path exists, rc state rcNotFound if
1325  *  path cannot be found, and rcInsufficient if buffer is
1326  *  too small.
1327  */
1328 static
NCBISRAPathFindWithRepLen(const NCBISRAPath * cself,const char * accession,char * path,size_t path_max,size_t * rep_len)1329 rc_t CC NCBISRAPathFindWithRepLen ( const NCBISRAPath *cself, const char *accession, char *path, size_t path_max, size_t *rep_len )
1330 {
1331     rc_t rc;
1332     NCBIRepository *repo;
1333 
1334     PATH_DEBUG(("NCBISRAPathFindWithRepLen(%s)\n", accession));
1335 
1336     rc = FindFast( cself, accession, path, path_max, rep_len );
1337     if ( rc == 0 )
1338         return 0;
1339 
1340     /* loop through all repositories */
1341     for ( repo = ( NCBIRepository* ) DLListHead ( & cself -> repos ); repo != NULL; repo = ( NCBIRepository* ) DLNodeNext ( & repo -> n ) )
1342     {
1343         rc = SRAPathFindInRepo(cself, repo, accession, path, path_max, rep_len, alg_none);
1344         if ( rc == 0 )
1345             return 0;
1346     }
1347     /* default repository */
1348     return SRAPathFindInRepo(cself, cself -> dflt_repo, accession, path, path_max, rep_len, alg_none);
1349 }
1350 
1351 struct SRAPath_vt_v2 vtSRAPath =
1352 {
1353     2, 1,
1354     NCBISRAPathAddRef,
1355     NCBISRAPathRelease,
1356     NCBISRAPathVersion,
1357     NCBISRAPathClear,
1358     NCBISRAPathAddRepPathDefault,
1359     NCBISRAPathAddVolPathDefault,
1360     NCBISRAPathFull,
1361     NCBISRAPathTest,
1362     NCBISRAPathFindWithRepLen
1363 };
1364 
1365 /* Make
1366  *  create path manager
1367  *
1368  *  the path manager should already be configured with
1369  *  standard search paths, but can be augmented by using
1370  *  the Add*Path messages.
1371  *
1372  *  "dir" [ IN, NULL OKAY ] - optional root directory to use
1373  *  attaches a new reference
1374  */
1375 MOD_EXPORT
SRAPathMakeImpl(SRAPath ** pm,const KDirectory * dir)1376 rc_t CC SRAPathMakeImpl ( SRAPath **pm, const KDirectory *dir )
1377 {
1378     rc_t rc;
1379 
1380     if ( pm == NULL )
1381         rc = RC ( rcSRA, rcMgr, rcConstructing, rcParam, rcNull );
1382     else
1383     {
1384         NCBISRAPath *p = (NCBISRAPath *) malloc ( sizeof * p );
1385         if ( p == NULL )
1386             rc = RC ( rcSRA, rcMgr, rcConstructing, rcMemory, rcExhausted );
1387         else
1388         {
1389             p -> dad . vt = ( SRAPath_vt* ) & vtSRAPath;
1390             p -> dir = dir;
1391             if ( dir != NULL )
1392                 rc = KDirectoryAddRef ( dir );
1393             else
1394             {
1395                 KDirectory *wd;
1396                 rc = KDirectoryNativeDir ( & wd );
1397                 p -> dir = wd;
1398             }
1399 
1400             if ( rc != 0 )
1401                 free ( p );
1402             else
1403             {
1404                 DLListInit ( & p -> repos );
1405                 p -> dflt_repo = NULL;
1406                 atomic32_set ( & p -> refcount, 1 );
1407 
1408                 /* the object is now complete */
1409                 rc = SRAPathConfig ( p );
1410                 if ( rc == 0 )
1411                 {
1412                     * pm = & p -> dad;
1413                     return 0;
1414                 }
1415 
1416                 SRAPathWhack ( p );
1417             }
1418         }
1419 
1420         * pm = NULL;
1421     }
1422 
1423     return rc;
1424 }
1425