1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "runtime.H"
19 #include "sqStore.H"
20 #include "clearRangeFile.H"
21 
22 #include <vector>
23 
24 
25 int
main(int argc,char ** argv)26 main (int argc, char **argv) {
27   char const                *seqName       = NULL;
28 
29   std::vector<char const *>  clrName;
30   std::vector<uint32>        bgnID;
31   std::vector<uint32>        endID;
32 
33   char const                *outName       = NULL;
34 
35   bool                       verbose       = false;
36 
37   argc = AS_configure(argc, argv, 1);
38 
39   std::vector<char const *>  err;
40   for (int32 arg=1; arg < argc; arg++) {
41     if        (strcmp(argv[arg], "-S") == 0) {
42       seqName = argv[++arg];
43 
44     } else if (strcmp(argv[arg], "-c") == 0) {
45       clrName.push_back(argv[++arg]);
46       bgnID.push_back(strtouint32(argv[++arg]));
47       endID.push_back(strtouint32(argv[++arg]));
48 
49     } else if (strcmp(argv[arg], "-o") == 0) {
50       outName = argv[++arg];
51 
52     } else if (strcmp(argv[arg], "-v") == 0) {
53       verbose = true;
54 
55     } else {
56       char *s = new char [1024];
57       snprintf(s, 1024, "ERROR:  Unknown option '%s'.\n", argv[arg]);
58       err.push_back(s);
59     }
60   }
61 
62   if (seqName == NULL)
63     err.push_back("ERROR:  no sequence store (-S) supplied.\n");
64 
65   if (err.size() > 0) {
66     fprintf(stderr, "usage: %s -S <seqStore> -c <bgnID> <endID> <clearRangeFile> -o <clearRangeFile>\n", argv[0]);
67     fprintf(stderr, "\n");
68     fprintf(stderr, "  -S <seqStore>                    Path to the sequence store\n");
69     fprintf(stderr, "  -c <clearRangeFile> <bgn> <end>  Path to the file of clear ranges,\n");
70     fprintf(stderr, "                                   along with the (inclusive) range of\n");
71     fprintf(stderr, "                                   read IDs that have clear ranges set\n");
72     fprintf(stderr, "  -o <clearRangeFile>              Path to output clear ranges.\n");
73     fprintf(stderr, "\n");
74     fprintf(stderr, "  -v                    Report clear range changes to stderr\n");
75     fprintf(stderr, "\n");
76     fprintf(stderr, "  Merges multiple clear range files into one.\n");
77     fprintf(stderr, "\n");
78 
79     for (uint32 ii=0; ii<err.size(); ii++)
80       if (err[ii])
81         fputs(err[ii], stderr);
82 
83     exit(1);
84   }
85 
86   sqStore        *seqStore = new sqStore(seqName, sqStore_extend);
87   uint32          numReads  = seqStore->sqStore_lastReadID();
88   uint32          numLibs   = seqStore->sqStore_lastLibraryID();
89 
90   clearRangeFile *outRange = new clearRangeFile(outName, seqStore);
91 
92   for (uint32 ii=0; ii<clrName.size(); ii++) {
93     clearRangeFile *clrRange = new clearRangeFile(clrName[ii], seqStore);
94 
95     for (uint32 rid=bgnID[ii]; rid<=endID[ii]; rid++) {
96       if (verbose == true)
97         fprintf(stderr, "%u\t%7u-%-7u\t%7u-%-7u\n",
98                 rid,
99                 seqStore->sqStore_getClearBgn(rid), seqStore->sqStore_getClearEnd(rid),
100                 clrRange->bgn(rid), clrRange->end(rid));
101 
102       outRange->setbgn(rid) = clrRange->bgn(rid);
103       outRange->setend(rid) = clrRange->end(rid);
104     }
105 
106     delete clrRange;
107   }
108 
109   delete outRange;
110 
111   delete seqStore;
112 
113   exit(0);
114 }
115