1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "overlapInCore.H"
19 #include "strings.H"
20 
21 oicParameters  G;
22 
23 
24 uint32 STRING_NUM_BITS       = 31;  //  MUST BE EXACTLY THIS
25 uint32 OFFSET_BITS           = 31;
26 
27 uint64 TRUELY_ONE            = (uint64)1;
28 uint64 TRUELY_ZERO           = (uint64)0;
29 
30 uint64 STRING_NUM_MASK       = (TRUELY_ONE << STRING_NUM_BITS) - 1;
31 uint64 OFFSET_MASK           = (TRUELY_ONE << OFFSET_BITS) - 1;
32 
33 uint64 MAX_STRING_NUM        = STRING_NUM_MASK;
34 
35 
36 
37 int64  Bad_Short_Window_Ct = 0;
38 //  The number of overlaps rejected because of too many errors in a small window
39 
40 int64  Bad_Long_Window_Ct = 0;
41 //  The number of overlaps rejected because of too many errors in a long window
42 
43 
44 //  Stores sequence and quality data of fragments in hash table
45 char   *basesData = NULL;
46 size_t  Data_Len = 0;
47 
48 String_Ref_t  *nextRef = NULL;
49 
50 size_t  Extra_Data_Len;
51 //  Total length available for hash table string data,
52 //  including both regular strings and extra strings
53 //  added from kmer screening
54 
55 uint64         Max_Extra_Ref_Space = 0;  //  allocated amount
56 uint64         Extra_Ref_Ct = 0;         //  used amount
57 String_Ref_t  *Extra_Ref_Space = NULL;
58 uint64         Extra_String_Ct = 0;
59 //  Number of extra strings of screen kmers added to hash table
60 
61 uint64  Extra_String_Subcount = 0;
62 //  Number of kmers already added to last extra string in hash table
63 
64 Check_Vector_t  * Hash_Check_Array = NULL;
65 //  Bit vector to eliminate impossible hash matches
66 
67 uint64  Hash_String_Num_Offset = 1;
68 Hash_Bucket_t  * Hash_Table;
69 
70 uint64  Kmer_Hits_With_Olap_Ct = 0;
71 uint64  Kmer_Hits_Without_Olap_Ct = 0;
72 uint64  Kmer_Hits_Skipped_Ct = 0;
73 uint64  Multi_Overlap_Ct = 0;
74 
75 uint64  String_Ct;
76 //  Number of fragments in the hash table
77 
78 Hash_Frag_Info_t  * String_Info = NULL;
79 int64  * String_Start = NULL;
80 uint32  String_Start_Size = 0;
81 //  Number of available positions in  String_Start
82 
83 size_t  Used_Data_Len = 0;
84 //  Number of bytes of Data currently occupied, including
85 //  regular strings and extra kmer screen strings
86 
87 int32  Bit_Equivalent[256] = {0};
88 //  Table to convert characters to 2-bit integer code
89 
90 int32  Char_Is_Bad[256] = {0};
91 //  Table to check if character is not a, c, g or t.
92 
93 uint64  Hash_Entries = 0;
94 
95 uint64  Total_Overlaps = 0;
96 uint64  Contained_Overlap_Ct = 0;
97 uint64  Dovetail_Overlap_Ct = 0;
98 
99 uint64  HSF1     = 666;
100 uint64  HSF2     = 666;
101 uint64  SV1      = 666;
102 uint64  SV2      = 666;
103 uint64  SV3      = 666;
104 
105 ovFile  *Out_BOF = NULL;
106 
107 
108 
109 //  Allocate memory for  (* WA)  and set initial values.
110 //  Set  thread_id  field to  id .
111 void
Initialize_Work_Area(Work_Area_t * WA,int id,sqStore * readStore,sqCache * readCache)112 Initialize_Work_Area(Work_Area_t *WA, int id, sqStore *readStore, sqCache *readCache) {
113   uint64  allocated = 0;
114 
115   WA->String_Olap_Size  = INIT_STRING_OLAP_SIZE;
116   WA->String_Olap_Space = new String_Olap_t [WA->String_Olap_Size];
117 
118   WA->Match_Node_Size  = INIT_MATCH_NODE_SIZE;
119   WA->Match_Node_Space = new Match_Node_t [WA->Match_Node_Size];
120 
121   allocated += WA->String_Olap_Size * sizeof (String_Olap_t);
122   allocated += WA->Match_Node_Size  * sizeof (Match_Node_t);
123 
124   WA->status     = 0;
125   WA->thread_id  = id;
126 
127   WA->readStore = readStore;
128   WA->readCache = readCache;
129 
130   WA->overlapsLen = 0;
131   WA->overlapsMax = 1024 * 1024 / sizeof(ovOverlap);
132   WA->overlaps    = new ovOverlap [WA->overlapsMax];
133 
134   allocated += sizeof(ovOverlap) * WA->overlapsMax;
135 
136   WA->editDist = new prefixEditDistance(G.Doing_Partial_Overlaps,
137                                         G.maxErate,
138                                         G.maxErate * G.alignNoise);
139 
140   WA->q_diff = new char [AS_MAX_READLEN];
141   WA->distinct_olap = new Olap_Info_t [MAX_DISTINCT_OLAPS];
142 }
143 
144 
145 void
Delete_Work_Area(Work_Area_t * WA)146 Delete_Work_Area(Work_Area_t *WA) {
147   delete    WA->editDist;
148   delete [] WA->String_Olap_Space;
149   delete [] WA->Match_Node_Space;
150   delete [] WA->overlaps;
151 
152   delete [] WA->distinct_olap;
153   delete [] WA->q_diff;
154 }
155 
156 
157 
158 
159 int
OverlapDriver(void)160 OverlapDriver(void) {
161 
162   Work_Area_t    *thread_wa = new Work_Area_t [G.Num_PThreads];
163 
164   sqStore        *readStore = new sqStore(G.Frag_Store_Path);
165   sqCache        *readCache = new sqCache(readStore);
166 
167   Out_BOF = new ovFile(readStore, G.Outfile_Name, ovFileFullWrite);
168 
169   fprintf(stderr, "Initializing %u work areas.\n", G.Num_PThreads);
170 
171 #pragma omp parallel for
172   for (uint32 i=0;  i<G.Num_PThreads;  i++)
173     Initialize_Work_Area(thread_wa+i, i, readStore, readCache);
174 
175   //  Make sure both the hash and reference ranges are valid.
176 
177   if (G.bgnHashID < 1)
178     G.bgnHashID = 1;
179 
180   if (readStore->sqStore_lastReadID() < G.endHashID)
181     G.endHashID = readStore->sqStore_lastReadID();
182 
183   if (G.bgnRefID < 1)
184     G.bgnRefID = 1;
185 
186   if (G.endRefID > readStore->sqStore_lastReadID())
187     G.endRefID = readStore->sqStore_lastReadID();
188 
189   //  Load the reference range into the cache
190 
191   fprintf(stderr, "Loading reference reads %u-%u inclusive.\n", G.bgnRefID, G.endRefID);
192 
193   readCache->sqCache_loadReads(G.bgnRefID, G.endRefID, true);
194 
195   //  Note distinction between the local bgn/end and the global G.bgn/G.end.
196 
197   uint32  bgnHashID = G.bgnHashID;
198   uint32  endHashID = G.endHashID;
199 
200   //  Iterate over read blocks, build a hash table, then search in threads.
201 
202   while (bgnHashID < G.endHashID) {
203     if (endHashID > G.endHashID)
204       endHashID = G.endHashID;
205 
206     assert(0          <  bgnHashID);
207     assert(bgnHashID  <= endHashID);
208     assert(endHashID  <= readStore->sqStore_lastReadID());
209 
210     //  Load as much as we can.  If we load less than expected, the endHashID is updated to reflect
211     //  the last read loaded.
212 
213     endHashID = Build_Hash_Index(readStore, bgnHashID, endHashID);
214 
215     //  Decide the range of reads to process.  No more than what is loaded in the table.
216 
217     G.curRefID = G.bgnRefID;
218 
219     //  The old version used to further divide the ref range into blocks of at most
220     //  Max_Reads_Per_Batch so that those reads could be loaded into core.  We don't
221     //  need to do that anymore.
222 
223     G.perThread = 1 + (G.endRefID - G.bgnRefID) / G.Num_PThreads / 8;
224 
225     fprintf(stderr, "\n");
226     fprintf(stderr, "Range: %u-%u.  Store has %u reads.\n",
227             G.bgnRefID, G.endRefID, readStore->sqStore_lastReadID());
228     fprintf(stderr, "Chunk: " F_U32 " reads/thread -- (G.endRefID=" F_U32 " - G.bgnRefID=" F_U32 ") / G.Num_PThreads=" F_U32 " / 8\n",
229             G.perThread, G.endRefID, G.bgnRefID, G.Num_PThreads);
230 
231     fprintf(stderr, "\n");
232     fprintf(stderr, "Starting " F_U32 "-" F_U32 " with " F_U32 " per thread\n", G.bgnRefID, G.endRefID, G.perThread);
233     fprintf(stderr, "\n");
234 
235     //  Initialize each thread, reset the current position.  curRefID and endRefID are updated, this
236     //  cannot be done in the parallel loop!
237 
238     for (uint32 i=0; i<G.Num_PThreads; i++) {
239       thread_wa[i].bgnID = G.curRefID;
240       thread_wa[i].endID = thread_wa[i].bgnID + G.perThread - 1;
241 
242       G.curRefID = thread_wa[i].endID + 1;  //  Global value updated!
243     }
244 
245 #pragma omp parallel for
246     for (uint32 i=0; i<G.Num_PThreads; i++)
247       Process_Overlaps(thread_wa + i);
248 
249     //  Clear out the hash table.  This stuff is allocated in Build_Hash_Index
250 
251     delete [] basesData;  basesData = NULL;
252     delete [] nextRef;    nextRef   = NULL;
253 
254     //  This one could be left allocated, except for the last iteration.
255 
256     delete [] Extra_Ref_Space;  Extra_Ref_Space = NULL;  Max_Extra_Ref_Space = 0;
257 
258     //  Prepare for another hash table iteration.
259     bgnHashID = endHashID + 1;
260     endHashID = G.endHashID;
261   }
262 
263   delete Out_BOF;
264 
265   delete readCache;
266 
267   delete readStore;
268 
269   for (uint32 i=0;  i<G.Num_PThreads;  i++)
270     Delete_Work_Area(thread_wa + i);
271 
272   delete [] thread_wa;
273 
274   return  0;
275 }
276 
277 
278 
279 
280 
281 int
main(int argc,char ** argv)282 main(int argc, char **argv) {
283   int  illegal;
284 
285   argc = AS_configure(argc, argv);
286 
287   G.initialize();  //  Probably redundant with the call in the constructor, but doesn't hurt.
288 
289   int err=0;
290   int arg=1;
291   while (arg < argc) {
292     if        (strcmp(argv[arg], "-partial") == 0) {
293       G.Doing_Partial_Overlaps = true;
294 
295     } else if (strcmp(argv[arg], "-h") == 0) {
296       decodeRange(argv[++arg], G.bgnHashID, G.endHashID);
297 
298     } else if (strcmp(argv[arg], "-H") == 0) {
299       decodeRange(argv[++arg], G.minLibToHash, G.maxLibToHash);
300 
301     } else if (strcmp(argv[arg], "-r") == 0) {
302       decodeRange(argv[++arg], G.bgnRefID, G.endRefID);
303 
304     } else if (strcmp(argv[arg], "-R") == 0) {
305       decodeRange(argv[++arg], G.minLibToRef, G.maxLibToRef);
306 
307     } else if (strcmp(argv[arg], "-k") == 0) {
308       arg++;
309 
310       if (fileExists(argv[arg]) == true)
311         G.kmerSkipFileName = argv[arg];
312       else
313         G.Kmer_Len = strtouint32(argv[arg]);
314 
315     } else if (strcmp(argv[arg], "-l") == 0) {
316       G.Frag_Olap_Limit = strtol(argv[++arg], NULL, 10);
317       if  (G.Frag_Olap_Limit < 1)
318         G.Frag_Olap_Limit = UINT64_MAX;
319 
320     } else if (strcmp(argv[arg], "-m") == 0) {
321       G.Unique_Olap_Per_Pair = false;
322     } else if (strcmp(argv[arg], "-u") == 0) {
323       G.Unique_Olap_Per_Pair = true;
324 
325     } else if (strcmp(argv[arg], "--hashbits") == 0) {
326       G.Hash_Mask_Bits = strtoull(argv[++arg], NULL, 10);
327 
328     } else if (strcmp(argv[arg], "--hashdatalen") == 0) {
329       G.Max_Hash_Data_Len = strtoull(argv[++arg], NULL, 10);
330 
331     } else if (strcmp(argv[arg], "--hashload") == 0) {
332       G.Max_Hash_Load = atof(argv[++arg]);
333 
334 #if 0
335     //  This should still work, but not useful unless String_Ref_t is
336     //  changed to uint32.
337     //
338     //fprintf(stderr, "--maxreadlen n     For batches with all short reads, pack bits differently to\n");
339     //fprintf(stderr, "                   process more reads per batch.\n");
340     //fprintf(stderr, "                     all reads must be shorter than n\n");
341     //fprintf(stderr, "                     limited to 2^(30-m) reads\n");
342     //fprintf(stderr, "                   Common values:\n");
343     //fprintf(stderr, "                     maxreadlen 2048->hashstrings  524288 (default)\n");
344     //fprintf(stderr, "                     maxreadlen  512->hashstrings 2097152\n");
345     //fprintf(stderr, "                     maxreadlen  128->hashstrings 8388608\n");
346     //fprintf(stderr, "\n");
347 
348     } else if (strcmp(argv[arg], "--maxreadlen") == 0) {
349       //  Quite the gross way to do this, but simple.
350       uint32 desired = strtoul(argv[++arg], NULL, 10);
351       OFFSET_BITS = 1;
352       while (((uint32)1 << OFFSET_BITS) < desired)
353         OFFSET_BITS++;
354 
355       STRING_NUM_BITS       = 30 - OFFSET_BITS;
356 
357       STRING_NUM_MASK       = (1 << STRING_NUM_BITS) - 1;
358       OFFSET_MASK           = (1 << OFFSET_BITS) - 1;
359 
360       MAX_STRING_NUM        = STRING_NUM_MASK;
361 #endif
362 
363     } else if (strcmp(argv[arg], "-o") == 0) {
364       G.Outfile_Name = argv[++arg];
365 
366     } else if (strcmp(argv[arg], "-s") == 0) {
367       G.Outstat_Name = argv[++arg];
368 
369     } else if (strcmp(argv[arg], "-t") == 0) {
370       G.Num_PThreads = setNumThreads(argv[++arg]);
371 
372 
373     } else if (strcmp(argv[arg], "--minlength") == 0) {
374       G.Min_Olap_Len = strtol (argv[++arg], NULL, 10);
375     } else if (strcmp(argv[arg], "--minkmers") == 0) {
376       G.Filter_By_Kmer_Count = int(floor(exp(-1.0 * (double)G.Kmer_Len * G.maxErate) * (G.Min_Olap_Len - G.Kmer_Len + 1)));
377     } else if (strcmp(argv[arg], "--maxerate") == 0) {
378       G.maxErate = strtof(argv[++arg], NULL);
379     } else if (strcmp(argv[arg], "--alignnoise") == 0) {
380       G.alignNoise = strtof(argv[++arg], NULL);
381 
382     } else if (strcmp(argv[arg], "-z") == 0) {
383       G.Use_Hopeless_Check = false;
384 
385     } else {
386       if (G.Frag_Store_Path == NULL) {
387         G.Frag_Store_Path = argv[arg];
388       } else {
389         fprintf(stderr, "Unknown option '%s'\n", argv[arg]);
390         err++;
391       }
392     }
393     arg++;
394   }
395 
396   //  Fix up some flags if we're allowing high error rates.
397   //
398   if (G.maxErate > 0.06)
399     G.Use_Hopeless_Check = false;
400 
401   if (G.Kmer_Len == 0)
402     fprintf(stderr, "* No kmer length supplied; -k needed!\n"), err++;
403 
404   if (G.Outfile_Name == NULL)
405     fprintf (stderr, "ERROR:  No output file name specified\n"), err++;
406 
407   if ((err) || (G.Frag_Store_Path == NULL)) {
408     fprintf(stderr, "USAGE:  %s [options] <seqStorePath>\n", argv[0]);
409     fprintf(stderr, "\n");
410     fprintf(stderr, "-b <fn>     in contig mode, specify the output file\n");
411     fprintf(stderr, "-c          contig mode.  Use 2 frag stores.  First is\n");
412     fprintf(stderr, "            for reads; second is for contigs\n");
413     fprintf(stderr, "-partial    do partial overlaps\n");
414     fprintf(stderr, "-h <range>  to specify fragments to put in hash table\n");
415     fprintf(stderr, "            Implies LSF mode (no changes to frag store)\n");
416     fprintf(stderr, "-I          designate a file of frag iids to limit olaps to\n");
417     fprintf(stderr, "            (Contig mode only)\n");
418     fprintf(stderr, "-k          if one or two digits, the length of a kmer, otherwise\n");
419     fprintf(stderr, "            the filename containing a list of kmers to ignore in\n");
420     fprintf(stderr, "            the hash table\n");
421     fprintf(stderr, "-l          specify the maximum number of overlaps per\n");
422     fprintf(stderr, "            fragment-end per batch of fragments.\n");
423     fprintf(stderr, "-m          allow multiple overlaps per oriented fragment pair\n");
424     fprintf(stderr, "-M          specify memory size.  Valid values are '8GB', '4GB',\n");
425     fprintf(stderr, "            '2GB', '1GB', '256MB'.  (Not for Contig mode)\n");
426     fprintf(stderr, "-o          specify output file name\n");
427     fprintf(stderr, "-P          write protoIO output (if not -partial)\n");
428     fprintf(stderr, "-r <range>  specify old fragments to overlap\n");
429     fprintf(stderr, "-t <n>      use <n> parallel threads\n");
430     fprintf(stderr, "-u          allow only 1 overlap per oriented fragment pair\n");
431     fprintf(stderr, "-z          skip the hopeless check (also skipped at > 0.06)\n");
432     fprintf(stderr, "\n");
433     fprintf(stderr, "--maxerate <n>     only output overlaps with fraction <n> or less error (e.g., 0.06 == 6%%)\n");
434     fprintf(stderr, "--minlength <n>    only output overlaps of <n> or more bases\n");
435     fprintf(stderr, "\n");
436     fprintf(stderr, "--hashbits n       Use n bits for the hash mask.\n");
437     fprintf(stderr, "--hashdatalen n    Load at most n bytes into the hash table at one time.\n");
438     fprintf(stderr, "--hashload f       Load to at most 0.0 < f < 1.0 capacity (default 0.7).\n");
439     fprintf(stderr, "\n");
440     fprintf(stderr, "--readsperbatch n  Force batch size to n.\n");
441     fprintf(stderr, "--readsperthread n Force each thread to process n reads.\n");
442     fprintf(stderr, "\n");
443     exit(1);
444   }
445 
446   //  We know enough now to set the hash function variables, and some other random variables.
447 
448   HSF1 = G.Kmer_Len - (G.Hash_Mask_Bits / 2);
449   HSF2 = 2 * G.Kmer_Len - G.Hash_Mask_Bits;
450   SV1  = HSF1 + 2;
451   SV2  = (HSF1 + HSF2) / 2;
452   SV3  = HSF2 - 2;
453 
454   //  Log parameters.
455 
456 #if 0
457   fprintf(stderr, "\n");
458   fprintf(stderr, "STRING_NUM_BITS          " F_U32 "\n", STRING_NUM_BITS);
459   fprintf(stderr, "OFFSET_BITS              " F_U32 "\n", OFFSET_BITS);
460   fprintf(stderr, "STRING_NUM_MASK          " F_U64 "\n", STRING_NUM_MASK);
461   fprintf(stderr, "OFFSET_MASK              " F_U64 "\n", OFFSET_MASK);
462   fprintf(stderr, "MAX_STRING_NUM           " F_U64 "\n", MAX_STRING_NUM);
463   fprintf(stderr, "\n");
464   fprintf(stderr, "Hash_Mask_Bits           " F_U32 "\n", G.Hash_Mask_Bits);
465   fprintf(stderr, "\n");
466   fprintf(stderr, "bgnHashID                " F_U32 "\n", G.bgnHashID);
467   fprintf(stderr, "bgnHashID                " F_U32 "\n", G.endHashID);
468   fprintf(stderr, "\n");
469   fprintf(stderr, "Max_Hash_Data_Len        " F_U64 "\n", G.Max_Hash_Data_Len);
470   fprintf(stderr, "Max_Hash_Load            %f\n", G.Max_Hash_Load);
471   fprintf(stderr, "Kmer Length              " F_U64 "\n", G.Kmer_Len);
472   fprintf(stderr, "Min Overlap Length       %d\n", G.Min_Olap_Len);
473   fprintf(stderr, "Max Error Rate           %f\n", G.maxErate);
474   fprintf(stderr, "Min Kmer Matches         " F_U64 "\n", G.Filter_By_Kmer_Count);
475   fprintf(stderr, "\n");
476   fprintf(stderr, "Num_PThreads             " F_U32 "\n", G.Num_PThreads);
477   fprintf(stderr, "\n");
478   fprintf(stderr, "sizeof(Hash_Bucket_t)    " F_U64 "\n",     (uint64)sizeof(Hash_Bucket_t));
479   fprintf(stderr, "sizeof(Check_Vector_t)   " F_U64 "\n",     (uint64)sizeof(Check_Vector_t));
480   fprintf(stderr, "sizeof(Hash_Frag_Info_t) " F_U64 "\n",     (uint64)sizeof(Hash_Frag_Info_t));
481   fprintf(stderr, "\n");
482   fprintf(stderr, "HASH_TABLE_SIZE          " F_U64 "\n",     HASH_TABLE_SIZE);
483   fprintf(stderr, "\n");
484   fprintf(stderr, "hash table size:         " F_U64    " MB\n", (HASH_TABLE_SIZE * sizeof(Hash_Bucket_t)) >> 20);
485   fprintf(stderr, "hash check array         " F_U64    " MB\n", (HASH_TABLE_SIZE    * sizeof (Check_Vector_t))   >> 20);
486   fprintf(stderr, "string info              " F_SIZE_T " MB\n", ((G.endHashID - G.bgnHashID + 1) * sizeof (Hash_Frag_Info_t)) >> 20);
487   fprintf(stderr, "string start             " F_SIZE_T " MB\n", ((G.endHashID - G.bgnHashID + 1) * sizeof (int64))            >> 20);
488   fprintf(stderr, "\n");
489 #endif
490 
491   assert (8 * sizeof (uint64) > 2 * G.Kmer_Len);
492 
493   Bit_Equivalent['a'] = Bit_Equivalent['A'] = 0;
494   Bit_Equivalent['c'] = Bit_Equivalent['C'] = 1;
495   Bit_Equivalent['g'] = Bit_Equivalent['G'] = 2;
496   Bit_Equivalent['t'] = Bit_Equivalent['T'] = 3;
497 
498   for  (int i = 0;  i < 256;  i ++) {
499     char  ch = tolower ((char) i);
500 
501     if  (ch == 'a' || ch == 'c' || ch == 'g' || ch == 't')
502       Char_Is_Bad[i] = 0;
503     else
504       Char_Is_Bad[i] = 1;
505   }
506 
507   Hash_Table       = new Hash_Bucket_t    [HASH_TABLE_SIZE];
508   Hash_Check_Array = new Check_Vector_t   [HASH_TABLE_SIZE];
509   String_Info      = new Hash_Frag_Info_t [G.endHashID - G.bgnHashID + 1];
510   String_Start     = new int64            [G.endHashID - G.bgnHashID + 1];
511 
512   String_Start_Size = G.endHashID - G.bgnHashID + 1;
513 
514   memset(Hash_Check_Array, 0, sizeof(Check_Vector_t)   * HASH_TABLE_SIZE);
515   memset(String_Info,      0, sizeof(Hash_Frag_Info_t) * (G.endHashID - G.bgnHashID + 1));
516   memset(String_Start,     0, sizeof(int64)            * (G.endHashID - G.bgnHashID + 1));
517 
518 
519 
520   OverlapDriver();
521 
522 
523 
524   delete [] basesData;
525   delete [] nextRef;
526 
527   delete [] String_Start;
528   delete [] String_Info;
529   delete [] Hash_Check_Array;
530   delete [] Hash_Table;
531 
532   FILE *stats = stderr;
533 
534   if (G.Outstat_Name != NULL) {
535     errno = 0;
536     stats = fopen(G.Outstat_Name, "w");
537     if (errno) {
538       fprintf(stderr, "WARNING: failed to open '%s' for writing: %s\n", G.Outstat_Name, strerror(errno));
539       stats = stderr;
540     }
541   }
542 
543   fprintf(stats, " Kmer hits without olaps = " F_S64 "\n", Kmer_Hits_Without_Olap_Ct);
544   fprintf(stats, "    Kmer hits with olaps = " F_S64 "\n", Kmer_Hits_With_Olap_Ct);
545   //fprintf(stats, "      Kmer hits below %u = " F_S64 "\n", G.Filter_By_Kmer_Count, Kmer_Hits_Skipped_Ct);
546   fprintf(stats, "  Multiple overlaps/pair = " F_S64 "\n", Multi_Overlap_Ct);
547   fprintf(stats, " Total overlaps produced = " F_S64 "\n", Total_Overlaps);
548   fprintf(stats, "      Contained overlaps = " F_S64 "\n", Contained_Overlap_Ct);
549   fprintf(stats, "       Dovetail overlaps = " F_S64 "\n", Dovetail_Overlap_Ct);
550   fprintf(stats, "Rejected by short window = " F_S64 "\n", Bad_Short_Window_Ct);
551   fprintf(stats, " Rejected by long window = " F_S64 "\n", Bad_Long_Window_Ct);
552 
553   AS_UTL_closeFile(stats, G.Outstat_Name);
554 
555   fprintf(stderr, "Bye.\n");
556 
557   return(0);
558 }
559