1
2 /******************************************************************************
3 *
4 * This file is part of canu, a software program that assembles whole-genome
5 * sequencing reads into contigs.
6 *
7 * This software is based on:
8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9 * the 'kmer package' r1994 (http://kmer.sourceforge.net)
10 *
11 * Except as indicated otherwise, this is a 'United States Government Work',
12 * and is released in the public domain.
13 *
14 * File 'README.licenses' in the root directory of this distribution
15 * contains full conditions and disclaimers.
16 */
17
18 #include "overlapInCore.H"
19 #include "strings.H"
20
21 oicParameters G;
22
23
24 uint32 STRING_NUM_BITS = 31; // MUST BE EXACTLY THIS
25 uint32 OFFSET_BITS = 31;
26
27 uint64 TRUELY_ONE = (uint64)1;
28 uint64 TRUELY_ZERO = (uint64)0;
29
30 uint64 STRING_NUM_MASK = (TRUELY_ONE << STRING_NUM_BITS) - 1;
31 uint64 OFFSET_MASK = (TRUELY_ONE << OFFSET_BITS) - 1;
32
33 uint64 MAX_STRING_NUM = STRING_NUM_MASK;
34
35
36
37 int64 Bad_Short_Window_Ct = 0;
38 // The number of overlaps rejected because of too many errors in a small window
39
40 int64 Bad_Long_Window_Ct = 0;
41 // The number of overlaps rejected because of too many errors in a long window
42
43
44 // Stores sequence and quality data of fragments in hash table
45 char *basesData = NULL;
46 size_t Data_Len = 0;
47
48 String_Ref_t *nextRef = NULL;
49
50 size_t Extra_Data_Len;
51 // Total length available for hash table string data,
52 // including both regular strings and extra strings
53 // added from kmer screening
54
55 uint64 Max_Extra_Ref_Space = 0; // allocated amount
56 uint64 Extra_Ref_Ct = 0; // used amount
57 String_Ref_t *Extra_Ref_Space = NULL;
58 uint64 Extra_String_Ct = 0;
59 // Number of extra strings of screen kmers added to hash table
60
61 uint64 Extra_String_Subcount = 0;
62 // Number of kmers already added to last extra string in hash table
63
64 Check_Vector_t * Hash_Check_Array = NULL;
65 // Bit vector to eliminate impossible hash matches
66
67 uint64 Hash_String_Num_Offset = 1;
68 Hash_Bucket_t * Hash_Table;
69
70 uint64 Kmer_Hits_With_Olap_Ct = 0;
71 uint64 Kmer_Hits_Without_Olap_Ct = 0;
72 uint64 Kmer_Hits_Skipped_Ct = 0;
73 uint64 Multi_Overlap_Ct = 0;
74
75 uint64 String_Ct;
76 // Number of fragments in the hash table
77
78 Hash_Frag_Info_t * String_Info = NULL;
79 int64 * String_Start = NULL;
80 uint32 String_Start_Size = 0;
81 // Number of available positions in String_Start
82
83 size_t Used_Data_Len = 0;
84 // Number of bytes of Data currently occupied, including
85 // regular strings and extra kmer screen strings
86
87 int32 Bit_Equivalent[256] = {0};
88 // Table to convert characters to 2-bit integer code
89
90 int32 Char_Is_Bad[256] = {0};
91 // Table to check if character is not a, c, g or t.
92
93 uint64 Hash_Entries = 0;
94
95 uint64 Total_Overlaps = 0;
96 uint64 Contained_Overlap_Ct = 0;
97 uint64 Dovetail_Overlap_Ct = 0;
98
99 uint64 HSF1 = 666;
100 uint64 HSF2 = 666;
101 uint64 SV1 = 666;
102 uint64 SV2 = 666;
103 uint64 SV3 = 666;
104
105 ovFile *Out_BOF = NULL;
106
107
108
109 // Allocate memory for (* WA) and set initial values.
110 // Set thread_id field to id .
111 void
Initialize_Work_Area(Work_Area_t * WA,int id,sqStore * readStore,sqCache * readCache)112 Initialize_Work_Area(Work_Area_t *WA, int id, sqStore *readStore, sqCache *readCache) {
113 uint64 allocated = 0;
114
115 WA->String_Olap_Size = INIT_STRING_OLAP_SIZE;
116 WA->String_Olap_Space = new String_Olap_t [WA->String_Olap_Size];
117
118 WA->Match_Node_Size = INIT_MATCH_NODE_SIZE;
119 WA->Match_Node_Space = new Match_Node_t [WA->Match_Node_Size];
120
121 allocated += WA->String_Olap_Size * sizeof (String_Olap_t);
122 allocated += WA->Match_Node_Size * sizeof (Match_Node_t);
123
124 WA->status = 0;
125 WA->thread_id = id;
126
127 WA->readStore = readStore;
128 WA->readCache = readCache;
129
130 WA->overlapsLen = 0;
131 WA->overlapsMax = 1024 * 1024 / sizeof(ovOverlap);
132 WA->overlaps = new ovOverlap [WA->overlapsMax];
133
134 allocated += sizeof(ovOverlap) * WA->overlapsMax;
135
136 WA->editDist = new prefixEditDistance(G.Doing_Partial_Overlaps,
137 G.maxErate,
138 G.maxErate * G.alignNoise);
139
140 WA->q_diff = new char [AS_MAX_READLEN];
141 WA->distinct_olap = new Olap_Info_t [MAX_DISTINCT_OLAPS];
142 }
143
144
145 void
Delete_Work_Area(Work_Area_t * WA)146 Delete_Work_Area(Work_Area_t *WA) {
147 delete WA->editDist;
148 delete [] WA->String_Olap_Space;
149 delete [] WA->Match_Node_Space;
150 delete [] WA->overlaps;
151
152 delete [] WA->distinct_olap;
153 delete [] WA->q_diff;
154 }
155
156
157
158
159 int
OverlapDriver(void)160 OverlapDriver(void) {
161
162 Work_Area_t *thread_wa = new Work_Area_t [G.Num_PThreads];
163
164 sqStore *readStore = new sqStore(G.Frag_Store_Path);
165 sqCache *readCache = new sqCache(readStore);
166
167 Out_BOF = new ovFile(readStore, G.Outfile_Name, ovFileFullWrite);
168
169 fprintf(stderr, "Initializing %u work areas.\n", G.Num_PThreads);
170
171 #pragma omp parallel for
172 for (uint32 i=0; i<G.Num_PThreads; i++)
173 Initialize_Work_Area(thread_wa+i, i, readStore, readCache);
174
175 // Make sure both the hash and reference ranges are valid.
176
177 if (G.bgnHashID < 1)
178 G.bgnHashID = 1;
179
180 if (readStore->sqStore_lastReadID() < G.endHashID)
181 G.endHashID = readStore->sqStore_lastReadID();
182
183 if (G.bgnRefID < 1)
184 G.bgnRefID = 1;
185
186 if (G.endRefID > readStore->sqStore_lastReadID())
187 G.endRefID = readStore->sqStore_lastReadID();
188
189 // Load the reference range into the cache
190
191 fprintf(stderr, "Loading reference reads %u-%u inclusive.\n", G.bgnRefID, G.endRefID);
192
193 readCache->sqCache_loadReads(G.bgnRefID, G.endRefID, true);
194
195 // Note distinction between the local bgn/end and the global G.bgn/G.end.
196
197 uint32 bgnHashID = G.bgnHashID;
198 uint32 endHashID = G.endHashID;
199
200 // Iterate over read blocks, build a hash table, then search in threads.
201
202 while (bgnHashID < G.endHashID) {
203 if (endHashID > G.endHashID)
204 endHashID = G.endHashID;
205
206 assert(0 < bgnHashID);
207 assert(bgnHashID <= endHashID);
208 assert(endHashID <= readStore->sqStore_lastReadID());
209
210 // Load as much as we can. If we load less than expected, the endHashID is updated to reflect
211 // the last read loaded.
212
213 endHashID = Build_Hash_Index(readStore, bgnHashID, endHashID);
214
215 // Decide the range of reads to process. No more than what is loaded in the table.
216
217 G.curRefID = G.bgnRefID;
218
219 // The old version used to further divide the ref range into blocks of at most
220 // Max_Reads_Per_Batch so that those reads could be loaded into core. We don't
221 // need to do that anymore.
222
223 G.perThread = 1 + (G.endRefID - G.bgnRefID) / G.Num_PThreads / 8;
224
225 fprintf(stderr, "\n");
226 fprintf(stderr, "Range: %u-%u. Store has %u reads.\n",
227 G.bgnRefID, G.endRefID, readStore->sqStore_lastReadID());
228 fprintf(stderr, "Chunk: " F_U32 " reads/thread -- (G.endRefID=" F_U32 " - G.bgnRefID=" F_U32 ") / G.Num_PThreads=" F_U32 " / 8\n",
229 G.perThread, G.endRefID, G.bgnRefID, G.Num_PThreads);
230
231 fprintf(stderr, "\n");
232 fprintf(stderr, "Starting " F_U32 "-" F_U32 " with " F_U32 " per thread\n", G.bgnRefID, G.endRefID, G.perThread);
233 fprintf(stderr, "\n");
234
235 // Initialize each thread, reset the current position. curRefID and endRefID are updated, this
236 // cannot be done in the parallel loop!
237
238 for (uint32 i=0; i<G.Num_PThreads; i++) {
239 thread_wa[i].bgnID = G.curRefID;
240 thread_wa[i].endID = thread_wa[i].bgnID + G.perThread - 1;
241
242 G.curRefID = thread_wa[i].endID + 1; // Global value updated!
243 }
244
245 #pragma omp parallel for
246 for (uint32 i=0; i<G.Num_PThreads; i++)
247 Process_Overlaps(thread_wa + i);
248
249 // Clear out the hash table. This stuff is allocated in Build_Hash_Index
250
251 delete [] basesData; basesData = NULL;
252 delete [] nextRef; nextRef = NULL;
253
254 // This one could be left allocated, except for the last iteration.
255
256 delete [] Extra_Ref_Space; Extra_Ref_Space = NULL; Max_Extra_Ref_Space = 0;
257
258 // Prepare for another hash table iteration.
259 bgnHashID = endHashID + 1;
260 endHashID = G.endHashID;
261 }
262
263 delete Out_BOF;
264
265 delete readCache;
266
267 delete readStore;
268
269 for (uint32 i=0; i<G.Num_PThreads; i++)
270 Delete_Work_Area(thread_wa + i);
271
272 delete [] thread_wa;
273
274 return 0;
275 }
276
277
278
279
280
281 int
main(int argc,char ** argv)282 main(int argc, char **argv) {
283 int illegal;
284
285 argc = AS_configure(argc, argv);
286
287 G.initialize(); // Probably redundant with the call in the constructor, but doesn't hurt.
288
289 int err=0;
290 int arg=1;
291 while (arg < argc) {
292 if (strcmp(argv[arg], "-partial") == 0) {
293 G.Doing_Partial_Overlaps = true;
294
295 } else if (strcmp(argv[arg], "-h") == 0) {
296 decodeRange(argv[++arg], G.bgnHashID, G.endHashID);
297
298 } else if (strcmp(argv[arg], "-H") == 0) {
299 decodeRange(argv[++arg], G.minLibToHash, G.maxLibToHash);
300
301 } else if (strcmp(argv[arg], "-r") == 0) {
302 decodeRange(argv[++arg], G.bgnRefID, G.endRefID);
303
304 } else if (strcmp(argv[arg], "-R") == 0) {
305 decodeRange(argv[++arg], G.minLibToRef, G.maxLibToRef);
306
307 } else if (strcmp(argv[arg], "-k") == 0) {
308 arg++;
309
310 if (fileExists(argv[arg]) == true)
311 G.kmerSkipFileName = argv[arg];
312 else
313 G.Kmer_Len = strtouint32(argv[arg]);
314
315 } else if (strcmp(argv[arg], "-l") == 0) {
316 G.Frag_Olap_Limit = strtol(argv[++arg], NULL, 10);
317 if (G.Frag_Olap_Limit < 1)
318 G.Frag_Olap_Limit = UINT64_MAX;
319
320 } else if (strcmp(argv[arg], "-m") == 0) {
321 G.Unique_Olap_Per_Pair = false;
322 } else if (strcmp(argv[arg], "-u") == 0) {
323 G.Unique_Olap_Per_Pair = true;
324
325 } else if (strcmp(argv[arg], "--hashbits") == 0) {
326 G.Hash_Mask_Bits = strtoull(argv[++arg], NULL, 10);
327
328 } else if (strcmp(argv[arg], "--hashdatalen") == 0) {
329 G.Max_Hash_Data_Len = strtoull(argv[++arg], NULL, 10);
330
331 } else if (strcmp(argv[arg], "--hashload") == 0) {
332 G.Max_Hash_Load = atof(argv[++arg]);
333
334 #if 0
335 // This should still work, but not useful unless String_Ref_t is
336 // changed to uint32.
337 //
338 //fprintf(stderr, "--maxreadlen n For batches with all short reads, pack bits differently to\n");
339 //fprintf(stderr, " process more reads per batch.\n");
340 //fprintf(stderr, " all reads must be shorter than n\n");
341 //fprintf(stderr, " limited to 2^(30-m) reads\n");
342 //fprintf(stderr, " Common values:\n");
343 //fprintf(stderr, " maxreadlen 2048->hashstrings 524288 (default)\n");
344 //fprintf(stderr, " maxreadlen 512->hashstrings 2097152\n");
345 //fprintf(stderr, " maxreadlen 128->hashstrings 8388608\n");
346 //fprintf(stderr, "\n");
347
348 } else if (strcmp(argv[arg], "--maxreadlen") == 0) {
349 // Quite the gross way to do this, but simple.
350 uint32 desired = strtoul(argv[++arg], NULL, 10);
351 OFFSET_BITS = 1;
352 while (((uint32)1 << OFFSET_BITS) < desired)
353 OFFSET_BITS++;
354
355 STRING_NUM_BITS = 30 - OFFSET_BITS;
356
357 STRING_NUM_MASK = (1 << STRING_NUM_BITS) - 1;
358 OFFSET_MASK = (1 << OFFSET_BITS) - 1;
359
360 MAX_STRING_NUM = STRING_NUM_MASK;
361 #endif
362
363 } else if (strcmp(argv[arg], "-o") == 0) {
364 G.Outfile_Name = argv[++arg];
365
366 } else if (strcmp(argv[arg], "-s") == 0) {
367 G.Outstat_Name = argv[++arg];
368
369 } else if (strcmp(argv[arg], "-t") == 0) {
370 G.Num_PThreads = setNumThreads(argv[++arg]);
371
372
373 } else if (strcmp(argv[arg], "--minlength") == 0) {
374 G.Min_Olap_Len = strtol (argv[++arg], NULL, 10);
375 } else if (strcmp(argv[arg], "--minkmers") == 0) {
376 G.Filter_By_Kmer_Count = int(floor(exp(-1.0 * (double)G.Kmer_Len * G.maxErate) * (G.Min_Olap_Len - G.Kmer_Len + 1)));
377 } else if (strcmp(argv[arg], "--maxerate") == 0) {
378 G.maxErate = strtof(argv[++arg], NULL);
379 } else if (strcmp(argv[arg], "--alignnoise") == 0) {
380 G.alignNoise = strtof(argv[++arg], NULL);
381
382 } else if (strcmp(argv[arg], "-z") == 0) {
383 G.Use_Hopeless_Check = false;
384
385 } else {
386 if (G.Frag_Store_Path == NULL) {
387 G.Frag_Store_Path = argv[arg];
388 } else {
389 fprintf(stderr, "Unknown option '%s'\n", argv[arg]);
390 err++;
391 }
392 }
393 arg++;
394 }
395
396 // Fix up some flags if we're allowing high error rates.
397 //
398 if (G.maxErate > 0.06)
399 G.Use_Hopeless_Check = false;
400
401 if (G.Kmer_Len == 0)
402 fprintf(stderr, "* No kmer length supplied; -k needed!\n"), err++;
403
404 if (G.Outfile_Name == NULL)
405 fprintf (stderr, "ERROR: No output file name specified\n"), err++;
406
407 if ((err) || (G.Frag_Store_Path == NULL)) {
408 fprintf(stderr, "USAGE: %s [options] <seqStorePath>\n", argv[0]);
409 fprintf(stderr, "\n");
410 fprintf(stderr, "-b <fn> in contig mode, specify the output file\n");
411 fprintf(stderr, "-c contig mode. Use 2 frag stores. First is\n");
412 fprintf(stderr, " for reads; second is for contigs\n");
413 fprintf(stderr, "-partial do partial overlaps\n");
414 fprintf(stderr, "-h <range> to specify fragments to put in hash table\n");
415 fprintf(stderr, " Implies LSF mode (no changes to frag store)\n");
416 fprintf(stderr, "-I designate a file of frag iids to limit olaps to\n");
417 fprintf(stderr, " (Contig mode only)\n");
418 fprintf(stderr, "-k if one or two digits, the length of a kmer, otherwise\n");
419 fprintf(stderr, " the filename containing a list of kmers to ignore in\n");
420 fprintf(stderr, " the hash table\n");
421 fprintf(stderr, "-l specify the maximum number of overlaps per\n");
422 fprintf(stderr, " fragment-end per batch of fragments.\n");
423 fprintf(stderr, "-m allow multiple overlaps per oriented fragment pair\n");
424 fprintf(stderr, "-M specify memory size. Valid values are '8GB', '4GB',\n");
425 fprintf(stderr, " '2GB', '1GB', '256MB'. (Not for Contig mode)\n");
426 fprintf(stderr, "-o specify output file name\n");
427 fprintf(stderr, "-P write protoIO output (if not -partial)\n");
428 fprintf(stderr, "-r <range> specify old fragments to overlap\n");
429 fprintf(stderr, "-t <n> use <n> parallel threads\n");
430 fprintf(stderr, "-u allow only 1 overlap per oriented fragment pair\n");
431 fprintf(stderr, "-z skip the hopeless check (also skipped at > 0.06)\n");
432 fprintf(stderr, "\n");
433 fprintf(stderr, "--maxerate <n> only output overlaps with fraction <n> or less error (e.g., 0.06 == 6%%)\n");
434 fprintf(stderr, "--minlength <n> only output overlaps of <n> or more bases\n");
435 fprintf(stderr, "\n");
436 fprintf(stderr, "--hashbits n Use n bits for the hash mask.\n");
437 fprintf(stderr, "--hashdatalen n Load at most n bytes into the hash table at one time.\n");
438 fprintf(stderr, "--hashload f Load to at most 0.0 < f < 1.0 capacity (default 0.7).\n");
439 fprintf(stderr, "\n");
440 fprintf(stderr, "--readsperbatch n Force batch size to n.\n");
441 fprintf(stderr, "--readsperthread n Force each thread to process n reads.\n");
442 fprintf(stderr, "\n");
443 exit(1);
444 }
445
446 // We know enough now to set the hash function variables, and some other random variables.
447
448 HSF1 = G.Kmer_Len - (G.Hash_Mask_Bits / 2);
449 HSF2 = 2 * G.Kmer_Len - G.Hash_Mask_Bits;
450 SV1 = HSF1 + 2;
451 SV2 = (HSF1 + HSF2) / 2;
452 SV3 = HSF2 - 2;
453
454 // Log parameters.
455
456 #if 0
457 fprintf(stderr, "\n");
458 fprintf(stderr, "STRING_NUM_BITS " F_U32 "\n", STRING_NUM_BITS);
459 fprintf(stderr, "OFFSET_BITS " F_U32 "\n", OFFSET_BITS);
460 fprintf(stderr, "STRING_NUM_MASK " F_U64 "\n", STRING_NUM_MASK);
461 fprintf(stderr, "OFFSET_MASK " F_U64 "\n", OFFSET_MASK);
462 fprintf(stderr, "MAX_STRING_NUM " F_U64 "\n", MAX_STRING_NUM);
463 fprintf(stderr, "\n");
464 fprintf(stderr, "Hash_Mask_Bits " F_U32 "\n", G.Hash_Mask_Bits);
465 fprintf(stderr, "\n");
466 fprintf(stderr, "bgnHashID " F_U32 "\n", G.bgnHashID);
467 fprintf(stderr, "bgnHashID " F_U32 "\n", G.endHashID);
468 fprintf(stderr, "\n");
469 fprintf(stderr, "Max_Hash_Data_Len " F_U64 "\n", G.Max_Hash_Data_Len);
470 fprintf(stderr, "Max_Hash_Load %f\n", G.Max_Hash_Load);
471 fprintf(stderr, "Kmer Length " F_U64 "\n", G.Kmer_Len);
472 fprintf(stderr, "Min Overlap Length %d\n", G.Min_Olap_Len);
473 fprintf(stderr, "Max Error Rate %f\n", G.maxErate);
474 fprintf(stderr, "Min Kmer Matches " F_U64 "\n", G.Filter_By_Kmer_Count);
475 fprintf(stderr, "\n");
476 fprintf(stderr, "Num_PThreads " F_U32 "\n", G.Num_PThreads);
477 fprintf(stderr, "\n");
478 fprintf(stderr, "sizeof(Hash_Bucket_t) " F_U64 "\n", (uint64)sizeof(Hash_Bucket_t));
479 fprintf(stderr, "sizeof(Check_Vector_t) " F_U64 "\n", (uint64)sizeof(Check_Vector_t));
480 fprintf(stderr, "sizeof(Hash_Frag_Info_t) " F_U64 "\n", (uint64)sizeof(Hash_Frag_Info_t));
481 fprintf(stderr, "\n");
482 fprintf(stderr, "HASH_TABLE_SIZE " F_U64 "\n", HASH_TABLE_SIZE);
483 fprintf(stderr, "\n");
484 fprintf(stderr, "hash table size: " F_U64 " MB\n", (HASH_TABLE_SIZE * sizeof(Hash_Bucket_t)) >> 20);
485 fprintf(stderr, "hash check array " F_U64 " MB\n", (HASH_TABLE_SIZE * sizeof (Check_Vector_t)) >> 20);
486 fprintf(stderr, "string info " F_SIZE_T " MB\n", ((G.endHashID - G.bgnHashID + 1) * sizeof (Hash_Frag_Info_t)) >> 20);
487 fprintf(stderr, "string start " F_SIZE_T " MB\n", ((G.endHashID - G.bgnHashID + 1) * sizeof (int64)) >> 20);
488 fprintf(stderr, "\n");
489 #endif
490
491 assert (8 * sizeof (uint64) > 2 * G.Kmer_Len);
492
493 Bit_Equivalent['a'] = Bit_Equivalent['A'] = 0;
494 Bit_Equivalent['c'] = Bit_Equivalent['C'] = 1;
495 Bit_Equivalent['g'] = Bit_Equivalent['G'] = 2;
496 Bit_Equivalent['t'] = Bit_Equivalent['T'] = 3;
497
498 for (int i = 0; i < 256; i ++) {
499 char ch = tolower ((char) i);
500
501 if (ch == 'a' || ch == 'c' || ch == 'g' || ch == 't')
502 Char_Is_Bad[i] = 0;
503 else
504 Char_Is_Bad[i] = 1;
505 }
506
507 Hash_Table = new Hash_Bucket_t [HASH_TABLE_SIZE];
508 Hash_Check_Array = new Check_Vector_t [HASH_TABLE_SIZE];
509 String_Info = new Hash_Frag_Info_t [G.endHashID - G.bgnHashID + 1];
510 String_Start = new int64 [G.endHashID - G.bgnHashID + 1];
511
512 String_Start_Size = G.endHashID - G.bgnHashID + 1;
513
514 memset(Hash_Check_Array, 0, sizeof(Check_Vector_t) * HASH_TABLE_SIZE);
515 memset(String_Info, 0, sizeof(Hash_Frag_Info_t) * (G.endHashID - G.bgnHashID + 1));
516 memset(String_Start, 0, sizeof(int64) * (G.endHashID - G.bgnHashID + 1));
517
518
519
520 OverlapDriver();
521
522
523
524 delete [] basesData;
525 delete [] nextRef;
526
527 delete [] String_Start;
528 delete [] String_Info;
529 delete [] Hash_Check_Array;
530 delete [] Hash_Table;
531
532 FILE *stats = stderr;
533
534 if (G.Outstat_Name != NULL) {
535 errno = 0;
536 stats = fopen(G.Outstat_Name, "w");
537 if (errno) {
538 fprintf(stderr, "WARNING: failed to open '%s' for writing: %s\n", G.Outstat_Name, strerror(errno));
539 stats = stderr;
540 }
541 }
542
543 fprintf(stats, " Kmer hits without olaps = " F_S64 "\n", Kmer_Hits_Without_Olap_Ct);
544 fprintf(stats, " Kmer hits with olaps = " F_S64 "\n", Kmer_Hits_With_Olap_Ct);
545 //fprintf(stats, " Kmer hits below %u = " F_S64 "\n", G.Filter_By_Kmer_Count, Kmer_Hits_Skipped_Ct);
546 fprintf(stats, " Multiple overlaps/pair = " F_S64 "\n", Multi_Overlap_Ct);
547 fprintf(stats, " Total overlaps produced = " F_S64 "\n", Total_Overlaps);
548 fprintf(stats, " Contained overlaps = " F_S64 "\n", Contained_Overlap_Ct);
549 fprintf(stats, " Dovetail overlaps = " F_S64 "\n", Dovetail_Overlap_Ct);
550 fprintf(stats, "Rejected by short window = " F_S64 "\n", Bad_Short_Window_Ct);
551 fprintf(stats, " Rejected by long window = " F_S64 "\n", Bad_Long_Window_Ct);
552
553 AS_UTL_closeFile(stats, G.Outstat_Name);
554
555 fprintf(stderr, "Bye.\n");
556
557 return(0);
558 }
559