1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <math.h>
5 #include <pthread.h>
6 #include <assert.h>
7 #include "args.h"
8 #include "emp.h"
9 #include "reader.h"
10 #include "async.h"
11 #include "pear.h"
12
13 /** @file pear-pt.c
14 @brief Main file containing scoring and assembly related functions (pthreads version)
15 */
16 #define THREAD_MIN_PACKET_SIZE 500
17 #define THREAD_PACKET_SIZE_DELTA 20
18
19 #define PEAR_MATCH_SCORE 1
20 #define PEAR_MISMATCH_SCORE 1
21
22 #define NUM_OF_OUTFILES 4
23
24 #define PEAR_READ_UNASSEMBLED 3
25 #define PEAR_READ_DISCARDED 2
26 #define PEAR_READ_ASSEMBLED 1
27
28 #define PEAR_DECODE_OUT_TYPE(x) (*((x->data) - 1))
29 #define PEAR_RESET_OUT_TYPE(x) *((x->data) - 1) = 0
30 #define PEAR_DECODE_ASM_TYPE(x) (*((x->qscore) - 1))
31 #define PEAR_RESET_ASM_TYPE(x) *((x->qscore) - 1) = 0
32 #define PEAR_SET_ASM_TYPE(x,y) *((x->qscore) - 1) = y
33
34 #define PEAR_READ_OUT_SINGLE 0
35 #define PEAR_READ_OUT_BOTH 1
36 #define PEAR_SET_OUT_TYPE(x,y) *((x->data) - 1) = y
37
38 #define PEAR_MERGE_NO_TRIM 1
39 #define PEAR_MERGE_TRIM_FORWARD 2
40 #define PEAR_MERGE_TRIM_BOTH 3
41 #define PEAR_MERGE_TRIM_REVERSE 4
42
43 #define DEGENERATE(x) (((x) ^ 1) && ((x) ^ 2) && ((x) ^ 4) && ((x) ^ 8))
44
45 char map_nt[256] =
46 {
47 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
48 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
49 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, -1, -1,
50 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15,
51 -1, 1, 14, 2, 13, -1, -1, 4, 11, -1, -1, 12, -1, 3, 15, 15,
52 -1, -1, 5, 6, 8, 8, 7, 9, 15, 10, -1, -1, -1, -1, -1, -1,
53 -1, 1, 14, 2, 13, -1, -1, 4, 11, -1, -1, 12, -1, 3, 15, 15,
54 -1, -1, 5, 6, 8, 8, 7, 9, 15, 10, -1, -1, -1, -1, -1, -1,
55 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
56 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
57 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
58 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
59 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
60 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
61 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
62 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
63 };
64
65 /* reverse mapping of number to nucleotide */
66 char revmap_nt[16] =
67 {
68 '\0', 'A', 'C', 'M', 'G', 'R', 'S', 'V',
69 'T', 'W', 'Y', 'H', 'K', 'D', 'B', 'N'
70 };
71
72 /* complements of the nucleotides */
73 char cpl_nt[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
74
75 /* translate from a 4-bit code to a table of 6 entries (AC, AG, AT, CG, CT, GT) */
76 int translate[16] = {-1, 0, 1, 4, 2, 5, 7, -1, 3, 6, 8, -1, 9, -1, -1, -1};
77
78 extern void print_number (size_t x);
79
80 static int trim_cpl (fastqRead * read, struct user_args * sw, double * uncalled);
81 static int trim (fastqRead * read, struct user_args * sw, double * uncalled);
82 static void init_scores (struct emp_freq * ef);
83 static char * makefilename (const char * prefix, const char * suffix);
84 static void mstrcpl (char * s);
85 static void mstrrev (char * s);
86 static INLINE int assembly_FORWARD_LONGER (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw, int nForward, int nReverse);
87 static void * entry_point_ef (void * data);
88 static void * entry_point (void * data);
89 static void DisplayInstance (struct user_args * sw);
90 static void free_global_thread_memory (void);
91 static void close_output_files (void);
92 static void destroy_thr_global (void);
93 static void fatal(const char * msg);
94 #if 0
95 static int validate_input (int nleft, int nright);
96 #endif
97 static void * emp_entry_point (void * data);
98 static void init_thr_global (void);
99 static INLINE int assembly_ef (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw);
100 static void flip_list (void);
101 static INLINE int assign_reads (memBlockInfo * block, struct thread_local_t * thr_local);
102 static INLINE int assembly_REVERSE_LONGER (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw, int nForward, int nReverse);
103 static INLINE int assembly_READS_EQUAL (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw, int n);
104 static void write_data (fastqRead ** fwd, fastqRead ** rev, unsigned int elms, FILE ** fd, struct user_args * sw);
105 static INLINE int assembly (fastqRead * left, fastqRead * right, struct user_args * sw);
106 static INLINE void scoring_nm (char dleft, char dright, char qleft, char qright, int score_method, double * score, double * oes);
107 static INLINE void scoring_ef_nm (char dleft, char dright, char qleft, char qright, int score_method, double * score, double * oes, struct emp_freq * ef);
108
109 static char * outfile_extensions[NUM_OF_OUTFILES] = { ".assembled.fastq",
110 ".unassembled.forward.fastq",
111 ".unassembled.reverse.fastq",
112 ".discarded.fastq" };
113
114 static char * sanityCheckMessage[2] = {
115 "\n[!!] Forward reads file contains more lines. Merging is done line-by-line"
116 "on both files and remaining reads in forward file are ignored. You are"
117 "strongly advised to check that corresponding paired-end reads are located at"
118 "the same line numbers in your files.\n\n",
119 "\n[!!] Reverse reads file contains more lines. Merging is done line-by-line"
120 "on both files and remaining reads in reverse file are ignored. You are"
121 "strongly advised to check that corresponding paired-end reads are located at"
122 "the same line numbers in your files.\n\n"
123 };
124
125 static pthread_mutex_t cs_mutex_wnd = PTHREAD_MUTEX_INITIALIZER;
126 static pthread_mutex_t cs_mutex_io = PTHREAD_MUTEX_INITIALIZER;
127 static pthread_cond_t cs_mutex_cond = PTHREAD_COND_INITIALIZER;
128
129 /* used for locking the screen when debugging */
130 #ifdef __DEBUG__
131 static pthread_mutex_t cs_mutex_out = PTHREAD_MUTEX_INITIALIZER;
132 #endif
133
134 struct thread_global_t thr_global;
135
136 int inputFileSanity = 0; /* Check on whether forward/reverse files have the same number of reads */
137
138
139 static unsigned long g_count_assembled = 0;
140 static unsigned long g_count_discarded = 0;
141 static unsigned long g_count_unassembled = 0;
142 static unsigned long g_count_total = 0;
143
144
145 unsigned long total_progress = 0;
146 unsigned long current_progress = 0;
147
148
149 //int stat_test (double, double, int, double);
150 int stat_test2 (double, double, int, double);
151
152 static double assemble_overlap (fastqRead * left,
153 fastqRead * right,
154 int base_left,
155 int base_right,
156 int ol_size,
157 fastqRead * ai,
158 struct user_args * sw);
159
160 double new_eq[4096];
161 double new_neq[4096];
162
163 /*
164 double new_eqA[4096];
165 double new_eqC[4096];
166 double new_eqG[4096];
167 double new_eqT[4096];
168 double new_diff[6][4096];
169 */
170
171 double penaltyEF[10][4096];
172
173
174
fatal(const char * msg)175 static void fatal(const char * msg)
176 {
177 fprintf(stderr, "Error: %s\n", msg);
178 exit(1);
179 }
180
convert(char * s)181 static void convert(char * s)
182 {
183 char c;
184 char m;
185 char * p = s;
186 int i = 0;
187
188 while ((c = *p++))
189 {
190 if ((m = map_nt[(int)c]) >= 0)
191 *(s + i++) = m;
192 else
193 fatal("Illegal character in sequence.");
194 }
195 }
196
revconvert(char * s)197 static void revconvert(char * s)
198 {
199 char c;
200 char * p = s;
201 int i = 0;
202
203 while ((c = *p++))
204 *(s + i++) = revmap_nt[(int)c];
205 }
206
remove_base_qscores(char * s,int base)207 static void remove_base_qscores (char * s, int base)
208 {
209 while (*s)
210 {
211 *s = *s - base + 1; /* qscores must start from 1 to distinguish end-of-string */
212 ++s;
213 }
214 }
215
add_base_qscores(char * s,int base,int cap)216 static void add_base_qscores (char * s, int base, int cap)
217 {
218 if (cap)
219 {
220 cap = cap + 1;
221 while (*s)
222 {
223 if (*s > cap) *s = cap;
224 *s = *s + base - 1;
225 ++s;
226 }
227 }
228 else
229 while (*s)
230 {
231 *s = *s + base - 1;
232 ++s;
233 }
234 }
235
236
237 /** @brief Trimming of forward part of unassembled sequence
238 *
239 * Finds two adjacent quality scores that are smaller than the minimum quality score threshold \a min_quality.
240 * It then \e trims the part starting from the rightmost quality score position.
241 *
242 * @param read Forward sequence
243 * @param min_quality Minimum quality score threshold
244 * @param uncalled Variable to store the ratio of uncalled bases
245 * @return Returns the length of the trimmed sequence
246 */
trim(fastqRead * read,struct user_args * sw,double * uncalled)247 static int trim (fastqRead * read, struct user_args * sw, double * uncalled)
248 {
249 int
250 i;
251 char
252 * qscore,
253 * data;
254
255 qscore = read->qscore + 1;
256 data = read->data + 1;
257 *uncalled = 0;
258
259 i = 1;
260 if (!*data)
261 {
262 if (DEGENERATE (*(data - 1))) ++ *uncalled;
263 return (i);
264 }
265
266 while (*data)
267 {
268 if (DEGENERATE(*(data - 1))) ++ (*uncalled);
269 if (*(data - 1) <= sw->qual_thres && *data <= sw->qual_thres)
270 {
271 *data = 0;
272 *qscore = 0;
273 *uncalled = (*uncalled) / (i);
274 return (i);
275 }
276 ++ i;
277 ++ data;
278 }
279 if (DEGENERATE(*(data - 1))) ++ (*uncalled);
280 *uncalled = (*uncalled) / i;
281 return (i);
282 }
283
284 /** @brief Trimming of reverse part (reversed + complemented) of unassembled sequence
285 *
286 * Finds two adjacent quality scores that are smaller than the minimum quality score threshold \a min_quality.
287 * It then \e trims the part starting from the rightmost quality score position.
288 *
289 * @param read Forward sequence
290 * @param min_quality Minimum quality score threshold
291 * @param uncalled Variable to store the ratio of uncalled bases
292 * @return Returns the length of the trimmed sequence
293 */
trim_cpl(fastqRead * read,struct user_args * sw,double * uncalled)294 static int trim_cpl (fastqRead * read, struct user_args * sw, double * uncalled)
295 {
296 int
297 i,
298 len;
299 char
300 * qscore,
301 * data;
302
303 qscore = read->qscore;
304 data = read->data;
305 *uncalled = 0;
306 len = 0;
307
308 while (*data)
309 {
310 ++len;
311 ++data;
312 }
313
314 data = read->data;
315 for (i = len - 1; i > 0; -- i)
316 {
317 if (DEGENERATE (data[i])) ++ *uncalled;
318 if (qscore[i] <= sw->qual_thres && qscore[i - 1] <= sw->qual_thres)
319 {
320 qscore[i - 1] = 0;
321 data[i - 1] = 0;
322 memmove (data, data + i, len - i + 1);
323 memmove (qscore, qscore + i, len - i + 1);
324 *uncalled = (*uncalled) / (len - i);
325 return (len - i);
326 }
327 }
328 if (DEGENERATE (*data)) ++ *uncalled;
329 *uncalled = (*uncalled) / len;
330 return (len);
331 }
332
333 /** @brief Initialize table of precomputed scores
334
335 This function computes a table of scores for every possible combination of
336 basepair and quality scores. The element \a sc_eqX[i][j] is the score for a
337 basepair \a X with quality scores \a i and \j in the two sequences. The
338 element \a sc_neqXY[i][j] is the score of basepairs \a X and \a Y with
339 quality scores \a i and \j, respectively.
340
341 @param match The match weight to be used in scoring
342 @param mismatch The mismatch weight to be used in scoring
343 @param ef Structure containing empirical frequencies
344 */
init_scores(struct emp_freq * ef)345 static void init_scores (struct emp_freq * ef)
346 {
347 int i, j, k;
348 double
349 ex, ey,
350 pa2, pc2, pg2, pt2, pagt2, pcgt2, pact2, pacg2, pacg, pact, pagt, pcgt,
351 pac2, pag2, pat2, pcg2, pct2, pgt2,
352 p2acg, p2act, p2agt, p2cgt;
353
354 pa2 = ef->pa * ef->pa;
355 pc2 = ef->pc * ef->pc;
356 pg2 = ef->pg * ef->pg;
357 pt2 = ef->pt * ef->pt;
358
359 pacg2 = pa2 + pc2 + pg2; pacg = ef->pa + ef->pc + ef->pg; p2acg = pacg * pacg;
360 pact2 = pa2 + pc2 + pt2; pact = ef->pa + ef->pc + ef->pt; p2act = pact * pact;
361 pagt2 = pa2 + pg2 + pt2; pagt = ef->pa + ef->pg + ef->pt; p2agt = pagt * pagt;
362 pcgt2 = pc2 + pg2 + pt2; pcgt = ef->pc + ef->pg + ef->pt; p2cgt = pcgt * pcgt;
363
364 pac2 = (ef->pa + ef->pc) * (ef->pa + ef->pc);
365 pag2 = (ef->pa + ef->pg) * (ef->pa + ef->pg);
366 pat2 = (ef->pa + ef->pt) * (ef->pa + ef->pt);
367 pcg2 = (ef->pc + ef->pg) * (ef->pc + ef->pg);
368 pct2 = (ef->pc + ef->pt) * (ef->pc + ef->pt);
369 pgt2 = (ef->pg + ef->pt) * (ef->pg + ef->pt);
370
371
372 for (i = 1; i < 64; ++ i)
373 {
374 for (j = 1; j < 64; ++ j)
375 {
376 ex = pow (10.0, - (i - 1) / 10.0);
377 ey = pow (10.0, - (j - 1) / 10.0);
378
379 k = (i << 6) | j;
380
381 new_eq[k] = PEAR_MATCH_SCORE * ((1 - ex) * (1 - ey) + (ex * ey) / 3.0);
382 new_neq[k] = PEAR_MISMATCH_SCORE * (1 - (1.0 / 3.0) * (1 - ex) * ey - (1.0 / 3.0) * (1 - ey) * ex - (2.0 / 9.0) * ex * ey);
383 //qs_mul[i][j] = ex * ey;
384
385 /*
386 new_eqA[k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pcgt2 / p2cgt;
387 new_eqC[k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pagt2 / p2agt;
388 new_eqG[k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pact2 / p2act;
389 new_eqT[k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pacg2 / p2acg;
390 */
391 /* compute the penalties for empirical frequencies
392
393 0 = A
394 1 = C
395 2 = G
396 3 = T
397 4 = AC
398 5 = AG
399 6 = AT
400 7 = CG
401 8 = CT
402 9 = GT
403 */
404 penaltyEF[0][k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pcgt2 / p2cgt;
405 penaltyEF[1][k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pagt2 / p2agt;
406 penaltyEF[2][k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pact2 / p2act;
407 penaltyEF[3][k] = PEAR_MATCH_SCORE * (1 - ex) * (1 - ey) + (ex * ey) * pacg2 / p2acg;
408 //new_neqAC[k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pc / pcgt) - (1 - ex) * ey * (ef->pa / pagt) - ex * ey * (pg2 + pt2) / pgt2);
409 penaltyEF[4][k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pc / pcgt) - (1 - ex) * ey * (ef->pa / pagt) - ex * ey * (pg2 + pt2) / pgt2); // AC
410 //sc_neqCA[i][j] PEAR_MISMATCH_SCOREch * (1 - (1 - ey) * ex * (ef->pa / pagt) - (1 - ex) * ey * (ef->pc / pcgt) - ex * ey * (pg2 + pt2) / pgt2);
411
412 //new_neqAG[k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pg / pcgt) - (1 - ex) * ey * (ef->pa / pact) - ex * ey * (pc2 + pt2) / pct2);
413 penaltyEF[5][k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pg / pcgt) - (1 - ex) * ey * (ef->pa / pact) - ex * ey * (pc2 + pt2) / pct2); // AG
414 //sc_neqGA[i][j] PEAR_MISMATCH_SCOREch * (1 - (1 - ey) * ex * (ef->pa / pact) - (1 - ex) * ey * (ef->pg / pcgt) - ex * ey * (pc2 + pt2) / pct2);
415
416 //new_neqAT[k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pt / pcgt) - (1 - ex) * ey * (ef->pa / pacg) - ex * ey * (pc2 + pg2) / pcg2);
417 penaltyEF[6][k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pt / pcgt) - (1 - ex) * ey * (ef->pa / pacg) - ex * ey * (pc2 + pg2) / pcg2); // AT
418 //sc_neqTA[i][j] PEAR_MISMATCH_SCOREch * (1 - (1 - ey) * ex * (ef->pa / pacg) - (1 - ex) * ey * (ef->pt / pcgt) - ex * ey * (pc2 + pg2) / pcg2);
419
420 //new_neqCG[k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pg / pagt) - (1 - ex) * ey * (ef->pc / pact) - ex * ey * (pa2 + pt2) / pat2);
421 penaltyEF[7][k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pg / pagt) - (1 - ex) * ey * (ef->pc / pact) - ex * ey * (pa2 + pt2) / pat2); // CG
422 //sc_neqGC[i][j] PEAR_MISMATCH_SCOREch * (1 - (1 - ey) * ex * (ef->pc / pact) - (1 - ex) * ey * (ef->pg / pagt) - ex * ey * (pa2 + pt2) / pat2);
423
424 //new_neqCT[k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pt / pagt) - (1 - ex) * ey * (ef->pc / pacg) - ex * ey * (pa2 + pg2) / pag2);
425 penaltyEF[8][k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pt / pagt) - (1 - ex) * ey * (ef->pc / pacg) - ex * ey * (pa2 + pg2) / pag2); // CT
426 //sc_neqTC[i][j] PEAR_MISMATCH_SCOREch * (1 - (1 - ey) * ex * (ef->pc / pacg) - (1 - ex) * ey * (ef->pt / pagt) - ex * ey * (pa2 + pg2) / pag2);
427
428 //new_neqGT[k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pt / pact) - (1 - ex) * ey * (ef->pg / pacg) - ex * ey * (pa2 + pc2) / pac2);
429 penaltyEF[9][k] = PEAR_MISMATCH_SCORE * (1 - (1 - ey) * ex * (ef->pt / pact) - (1 - ex) * ey * (ef->pg / pacg) - ex * ey * (pa2 + pc2) / pac2); // GT
430 //sc_neqTG[i][j] = mismatch * (1 - (1 - ey) * ex * (ef->pg / pacg) - (1 - ex) * ey * (ef->pt / pact) - ex * ey * (pa2 + pc2) / pac2);
431
432
433 }
434 }
435 }
436
437 #ifdef EXPERIMENTAL
438 INLINE void
scoring_ef(char dleft,char dright,char qleft,char qright,int score_method,double * score,double * oes,int match,int mismatch,struct emp_freq * ef)439 scoring_ef (char dleft, char dright, char qleft, char qright, int score_method, double * score, double * oes, int match, int mismatch, struct emp_freq * ef)
440 {
441 double tmp;
442
443 if (DEGENERATE(dleft) || DEGENERATE(dright)) /* one of them is N */
444 {
445 switch (score_method)
446 {
447 case 1:
448 *score += (ef->q * match - (1 - ef->q) * mismatch);
449 *oes = *score;
450 break;
451 case 2:
452 tmp = (1 - ef->q) * mismatch;
453 *oes += (ef->q * match - tmp);
454 *score -= tmp;
455 break;
456 case 3:
457 *score -= mismatch;
458 *oes += (ef->q * match - (1 - ef->q) * mismatch);
459 break;
460 }
461 }
462 else if (dleft == dright) /* equal */
463 {
464 switch (score_method)
465 {
466 case 1:
467 switch (dleft)
468 {
469 case 'A':
470 *score += (sc_eqA[(int)qright][(int)qleft] - (1 - sc_eqA[(int)qright][(int)qleft] / match) * mismatch);
471 break;
472 case 'C':
473 *score += (sc_eqC[(int)qright][(int)qleft] - (1 - sc_eqC[(int)qright][(int)qleft] / match) * mismatch);
474 break;
475 case 'G':
476 *score += (sc_eqG[(int)qright][(int)qleft] - (1 - sc_eqG[(int)qright][(int)qleft] / match) * mismatch);
477 break;
478 case 'T':
479 *score += (sc_eqT[(int)qright][(int)qleft] - (1 - sc_eqT[(int)qright][(int)qleft] / match) * mismatch);
480 break;
481 }
482 *oes = *score;
483 break;
484 case 2:
485 switch (dleft)
486 {
487 case 'A':
488 *score += sc_eqA[(int)qright][(int)qleft];
489 *oes += (sc_eqA[(int)qright][(int)qleft] - (1 - sc_eqA[(int)qright][(int)qleft] / match) * mismatch);
490 break;
491 case 'C':
492 *score += sc_eqC[(int)qright][(int)qleft];
493 *oes += (sc_eqC[(int)qright][(int)qleft] - (1 - sc_eqC[(int)qright][(int)qleft] / match) * mismatch);
494 break;
495 case 'G':
496 *score += sc_eqG[(int)qright][(int)qleft];
497 *oes += (sc_eqG[(int)qright][(int)qleft] - (1 - sc_eqG[(int)qright][(int)qleft] / match) * mismatch);
498 break;
499 case 'T':
500 *score += sc_eqT[(int)qright][(int)qleft];
501 *oes += (sc_eqT[(int)qright][(int)qleft] - (1 - sc_eqT[(int)qright][(int)qleft] / match) * mismatch);
502 break;
503 }
504 break;
505 case 3:
506 switch (dleft)
507 {
508 case 'A':
509 *oes += (sc_eqA[(int)qright][(int)qleft] - (1 - sc_eqA[(int)qright][(int)qleft] / match) * mismatch);
510 break;
511 case 'C':
512 *oes += (sc_eqC[(int)qright][(int)qleft] - (1 - sc_eqC[(int)qright][(int)qleft] / match) * mismatch);
513 break;
514 case 'G':
515 *oes += (sc_eqG[(int)qright][(int)qleft] - (1 - sc_eqG[(int)qright][(int)qleft] / match) * mismatch);
516 break;
517 case 'T':
518 *oes += (sc_eqT[(int)qright][(int)qleft] - (1 - sc_eqT[(int)qright][(int)qleft] / match) * mismatch);
519 break;
520 }
521 *score += match;
522 break;
523 }
524 }
525 else /* not equal */
526 {
527 switch (score_method)
528 {
529 case 1:
530 switch (dleft)
531 {
532 case 'A':
533 switch (dright)
534 {
535 case 'C':
536 *score = *score - (sc_neqAC[(int)qleft][(int)qright] - (1 - sc_neqAC[(int)qleft][(int)qright] / mismatch) * match);
537 break;
538 case 'G':
539 *score = *score - (sc_neqAG[(int)qleft][(int)qright] - (1 - sc_neqAG[(int)qleft][(int)qright] / mismatch) * match);
540 break;
541 case 'T':
542 *score = *score - (sc_neqAT[(int)qleft][(int)qright] - (1 - sc_neqAT[(int)qleft][(int)qright] / mismatch) * match);
543 break;
544 }
545 break;
546 case 'C':
547 switch (dright)
548 {
549 case 'A':
550 *score = *score - (sc_neqCA[(int)qleft][(int)qright] - (1 - sc_neqCA[(int)qleft][(int)qright] / mismatch) * match);
551 break;
552 case 'G':
553 *score = *score - (sc_neqCG[(int)qleft][(int)qright] - (1 - sc_neqCG[(int)qleft][(int)qright] / mismatch) * match);
554 break;
555 case 'T':
556 *score = *score - (sc_neqCT[(int)qleft][(int)qright] - (1 - sc_neqCT[(int)qleft][(int)qright] / mismatch) * match);
557 break;
558 }
559 break;
560 case 'G':
561 switch (dright)
562 {
563 case 'A':
564 *score = *score - (sc_neqGA[(int)qleft][(int)qright] - (1 - sc_neqGA[(int)qleft][(int)qright] / mismatch) * match);
565 break;
566 case 'C':
567 *score = *score - (sc_neqGC[(int)qleft][(int)qright] - (1 - sc_neqGC[(int)qleft][(int)qright] / mismatch) * match);
568 break;
569 case 'T':
570 *score = *score - (sc_neqGT[(int)qleft][(int)qright] - (1 - sc_neqGT[(int)qleft][(int)qright] / mismatch) * match);
571 break;
572 }
573 break;
574 case 'T':
575 switch (dright)
576 {
577 case 'A':
578 *score = *score - (sc_neqTA[(int)qleft][(int)qright] - (1 - sc_neqTA[(int)qleft][(int)qright] / mismatch) * match);
579 break;
580 case 'C':
581 *score = *score - (sc_neqTC[(int)qleft][(int)qright] - (1 - sc_neqTC[(int)qleft][(int)qright] / mismatch) * match);
582 break;
583 case 'G':
584 *score = *score - (sc_neqTG[(int)qleft][(int)qright] - (1 - sc_neqTG[(int)qleft][(int)qright] / mismatch) * match);
585 break;
586 }
587 break;
588 }
589 *oes = *score;
590 break;
591 case 2:
592 switch (dleft)
593 {
594 case 'A':
595 switch (dright)
596 {
597 case 'C':
598 *score -= sc_neqAC[(int)qleft][(int)qright];
599 *oes = *oes - (sc_neqAC[(int)qleft][(int)qright] - (1 - sc_neqAC[(int)qleft][(int)qright] / mismatch) * match);
600 break;
601 case 'G':
602 *score -= sc_neqAG[(int)qleft][(int)qright];
603 *oes = *oes - (sc_neqAG[(int)qleft][(int)qright] - (1 - sc_neqAG[(int)qleft][(int)qright] / mismatch) * match);
604 break;
605 case 'T':
606 *score -= sc_neqAT[(int)qleft][(int)qright];
607 *oes = *oes - (sc_neqAT[(int)qleft][(int)qright] - (1 - sc_neqAT[(int)qleft][(int)qright] / mismatch) * match);
608 break;
609 }
610 break;
611 case 'C':
612 switch (dright)
613 {
614 case 'A':
615 *score -= sc_neqCA[(int)qleft][(int)qright];
616 *oes = *oes - (sc_neqCA[(int)qleft][(int)qright] - (1 - sc_neqCA[(int)qleft][(int)qright] / mismatch) * match);
617 break;
618 case 'G':
619 *score -= sc_neqCG[(int)qleft][(int)qright];
620 *oes = *oes - (sc_neqCG[(int)qleft][(int)qright] - (1 - sc_neqCG[(int)qleft][(int)qright] / mismatch) * match);
621 break;
622 case 'T':
623 *score -= sc_neqCT[(int)qleft][(int)qright];
624 *oes = *oes - (sc_neqCT[(int)qleft][(int)qright] - (1 - sc_neqCT[(int)qleft][(int)qright] / mismatch) * match);
625 break;
626 }
627 break;
628 case 'G':
629 switch (dright)
630 {
631 case 'A':
632 *score -= sc_neqGA[(int)qleft][(int)qright];
633 *oes = *oes - (sc_neqGA[(int)qleft][(int)qright] - (1 - sc_neqGA[(int)qleft][(int)qright] / mismatch) * match);
634 break;
635 case 'C':
636 *score -= sc_neqGC[(int)qleft][(int)qright];
637 *oes = *oes - (sc_neqGC[(int)qleft][(int)qright] - (1 - sc_neqGC[(int)qleft][(int)qright] / mismatch) * match);
638 break;
639 case 'T':
640 *score -= sc_neqGT[(int)qleft][(int)qright];
641 *oes = *oes - (sc_neqGT[(int)qleft][(int)qright] - (1 - sc_neqGT[(int)qleft][(int)qright] / mismatch) * match);
642 break;
643 }
644 break;
645 case 'T':
646 switch (dright)
647 {
648 case 'A':
649 *score -= sc_neqTA[(int)qleft][(int)qright];
650 *oes = *oes - (sc_neqTA[(int)qleft][(int)qright] - (1 - sc_neqTA[(int)qleft][(int)qright] / mismatch) * match);
651 break;
652 case 'C':
653 *score -= sc_neqTC[(int)qleft][(int)qright];
654 *oes = *oes - (sc_neqTC[(int)qleft][(int)qright] - (1 - sc_neqTC[(int)qleft][(int)qright] / mismatch) * match);
655 break;
656 case 'G':
657 *score -= sc_neqTG[(int)qleft][(int)qright];
658 *oes = *oes - (sc_neqTG[(int)qleft][(int)qright] - (1 - sc_neqTG[(int)qleft][(int)qright] / mismatch) * match);
659 break;
660 }
661 break;
662 }
663 break;
664 case 3:
665 *score -= mismatch;
666 switch (dleft)
667 {
668 case 'A':
669 switch (dright)
670 {
671 case 'C':
672 *oes = *oes - (sc_neqAC[(int)qleft][(int)qright] - (1 - sc_neqAC[(int)qleft][(int)qright] / mismatch) * match);
673 break;
674 case 'G':
675 *oes = *oes - (sc_neqAG[(int)qleft][(int)qright] - (1 - sc_neqAG[(int)qleft][(int)qright] / mismatch) * match);
676 break;
677 case 'T':
678 *oes = *oes - (sc_neqAT[(int)qleft][(int)qright] - (1 - sc_neqAT[(int)qleft][(int)qright] / mismatch) * match);
679 break;
680 }
681 break;
682 case 'C':
683 switch (dright)
684 {
685 case 'A':
686 *oes = *oes - (sc_neqCA[(int)qleft][(int)qright] - (1 - sc_neqCA[(int)qleft][(int)qright] / mismatch) * match);
687 break;
688 case 'G':
689 *oes = *oes - (sc_neqCG[(int)qleft][(int)qright] - (1 - sc_neqCG[(int)qleft][(int)qright] / mismatch) * match);
690 break;
691 case 'T':
692 *oes = *oes - (sc_neqCT[(int)qleft][(int)qright] - (1 - sc_neqCT[(int)qleft][(int)qright] / mismatch) * match);
693 break;
694 }
695 break;
696 case 'G':
697 switch (dright)
698 {
699 case 'A':
700 *oes = *oes - (sc_neqGA[(int)qleft][(int)qright] - (1 - sc_neqGA[(int)qleft][(int)qright] / mismatch) * match);
701 break;
702 case 'C':
703 *oes = *oes - (sc_neqGC[(int)qleft][(int)qright] - (1 - sc_neqGC[(int)qleft][(int)qright] / mismatch) * match);
704 break;
705 case 'T':
706 *oes = *oes - (sc_neqGT[(int)qleft][(int)qright] - (1 - sc_neqGT[(int)qleft][(int)qright] / mismatch) * match);
707 break;
708 }
709 break;
710 case 'T':
711 switch (dright)
712 {
713 case 'A':
714 *oes = *oes - (sc_neqTA[(int)qleft][(int)qright] - (1 - sc_neqTA[(int)qleft][(int)qright] / mismatch) * match);
715 break;
716 case 'C':
717 *oes = *oes - (sc_neqTC[(int)qleft][(int)qright] - (1 - sc_neqTC[(int)qleft][(int)qright] / mismatch) * match);
718 break;
719 case 'G':
720 *oes = *oes - (sc_neqTG[(int)qleft][(int)qright] - (1 - sc_neqTG[(int)qleft][(int)qright] / mismatch) * match);
721 break;
722 }
723 break;
724 }
725 break;
726 }
727 }
728 }
729 #endif
730
scoring_ef_nm(char dleft,char dright,char qleft,char qright,int score_method,double * score,double * oes,struct emp_freq * ef)731 static INLINE void scoring_ef_nm (char dleft, char dright, char qleft, char qright, int score_method, double * score, double * oes, struct emp_freq * ef)
732 {
733 double tmp;
734 int i, k;
735
736 if ( (DEGENERATE (dleft)) || DEGENERATE (dright)) /* one of them is N */
737 {
738 switch (score_method)
739 {
740 case 1:
741 *score += (ef->q - (1 - ef->q));
742 *oes = *score;
743 break;
744 case 2:
745 tmp = (1 - ef->q);
746 *oes += (ef->q - tmp);
747 *score -= tmp;
748 break;
749 case 3:
750 *score -= 1;
751 *oes += (ef->q - (1 - ef->q));
752 break;
753 default:
754 assert (0);
755 }
756 }
757 else if (dleft == dright) /* equal */
758 {
759 i = translate[dleft | dright];
760 assert (i != -1);
761 k = ((int)qright << 6) | (int)qleft;
762 switch (score_method)
763 {
764 case 1:
765 *score += (penaltyEF[i][k] - (1 - penaltyEF[i][k]));
766 *oes = *score;
767 break;
768
769 case 2:
770 *score += penaltyEF[i][k];
771 *oes += (penaltyEF[i][k] - (1 - penaltyEF[i][k]));
772 break;
773 case 3:
774 *oes += (penaltyEF[i][k] - (1 - penaltyEF[i][k]));
775 *score += 1;
776 break;
777 default:
778 assert (0);
779 }
780 }
781 else /* not equal */
782 {
783 if (dleft > dright)
784 {
785 /* swap the values */
786 dleft = dleft ^ dright; dright = dleft ^ dright; dleft = dleft ^ dright;
787
788 /* swap quality scores */
789 qleft = qleft ^ qright; qright = qleft ^ qright; qleft = qleft ^ qright;
790
791 }
792 i = translate[dleft | dright];
793 assert (i != -1);
794 k = ((int)qleft << 6) | (int)qright;
795 switch (score_method)
796 {
797 case 1:
798 *score = *score - (2 * penaltyEF[i][k]) + 1;
799 *oes = *score;
800 break;
801
802 case 2:
803 *score -= penaltyEF[i][k];
804 *oes = *oes - (2 * penaltyEF[i][k]) + 1;
805 break;
806
807 case 3:
808 *score -= 1;
809 *oes = *oes - (2 * penaltyEF[i][k]) + 1;
810 break;
811 default:
812 assert (0);
813 }
814 }
815 }
816
817 #if 0
818 /* TODO: Remember to speed up this function by doing something with the multiplication and division of match/mismatch */
819 INLINE void scoring (char dleft, char dright, char qleft, char qright, int score_method, double * score, double * oes, int match, int mismatch)
820 {
821 double tmp;
822
823 if (dleft > UNCALLED || dright > UNCALLED) /* one of them is N */
824 {
825 switch (score_method)
826 {
827 case 1:
828 *score += (0.25 * match - (1 - 0.25) * mismatch);
829 *oes = *score;
830 break;
831 case 2:
832 tmp = (1 - 0.25) * mismatch;
833 *oes += (0.25 * match - tmp);
834 *score -= tmp;
835 break;
836 case 3:
837 *score -= mismatch;
838 *oes += (0.25 * match - (1 - 0.25) * mismatch);
839 break;
840 }
841 }
842 else if (dleft == dright) /* equal */
843 {
844 switch (score_method)
845 {
846 case 1:
847 *score += (sc_eq[(int)qright][(int)qleft] - (1 - sc_eq[(int)qright][(int)qleft] / match) * mismatch);
848 *oes = *score;
849 break;
850 case 2:
851 tmp = sc_eq[(int)qright][(int)qleft];
852 *oes += (tmp - (1 - sc_eq[(int)qright][(int)qleft] / match) * mismatch);
853 *score += tmp;
854 break;
855 case 3:
856 *score += match;
857 *oes += (sc_eq[(int)qright][(int)qleft] - (1 - sc_eq[(int)qright][(int)qleft] / match) * mismatch);
858 break;
859 }
860 }
861 else /* not equal */
862 {
863 switch (score_method)
864 {
865 case 1:
866 *score = *score - (sc_neq[(int)qright][(int)qleft] - (1 - sc_neq[(int)qright][(int)qleft] / mismatch) * match);
867 *oes = *score;
868 break;
869 case 2:
870 tmp = sc_neq[(int)qright][(int)qleft];
871 *oes = *oes - (tmp - (1 - sc_neq[(int)qright][(int)qleft] / mismatch) * match);
872 *score -= tmp;
873 break;
874 case 3:
875 *score -= mismatch;
876 *oes = *score - (sc_neq[(int)qright][(int)qleft] - (1 - sc_neq[(int)qright][(int)qleft] / mismatch) * match);
877 break;
878 }
879 }
880 }
881 #endif
882
883 /* TODO: Remember to speed up this function by doing something with the multiplication and division of match/mismatch */
scoring_nm(char dleft,char dright,char qleft,char qright,int score_method,double * score,double * oes)884 static INLINE void scoring_nm (char dleft, char dright, char qleft, char qright, int score_method, double * score, double * oes)
885 {
886 double tmp;
887 int k;
888
889 k = ((int)qright << 6) | (int)qleft;
890 if (DEGENERATE (dleft) || DEGENERATE(dright)) /* one of them is N */
891 {
892 switch (score_method)
893 {
894 case 1:
895 *score += ( - 0.5);
896 *oes = *score;
897 break;
898 case 2:
899 tmp = 0.75;
900 *oes += ( - 0.5);
901 *score -= 0.75;
902 break;
903 case 3:
904 *score -= 1;
905 *oes += ( - 0.5 );
906 break;
907 default:
908 assert (0);
909 }
910 }
911 else if (dleft == dright) /* equal */
912 {
913 switch (score_method)
914 {
915 case 1:
916 //*score += (sc_eq[(int)qright][(int)qleft] - (1 - sc_eq[(int)qright][(int)qleft]) );
917 *score += (new_eq[k] - (1 - new_eq[k]) );
918 *oes = *score;
919 break;
920 case 2:
921 //tmp = sc_eq[(int)qright][(int)qleft];
922 tmp = new_eq[k];
923 //*oes += (tmp - (1 - sc_eq[(int)qright][(int)qleft] ));
924 *oes += (tmp - (1 - new_eq[k] ));
925 *score += tmp;
926 break;
927 case 3:
928 *score += 1;
929 //*oes += (sc_eq[(int)qright][(int)qleft] - (1 - sc_eq[(int)qright][(int)qleft] ) );
930 *oes += (new_eq[k] - (1 - new_eq[k] ) );
931 break;
932 default:
933 assert (0);
934 }
935 }
936 else /* not equal */
937 {
938 switch (score_method)
939 {
940 case 1:
941 //*score = *score - (sc_neq[(int)qright][(int)qleft] - (1 - sc_neq[(int)qright][(int)qleft] ) );
942 *score = *score - (new_neq[k] - (1 - new_neq[k] ) );
943 *oes = *score;
944 break;
945 case 2:
946 //tmp = sc_neq[(int)qright][(int)qleft];
947 tmp = new_neq[k];
948 //*oes = *oes - (tmp - (1 - sc_neq[(int)qright][(int)qleft] ) );
949 *oes = *oes - (tmp - (1 - new_neq[k] ) );
950 *score -= tmp;
951 break;
952 case 3:
953 *score -= 1;
954 //*oes = *score - (sc_neq[(int)qright][(int)qleft] - (1 - sc_neq[(int)qright][(int)qleft] ) );
955 *oes = *score - (new_neq[k] - (1 - new_neq[k] ) );
956 break;
957 default:
958 assert (0);
959 }
960 }
961 }
962
assembly_FORWARD_LONGER(fastqRead * forward,fastqRead * reverse,struct emp_freq * ef,struct user_args * sw,int nForward,int nReverse)963 static INLINE int assembly_FORWARD_LONGER (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw, int nForward, int nReverse)
964 {
965 int
966 i,
967 j;
968 double
969 score = 0,
970 oes = 0,
971 best_score = 0,
972 best_oes = 0,
973 uncalled = 0;
974 int
975 best_overlap = 0, /* overlap of the best alignment */
976 nMatch,
977 asm_len = 0,
978 st_pass,
979 bestScoreCase = 0;
980
981 /* -------------->
982 * <------
983 * ....
984 * ------------->
985 * <------
986 */
987 for (i = 1; i <= nReverse; ++ i)
988 {
989 nMatch = score = oes = 0;
990 for (j = 0; j < i; ++ j)
991 {
992 scoring_ef_nm (forward->data[nForward - i + j],
993 reverse->data[j],
994 forward->qscore[nForward - i + j],
995 reverse->qscore[j],
996 sw->score_method,
997 &score,
998 &oes,
999 ef);
1000
1001 if (forward->data[nForward - i + j] == reverse->data[j]) ++nMatch;
1002 }
1003
1004 if (score > best_score)
1005 {
1006 bestScoreCase = PEAR_MERGE_NO_TRIM;
1007 best_overlap = i;
1008 best_score = score;
1009 best_oes = oes;
1010 }
1011 }
1012 /* compute score for runthrough case */
1013 /* -------------->
1014 * <------
1015 * ....
1016 * ------------->
1017 * <------
1018 */
1019 for (i = nForward - nReverse - 1; i >= 0; -- i)
1020 {
1021 score = oes = nMatch = 0;
1022 for (j = 0; j < nReverse; ++ j)
1023 {
1024 scoring_ef_nm (forward->data[i + j],
1025 reverse->data[j],
1026 forward->qscore[i + j],
1027 reverse->qscore[j],
1028 sw->score_method,
1029 &score,
1030 &oes,
1031 ef);
1032 if (forward->data[i + j] == reverse->data[j]) ++nMatch;
1033 }
1034
1035 if (score > best_score)
1036 {
1037 bestScoreCase = PEAR_MERGE_TRIM_FORWARD;
1038 best_overlap = i;
1039 best_score = score;
1040 best_oes = oes;
1041 }
1042 }
1043 /* compute score for runthrough case */
1044 /* -------------->
1045 * <------
1046 * ....
1047 * ------------->
1048 * <------
1049 */
1050 for (i = nReverse - 1; i > 0; -- i)
1051 {
1052 score = oes = nMatch = 0;
1053 for (j = 0; j < i; ++ j)
1054 {
1055 scoring_ef_nm (forward->data[j],
1056 reverse->data[nReverse - i + j],
1057 forward->qscore[j],
1058 reverse->qscore[nReverse - i + j],
1059 sw->score_method,
1060 &score,
1061 &oes,
1062 ef);
1063 if (forward->data[j] == reverse->data[nReverse - i + j]) ++nMatch;
1064 }
1065 if (score > best_score)
1066 {
1067 bestScoreCase = PEAR_MERGE_TRIM_BOTH;
1068 best_overlap = i;
1069 best_score = score;
1070 best_oes = oes;
1071 }
1072 }
1073
1074 if (!bestScoreCase) return (0);
1075
1076 if (sw->test == 1)
1077 st_pass = stat_test2 (sw->p_value, best_oes, sw->min_overlap, ef->q);
1078 else
1079 st_pass = stat_test2 (sw->p_value, best_oes, best_overlap, ef->q);
1080
1081 if (!st_pass) return (0);
1082
1083 /* TODO PART */
1084
1085 switch (bestScoreCase)
1086 {
1087 case PEAR_MERGE_NO_TRIM:
1088 if (best_overlap == 0)
1089 {
1090 assert(0);
1091 asm_len = nForward + nReverse;
1092
1093 for (j = 0; j < nForward; ++ j)
1094 if ( DEGENERATE(forward->data[j])) ++uncalled;
1095 for (j = 0; j < nReverse; ++ j)
1096 if (DEGENERATE(reverse->data[j])) ++uncalled;
1097 uncalled /= (nForward + nReverse);
1098
1099 if (0 >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1100 {
1101 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_BOTH);
1102 }
1103 else
1104 {
1105 return (0);
1106 }
1107 }
1108 else
1109 {
1110 asm_len = nForward + nReverse - best_overlap;
1111
1112 /* count uncalled bases in the non-overlapping high-quality part of the forward read */
1113 for (j = 0; j < nForward - best_overlap; ++ j)
1114 if (DEGENERATE(forward->data[j])) ++uncalled;
1115
1116 /* count uncalled bases in the non-overlapping high-quality part of the reverse read */
1117 for (j = best_overlap; j < nReverse; ++ j)
1118 if (DEGENERATE(reverse->data[j])) ++uncalled;
1119
1120 /* count the uncalled bases in the overlapping part of the two reads */
1121 for (j = nForward - best_overlap; j < nForward; ++ j)
1122 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[j - nForward + best_overlap]))) ++uncalled;
1123 uncalled /= asm_len;
1124
1125 if (nForward + nReverse - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1126 {
1127 if (asm_len > nForward)
1128 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_BOTH); /* The merged read will require both memory buffers (forward + reverse) */
1129 else
1130 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_SINGLE); /* The merged read will fit inside the forward read mem buffer */
1131
1132
1133 assemble_overlap (forward, reverse, nForward - best_overlap, 0, best_overlap, forward, sw);
1134 memmove (reverse->data, reverse->data + best_overlap, nReverse - best_overlap);
1135 memmove (reverse->qscore, reverse->qscore + best_overlap, nReverse - best_overlap);
1136
1137 reverse->data[nReverse - best_overlap] = 0;
1138 reverse->qscore[nReverse - best_overlap] = 0;
1139 }
1140 else
1141 {
1142 return (0);
1143 }
1144 }
1145 break;
1146 case PEAR_MERGE_TRIM_FORWARD:
1147 /* note that here best_overlap refers to the position of the first byte of the overlapping region
1148 in the forward read */
1149 asm_len = best_overlap + nReverse;
1150
1151 /* count uncalled bases in the non-overlapping high-quality part of the forward read */
1152 for (j = 0; j < best_overlap; ++ j)
1153 if (DEGENERATE(forward->data[j])) ++uncalled;
1154
1155 /* count the uncalled bases in the overlapping part of the two reads */
1156 for (j = best_overlap; j < best_overlap + nReverse; ++ j)
1157 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[j - nReverse - best_overlap]))) ++uncalled;
1158 uncalled /= asm_len;
1159
1160 if (nReverse >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1161 {
1162 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_SINGLE); /* The merged read will fit inside the forward read mem buffer */
1163
1164 assemble_overlap (forward, reverse, best_overlap, 0, nReverse, forward, sw);
1165
1166 forward->data[best_overlap + nReverse] = 0;
1167 forward->qscore[best_overlap + nReverse] = 0;
1168 }
1169 else
1170 {
1171 return (0);
1172 }
1173 break;
1174 case PEAR_MERGE_TRIM_BOTH:
1175 asm_len = best_overlap;
1176
1177 /* compute uncalled */
1178 for (j = 0; j < asm_len; ++ j)
1179 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[nReverse - best_overlap + j]))) ++uncalled;
1180 uncalled /= asm_len;
1181
1182 if (asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1183 {
1184 assemble_overlap (forward, reverse, 0, nReverse - best_overlap, best_overlap, forward, sw);
1185
1186 forward->data[best_overlap] = 0;
1187 forward->qscore[best_overlap] = 0;
1188 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_SINGLE); /* flag that it's one piece */
1189 }
1190 else
1191 {
1192 return (0);
1193 }
1194 break;
1195 default:
1196 assert (0);
1197
1198 }
1199
1200 return (1);
1201
1202 }
1203
assembly_READS_EQUAL(fastqRead * forward,fastqRead * reverse,struct emp_freq * ef,struct user_args * sw,int n)1204 static INLINE int assembly_READS_EQUAL (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw, int n)
1205 {
1206 int
1207 i,
1208 j;
1209 double
1210 score = 0,
1211 oes = 0,
1212 best_score = 0,
1213 best_oes = 0,
1214 uncalled = 0;
1215 int
1216 best_overlap = 0, /* overlap of the best alignment */
1217 nMatch,
1218 asm_len = 0,
1219 st_pass,
1220 run_through = 0;
1221
1222 /* compute score for every overlap */
1223 for (i = 0; i <= n; ++ i) /* the size of the overlap */
1224 {
1225 nMatch = score = oes = 0;
1226 for (j = 0; j < i; ++ j)
1227 {
1228 scoring_ef_nm (forward->data[n - i + j],
1229 reverse->data[j],
1230 forward->qscore[n - i + j],
1231 reverse->qscore[j],
1232 sw->score_method,
1233 &score,
1234 &oes,
1235 ef);
1236 if (forward->data[n - i + j] == reverse->data[j]) ++nMatch;
1237 }
1238 if (score > best_score)
1239 {
1240 best_overlap = i;
1241 best_score = score;
1242 best_oes = oes;
1243 }
1244 }
1245
1246 /* compute for score for runthrough case */
1247 for (i = n - 1; i > 0; --i)
1248 {
1249 score = oes = nMatch = 0;
1250 for (j = 0; j < i; ++j)
1251 {
1252 scoring_ef_nm (forward->data[j],
1253 reverse->data[n - i + j],
1254 forward->qscore[j],
1255 reverse->qscore[n - i + j],
1256 sw->score_method,
1257 &score,
1258 &oes,
1259 ef);
1260 if (forward->data[n - i + j] == reverse->data[j]) ++nMatch;
1261 }
1262
1263 if (score > best_score)
1264 {
1265 run_through = 1;
1266 best_overlap = i;
1267 best_score = score;
1268 best_oes = oes;
1269 }
1270 }
1271
1272 /** NOW LET'S MERGE */
1273 if (sw->test == 1)
1274 {
1275 st_pass = stat_test2 (sw->p_value, best_oes, sw->min_overlap, ef->q);
1276 }
1277 else
1278 {
1279 st_pass = stat_test2 (sw->p_value, best_oes, best_overlap, ef->q);
1280 }
1281
1282 if (!st_pass) return (0);
1283
1284
1285 /* do the assembly!!!! */
1286 if (!run_through)
1287 {
1288 if (best_overlap == 0)
1289 {
1290 asm_len = 2 * n;
1291
1292 for (j = 0; j < n; ++ j)
1293 if (DEGENERATE(forward->data[j])) ++uncalled;
1294 for (j = 0; j < n; ++ j)
1295 if (DEGENERATE(reverse->data[j])) ++uncalled;
1296 uncalled /= asm_len;
1297
1298 if (2 * n - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1299 {
1300 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_BOTH);
1301 }
1302 else
1303 {
1304 return (0);
1305 }
1306 }
1307 else if (best_overlap == n )
1308 {
1309 asm_len = n;
1310
1311 for (j = 0; j < asm_len; ++ j)
1312 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[j]))) ++uncalled;
1313 uncalled /= asm_len;
1314
1315 if (2 * n - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1316 {
1317 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_SINGLE);
1318 assemble_overlap (forward, reverse, 0, 0, n, forward, sw);
1319 forward->data[n] = 0;
1320 forward->qscore[n] = 0;
1321 }
1322 else
1323 {
1324 return (0);
1325 }
1326 }
1327 else
1328 {
1329 asm_len = 2 * n - best_overlap;
1330 for (j = 0; j < n - best_overlap; ++ j)
1331 if (DEGENERATE(forward->data[j])) ++uncalled;
1332 for (j = n - best_overlap; j < n; ++ j)
1333 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[j - n + best_overlap]))) ++uncalled;
1334 for (j = best_overlap; j < n; ++ j)
1335 if (DEGENERATE(reverse->data[j])) ++uncalled;
1336 uncalled /= asm_len;
1337
1338 if (2 * n - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1339 {
1340 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_BOTH);
1341
1342 assemble_overlap (forward, reverse, n - best_overlap, 0, best_overlap, forward, sw);
1343 memmove (reverse->data, reverse->data + best_overlap, n - best_overlap);
1344 memmove (reverse->qscore, reverse->qscore + best_overlap, n - best_overlap);
1345 /* THIS IS WRONG */
1346 //memcpy (reverse->data, reverse->data + best_overlap, n - best_overlap);
1347 //memcpy (reverse->qscore, reverse->qscore + best_overlap, n - best_overlap);
1348
1349 reverse->data[n - best_overlap] = 0;
1350 reverse->qscore[n - best_overlap] = 0;
1351 }
1352 else
1353 {
1354 return (0);
1355 }
1356 }
1357 } /* run-through case */
1358 else
1359 {
1360 asm_len = best_overlap;
1361
1362 /* compute uncalled */
1363 for (j = 0; j < asm_len; ++ j)
1364 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[n - best_overlap + j]))) ++uncalled;
1365 uncalled /= asm_len;
1366
1367 if (asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1368 {
1369 assemble_overlap (forward, reverse, 0, n - best_overlap, best_overlap, forward, sw);
1370
1371 forward->data[best_overlap] = 0;
1372 forward->qscore[best_overlap] = 0;
1373 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_SINGLE); /* flag that it's one piece */
1374 }
1375 else
1376 {
1377 return (0);
1378 }
1379 }
1380
1381 /* TODO: Remember to reset *(let->data - 1) */
1382
1383 /* TODO: Optimize this in assemble_overlap? */
1384
1385 return (1);
1386
1387 }
1388
assembly_REVERSE_LONGER(fastqRead * forward,fastqRead * reverse,struct emp_freq * ef,struct user_args * sw,int nForward,int nReverse)1389 static INLINE int assembly_REVERSE_LONGER (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw, int nForward, int nReverse)
1390 {
1391 int
1392 i,
1393 j;
1394 double
1395 score = 0,
1396 oes = 0,
1397 best_score = 0,
1398 best_oes = 0,
1399 uncalled = 0;
1400 int
1401 best_overlap = 0, /* overlap of the best alignment */
1402 nMatch,
1403 asm_len = 0,
1404 st_pass,
1405 bestScoreCase = 0;
1406
1407 /* -------->
1408 * <------------
1409 * ....
1410 * ------->
1411 * <------------
1412 */
1413 for (i = 0; i <= nForward; ++ i)
1414 {
1415 nMatch = score = oes = 0;
1416 for (j = 0; j < i; ++ j)
1417 {
1418 scoring_ef_nm (forward->data[nForward - i + j],
1419 reverse->data[j],
1420 forward->qscore[nForward - i + j],
1421 reverse->qscore[j],
1422 sw->score_method,
1423 &score,
1424 &oes,
1425 ef);
1426 if (forward->data[nForward - i + j] == reverse->data[j]) ++nMatch;
1427 }
1428 if (score > best_score)
1429 {
1430 bestScoreCase = PEAR_MERGE_NO_TRIM;
1431 best_overlap = i;
1432 best_score = score;
1433 best_oes = oes;
1434 }
1435 }
1436 /* -------->
1437 * <------------
1438 * ....
1439 * ------->
1440 * <------------
1441 */
1442 for (i = nReverse - nForward - 1; i >= 0; -- i)
1443 {
1444 score = oes = nMatch = 0;
1445 for (j = 0; j < nForward; ++ j)
1446 {
1447 scoring_ef_nm (forward->data[j],
1448 reverse->data[nReverse - nForward - i + j],
1449 forward->qscore[j],
1450 reverse->qscore[nReverse - nForward - i + j],
1451 sw->score_method,
1452 &score,
1453 &oes,
1454 ef);
1455 if (forward->data[j] == reverse->data[nReverse - nForward - i + j]) ++nMatch;
1456 }
1457 if (score > best_score)
1458 {
1459 bestScoreCase = PEAR_MERGE_TRIM_REVERSE;
1460 best_overlap = i; /* now this is the length of the remaining part of the reverse read on the right of the overlapping region */
1461 best_score = score;
1462 best_oes = oes;
1463 }
1464 }
1465 /* -------->
1466 * <------------
1467 * ....
1468 * ------->
1469 * <------------
1470 */
1471 for (i = nForward - 1; i > 0; -- i)
1472 {
1473 score = oes = nMatch = 0;
1474 for (j = 0; j < i; ++ j)
1475 {
1476 scoring_ef_nm (forward->data[j],
1477 reverse->data[nReverse - i + j],
1478 forward->qscore[j],
1479 reverse->qscore[nReverse - i + j],
1480 sw->score_method,
1481 &score,
1482 &oes,
1483 ef);
1484 if (forward->data[j] == reverse->data[nReverse - i + j]) ++nMatch;
1485 }
1486 if (score > best_score)
1487 {
1488 bestScoreCase = PEAR_MERGE_TRIM_BOTH;
1489 best_overlap = i;
1490 best_score = score;
1491 best_oes = oes;
1492 }
1493 }
1494
1495 if (!bestScoreCase) return (0);
1496
1497 /* do a statistical test */
1498 if (sw->test == 1)
1499 st_pass = stat_test2 (sw->p_value, best_oes, sw->min_overlap, ef->q);
1500 else
1501 st_pass = stat_test2 (sw->p_value, best_oes, best_overlap, ef->q);
1502
1503 if (!st_pass) return (0);
1504
1505 switch (bestScoreCase)
1506 {
1507 case PEAR_MERGE_NO_TRIM:
1508 if (best_overlap == 0)
1509 {
1510 assert(0);
1511 asm_len = nForward + nReverse;
1512
1513 for (j = 0; j < nForward; ++ j)
1514 if (DEGENERATE(forward->data[j])) ++uncalled;
1515 for (j = 0; j < nReverse; ++ j)
1516 if (DEGENERATE(reverse->data[j])) ++uncalled;
1517 uncalled /= (nForward + nReverse);
1518
1519 if (0 >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1520 {
1521 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_BOTH);
1522 }
1523 else
1524 {
1525 return (0);
1526 }
1527 }
1528 else
1529 {
1530 asm_len = nForward + nReverse - best_overlap;
1531
1532 /* count uncalled bases in the non-overlapping high-quality part of the forward read */
1533 for (j = 0; j < nForward - best_overlap; ++ j)
1534 if (DEGENERATE(forward->data[j])) ++uncalled;
1535
1536 /* count uncalled bases in the non-overlapping high-quality part of the reverse read */
1537 for (j = best_overlap; j < nReverse; ++ j)
1538 if (DEGENERATE(reverse->data[j])) ++uncalled;
1539
1540 /* count the uncalled bases in the overlapping part of the two reads */
1541 for (j = nForward - best_overlap; j < nForward; ++ j)
1542 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[j - nForward + best_overlap]))) ++uncalled;
1543 uncalled /= asm_len;
1544
1545 if (nForward + nReverse - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1546 {
1547 if (asm_len > nForward)
1548 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_BOTH); /* The merged read will require both memory buffers (forward + reverse) */
1549 else
1550 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_SINGLE); /* The merged read will fit inside the forward read mem buffer */
1551
1552
1553 assemble_overlap (forward, reverse, nForward - best_overlap, 0, best_overlap, forward, sw);
1554 memmove (reverse->data, reverse->data + best_overlap, nReverse - best_overlap);
1555 memmove (reverse->qscore, reverse->qscore + best_overlap, nReverse - best_overlap);
1556
1557 reverse->data[nReverse - best_overlap] = 0;
1558 reverse->qscore[nReverse - best_overlap] = 0;
1559 }
1560 else
1561 {
1562 return (0);
1563 }
1564 }
1565 break;
1566 case PEAR_MERGE_TRIM_REVERSE:
1567 /* note that here best_overlap refers to the length of the remaining part right of the overlapping regionn
1568 in the reverse read */
1569 asm_len = best_overlap + nForward;
1570
1571 /* count uncalled bases in the non-overlapping high-quality part of the reverse read */
1572 for (j = nReverse - best_overlap; j < nReverse; ++ j)
1573 if (DEGENERATE(reverse->data[j])) ++uncalled;
1574
1575 /* count the uncalled bases in the overlapping part of the two reads */
1576 for (j = 0; j < nForward; ++ j)
1577 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[nReverse - best_overlap - nForward + j]))) ++uncalled;
1578 uncalled /= asm_len;
1579
1580 if (nForward >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1581 {
1582 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_BOTH);
1583
1584 assemble_overlap (forward, reverse, 0, nReverse - nForward - best_overlap, nForward, forward, sw);
1585
1586 memmove (reverse->data, reverse->data + nReverse - best_overlap, best_overlap);
1587 memmove (reverse->qscore, reverse->qscore + nReverse - best_overlap, best_overlap);
1588
1589 reverse->data[best_overlap] = 0;
1590 reverse->qscore[best_overlap] = 0;
1591 }
1592 else
1593 {
1594 return (0);
1595 }
1596 break;
1597 case PEAR_MERGE_TRIM_BOTH:
1598 asm_len = best_overlap;
1599
1600 /* compute uncalled */
1601 for (j = 0; j < asm_len; ++ j)
1602 if ((DEGENERATE(forward->data[j])) && (DEGENERATE(reverse->data[nReverse - best_overlap + j]))) ++uncalled;
1603 uncalled /= asm_len;
1604
1605 if (asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1606 {
1607 assemble_overlap (forward, reverse, 0, nReverse - best_overlap, best_overlap, forward, sw);
1608
1609 forward->data[best_overlap] = 0;
1610 forward->qscore[best_overlap] = 0;
1611 PEAR_SET_OUT_TYPE(forward,PEAR_READ_OUT_SINGLE); /* flag that it's one piece */
1612 }
1613 else
1614 {
1615 return (0);
1616 }
1617 break;
1618 default:
1619 assert (0);
1620 }
1621
1622 return (1);
1623
1624 }
1625
1626
assembly_ef(fastqRead * forward,fastqRead * reverse,struct emp_freq * ef,struct user_args * sw)1627 static INLINE int assembly_ef (fastqRead * forward, fastqRead * reverse, struct emp_freq * ef, struct user_args * sw)
1628 {
1629 int
1630 nForward,
1631 nReverse,
1632 rc;
1633
1634 nForward = strlen (forward->data);
1635 nReverse = strlen (reverse->data);
1636
1637 /************************* |FORWARD| > |REVERSE| ********************************/
1638 if (nForward > nReverse)
1639 {
1640 rc = assembly_FORWARD_LONGER (forward, reverse, ef, sw, nForward, nReverse);
1641 }
1642
1643 /************************* |FORWARD| == |REVERSE| ********************************/
1644 else if (nForward == nReverse)
1645 {
1646 rc = assembly_READS_EQUAL (forward, reverse, ef, sw, nForward);
1647 }
1648 /************************* |FORWARD| < |REVERSE| ********************************/
1649 else
1650 {
1651 rc = assembly_REVERSE_LONGER (forward, reverse, ef, sw, nForward, nReverse);
1652 }
1653
1654 return (rc);
1655 }
1656
assembly(fastqRead * left,fastqRead * right,struct user_args * sw)1657 static INLINE int assembly (fastqRead * left, fastqRead * right, struct user_args * sw)
1658 {
1659 int i,j;
1660 int n;
1661 double score;
1662 double oes;
1663 double best_score = 0;
1664 double best_oes = 0;
1665 int best_overlap = 0; /* overlap of the best alignment */
1666 int run_through = 0;
1667 int nMatch;
1668 int asm_len = 0;
1669 int st_pass;
1670 double uncalled = 0;
1671
1672 n = strlen (left->data);
1673
1674 /* compute score for every overlap */
1675 score = 0;
1676 oes = 0;
1677 for (i = 0; i <= n; ++ i) /* the size of the overlap */
1678 {
1679 nMatch = 0;
1680 score = 0;
1681 oes = 0;
1682 for (j = 0; j < i; ++ j)
1683 {
1684 //scoring (left->data[n - i + j], right->data[j], left->qscore[n - i + j], right->qscore[j], sw->score_method, &score, &oes, match_score, mismatch_score);
1685 scoring_nm (left->data[n - i + j], right->data[j], left->qscore[n - i + j], right->qscore[j], sw->score_method, &score, &oes);
1686 if (left->data[n - i + j] == right->data[j]) ++nMatch;
1687 }
1688 if (score > best_score)
1689 {
1690 best_overlap = i;
1691 best_score = score;
1692 best_oes = oes;
1693 }
1694 }
1695
1696 /* compute for score for runthrough case */
1697 for (i = n - 1; i > 0; --i)
1698 {
1699 score = 0;
1700 oes = 0;
1701 nMatch = 0;
1702 for (j = 0; j < i; ++j)
1703 {
1704 //scoring (left->data[j], right->data[n - i + j], left->qscore[j], right->qscore[n - i + j], sw->score_method, &score, &oes, match_score, mismatch_score);
1705 scoring_nm (left->data[j], right->data[n - i + j], left->qscore[j], right->qscore[n - i + j], sw->score_method, &score, &oes);
1706 if (left->data[n - i + j] == right->data[j]) ++nMatch;
1707 }
1708
1709 if (score > best_score)
1710 {
1711 run_through = 1;
1712 best_overlap = i;
1713 best_score = score;
1714 best_oes = oes;
1715 }
1716 }
1717
1718
1719 if (sw->test == 1)
1720 {
1721 //st_pass = stat_test2 (sw->p_value, best_score, sw->min_overlap, 0.25);
1722 st_pass = stat_test2 (sw->p_value, best_oes, sw->min_overlap, 0.25);
1723 }
1724 else
1725 {
1726 //st_pass = stat_test2 (sw->p_value, best_score, best_overlap, 0.25);
1727 st_pass = stat_test2 (sw->p_value, best_oes, best_overlap, 0.25);
1728 }
1729
1730 if (!st_pass) return (0);
1731
1732
1733 /* do the assembly!!!! */
1734 if (!run_through)
1735 {
1736 if (best_overlap == 0)
1737 {
1738 asm_len = (n << 1);
1739
1740 for (j = 0; j < n; ++ j)
1741 if (DEGENERATE(left->data[j])) ++uncalled;
1742 for (j = 0; j < n; ++ j)
1743 if (DEGENERATE(right->data[j])) ++uncalled;
1744 uncalled /= asm_len;
1745
1746 if ((n << 1) - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1747 {
1748 PEAR_SET_OUT_TYPE(left,PEAR_READ_OUT_BOTH);
1749 }
1750 else
1751 {
1752 return (0);
1753 }
1754 }
1755 else if (best_overlap == n )
1756 {
1757 asm_len = n;
1758
1759 for (j = 0; j < asm_len; ++ j)
1760 if ((DEGENERATE(left->data[j])) && (DEGENERATE(right->data[j]))) ++uncalled;
1761 uncalled /= asm_len;
1762
1763 if ((n << 1) - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1764 {
1765 PEAR_SET_OUT_TYPE(left,PEAR_READ_OUT_SINGLE);
1766 assemble_overlap (left, right, 0, 0, n, left, sw);
1767 left->data[n] = 0;
1768 left->qscore[n] = 0;
1769 }
1770 else
1771 {
1772 return (0);
1773 }
1774 }
1775 else
1776 {
1777 asm_len = (n << 1) - best_overlap;
1778 for (j = 0; j < n - best_overlap; ++ j)
1779 if (DEGENERATE(left->data[j])) ++uncalled;
1780 for (j = n - best_overlap; j < n; ++ j)
1781 if ((DEGENERATE(left->data[j])) && (DEGENERATE(right->data[j - n + best_overlap]))) ++uncalled;
1782 for (j = best_overlap; j < n; ++ j)
1783 if (DEGENERATE(right->data[j])) ++uncalled;
1784 uncalled /= asm_len;
1785
1786 if ((n << 1) - asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1787 {
1788 PEAR_SET_OUT_TYPE(left,PEAR_READ_OUT_BOTH);
1789 assemble_overlap (left, right, n - best_overlap, 0, best_overlap, left, sw);
1790 memmove (right->data, right->data + best_overlap, n - best_overlap);
1791 memmove (right->qscore, right->qscore + best_overlap, n - best_overlap);
1792 /* THIS IS WRONG */
1793 //memcpy (right->data, right->data + best_overlap, n - best_overlap);
1794 //memcpy (right->qscore, right->qscore + best_overlap, n - best_overlap);
1795
1796 right->data[n - best_overlap] = 0;
1797 right->qscore[n - best_overlap] = 0;
1798 }
1799 else
1800 {
1801 return (0);
1802 }
1803 }
1804 } /* run-through case */
1805 else
1806 {
1807 asm_len = best_overlap;
1808
1809 /* compute uncalled */
1810 for (j = 0; j < asm_len; ++ j)
1811 if ((DEGENERATE(left->data[j])) && (DEGENERATE(right->data[n - best_overlap + j]))) ++uncalled;
1812 uncalled /= asm_len;
1813
1814 if (asm_len >= sw->min_overlap && asm_len >= sw->min_asm_len && asm_len <= sw->max_asm_len && uncalled <= sw->max_uncalled)
1815 {
1816 assemble_overlap (left, right, 0, n - best_overlap, best_overlap, left, sw);
1817
1818 left->data[best_overlap] = 0;
1819 left->qscore[best_overlap] = 0;
1820 PEAR_SET_OUT_TYPE(left,PEAR_READ_OUT_SINGLE); /* flag that it's one piece */
1821 }
1822 else
1823 {
1824 return (0);
1825 }
1826 }
1827
1828 /* TODO: Remember to reset *(let->data - 1) */
1829
1830 /* TODO: Optimize this in assemble_overlap? */
1831
1832 return (1);
1833 }
1834
1835 /** @brief Assemble the overlap
1836
1837 @param left
1838 @param right
1839 @param base_left Position in the forward read where the overlap starts
1840 @param base_right Position in the reverse read where the overlap starts
1841 @param ol_size Overlap size
1842 @param ai Buffer where to store the merge read
1843 */
1844
assemble_overlap(fastqRead * left,fastqRead * right,int base_left,int base_right,int ol_size,fastqRead * ai,struct user_args * sw)1845 static double assemble_overlap (fastqRead * left, fastqRead * right, int base_left, int base_right, int ol_size, fastqRead * ai, struct user_args * sw)
1846 {
1847 int i;
1848 char x, y;
1849 char qx, qy;
1850 //double exp_match = 0;
1851
1852 for (i = 0; i < ol_size; ++i)
1853 {
1854 x = left->data[base_left + i];
1855 y = right->data[base_right + i];
1856 qx = left->qscore[base_left + i];
1857 qy = right->qscore[base_right + i];
1858 if ((DEGENERATE(x)) && (DEGENERATE(y)))
1859 {
1860 //exp_match += 0.25; sm_len
1861 ai->data[base_left + i] = x | y;
1862 ai->qscore[base_left + i] = ( qx < qy ) ? qx : qy;
1863 }
1864 else if (DEGENERATE(x))
1865 {
1866 //exp_match += 0.25;
1867 ai->data[base_left + i] = y;
1868 ai->qscore[base_left + i] = qy;
1869 }
1870 else if (DEGENERATE(y))
1871 {
1872 //exp_match += 0.25;
1873 ai->data[base_left + i] = x;
1874 ai->qscore[base_left + i] = qx;
1875 }
1876 else
1877 {
1878 if (x == y)
1879 {
1880 //exp_match += (sc_eq[(int)qx][(int)qy] / match_score);
1881
1882 ai->data[base_left + i] = x;
1883 //ai->qscore[base_left + i] = (right->qscore[base_right + i] - sw->phred_base) + (left->qscore[base_left + i] - sw->phred_base) + sw->phred_base; //qs_mul[qx][qy];
1884 ai->qscore[base_left + i] = (right->qscore[base_right + i]) + (left->qscore[base_left + i]) - 1; //qs_mul[qx][qy];
1885 }
1886 else
1887 {
1888 //exp_match += (1 - sc_neq[(int)qx][(int)qy] / mismatch_score);
1889
1890 if (qx > qy)
1891 {
1892 if (sw->nbase) /* nbase option */
1893 {
1894 ai->data[base_left + i] = map_nt['N'];
1895 ai->qscore[base_left + i] = qx;
1896 }
1897 else
1898 {
1899 ai->data[base_left + i] = x;
1900 ai->qscore[base_left + i] = qx;
1901 }
1902 }
1903 else
1904 {
1905 if (sw->nbase) /* nbase option */
1906 {
1907 ai->data[base_left + i] = map_nt['N'];
1908 ai->qscore[base_left + i] = qy;
1909 }
1910 else
1911 {
1912 ai->data[base_left + i] = y;
1913 ai->qscore[base_left + i] = qy;
1914 }
1915 }
1916 }
1917 }
1918 }
1919 return (0);
1920 }
1921
mstrrev(char * s)1922 static void mstrrev (char * s)
1923 {
1924 char * q = s;
1925
1926 while (q && *q) ++q;
1927
1928 for (--q; s < q; ++s, --q)
1929 {
1930 *s = *s ^ *q;
1931 *q = *s ^ *q;
1932 *s = *s ^ *q;
1933 }
1934 }
1935
mstrcpl(char * s)1936 static void mstrcpl (char * s)
1937 {
1938 if (!s) return;
1939
1940 while (*s)
1941 {
1942 /* if the codes were 0,1,2,3,4 it would be
1943 that simple:
1944 *s = (*s >> 2) + (~*s & 3);
1945 */
1946
1947 /* with codes 1,2,3,4,5 it gets nasty: */
1948 *s = cpl_nt[(int)*s];
1949 /*
1950 *s = (((*s & 1) & (! (*s & 2))) << 2) |
1951 (((!((*s & 4) >> 2) ) & ((*s & 2) >> 1)) << 1 ) |
1952 (((!((*s & 4) >> 2)) & ((*s & 2) >> 1) & !(*s & 1)) |
1953 (((*s & 4) >> 2) & !((*s & 2) >> 1)));*/
1954 ++s;
1955 }
1956 }
1957
1958 #if 0
1959 static int validate_input (int nleft, int nright)
1960 {
1961 if (!nleft || !nright)
1962 {
1963 fprintf (stderr, "ERROR: At least one of the input files contains no records.\n");
1964 return (0);
1965 }
1966
1967 if (nleft != nright)
1968 {
1969 fprintf (stderr, "ERROR: Number of records in the two input files does not match.\n");
1970 return (0);
1971 }
1972
1973 return (1);
1974 }
1975 #endif
1976
1977 static char *
makefilename(const char * prefix,const char * suffix)1978 makefilename (const char * prefix, const char * suffix)
1979 {
1980 char * filename;
1981
1982 filename = (char *) malloc ((strlen(prefix) + strlen(suffix) + 1) * sizeof (char));
1983
1984 strcpy (filename, prefix);
1985 strcat (filename, suffix);
1986
1987 return (filename);
1988 }
1989
1990 /** @brief Write data to file
1991 *
1992 * Write the result of overlapping of a specific read to the corresponding output file
1993 *
1994 * @param fwd Structure containing the forward reads. Note that this structure is
1995 * mutable and has been modified to contain the new read which is the
1996 * result of the assembly.
1997 * @param rev Structure containing the reverse reads. Note that this structure is
1998 * mutable and has been modified to contain the new read which is the
1999 * result of the assembly.
2000 * @param elms Number of reads in the structure
2001 * @param fd Array of file descriptors of output files
2002 */
write_data(fastqRead ** fwd,fastqRead ** rev,unsigned int elms,FILE ** fd,struct user_args * sw)2003 static void write_data (fastqRead ** fwd, fastqRead ** rev, unsigned int elms, FILE ** fd, struct user_args * sw)
2004 {
2005 unsigned int i;
2006 char bothOut; /* this is set if both fwd[i] and rev[i] contain the resulting assembled read and qscore */
2007
2008 for ( i = 0; i < elms; ++ i)
2009 {
2010 bothOut = PEAR_DECODE_OUT_TYPE(fwd[i]);
2011 PEAR_RESET_OUT_TYPE(fwd[i]);
2012
2013 if (PEAR_DECODE_ASM_TYPE(fwd[i]) == PEAR_READ_ASSEMBLED) /* assembled */
2014 {
2015 PEAR_RESET_ASM_TYPE(fwd[i]);
2016 revconvert(fwd[i]->data);
2017 fprintf (fd[0], "%s\n", fwd[i]->header);
2018 if (!bothOut)
2019 {
2020 fprintf (fd[0], "%s\n", fwd[i]->data);
2021 }
2022 else
2023 {
2024 revconvert(rev[i]->data);
2025 fprintf (fd[0], "%s", fwd[i]->data);
2026 fprintf (fd[0], "%s\n", rev[i]->data);
2027 }
2028 fprintf (fd[0], "+\n");
2029
2030
2031 add_base_qscores(fwd[i]->qscore, sw->phred_base, sw->cap);
2032 if (!bothOut)
2033 {
2034 fprintf (fd[0], "%s\n", fwd[i]->qscore);
2035 }
2036 else
2037 {
2038 add_base_qscores(rev[i]->qscore, sw->phred_base, sw->cap);
2039 fprintf (fd[0], "%s", fwd[i]->qscore);
2040 fprintf (fd[0], "%s\n", rev[i]->qscore);
2041 }
2042
2043 ++ g_count_assembled;
2044 }
2045 else
2046 {
2047 add_base_qscores(fwd[i]->qscore, sw->phred_base, sw->cap);
2048 add_base_qscores(rev[i]->qscore, sw->phred_base, sw->cap);
2049 revconvert(fwd[i]->data);
2050 revconvert(rev[i]->data);
2051 if (PEAR_DECODE_ASM_TYPE(fwd[i]) == PEAR_READ_DISCARDED) /* discarded */
2052 {
2053 PEAR_RESET_ASM_TYPE(fwd[i]);
2054 /* discarded reads*/
2055 /* Maybe consider printing the untrimmed sequences */
2056 fprintf (fd[3], "%s\n", fwd[i]->header);
2057 fprintf (fd[3], "%s\n+\n%s\n", fwd[i]->data, fwd[i]->qscore);
2058 fprintf (fd[3], "%s\n", rev[i]->header);
2059 fprintf (fd[3], "%s\n+\n%s\n", rev[i]->data, rev[i]->qscore); /* printing the reverse compliment of the original sequence */
2060 ++ g_count_discarded;
2061 }
2062 else /* unassembled reads*/
2063 {
2064 PEAR_RESET_ASM_TYPE(fwd[i]);
2065 fprintf (fd[1], "%s\n", fwd[i]->header);
2066 fprintf (fd[2], "%s\n", rev[i]->header);
2067 fprintf (fd[1], "%s\n+\n%s\n", fwd[i]->data, fwd[i]->qscore);
2068 fprintf (fd[2], "%s\n+\n%s\n", rev[i]->data, rev[i]->qscore); /* printing the reverse compliment of the original sequence */
2069 ++ g_count_unassembled;
2070 }
2071 }
2072 }
2073 }
2074
flip_list(void)2075 static void flip_list (void)
2076 {
2077 memBlockInfo * elm1;
2078 memBlockInfo * elm2;
2079
2080 elm1 = thr_global.xblock;
2081 elm2 = thr_global.yblock;
2082
2083 thr_global.xblock = elm2;
2084 thr_global.yblock = elm1;
2085 }
2086
assign_reads(memBlockInfo * block,struct thread_local_t * thr_local)2087 static INLINE int assign_reads (memBlockInfo * block, struct thread_local_t * thr_local)
2088 {
2089 unsigned int r;
2090
2091 if (block->reads == block->processed) return (0);
2092
2093 r = THREAD_MIN_PACKET_SIZE + (rand() % THREAD_MIN_PACKET_SIZE);
2094 thr_local->block = block;
2095 thr_local->start = block->processed;
2096
2097 if (block->reads - block->processed <= r + THREAD_PACKET_SIZE_DELTA)
2098 {
2099 thr_local->end = block->reads;
2100 block->processed = block->reads;
2101 }
2102 else
2103 {
2104 thr_local->end = thr_local->start + r;
2105 block->processed = thr_local->start + r;
2106 }
2107 ++ block->threads;
2108 return (1);
2109 }
2110
entry_point_ef(void * data)2111 static void * entry_point_ef (void * data)
2112 {
2113 struct thread_local_t * thr_local;
2114 int ass, sleep, elms;
2115 unsigned int i;
2116 double uncalled_forward, uncalled_reverse;
2117
2118 /* initialization of values */
2119 thr_local = (struct thread_local_t *)data;
2120
2121
2122 while (1)
2123 {
2124 // TODO: The first condition in the next line is useless?
2125 if (thr_local->block == thr_global.xblock && thr_global.io_thread == thr_local->id)
2126 {
2127 pthread_mutex_lock (&cs_mutex_io);
2128 //fprintf (stdout, "."); fflush (stdout);
2129 fprintf (stdout,"\rAssemblying reads: %d\%%", (int) ((current_progress / (double)total_progress) * 100));
2130 fflush(stdout);
2131 write_data (thr_global.xblock->fwd->reads, thr_global.xblock->rev->reads, thr_global.xblock->reads, thr_global.fd, thr_local->sw);
2132 // TODO: read_data ();
2133 elms = db_get_next_reads (thr_global.xblock->fwd,
2134 thr_global.xblock->rev,
2135 thr_global.yblock->fwd,
2136 thr_global.yblock->rev,
2137 &inputFileSanity);
2138 //read_size = strlen (fwd_block.reads[0]->data);
2139 pthread_mutex_unlock (&cs_mutex_io);
2140
2141 pthread_mutex_lock (&cs_mutex_wnd);
2142 thr_global.xblock->reads = elms;
2143 thr_global.xblock->processed = 0;
2144 thr_global.io_thread = -1;
2145 thr_global.xblock->threads = 0;
2146 if (!elms) thr_global.finish = 1;
2147 flip_list ();
2148
2149 #ifdef __DEBUG__
2150 pthread_mutex_lock (&cs_mutex_out);
2151 printf ("READ %d elms\n", elms);
2152 printf ("WAKE_UP_ALL! (reads: %d processed: %d)\n", thr_global.xblock->reads, thr_global.xblock->processed);
2153 pthread_mutex_unlock (&cs_mutex_out);
2154 #endif
2155 // wakeup threads
2156 pthread_cond_broadcast (&cs_mutex_cond);
2157 pthread_mutex_unlock (&cs_mutex_wnd);
2158 }
2159
2160 sleep = 1;
2161
2162 pthread_mutex_lock (&cs_mutex_wnd);
2163 if (thr_global.xblock->reads == thr_global.xblock->processed)
2164 {
2165 if (thr_global.finish)
2166 {
2167 if (!thr_global.xblock->threads)
2168 {
2169 thr_global.xblock->threads = 1;
2170 pthread_mutex_unlock (&cs_mutex_wnd);
2171 // printf ("!!!!!!!!!! reads: %d processed: %d\n", thr_global.xblock->reads, thr_global.xblock->processed);
2172 // printf ("!!!!!!!!!! Writing another %d reads\n", thr_global.xblock->reads);
2173 write_data (thr_global.xblock->fwd->reads, thr_global.xblock->rev->reads, thr_global.xblock->reads, thr_global.fd, thr_local->sw);
2174 // printf ("Finsihed\n");
2175 break;
2176 }
2177 pthread_mutex_unlock (&cs_mutex_wnd);
2178 break;
2179 }
2180 /* is this the last thread using the current buffer? */
2181 if (thr_global.xblock->threads == 0 && thr_global.io_thread == -1 && thr_global.finish == 0)
2182 {
2183 #ifdef __DEBUG__
2184 pthread_mutex_lock (&cs_mutex_out);
2185 printf ("ASSIGNED IO THREAD %d\n", thr_local->id);
2186 pthread_mutex_unlock (&cs_mutex_out);
2187 #endif
2188 thr_global.io_thread = thr_local->id;
2189 thr_local->block = thr_global.xblock;
2190 sleep = 0;
2191 }
2192 else
2193 {
2194 if (assign_reads (thr_global.yblock, thr_local)) sleep = 0;
2195 #ifdef __DEBUG__
2196 if (!sleep) {
2197 pthread_mutex_lock (&cs_mutex_out);
2198 printf ("ASSIGNED y READS to %d (%d - %d) Threads: %d\n", thr_local->id, thr_local->start, thr_local->end, thr_global.yblock->threads);
2199 pthread_mutex_unlock (&cs_mutex_out);}
2200 #endif
2201 }
2202 }
2203 else
2204 {
2205 if (assign_reads (thr_global.xblock,thr_local)) sleep = 0;
2206 #ifdef __DEBUG__
2207 if (!sleep) {
2208 pthread_mutex_lock (&cs_mutex_out);
2209 printf ("ASSIGNED x READS to %d (%d - %d) Threads: %d\n", thr_local->id, thr_local->start, thr_local->end, thr_global.xblock->threads);
2210 pthread_mutex_unlock (&cs_mutex_out);}
2211 #endif
2212 }
2213 if (sleep)
2214 {
2215 #ifdef __DEBUG__
2216 pthread_mutex_lock (&cs_mutex_out);
2217 printf ("Sleeping %d\n", thr_local->id);
2218 pthread_mutex_unlock (&cs_mutex_out);
2219 #endif
2220 pthread_cond_wait (&cs_mutex_cond, &cs_mutex_wnd);
2221 #ifdef __DEBUG__
2222 pthread_mutex_lock (&cs_mutex_out);
2223 printf ("WAKING %d\n", thr_local->id);
2224 pthread_mutex_unlock (&cs_mutex_out);
2225 #endif
2226 }
2227 pthread_mutex_unlock (&cs_mutex_wnd);
2228
2229 if (!sleep && thr_global.io_thread != thr_local->id)
2230 {
2231 // TODO: Process reads and make this an inline function
2232 for (i = thr_local->start; i < thr_local->end; ++ i)
2233 {
2234 convert (thr_local->block->fwd->reads[i]->data);
2235 convert (thr_local->block->rev->reads[i]->data);
2236 remove_base_qscores (thr_local->block->fwd->reads[i]->qscore, thr_local->sw->phred_base);
2237 remove_base_qscores (thr_local->block->rev->reads[i]->qscore, thr_local->sw->phred_base);
2238
2239 mstrrev (thr_local->block->rev->reads[i]->data); /* reverse the sequence */
2240 mstrcpl (thr_local->block->rev->reads[i]->data); /* complement the sequence */
2241 mstrrev (thr_local->block->rev->reads[i]->qscore); /* reverse the quality scores */
2242
2243 //TODO Switch for empirical frequencies
2244 ass = assembly_ef (thr_local->block->fwd->reads[i], thr_local->block->rev->reads[i], thr_local->ef, thr_local->sw);
2245 PEAR_SET_ASM_TYPE(thr_local->block->fwd->reads[i],ass);
2246 if (!ass)
2247 {
2248 if (trim (thr_local->block->fwd->reads[i], thr_local->sw, &uncalled_forward) < thr_local->sw->min_trim_len ||
2249 trim_cpl (thr_local->block->rev->reads[i], thr_local->sw, &uncalled_reverse) < thr_local->sw->min_trim_len ||
2250 uncalled_forward >= thr_local->sw->max_uncalled || uncalled_reverse >= thr_local->sw->max_uncalled)
2251 {
2252 PEAR_SET_ASM_TYPE(thr_local->block->fwd->reads[i],PEAR_READ_DISCARDED);
2253 }
2254 else
2255 {
2256 PEAR_SET_ASM_TYPE(thr_local->block->fwd->reads[i],PEAR_READ_UNASSEMBLED);
2257 }
2258 }
2259 }
2260
2261 pthread_mutex_lock (&cs_mutex_wnd);
2262 -- thr_local->block->threads;
2263 #ifdef __DEBUG__
2264 pthread_mutex_lock (&cs_mutex_out);
2265 printf ("Finished %d (Remaining threads: %d)\n", thr_local->id, thr_local->block->threads);
2266 pthread_mutex_unlock (&cs_mutex_out);
2267 #endif
2268 pthread_mutex_unlock (&cs_mutex_wnd);
2269 }
2270 // pthread_mutex_lock (&cs_mutex_out);
2271 // printf ("Thread: %d\n", thr_local->id);
2272 // pthread_mutex_unlock (&cs_mutex_out);
2273 }
2274 return (NULL);
2275 }
entry_point(void * data)2276 static void * entry_point (void * data)
2277 { struct thread_local_t * thr_local;
2278 int ass, sleep, elms;
2279 unsigned int i;
2280 double uncalled_forward, uncalled_reverse;
2281
2282 /* initialization of values */
2283 thr_local = (struct thread_local_t *)data;
2284
2285
2286 while (1)
2287 {
2288 // TODO: The first condition in the next line is useless?
2289 if (thr_local->block == thr_global.xblock && thr_global.io_thread == thr_local->id)
2290 {
2291 pthread_mutex_lock (&cs_mutex_io);
2292 fprintf (stdout, "."); fflush (stdout);
2293 write_data (thr_global.xblock->fwd->reads, thr_global.xblock->rev->reads, thr_global.xblock->reads, thr_global.fd, thr_local->sw);
2294 // TODO: read_data ();
2295 elms = db_get_next_reads (thr_global.xblock->fwd,
2296 thr_global.xblock->rev,
2297 thr_global.yblock->fwd,
2298 thr_global.yblock->rev,
2299 &inputFileSanity);
2300 //read_size = strlen (fwd_block.reads[0]->data);
2301 pthread_mutex_unlock (&cs_mutex_io);
2302
2303 pthread_mutex_lock (&cs_mutex_wnd);
2304 thr_global.xblock->reads = elms;
2305 thr_global.xblock->processed = 0;
2306 thr_global.io_thread = -1;
2307 thr_global.xblock->threads = 0;
2308 if (!elms) thr_global.finish = 1;
2309 flip_list ();
2310
2311 #ifdef __DEBUG__
2312 pthread_mutex_lock (&cs_mutex_out);
2313 printf ("READ %d elms\n", elms);
2314 printf ("WAKE_UP_ALL! (reads: %d processed: %d)\n", thr_global.xblock->reads, thr_global.xblock->processed);
2315 pthread_mutex_unlock (&cs_mutex_out);
2316 #endif
2317 // wakeup threads
2318 pthread_cond_broadcast (&cs_mutex_cond);
2319 pthread_mutex_unlock (&cs_mutex_wnd);
2320 }
2321
2322 sleep = 1;
2323
2324 pthread_mutex_lock (&cs_mutex_wnd);
2325 if (thr_global.xblock->reads == thr_global.xblock->processed)
2326 {
2327 if (thr_global.finish)
2328 {
2329 if (!thr_global.xblock->threads)
2330 {
2331 thr_global.xblock->threads = 1;
2332 pthread_mutex_unlock (&cs_mutex_wnd);
2333 write_data (thr_global.xblock->fwd->reads, thr_global.xblock->rev->reads, thr_global.xblock->reads, thr_global.fd, thr_local->sw);
2334 }
2335 pthread_mutex_unlock (&cs_mutex_wnd);
2336 break;
2337 }
2338 /* is this the last thread using the current buffer? */
2339 if (thr_global.xblock->threads == 0 && thr_global.io_thread == -1)
2340 {
2341 #ifdef __DEBUG__
2342 pthread_mutex_lock (&cs_mutex_out);
2343 printf ("ASSIGNED IO THREAD %d\n", thr_local->id);
2344 pthread_mutex_unlock (&cs_mutex_out);
2345 #endif
2346 thr_global.io_thread = thr_local->id;
2347 thr_local->block = thr_global.xblock;
2348 sleep = 0;
2349 }
2350 else
2351 {
2352 if (assign_reads (thr_global.yblock, thr_local)) sleep = 0;
2353 #ifdef __DEBUG__
2354 if (!sleep) {
2355 pthread_mutex_lock (&cs_mutex_out);
2356 printf ("ASSIGNED y READS to %d (%d - %d) Threads: %d\n", thr_local->id, thr_local->start, thr_local->end, thr_global.yblock->threads);
2357 pthread_mutex_unlock (&cs_mutex_out);}
2358 #endif
2359 }
2360 }
2361 else
2362 {
2363 if (assign_reads (thr_global.xblock,thr_local)) sleep = 0;
2364 #ifdef __DEBUG__
2365 if (!sleep) {
2366 pthread_mutex_lock (&cs_mutex_out);
2367 printf ("ASSIGNED x READS to %d (%d - %d) Threads: %d\n", thr_local->id, thr_local->start, thr_local->end, thr_global.xblock->threads);
2368 pthread_mutex_unlock (&cs_mutex_out);}
2369 #endif
2370 }
2371 if (sleep)
2372 {
2373 #ifdef __DEBUG__
2374 pthread_mutex_lock (&cs_mutex_out);
2375 printf ("Sleeping %d\n", thr_local->id);
2376 pthread_mutex_unlock (&cs_mutex_out);
2377 #endif
2378 pthread_cond_wait (&cs_mutex_cond, &cs_mutex_wnd);
2379 #ifdef __DEBUG__
2380 pthread_mutex_lock (&cs_mutex_out);
2381 printf ("WAKING %d\n", thr_local->id);
2382 pthread_mutex_unlock (&cs_mutex_out);
2383 #endif
2384 }
2385 pthread_mutex_unlock (&cs_mutex_wnd);
2386
2387 if (!sleep && thr_global.io_thread != thr_local->id)
2388 {
2389 // TODO: Process reads and make this an inline function
2390 for (i = thr_local->start; i < thr_local->end; ++ i)
2391 {
2392 convert (thr_local->block->fwd->reads[i]->data);
2393 convert (thr_local->block->rev->reads[i]->data);
2394 remove_base_qscores (thr_local->block->fwd->reads[i]->qscore, thr_local->sw->phred_base);
2395 remove_base_qscores (thr_local->block->rev->reads[i]->qscore, thr_local->sw->phred_base);
2396
2397 mstrrev (thr_local->block->rev->reads[i]->data); /* reverse the sequence */
2398 mstrcpl (thr_local->block->rev->reads[i]->data); /* complement the sequence */
2399 mstrrev (thr_local->block->rev->reads[i]->qscore); /* reverse the quality scores */
2400
2401 //TODO Switch for empirical frequencies
2402 ass = assembly (thr_local->block->fwd->reads[i], thr_local->block->rev->reads[i], thr_local->sw);
2403 PEAR_SET_ASM_TYPE(thr_local->block->fwd->reads[i],ass);
2404 if (!ass)
2405 {
2406 if (trim (thr_local->block->fwd->reads[i], thr_local->sw, &uncalled_forward) < thr_local->sw->min_trim_len ||
2407 trim_cpl (thr_local->block->rev->reads[i], thr_local->sw, &uncalled_reverse) < thr_local->sw->min_trim_len ||
2408 uncalled_forward >= thr_local->sw->max_uncalled || uncalled_reverse >= thr_local->sw->max_uncalled)
2409 {
2410 PEAR_SET_ASM_TYPE(thr_local->block->fwd->reads[i],PEAR_READ_DISCARDED);
2411 }
2412 else
2413 {
2414 PEAR_SET_ASM_TYPE(thr_local->block->fwd->reads[i],PEAR_READ_UNASSEMBLED);
2415 }
2416 }
2417 }
2418
2419 pthread_mutex_lock (&cs_mutex_wnd);
2420 -- thr_local->block->threads;
2421 #ifdef __DEBUG__
2422 pthread_mutex_lock (&cs_mutex_out);
2423 printf ("Finished %d (Remaining threads: %d)\n", thr_local->id, thr_local->block->threads);
2424 pthread_mutex_unlock (&cs_mutex_out);
2425 #endif
2426 pthread_mutex_unlock (&cs_mutex_wnd);
2427 }
2428 // pthread_mutex_lock (&cs_mutex_out);
2429 // printf ("Thread: %d\n", thr_local->id);
2430 // pthread_mutex_unlock (&cs_mutex_out);
2431 }
2432 return (NULL);
2433 }
2434
init_thr_global(void)2435 static void init_thr_global (void)
2436 {
2437 thr_global.xblock = (memBlockInfo *) calloc (1,sizeof(memBlockInfo));
2438 thr_global.yblock = (memBlockInfo *) calloc (1,sizeof(memBlockInfo));
2439 thr_global.xblock->fwd = (memBlock *) calloc (1,sizeof(memBlock));
2440 thr_global.xblock->rev = (memBlock *) calloc (1,sizeof(memBlock));
2441 thr_global.yblock->fwd = (memBlock *) calloc (1,sizeof(memBlock));
2442 thr_global.yblock->rev = (memBlock *) calloc (1,sizeof(memBlock));
2443
2444 thr_global.xblock->reads = 0;
2445 thr_global.xblock->processed = 0;
2446 thr_global.xblock->threads = 0;
2447 thr_global.yblock->reads = 0;
2448 thr_global.yblock->processed = 0;
2449 thr_global.yblock->threads = 0;
2450
2451 thr_global.io_thread = -1;
2452 thr_global.finish = 0;
2453 }
2454
close_output_files(void)2455 static void close_output_files (void)
2456 {
2457 fclose (thr_global.fd[0]);
2458 fclose (thr_global.fd[1]);
2459 fclose (thr_global.fd[2]);
2460 fclose (thr_global.fd[3]);
2461 }
2462
free_global_thread_memory(void)2463 static void free_global_thread_memory (void)
2464 {
2465 free (thr_global.xblock->fwd);
2466 free (thr_global.xblock->rev);
2467 free (thr_global.yblock->fwd);
2468 free (thr_global.yblock->rev);
2469 free (thr_global.xblock);
2470 free (thr_global.yblock);
2471 }
2472
destroy_thr_global(void)2473 static void destroy_thr_global (void)
2474 {
2475 close_output_files();
2476
2477 /* free memory */
2478 free_global_thread_memory ();
2479 }
2480
2481 /** @brief Initialize output file names
2482
2483 Create output file names based on input parameters, open them for writing
2484 and store the file pointers to the global thread structures
2485
2486 @param sw
2487 Parsed command-line parameters given by the user
2488
2489 @return
2490 Returns 1 in case of success, 0 if error
2491 */
init_files(struct user_args * sw)2492 static int init_files (struct user_args * sw)
2493 {
2494 int i, j;
2495 char * out[4];
2496
2497 /* construct output file names */
2498 for (i = 0; i < NUM_OF_OUTFILES; ++ i)
2499 {
2500 out[i] = makefilename (sw->outfile, outfile_extensions[i]);
2501 thr_global.fd[i] = fopen (out[i], "w");
2502 if (!thr_global.fd[i])
2503 {
2504 fprintf (stderr, "[ERROR]: Cannot create file %s.\n "
2505 "Check whether a) directory exists and b) there is enough space\n", out[i]);
2506 free (out[i]);
2507 for (j = 0; j < i; ++ j) fclose (thr_global.fd[j]);
2508 return (0);
2509 }
2510 free (out[i]);
2511 }
2512
2513 return (1);
2514 }
2515
emp_entry_point(void * data)2516 static void * emp_entry_point (void * data)
2517 {
2518 struct thread_local_t * thr_local;
2519 int j, sleep, elms;
2520 unsigned int i;
2521 struct emp_freq * ef;
2522 fastqRead * reads[2];
2523 char * seq;
2524
2525 /* initialization of values */
2526 thr_local = (struct thread_local_t *)data;
2527
2528 ef = (struct emp_freq *) calloc (1, sizeof (struct emp_freq));
2529
2530 while (1)
2531 {
2532 // TODO: The first condition in the next line is useless?
2533 if (thr_local->block == thr_global.xblock && thr_global.io_thread == thr_local->id)
2534 {
2535 pthread_mutex_lock (&cs_mutex_io);
2536 //write_data (thr_global.xblock->fwd->reads, thr_global.xblock->rev->reads, thr_global.xblock->reads, thr_global.fd);
2537 // TODO: read_data ();
2538 elms = db_get_next_reads (thr_global.xblock->fwd,
2539 thr_global.xblock->rev,
2540 thr_global.yblock->fwd,
2541 thr_global.yblock->rev,
2542 &inputFileSanity);
2543 //read_size = strlen (fwd_block.reads[0]->data);
2544 pthread_mutex_unlock (&cs_mutex_io);
2545
2546 pthread_mutex_lock (&cs_mutex_wnd);
2547 thr_global.xblock->reads = elms;
2548 thr_global.xblock->processed = 0;
2549 thr_global.io_thread = -1;
2550 thr_global.xblock->threads = 0;
2551 if (!elms) thr_global.finish = 1;
2552 flip_list ();
2553
2554 #ifdef __DEBUG__
2555 pthread_mutex_lock (&cs_mutex_out);
2556 printf ("READ %d elms\n", elms);
2557 printf ("WAKE_UP_ALL! (reads: %d processed: %d)\n", thr_global.xblock->reads, thr_global.xblock->processed);
2558 pthread_mutex_unlock (&cs_mutex_out);
2559 #endif
2560 // wakeup threads
2561 pthread_cond_broadcast (&cs_mutex_cond);
2562 pthread_mutex_unlock (&cs_mutex_wnd);
2563 }
2564
2565 sleep = 1;
2566
2567 pthread_mutex_lock (&cs_mutex_wnd);
2568 if (thr_global.xblock->reads == thr_global.xblock->processed)
2569 {
2570 if (thr_global.finish)
2571 {
2572 pthread_mutex_unlock (&cs_mutex_wnd);
2573 break;
2574 }
2575 /*
2576 if (thr_global.finish)
2577 {
2578 pthread_mutex_unlock (&cs_mutex_wnd);
2579 if (!thr_global.xblock->threads)
2580 {
2581 write_data (thr_global.xblock->fwd->reads, thr_global.xblock->rev->reads, thr_global.xblock->reads, thr_global.fd);
2582 }
2583 break;
2584 }
2585 */
2586 /* is this the last thread using the current buffer? */
2587 if (thr_global.xblock->threads == 0 && thr_global.io_thread == -1)
2588 {
2589 #ifdef __DEBUG__
2590 pthread_mutex_lock (&cs_mutex_out);
2591 printf ("ASSIGNED IO THREAD %d\n", thr_local->id);
2592 pthread_mutex_unlock (&cs_mutex_out);
2593 #endif
2594 thr_global.io_thread = thr_local->id;
2595 thr_local->block = thr_global.xblock;
2596 sleep = 0;
2597 }
2598 else
2599 {
2600 if (assign_reads (thr_global.yblock, thr_local)) sleep = 0;
2601 #ifdef __DEBUG__
2602 if (!sleep) {
2603 pthread_mutex_lock (&cs_mutex_out);
2604 printf ("ASSIGNED y READS to %d (%d - %d) Threads: %d\n", thr_local->id, thr_local->start, thr_local->end, thr_global.yblock->threads);
2605 pthread_mutex_unlock (&cs_mutex_out);}
2606 #endif
2607 }
2608 }
2609 else
2610 {
2611 if (assign_reads (thr_global.xblock,thr_local)) sleep = 0;
2612 #ifdef __DEBUG__
2613 if (!sleep) {
2614 pthread_mutex_lock (&cs_mutex_out);
2615 printf ("ASSIGNED x READS to %d (%d - %d) Threads: %d\n", thr_local->id, thr_local->start, thr_local->end, thr_global.xblock->threads);
2616 pthread_mutex_unlock (&cs_mutex_out);}
2617 #endif
2618 }
2619 if (sleep)
2620 {
2621 #ifdef __DEBUG__
2622 pthread_mutex_lock (&cs_mutex_out);
2623 printf ("Sleeping %d\n", thr_local->id);
2624 pthread_mutex_unlock (&cs_mutex_out);
2625 #endif
2626 pthread_cond_wait (&cs_mutex_cond, &cs_mutex_wnd);
2627 #ifdef __DEBUG__
2628 pthread_mutex_lock (&cs_mutex_out);
2629 printf ("WAKING %d\n", thr_local->id);
2630 pthread_mutex_unlock (&cs_mutex_out);
2631 #endif
2632 }
2633 pthread_mutex_unlock (&cs_mutex_wnd);
2634
2635 if (!sleep && thr_global.io_thread != thr_local->id)
2636 {
2637 // TODO: Process reads and make this an inline function
2638 for (i = thr_local->start; i < thr_local->end; ++ i)
2639 {
2640 reads[0] = thr_local->block->fwd->reads[i];
2641 reads[1] = thr_local->block->rev->reads[i];
2642 for (j = 0; j < 2; ++ j)
2643 {
2644 seq = reads[j]->data;
2645 while (*seq)
2646 {
2647 switch (*seq)
2648 {
2649 case 'A':
2650 case 'a':
2651 ++ ef->freqa;
2652 break;
2653
2654 case 'C':
2655 case 'c':
2656 ++ ef->freqc;
2657 break;
2658
2659 case 'G':
2660 case 'g':
2661 ++ ef->freqg;
2662 break;
2663
2664 case 'T':
2665 case 't':
2666 ++ ef->freqt;
2667 break;
2668 default: /* uncalled bases */
2669 ++ ef->freqn;
2670 break;
2671
2672 }
2673 ++ seq;
2674 }
2675 }
2676 }
2677
2678 pthread_mutex_lock (&cs_mutex_wnd);
2679 -- thr_local->block->threads;
2680 #ifdef __DEBUG__
2681 pthread_mutex_lock (&cs_mutex_out);
2682 printf ("Finished %d (Remaining threads: %d)\n", thr_local->id, thr_local->block->threads);
2683 pthread_mutex_unlock (&cs_mutex_out);
2684 #endif
2685 pthread_mutex_unlock (&cs_mutex_wnd);
2686 }
2687 // pthread_mutex_lock (&cs_mutex_out);
2688 // printf ("Thread: %d\n", thr_local->id);
2689 // pthread_mutex_unlock (&cs_mutex_out);
2690 }
2691 // pthread_mutex_lock (&cs_mutex_out);
2692
2693 // printf ("thread: %2d A: %d C: %d G: %d T: %d\n", thr_local->id, ef->freqa, ef->freqc, ef->freqg, ef->freqt);
2694 // pthread_mutex_unlock (&cs_mutex_out);
2695 return (ef);
2696 }
2697
DisplayInstance(struct user_args * sw)2698 static void DisplayInstance (struct user_args * sw)
2699 {
2700 fprintf (stdout, " ____ _____ _ ____ \n");
2701 fprintf (stdout, "| _ \\| ____| / \\ | _ \\\n");
2702 fprintf (stdout, "| |_) | _| / _ \\ | |_) |\n");
2703 fprintf (stdout, "| __/| |___ / ___ \\| _ <\n");
2704 fprintf (stdout, "|_| |_____/_/ \\_\\_| \\_\\\n\n");
2705 fprintf (stdout, "%s v%s [%s]\n\n", PROGRAM_NAME, PROGRAM_VERSION, VERSION_DATE);
2706 fprintf (stdout, "Citation - PEAR: a fast and accurate Illumina Paired-End reAd mergeR\n");
2707 fprintf (stdout, "Zhang et al (2014) Bioinformatics 30(5): 614-620 | doi:10.1093/bioinformatics/btt593\n\n");
2708 fprintf (stdout, "Forward reads file.................: %s\n", sw->fastq_left);
2709 fprintf (stdout, "Reverse reads file.................: %s\n", sw->fastq_right);
2710 fprintf (stdout, "PHRED..............................: %d\n", sw->phred_base);
2711 fprintf (stdout, "Using empirical frequencies........: %s\n", sw->emp_freqs ? "YES" : "NO");
2712 fprintf (stdout, "Statistical method.................: %s\n", (sw->test - 1) ? "Acceptance probability" : "OES");
2713 fprintf (stdout, "Maximum assembly length............: %d\n", sw->max_asm_len);
2714 fprintf (stdout, "Minimum assembly length............: %d\n", sw->min_asm_len);
2715 fprintf (stdout, "p-value............................: %f\n", sw->p_value);
2716 fprintf (stdout, "Quality score threshold (trimming).: %d\n", sw->qual_thres);
2717 fprintf (stdout, "Minimum read size after trimming...: %d\n", sw->min_trim_len);
2718 fprintf (stdout, "Maximal ratio of uncalled bases....: %f\n", sw->max_uncalled);
2719 fprintf (stdout, "Minimum overlap....................: %d\n", sw->min_overlap);
2720 fprintf (stdout, "Scoring method.....................: ");
2721 if (sw->score_method == 1)
2722 {
2723 fprintf (stdout, "OES with match = +1 and mismatch = -1\n");
2724 }
2725 else if (sw->score_method == 2)
2726 {
2727 fprintf (stdout, "Scaled score\n");
2728 }
2729 else
2730 {
2731 fprintf (stdout, "Ignore quality scores, match = +1 and mismatch = -1\n");
2732 }
2733 fprintf (stdout, "Threads............................: %d\n\n", sw->threads);
2734
2735
2736 }
2737
main(int argc,char * argv[])2738 int main (int argc, char * argv[])
2739 {
2740 int i;
2741 struct user_args sw;
2742 struct emp_freq * ef;
2743 struct thread_local_t * thr_data;
2744 double
2745 a = 0,
2746 c = 0,
2747 g = 0,
2748 t = 0,
2749 n = 0;
2750 pthread_t * tid;
2751 unsigned int
2752 blockElements;
2753
2754 /* parse command-line arguments */
2755 if (!decode_switches (argc, argv, &sw))
2756 {
2757 /* TODO: Validate reads names */
2758 usage ();
2759 return (EXIT_FAILURE);
2760 }
2761
2762 /* Display PEAR instance settings */
2763 DisplayInstance (&sw);
2764
2765 /* check consistency of input files */
2766 if (!pear_check_files (sw.fastq_left, sw.fastq_right))
2767 {
2768 return (EXIT_FAILURE);
2769 }
2770
2771 init_thr_global ();
2772 thr_data = (struct thread_local_t *) calloc (sw.threads, sizeof (struct thread_local_t));
2773 tid = (pthread_t *) malloc (sw.threads * sizeof (pthread_t));
2774 if (!init_files (&sw))
2775 {
2776 free_global_thread_memory ();
2777 free (tid);
2778 free (thr_data);
2779 return (0);
2780 }
2781
2782 /* Initialize read buffers */
2783 init_fastq_reader_double_buffer (sw.fastq_left,
2784 sw.fastq_right,
2785 sw.memory,
2786 thr_global.xblock->fwd,
2787 thr_global.xblock->rev,
2788 thr_global.yblock->fwd,
2789 thr_global.yblock->rev);
2790
2791 if (sw.emp_freqs)
2792 {
2793 printf ("Computing empirical frequencies....: ");
2794 fflush (stdout);
2795 blockElements = db_get_next_reads (thr_global.yblock->fwd,
2796 thr_global.yblock->rev,
2797 thr_global.xblock->fwd,
2798 thr_global.xblock->rev,
2799 &inputFileSanity);
2800
2801 thr_global.yblock->reads = blockElements;
2802 thr_global.yblock->processed = 0;
2803
2804 for (i = 0; i < sw.threads; ++ i)
2805 {
2806 thr_data[i].block = thr_global.xblock;
2807 thr_data[i].id = i;
2808 thr_data[i].sw = &sw;
2809 thr_data[i].start = 0;
2810 thr_data[i].end = 0;
2811 pthread_create (&tid[i], NULL, emp_entry_point, (void *)&thr_data[i]);
2812 }
2813 for (i = 0; i < sw.threads; ++ i)
2814 {
2815 pthread_join (tid[i], (void **)&ef);
2816 a += ef->freqa; c += ef->freqc; g += ef->freqg; t += ef->freqt; n += ef->freqn;
2817 free (ef);
2818 // printf ("tid: %d ef->a: %d ef->c: %d ef->g: %d ef->t: %d\n", (int)tid[i], ef->freqa, ef->freqc, ef->freqg, ef->freqt);
2819 }
2820 ef = (struct emp_freq *)malloc (sizeof(struct emp_freq));
2821 ef->freqa = a; ef->freqc = c; ef->freqg = g; ef->freqt = t; ef->freqn = n;
2822 ef->total = a + c + g + t;
2823 ef->pa = ef->freqa / ef->total; ef->pc = ef->freqc / ef->total; ef->pg = ef->freqg / ef->total; ef->pt = ef->freqt / ef->total;
2824 ef->q = ef->pa * ef->pa + ef->pc * ef->pc + ef->pg * ef->pg + ef->pt * ef->pt;
2825 printf ("DONE\n");
2826 printf (" A: %f\n C: %f\n G: %f\n T: %f\n", ef->pa, ef->pc, ef->pg, ef->pt);
2827 printf (" %ld uncalled bases\n", ef->freqn); /* TODO: Note that only N's are reported */
2828 rewind_files (sw.fastq_left, sw.fastq_right);
2829 thr_global.xblock->fwd->unread = thr_global.xblock->rev->unread = NULL;
2830 thr_global.yblock->fwd->unread = thr_global.yblock->rev->unread = NULL;
2831 thr_global.yblock->reads = thr_global.xblock->reads = 0;
2832 thr_global.yblock->processed = thr_global.xblock->processed = 0;
2833 thr_global.finish = 0;
2834 thr_global.xblock->reads = 0;
2835 thr_global.xblock->processed = 0;
2836 thr_global.xblock->threads = 0;
2837 thr_global.yblock->reads = 0;
2838 thr_global.yblock->processed = 0;
2839 thr_global.yblock->threads = 0;
2840
2841
2842 thr_global.io_thread = -1;
2843 thr_global.finish = 0;
2844
2845 if (inputFileSanity)
2846 {
2847 fprintf (stderr, "%s", sanityCheckMessage[inputFileSanity - 1]);
2848 }
2849 inputFileSanity = 0;
2850
2851 }
2852 else
2853 {
2854 ef = (struct emp_freq *)malloc (sizeof(struct emp_freq));
2855 ef->freqa = ef->freqc = ef->freqg = ef->freqt = ef->total = ef->pa = ef->pc = ef->pg = ef->pt = ef->q = 0.25;
2856 sw.emp_freqs = 1;
2857 }
2858
2859 init_scores(ef);
2860 current_progress = 0;
2861
2862 //fprintf (stdout, "Assemblying reads..................: [");
2863 fprintf (stdout, "Assemblying reads: 0\%%");
2864 fflush (stdout);
2865
2866 blockElements = db_get_next_reads (thr_global.yblock->fwd,
2867 thr_global.yblock->rev,
2868 thr_global.xblock->fwd,
2869 thr_global.xblock->rev,
2870 &inputFileSanity);
2871
2872 thr_global.yblock->reads = blockElements;
2873 thr_global.yblock->processed = 0;
2874
2875 /* pthreads entry point */
2876 for (i = 0; i < sw.threads; ++ i)
2877 {
2878 thr_data[i].block = thr_global.xblock;
2879 thr_data[i].id = i;
2880 thr_data[i].sw = &sw;
2881 // thr_data[i].match_score = match_score;
2882 // thr_data[i].mismatch_score = mismatch_score;
2883 thr_data[i].start = 0;
2884 thr_data[i].end = 0;
2885 thr_data[i].ef = ef;
2886 if (sw.emp_freqs)
2887 pthread_create (&tid[i], NULL, entry_point_ef, (void *)&thr_data[i]);
2888 else
2889 pthread_create (&tid[i], NULL, entry_point, (void *)&thr_data[i]);
2890 }
2891
2892 for (i = 0; i < sw.threads; ++ i)
2893 {
2894 pthread_join (tid[i], NULL);
2895 }
2896 printf ("\n\n");
2897
2898 g_count_total = g_count_assembled + g_count_discarded + g_count_unassembled;
2899 printf ("Assembled reads ...................: ");
2900 print_number (g_count_assembled);
2901 printf (" / ");
2902 print_number (g_count_total);
2903 printf (" (%.3f%%)\n", ((double)g_count_assembled / g_count_total) * 100);
2904
2905 printf ("Discarded reads ...................: ");
2906 print_number (g_count_discarded);
2907 printf (" / ");
2908 print_number (g_count_total);
2909 printf (" (%.3f%%)\n", ((double)g_count_discarded / g_count_total) * 100);
2910
2911 printf ("Not assembled reads ...............: ");
2912 print_number (g_count_unassembled);
2913 printf (" / ");
2914 print_number (g_count_total);
2915 printf (" (%.3f%%)\n", ((double)g_count_unassembled / g_count_total) * 100);
2916
2917
2918
2919 printf ("Assembled reads file...............: %s%s\n", sw.outfile, ".assembled.fastq");
2920 printf ("Discarded reads file...............: %s%s\n", sw.outfile, ".discarded.fastq");
2921 printf ("Unassembled forward reads file.....: %s%s\n", sw.outfile, ".unassembled.forward.fastq");
2922 printf ("Unassembled reverse reads file.....: %s%s\n", sw.outfile, ".unassembled.reverse.fastq" );
2923
2924 if (inputFileSanity)
2925 {
2926 fprintf (stderr, "%s", sanityCheckMessage[inputFileSanity - 1]);
2927 }
2928 free (ef);
2929 free (tid);
2930
2931 destroy_reader ();
2932 destroy_thr_global ();
2933 free (thr_data);
2934
2935 return (EXIT_SUCCESS);
2936 }
2937