1 /****************************************************************\
2 *                                                                *
3 *  Library for PCR simulation                                    *
4 *                                                                *
5 *  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
6 *  Copyright (C) 2000-2009.  All Rights Reserved.                *
7 *                                                                *
8 *  This source code is distributed under the terms of the        *
9 *  GNU General Public License, version 3. See the file COPYING   *
10 *  or http://www.gnu.org/licenses/gpl.txt for details            *
11 *                                                                *
12 *  If you use this code, please keep this notice intact.         *
13 *                                                                *
14 \****************************************************************/
15 
16 #include "pcr.h"
17 #include "submat.h"
18 #include "sequence.h"
19 
20 #include <string.h> /* For strlen() */
21 
22 /**/
23 
PCR_Match_allocate(PCR * pcr)24 static PCR_Match *PCR_Match_allocate(PCR *pcr){
25     register PCR_Match *pcr_match;
26     if(pcr->match_recycle){
27         pcr_match = pcr->match_recycle;
28         /* pcr_match->pcr_probe is used as the next pointer */
29         pcr->match_recycle = (PCR_Match*)pcr_match->pcr_probe;
30     } else {
31         pcr_match = g_chunk_new(PCR_Match, pcr->match_mem_chunk);
32         }
33     return pcr_match;
34     }
35 
PCR_Match_create(PCR_Probe * pcr_probe,gint position,gint mismatch)36 static PCR_Match *PCR_Match_create(PCR_Probe *pcr_probe, gint position,
37                                    gint mismatch){
38     register PCR *pcr = pcr_probe->pcr_primer->pcr_experiment->pcr;
39     register PCR_Match *pcr_match = PCR_Match_allocate(pcr);
40     pcr_match->pcr_probe = pcr_probe;
41     pcr_match->position = position;
42     pcr_match->mismatch = mismatch;
43     return pcr_match;
44     }
45 
PCR_Match_destroy(PCR_Match * pcr_match)46 static void PCR_Match_destroy(PCR_Match *pcr_match){
47     register PCR *pcr
48         = pcr_match->pcr_probe->pcr_primer->pcr_experiment->pcr;
49     pcr_match->pcr_probe = (PCR_Probe*)pcr->match_recycle;
50     pcr->match_recycle = pcr_match;
51     return;
52     }
53 
54 /**/
55 
PCR_Probe_create(PCR_Primer * pcr_primer,Sequence_Strand strand,gchar * probe,gint mismatch)56 static PCR_Probe *PCR_Probe_create(PCR_Primer *pcr_primer,
57        Sequence_Strand strand, gchar *probe, gint mismatch){
58     register PCR *pcr = pcr_primer->pcr_experiment->pcr;
59     register PCR_Probe *pcr_probe = g_chunk_new(PCR_Probe,
60                                     pcr->probe_mem_chunk);
61     pcr_probe->pcr_primer = pcr_primer;
62     pcr_probe->strand = strand;
63     pcr_probe->mismatch = mismatch;
64     return pcr_probe;
65     }
66 
PCR_Probe_destroy(PCR_Probe * pcr_probe)67 static void PCR_Probe_destroy(PCR_Probe *pcr_probe){
68     register PCR *pcr = pcr_probe->pcr_primer->pcr_experiment->pcr;
69     g_mem_chunk_free(pcr->probe_mem_chunk, pcr_probe);
70     return;
71     }
72 
PCR_Probe_register_hit(PCR_Probe * pcr_probe,Sequence * sequence,guint seq_pos)73 static void PCR_Probe_register_hit(PCR_Probe *pcr_probe,
74                                    Sequence *sequence, guint seq_pos){
75     register PCR_Match *pcr_match, *prev_match;
76     register PCR_Primer *pcr_primer = pcr_probe->pcr_primer;
77     register PCR_Experiment *pcr_experiment
78                            = pcr_primer->pcr_experiment;
79     register PCR *pcr = pcr_experiment->pcr;
80     register gint i, product_length, mismatch = pcr_probe->mismatch;
81     register SListNode *node;
82     register gint match_start;
83     if(pcr_probe->strand == Sequence_Strand_FORWARD){
84         match_start = seq_pos - pcr_primer->probe_len + 1;
85     } else {
86         match_start = seq_pos - pcr_primer->length    + 1;
87         }
88     if(match_start < 0)
89         return; /* Too close to sequence start */
90     if((match_start + pcr_primer->length) > sequence->len)
91         return; /* Too close to sequence end */
92     /* Count number of mismatches in rest of primer */
93     g_assert(pcr_primer);
94     if(pcr_probe->strand == Sequence_Strand_FORWARD){
95         for(i = pcr_primer->probe_len; i < pcr_primer->length; i++){
96             if(pcr_primer->forward[i]
97             != Sequence_get_symbol(sequence, match_start+i)){
98                 mismatch++;
99                 if(mismatch > pcr->mismatch_threshold)
100                     return; /* Match failed to extend */
101                 }
102             }
103     } else {
104         g_assert(pcr_primer);
105         for(i = 0; i < (pcr_primer->length-pcr_primer->probe_len); i++){
106             if(pcr_primer->revcomp[i]
107             != Sequence_get_symbol(sequence, match_start+i)){
108                 mismatch++;
109                 if(mismatch > pcr->mismatch_threshold)
110                     return; /* Match failed to extend */
111                 }
112             }
113         }
114     /* Clear any matches longer in range */
115     while(!SList_isempty(pcr_experiment->match_list)){
116         prev_match = SList_first(pcr_experiment->match_list);
117         product_length = match_start
118                        - prev_match->position
119                        + pcr_probe->pcr_primer->length;
120         if(product_length <= pcr_experiment->max_product_len)
121             break;
122         prev_match = SList_pop(pcr_experiment->match_list);
123         PCR_Match_destroy(prev_match);
124         }
125     /* Create a match for this probe */
126     pcr_match = PCR_Match_create(pcr_probe, match_start, mismatch);
127     /* Report hit with any previous matches in range */
128     SList_for_each(pcr_experiment->match_list, node){
129         prev_match = node->data;
130         product_length = match_start
131                        - prev_match->position
132                        + pcr_probe->pcr_primer->length;
133         if(product_length < pcr_experiment->min_product_len)
134             break;
135         /* Primers must be on different strands */
136         if(prev_match->pcr_probe->strand
137         != pcr_match->pcr_probe->strand)
138             /* Primers must be facing each other */
139             if((prev_match->pcr_probe->strand
140                 == Sequence_Strand_FORWARD)
141             && (pcr_match->pcr_probe->strand
142                 == Sequence_Strand_REVCOMP))
143                 pcr->report_func(sequence,
144                                  prev_match, pcr_match, product_length,
145                                  pcr->user_data);
146         }
147     /* Record this match */
148     SList_queue(pcr_experiment->match_list, pcr_match);
149     return;
150     }
151 
152 /**/
153 
PCR_Sensor_create(PCR * pcr)154 static PCR_Sensor *PCR_Sensor_create(PCR *pcr){
155     register PCR_Sensor *pcr_sensor = g_chunk_new(PCR_Sensor,
156                                             pcr->sensor_mem_chunk);
157     pcr_sensor->owned_probe_list = SList_create(pcr->slist_set);
158     pcr_sensor->borrowed_sensor_list
159                                     = SList_create(pcr->slist_set);
160     pcr->sensor_count++;
161     return pcr_sensor;
162     }
163 
PCR_Sensor_destroy(PCR_Sensor * pcr_sensor,PCR * pcr)164 static void PCR_Sensor_destroy(PCR_Sensor *pcr_sensor, PCR *pcr){
165     register PCR_Probe *pcr_probe;
166     if(pcr_sensor->owned_probe_list){ /* Maybe removed in merge */
167         while(!SList_isempty(pcr_sensor->owned_probe_list)){
168             pcr_probe = SList_pop(pcr_sensor->owned_probe_list);
169             PCR_Probe_destroy(pcr_probe);
170             }
171         SList_destroy(pcr_sensor->owned_probe_list);
172         }
173     SList_destroy(pcr_sensor->borrowed_sensor_list);
174     g_mem_chunk_free(pcr->sensor_mem_chunk, pcr_sensor);
175     pcr->sensor_count--;
176     return;
177     }
178 
PCR_FSM_merge_PCR_Sensor(gpointer a,gpointer b,gpointer user_data)179 static gpointer PCR_FSM_merge_PCR_Sensor(gpointer a, gpointer b,
180                                            gpointer user_data){
181     register PCR *pcr = user_data;
182     register PCR_Sensor *ps_a = a, *ps_b = b;
183     g_assert(ps_a);
184     g_assert(ps_b);
185     g_assert(SList_isempty(ps_a->borrowed_sensor_list));
186     g_assert(SList_isempty(ps_b->borrowed_sensor_list));
187     ps_a->owned_probe_list = SList_join(ps_a->owned_probe_list,
188                                         ps_b->owned_probe_list);
189     SList_destroy(ps_b->owned_probe_list);
190     ps_b->owned_probe_list = NULL;
191     PCR_Sensor_destroy(ps_b, pcr);
192     return ps_a;
193     }
194 /* Merge two PCR_Sensors which have the same sequence */
195 
PCR_FSM_combine_PCR_Sensor(gpointer a,gpointer b,gpointer user_data)196 static gpointer PCR_FSM_combine_PCR_Sensor(gpointer a, gpointer b,
197                                              gpointer user_data){
198     register PCR_Sensor *ps_a = a, *ps_b = b;
199     g_assert(ps_a);
200     g_assert(ps_b);
201     /**/
202     SList_queue(ps_a->borrowed_sensor_list, ps_b);
203     return ps_a;
204     }
205 /* Join two PCR_Sensors when one is a subsequence of the other */
206 
207 /**/
208 
PCR_Primer_add_probe(PCR_Primer * pcr_primer,Sequence_Strand strand,gchar * primer,gint mismatch)209 static void PCR_Primer_add_probe(PCR_Primer *pcr_primer,
210         Sequence_Strand strand, gchar *primer, gint mismatch){
211     register PCR_Probe *pcr_probe = PCR_Probe_create(pcr_primer,
212               strand, primer, mismatch);
213     register PCR_Sensor *pcr_sensor = PCR_Sensor_create(
214                            pcr_primer->pcr_experiment->pcr);
215     SList_queue(pcr_sensor->owned_probe_list, pcr_probe);
216     g_ptr_array_add(pcr_primer->probe_list, pcr_probe);
217     FSM_add(pcr_primer->pcr_experiment->pcr->fsm, primer,
218             pcr_primer->probe_len, pcr_sensor);
219     return;
220     }
221 
PCR_Primer_Wordhood_traverse_func(gchar * word,gint score,gpointer user_data)222 static gboolean PCR_Primer_Wordhood_traverse_func(gchar *word,
223                     gint score, gpointer user_data){
224     register PCR_Primer *pcr_primer = user_data;
225     register gint mismatch = pcr_primer->probe_len-score;
226     PCR_Primer_add_probe(pcr_primer, Sequence_Strand_FORWARD,
227         word, mismatch);
228     Sequence_revcomp_in_place(word, pcr_primer->probe_len);
229     PCR_Primer_add_probe(pcr_primer, Sequence_Strand_REVCOMP,
230         word, mismatch);
231     Sequence_revcomp_in_place(word, pcr_primer->probe_len);
232     return FALSE;
233     }
234 
PCR_Primer_create(PCR_Experiment * pcr_experiment,gchar * primer)235 static PCR_Primer *PCR_Primer_create(PCR_Experiment *pcr_experiment,
236                                      gchar *primer){
237     register PCR_Primer *pcr_primer = g_new(PCR_Primer, 1);
238     g_assert(pcr_experiment);
239     g_assert(primer);
240     pcr_primer->pcr_experiment = pcr_experiment;
241     pcr_primer->length = strlen(primer);
242     if((pcr_experiment->pcr->seed_length)
243     && (pcr_primer->length > pcr_experiment->pcr->seed_length))
244         pcr_primer->probe_len = pcr_experiment->pcr->seed_length;
245     else
246         pcr_primer->probe_len = pcr_primer->length;
247     pcr_primer->forward = g_string_chunk_insert(
248                              pcr_experiment->pcr->string_chunk, primer);
249     pcr_primer->revcomp = g_string_chunk_insert(
250                              pcr_experiment->pcr->string_chunk, primer);
251     Sequence_revcomp_in_place(pcr_primer->revcomp, pcr_primer->length);
252     g_strup(pcr_primer->forward);
253     g_strup(pcr_primer->revcomp);
254     pcr_primer->probe_list = g_ptr_array_new();
255     WordHood_traverse(pcr_experiment->pcr->wordhood,
256                       PCR_Primer_Wordhood_traverse_func,
257                       pcr_primer->forward,
258                       pcr_primer->probe_len,
259                       pcr_primer);
260     return pcr_primer;
261     }
262 
PCR_Primer_destroy(PCR_Primer * pcr_primer)263 static void PCR_Primer_destroy(PCR_Primer *pcr_primer){
264     register gint i;
265     for(i = 0; i < pcr_primer->probe_list->len; i++)
266         PCR_Probe_destroy(pcr_primer->probe_list->pdata[i]);
267     g_ptr_array_free(pcr_primer->probe_list, TRUE);
268     g_free(pcr_primer);
269     return;
270     }
271 
PCR_Primer_memory_usage(PCR_Primer * pcr_primer)272 static gsize PCR_Primer_memory_usage(PCR_Primer *pcr_primer){
273     register gsize primer_size = sizeof(gchar) * pcr_primer->length;
274     return sizeof(PCR_Primer)
275          + (primer_size * 2)
276          + (pcr_primer->probe_list->len
277             * (sizeof(PCR_Probe) + sizeof(gpointer)));
278     }
279 
280 /**/
281 
PCR_Experiment_create(PCR * pcr,gchar * id,gchar * primer_a,gchar * primer_b,gint min_product_len,gint max_product_len)282 static PCR_Experiment *PCR_Experiment_create(PCR *pcr, gchar *id,
283                  gchar *primer_a, gchar *primer_b,
284                  gint min_product_len, gint max_product_len){
285     register PCR_Experiment *pcr_experiment = g_new(PCR_Experiment, 1);
286     /*
287     g_message("Adding [%s][%s][%s][%d][%d]",
288             id, primer_a, primer_b, min_product_len, max_product_len);
289     */
290     g_assert(id);
291     g_assert(primer_a);
292     g_assert(primer_b);
293     g_assert(min_product_len > 0);
294     g_assert(max_product_len >= min_product_len);
295     pcr_experiment->pcr = pcr;
296     pcr_experiment->id = g_string_chunk_insert(pcr->string_chunk, id);
297     pcr_experiment->primer_a = PCR_Primer_create(pcr_experiment,
298                                                  primer_a);
299     pcr_experiment->primer_b = PCR_Primer_create(pcr_experiment,
300                                                  primer_b);
301     pcr_experiment->min_product_len = min_product_len;
302     pcr_experiment->max_product_len = max_product_len;
303     pcr_experiment->match_list = SList_create(pcr->slist_set);
304     pcr_experiment->product_count = 0;
305     return pcr_experiment;
306     }
307 
PCR_Experiment_clear(PCR_Experiment * pcr_experiment)308 static void PCR_Experiment_clear(PCR_Experiment *pcr_experiment){
309     register PCR_Match *pcr_match;
310     while(!SList_isempty(pcr_experiment->match_list)){
311         pcr_match = SList_pop(pcr_experiment->match_list);
312         PCR_Match_destroy(pcr_match);
313         }
314     return;
315     }
316 
PCR_Experiment_destroy(PCR_Experiment * pcr_experiment)317 static void PCR_Experiment_destroy(PCR_Experiment *pcr_experiment){
318     g_assert(pcr_experiment);
319     PCR_Experiment_clear(pcr_experiment);
320     PCR_Primer_destroy(pcr_experiment->primer_a);
321     PCR_Primer_destroy(pcr_experiment->primer_b);
322     SList_destroy(pcr_experiment->match_list);
323     g_free(pcr_experiment);
324     return;
325     }
326 
PCR_Experiment_memory_usage(PCR_Experiment * pcr_experiment)327 static gsize PCR_Experiment_memory_usage(
328              PCR_Experiment *pcr_experiment){
329     return sizeof(PCR_Experiment)
330          + sizeof(gpointer) /* for pcr->experment_list */
331          + (sizeof(gchar) * strlen(pcr_experiment->id))
332          + PCR_Primer_memory_usage(pcr_experiment->primer_a)
333          + PCR_Primer_memory_usage(pcr_experiment->primer_b);
334     }
335 
336 /**/
337 
PCR_create(PCR_ReportFunc report_func,gpointer user_data,gint mismatch_threshold,gint seed_length)338 PCR *PCR_create(PCR_ReportFunc report_func, gpointer user_data,
339                 gint mismatch_threshold, gint seed_length){
340     register PCR *pcr = g_new(PCR, 1);
341     register Submat *submat = Submat_create("iupac-identity");
342     register WordHood_Alphabet *wha;
343     pcr->slist_set = SListSet_create();
344     pcr->fsm = FSM_create("ACGT", PCR_FSM_merge_PCR_Sensor,
345                                   PCR_FSM_combine_PCR_Sensor, pcr);
346     pcr->experiment_list = g_ptr_array_new();
347     pcr->mismatch_threshold = mismatch_threshold;
348     pcr->seed_length = seed_length;
349     pcr->report_func = report_func;
350     pcr->is_prepared = FALSE;
351     wha = WordHood_Alphabet_create_from_Submat(
352                             "GARTKWDCSMVYBHN", "ACGT", submat, FALSE);
353     pcr->wordhood = WordHood_create(wha, pcr->mismatch_threshold, TRUE);
354     WordHood_Alphabet_destroy(wha);
355     pcr->experiment_memory_usage = 0;
356     pcr->sensor_count = 0;
357     pcr->sensor_mem_chunk = g_mem_chunk_new("PCR_Sensor",
358                          sizeof(PCR_Sensor), sizeof(PCR_Sensor)*4096,
359                          G_ALLOC_AND_FREE);
360     pcr->user_data = user_data;
361     Submat_destroy(submat);
362     pcr->string_chunk = g_string_chunk_new(4096);
363     pcr->probe_mem_chunk = g_mem_chunk_new("PCR_Probe",
364                          sizeof(PCR_Probe), sizeof(PCR_Probe)*4096,
365                          G_ALLOC_AND_FREE);
366     pcr->match_mem_chunk = g_mem_chunk_new("PCR_Match",
367                          sizeof(PCR_Match), sizeof(PCR_Match)*4096,
368                          G_ALLOC_ONLY);
369     pcr->match_recycle = NULL;
370     return pcr;
371     }
372 
PCR_destroy(PCR * pcr)373 void PCR_destroy(PCR *pcr){
374     register gint i;
375     for(i = 0; i < pcr->experiment_list->len; i++)
376         PCR_Experiment_destroy(pcr->experiment_list->pdata[i]);
377     g_ptr_array_free(pcr->experiment_list, TRUE);
378     FSM_destroy(pcr->fsm);
379     WordHood_destroy(pcr->wordhood);
380     SListSet_destroy(pcr->slist_set);
381     g_mem_chunk_destroy(pcr->sensor_mem_chunk);
382     g_string_chunk_free(pcr->string_chunk);
383     g_mem_chunk_destroy(pcr->probe_mem_chunk);
384     g_mem_chunk_destroy(pcr->match_mem_chunk);
385     g_free(pcr);
386     return;
387     }
388 
389 /**/
390 
PCR_memory_usage(PCR * pcr)391 static gsize PCR_memory_usage(PCR *pcr){
392     return sizeof(PCR)
393          + pcr->experiment_memory_usage
394          + SListSet_memory_usage(pcr->slist_set)
395          + FSM_memory_usage(pcr->fsm)
396          + (sizeof(PCR_Sensor) * pcr->sensor_count);
397     }
398 
PCR_add_experiment(PCR * pcr,gchar * id,gchar * primer_a,gchar * primer_b,gint min_product_len,gint max_product_len)399 gsize PCR_add_experiment(PCR *pcr, gchar *id,
400                         gchar *primer_a, gchar *primer_b,
401                         gint min_product_len, gint max_product_len){
402     register PCR_Experiment *pcr_experiment;
403     g_assert(pcr);
404     g_assert(!pcr->is_prepared);
405     pcr_experiment = PCR_Experiment_create(pcr, id, primer_a, primer_b,
406                                    min_product_len, max_product_len);
407     g_ptr_array_add(pcr->experiment_list, pcr_experiment);
408     pcr->experiment_memory_usage
409          += PCR_Experiment_memory_usage(pcr_experiment);
410     return PCR_memory_usage(pcr);
411     }
412 
PCR_prepare(PCR * pcr)413 void PCR_prepare(PCR *pcr){
414     g_assert(pcr);
415     g_assert(!pcr->is_prepared);
416     FSM_compile(pcr->fsm);
417     pcr->is_prepared = TRUE;
418     return;
419     }
420 
PCR_Sensor_register_hit_recur(PCR_Sensor * pcr_sensor,Sequence * sequence,guint seq_pos)421 static void PCR_Sensor_register_hit_recur(PCR_Sensor *pcr_sensor,
422                                  Sequence *sequence, guint seq_pos){
423     register SListNode *node;
424     register PCR_Probe *pcr_probe;
425     register PCR_Sensor *borrowed_sensor;
426     SList_for_each(pcr_sensor->owned_probe_list, node){
427         pcr_probe = node->data;
428         PCR_Probe_register_hit(pcr_probe, sequence, seq_pos);
429         }
430     SList_for_each(pcr_sensor->borrowed_sensor_list, node){
431         borrowed_sensor = node->data;
432         PCR_Sensor_register_hit_recur(borrowed_sensor,
433                                         sequence, seq_pos);
434         }
435     return;
436     }
437 
PCR_FSM_traverse_func(guint seq_pos,gpointer node_data,gpointer user_data)438 static void PCR_FSM_traverse_func(guint seq_pos,
439                                   gpointer node_data,
440                                   gpointer user_data){
441     register PCR_Sensor *pcr_sensor = node_data;
442     register Sequence *sequence = user_data;
443     PCR_Sensor_register_hit_recur(pcr_sensor, sequence, seq_pos);
444     return;
445     }
446 
PCR_simulate(PCR * pcr,Sequence * sequence)447 void PCR_simulate(PCR *pcr, Sequence *sequence){
448     register gint i;
449     register gchar *str = Sequence_get_str(sequence);
450     g_assert(pcr->is_prepared);
451     FSM_traverse(pcr->fsm, str,
452                  PCR_FSM_traverse_func, sequence);
453     g_free(str);
454     for(i = 0; i < pcr->experiment_list->len; i++)
455         PCR_Experiment_clear(pcr->experiment_list->pdata[i]);
456     return;
457     }
458 
459 /**/
460 
461 /* FIXME: Add wordhood_abandon and match_abandon thresholds.
462  */
463 
464