1 /* Copyright 2002 by Jeffrey Chang.
2  * Copyright 2016, 2019 by Markus Piotrowski.
3  * All rights reserved.
4  *
5  * This file is part of the Biopython distribution and governed by your
6  * choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
7  * Please see the LICENSE file that should have been included as part of this
8  * package.
9  *
10  * cpairwise2module.c
11  * Created 30 Sep 2001
12  *
13  * Optimized C routines that complement pairwise2.py.
14  */
15 
16 #include "Python.h"
17 
18 
19 #define _PRECISION 1000
20 #define rint(x) (int)((x)*_PRECISION+0.5)
21 
22 /* Functions in this module. */
23 
calc_affine_penalty(int length,double open,double extend,int penalize_extend_when_opening)24 static double calc_affine_penalty(int length, double open, double extend,
25     int penalize_extend_when_opening)
26 {
27     double penalty;
28 
29     if(length <= 0)
30         return 0.0;
31     penalty = open + extend * length;
32     if(!penalize_extend_when_opening)
33         penalty -= extend;
34     return penalty;
35 }
36 
_get_match_score(PyObject * py_sequenceA,PyObject * py_sequenceB,PyObject * py_match_fn,int i,int j,char * sequenceA,char * sequenceB,int use_sequence_cstring,double match,double mismatch,int use_match_mismatch_scores)37 static double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB,
38                                PyObject *py_match_fn, int i, int j,
39                                char *sequenceA, char *sequenceB,
40                                int use_sequence_cstring,
41                                double match, double mismatch,
42                                int use_match_mismatch_scores)
43 {
44     PyObject *py_A=NULL, *py_B=NULL;
45     PyObject *py_arglist=NULL, *py_result=NULL;
46     double score = 0;
47 
48     if(use_sequence_cstring && use_match_mismatch_scores) {
49         score = (sequenceA[i] == sequenceB[j]) ? match : mismatch;
50         return score;
51     }
52     /* Calculate the match score. */
53     if(!(py_A = PySequence_GetItem(py_sequenceA, i)))
54         goto _get_match_score_cleanup;
55     if(!(py_B = PySequence_GetItem(py_sequenceB, j)))
56         goto _get_match_score_cleanup;
57     if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B)))
58         goto _get_match_score_cleanup;
59 
60     if(!(py_result = PyEval_CallObject(py_match_fn, py_arglist)))
61         goto _get_match_score_cleanup;
62     score = PyFloat_AsDouble(py_result);
63 
64  _get_match_score_cleanup:
65     if(py_A) {
66         Py_DECREF(py_A);
67     }
68     if(py_B) {
69         Py_DECREF(py_B);
70     }
71     if(py_arglist) {
72         Py_DECREF(py_arglist);
73     }
74     if(py_result) {
75         Py_DECREF(py_result);
76     }
77     return score;
78 }
79 
80 #if PY_MAJOR_VERSION >= 3
_create_bytes_object(PyObject * o)81 static PyObject* _create_bytes_object(PyObject* o)
82 {
83     PyObject* b;
84     if (PyBytes_Check(o)) {
85         return o;
86     }
87     if (!PyUnicode_Check(o)) {
88         return NULL;
89     }
90     b = PyUnicode_AsASCIIString(o);
91     if (!b) {
92         PyErr_Clear();
93         return NULL;
94     }
95     return b;
96 }
97 #endif
98 
99 /* This function is a more-or-less straightforward port of the
100  * equivalent function in pairwise2. Please see there for algorithm
101  * documentation.
102  */
cpairwise2__make_score_matrix_fast(PyObject * self,PyObject * args)103 static PyObject *cpairwise2__make_score_matrix_fast(PyObject *self,
104                                                     PyObject *args)
105 {
106     int i;
107     int row, col;
108     PyObject *py_sequenceA, *py_sequenceB, *py_match_fn;
109 #if PY_MAJOR_VERSION >= 3
110     PyObject *py_bytesA, *py_bytesB;
111 #endif
112     char *sequenceA=NULL, *sequenceB=NULL;
113     int use_sequence_cstring;
114     double open_A, extend_A, open_B, extend_B;
115     int penalize_extend_when_opening, penalize_end_gaps_A, penalize_end_gaps_B;
116     int align_globally, score_only;
117 
118     PyObject *py_match=NULL, *py_mismatch=NULL;
119     double first_A_gap, first_B_gap;
120     double match, mismatch;
121     double score;
122     double best_score = 0;
123     double local_max_score = 0;
124     int use_match_mismatch_scores;
125     int lenA, lenB;
126     double *score_matrix = NULL;
127     unsigned char *trace_matrix = NULL;
128     PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL;
129 
130     double *col_cache_score = NULL;
131     PyObject *py_retval = NULL;
132 
133     if(!PyArg_ParseTuple(args, "OOOddddi(ii)ii", &py_sequenceA, &py_sequenceB,
134                          &py_match_fn, &open_A, &extend_A, &open_B, &extend_B,
135                          &penalize_extend_when_opening,
136                          &penalize_end_gaps_A, &penalize_end_gaps_B,
137                          &align_globally, &score_only))
138         return NULL;
139     if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) {
140         PyErr_SetString(PyExc_TypeError,
141                         "py_sequenceA and py_sequenceB should be sequences.");
142         return NULL;
143     }
144 
145     /* Optimize for the common case. Check to see if py_sequenceA and
146        py_sequenceB are strings.  If they are, use the c string
147        representation. */
148 #if PY_MAJOR_VERSION < 3
149     use_sequence_cstring = 0;
150     if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) {
151         sequenceA = PyString_AS_STRING(py_sequenceA);
152         sequenceB = PyString_AS_STRING(py_sequenceB);
153         use_sequence_cstring = 1;
154     }
155 #else
156     py_bytesA = _create_bytes_object(py_sequenceA);
157     py_bytesB = _create_bytes_object(py_sequenceB);
158     if (py_bytesA && py_bytesB) {
159         sequenceA = PyBytes_AS_STRING(py_bytesA);
160         sequenceB = PyBytes_AS_STRING(py_bytesB);
161         use_sequence_cstring = 1;
162     }
163     else {
164         Py_XDECREF(py_bytesA);
165         Py_XDECREF(py_bytesB);
166         use_sequence_cstring = 0;
167     }
168 #endif
169 
170     if(!PyCallable_Check(py_match_fn)) {
171         PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable.");
172         return NULL;
173     }
174     /* Optimize for the common case. Check to see if py_match_fn is
175        an identity_match. If so, pull out the match and mismatch
176        member variables and calculate the scores myself. */
177     match = mismatch = 0;
178     use_match_mismatch_scores = 0;
179     if(!(py_match = PyObject_GetAttrString(py_match_fn, "match")))
180         goto cleanup_after_py_match_fn;
181     match = PyFloat_AsDouble(py_match);
182     if(match==-1.0 && PyErr_Occurred())
183         goto cleanup_after_py_match_fn;
184     if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch")))
185         goto cleanup_after_py_match_fn;
186     mismatch = PyFloat_AsDouble(py_mismatch);
187     if(mismatch==-1.0 && PyErr_Occurred())
188         goto cleanup_after_py_match_fn;
189     use_match_mismatch_scores = 1;
190 
191  cleanup_after_py_match_fn:
192     if(PyErr_Occurred())
193         PyErr_Clear();
194     if(py_match) {
195         Py_DECREF(py_match);
196     }
197     if(py_mismatch) {
198         Py_DECREF(py_mismatch);
199     }
200     /* Cache some commonly used gap penalties */
201     first_A_gap = calc_affine_penalty(1, open_A, extend_A,
202                                       penalize_extend_when_opening);
203     first_B_gap = calc_affine_penalty(1, open_B, extend_B,
204                                       penalize_extend_when_opening);
205 
206     /* Allocate matrices for storing the results and initialize first row and col. */
207     lenA = PySequence_Length(py_sequenceA);
208     lenB = PySequence_Length(py_sequenceB);
209     score_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*score_matrix));
210     if(!score_matrix) {
211         PyErr_SetString(PyExc_MemoryError, "Out of memory");
212         goto _cleanup_make_score_matrix_fast;
213     }
214     for(i=0; i<(lenB+1); i++)
215         score_matrix[i] = 0;
216     for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
217         score_matrix[i] = 0;
218     /* If we only want the score, we don't need the trace matrix. */
219     if (!score_only){
220         trace_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*trace_matrix));
221         if(!trace_matrix) {
222             PyErr_SetString(PyExc_MemoryError, "Out of memory");
223             goto _cleanup_make_score_matrix_fast;
224         }
225         for(i=0; i<(lenB+1); i++)
226             trace_matrix[i] = 0;
227         for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
228             trace_matrix[i] = 0;
229         }
230     else
231         trace_matrix = malloc(1);
232 
233     /* Initialize the first row and col of the score matrix. */
234     for(i=0; i<=lenA; i++) {
235         if(penalize_end_gaps_B)
236             score = calc_affine_penalty(i, open_B, extend_B,
237                                         penalize_extend_when_opening);
238         else
239             score = 0;
240         score_matrix[i*(lenB+1)] = score;
241     }
242     for(i=0; i<=lenB; i++) {
243         if(penalize_end_gaps_A)
244             score = calc_affine_penalty(i, open_A, extend_A,
245                                         penalize_extend_when_opening);
246         else
247             score = 0;
248         score_matrix[i] = score;
249     }
250 
251     /* Now initialize the col cache. */
252     col_cache_score = malloc((lenB+1)*sizeof(*col_cache_score));
253     memset((void *)col_cache_score, 0, (lenB+1)*sizeof(*col_cache_score));
254     for(i=0; i<=lenB; i++) {
255         col_cache_score[i] = calc_affine_penalty(i, (2*open_B), extend_B,
256                              penalize_extend_when_opening);
257     }
258 
259     /* Fill in the score matrix. The row cache is calculated on the fly.*/
260     for(row=1; row<=lenA; row++) {
261         double row_cache_score = calc_affine_penalty(row, (2*open_A), extend_A,
262                                  penalize_extend_when_opening);
263         for(col=1; col<=lenB; col++) {
264             double match_score, nogap_score;
265             double row_open, row_extend, col_open, col_extend;
266             int best_score_rint, row_score_rint, col_score_rint;
267             unsigned char row_trace_score, col_trace_score, trace_score;
268 
269             /* Calculate the best score. */
270             match_score = _get_match_score(py_sequenceA, py_sequenceB,
271                                            py_match_fn, row-1, col-1,
272                                            sequenceA, sequenceB,
273                                            use_sequence_cstring,
274                                            match, mismatch,
275                                            use_match_mismatch_scores);
276             if(match_score==-1.0 && PyErr_Occurred())
277                 goto _cleanup_make_score_matrix_fast;
278             nogap_score = score_matrix[(row-1)*(lenB+1)+col-1] + match_score;
279 
280             if (!penalize_end_gaps_A && row==lenA) {
281                 row_open = score_matrix[(row)*(lenB+1)+col-1];
282                 row_extend = row_cache_score;
283             }
284             else {
285                 row_open = score_matrix[(row)*(lenB+1)+col-1] + first_A_gap;
286                 row_extend = row_cache_score + extend_A;
287             }
288             row_cache_score = (row_open > row_extend) ? row_open : row_extend;
289 
290             if (!penalize_end_gaps_B && col==lenB){
291                 col_open = score_matrix[(row-1)*(lenB+1)+col];
292                 col_extend = col_cache_score[col];
293             }
294             else {
295                 col_open = score_matrix[(row-1)*(lenB+1)+col] + first_B_gap;
296                 col_extend = col_cache_score[col] + extend_B;
297             }
298             col_cache_score[col] = (col_open > col_extend) ? col_open : col_extend;
299 
300             best_score = (row_cache_score > col_cache_score[col]) ? row_cache_score : col_cache_score[col];
301             if(nogap_score > best_score)
302                 best_score = nogap_score;
303 
304             if (best_score > local_max_score)
305                 local_max_score = best_score;
306 
307             if(!align_globally && best_score < 0)
308                 score_matrix[row*(lenB+1)+col] = 0;
309             else
310                 score_matrix[row*(lenB+1)+col] = best_score;
311 
312             if (!score_only) {
313                 row_score_rint = rint(row_cache_score);
314                 col_score_rint = rint(col_cache_score[col]);
315                 row_trace_score = 0;
316                 col_trace_score = 0;
317                 if (rint(row_open) == row_score_rint)
318                     row_trace_score = row_trace_score|1;
319                 if (rint(row_extend) == row_score_rint)
320                     row_trace_score = row_trace_score|8;
321                 if (rint(col_open) == col_score_rint)
322                     col_trace_score = col_trace_score|4;
323                 if (rint(col_extend) == col_score_rint)
324                     col_trace_score = col_trace_score|16;
325 
326                 trace_score = 0;
327                 best_score_rint = rint(best_score);
328                 if (rint(nogap_score) == best_score_rint)
329                     trace_score = trace_score|2;
330                 if (row_score_rint == best_score_rint)
331                     trace_score += row_trace_score;
332                 if (col_score_rint == best_score_rint)
333                     trace_score += col_trace_score;
334                 trace_matrix[row*(lenB+1)+col] = trace_score;
335             }
336         }
337     }
338 
339     if (!align_globally)
340         best_score = local_max_score;
341 
342     /* Save the score and traceback matrices into real python objects. */
343 	if(!score_only) {
344 		if(!(py_score_matrix = PyList_New(lenA+1)))
345 			goto _cleanup_make_score_matrix_fast;
346 		if(!(py_trace_matrix = PyList_New(lenA+1)))
347 			goto _cleanup_make_score_matrix_fast;
348 
349 		for(row=0; row<=lenA; row++) {
350 			PyObject *py_score_row, *py_trace_row;
351 			if(!(py_score_row = PyList_New(lenB+1)))
352 				goto _cleanup_make_score_matrix_fast;
353 			PyList_SET_ITEM(py_score_matrix, row, py_score_row);
354 			if(!(py_trace_row = PyList_New(lenB+1)))
355 				goto _cleanup_make_score_matrix_fast;
356 			PyList_SET_ITEM(py_trace_matrix, row, py_trace_row);
357 
358 			for(col=0; col<=lenB; col++) {
359 				PyObject *py_score, *py_trace;
360 				int offset = row*(lenB+1) + col;
361 
362 				/* Set py_score_matrix[row][col] to the score. */
363 				if(!(py_score = PyFloat_FromDouble(score_matrix[offset])))
364 					goto _cleanup_make_score_matrix_fast;
365 				PyList_SET_ITEM(py_score_row, col, py_score);
366 
367 				/* Set py_trace_matrix[row][col] to a list of indexes.  On
368 				   the edges of the matrix (row or column is 0), the
369 				   matrix should be [None]. */
370 				if(!row || !col) {
371 					if(!(py_trace = Py_BuildValue("B", 1)))
372 						goto _cleanup_make_score_matrix_fast;
373 					Py_INCREF(Py_None);
374 					PyList_SET_ITEM(py_trace_row, col, Py_None);
375 				}
376 				else {
377 					if(!(py_trace = Py_BuildValue("B", trace_matrix[offset])))
378 						goto _cleanup_make_score_matrix_fast;
379 					PyList_SET_ITEM(py_trace_row, col, py_trace);
380 
381 				}
382 			}
383 		}
384 	}
385 	else {
386 		py_score_matrix = PyList_New(1);
387 		py_trace_matrix = PyList_New(1);
388 	}
389     py_retval = Py_BuildValue("(OOd)", py_score_matrix, py_trace_matrix, best_score);
390 
391  _cleanup_make_score_matrix_fast:
392     if(score_matrix)
393         free(score_matrix);
394     if(trace_matrix)
395         free(trace_matrix);
396     if(col_cache_score)
397         free(col_cache_score);
398     if(py_score_matrix){
399         Py_DECREF(py_score_matrix);
400     }
401     if(py_trace_matrix){
402         Py_DECREF(py_trace_matrix);
403     }
404 
405 #if PY_MAJOR_VERSION >= 3
406     if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA);
407     if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB);
408 #endif
409 
410     return py_retval;
411 }
412 
cpairwise2_rint(PyObject * self,PyObject * args,PyObject * keywds)413 static PyObject *cpairwise2_rint(PyObject *self, PyObject *args,
414                                  PyObject *keywds)
415 {
416     double x;
417     int precision = _PRECISION;
418     int rint_x;
419 
420     static char *kwlist[] = {"x", "precision", NULL};
421 
422     if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist,
423                                     &x, &precision))
424         return NULL;
425     rint_x = (int)(x * precision + 0.5);
426 #if PY_MAJOR_VERSION >= 3
427     return PyLong_FromLong((long)rint_x);
428 #else
429     return PyInt_FromLong((long)rint_x);
430 #endif
431 }
432 
433 /* Module definition stuff */
434 
435 static PyMethodDef cpairwise2Methods[] = {
436     {"_make_score_matrix_fast",
437      (PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""},
438     {"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""},
439     {NULL, NULL, 0, NULL}
440 };
441 
442 static char cpairwise2__doc__[] =
443 "Optimized C routines that complement pairwise2.py. These are called from within pairwise2.py.\n\
444 \n\
445 ";
446 
447 #if PY_MAJOR_VERSION >= 3
448 
449 static struct PyModuleDef moduledef = {
450         PyModuleDef_HEAD_INIT,
451         "cpairwise2",
452         cpairwise2__doc__,
453         -1,
454         cpairwise2Methods,
455         NULL,
456         NULL,
457         NULL,
458         NULL
459 };
460 
461 PyObject *
PyInit_cpairwise2(void)462 PyInit_cpairwise2(void)
463 
464 #else
465 
466 void
467 /* for Windows: _declspec(dllexport) initcpairwise2(void) */
468 initcpairwise2(void)
469 #endif
470 
471 {
472 #if PY_MAJOR_VERSION >= 3
473     PyObject* module = PyModule_Create(&moduledef);
474     if (module==NULL) return NULL;
475     return module;
476 #else
477     (void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__);
478 #endif
479 }
480