1 /* @source enssequence ********************************************************
2 **
3 ** Ensembl Sequence functions
4 **
5 ** @author Copyright (C) 1999 Ensembl Developers
6 ** @author Copyright (C) 2006 Michael K. Schuster
7 ** @version $Revision: 1.46 $
8 ** @modified 2009 by Alan Bleasby for incorporation into EMBOSS core
9 ** @modified $Date: 2013/02/17 13:02:40 $ by $Author: mks $
10 ** @@
11 **
12 ** This library is free software; you can redistribute it and/or
13 ** modify it under the terms of the GNU Lesser General Public
14 ** License as published by the Free Software Foundation; either
15 ** version 2.1 of the License, or (at your option) any later version.
16 **
17 ** This library is distributed in the hope that it will be useful,
18 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 ** Lesser General Public License for more details.
21 **
22 ** You should have received a copy of the GNU Lesser General Public
23 ** License along with this library; if not, write to the Free Software
24 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
25 ** MA  02110-1301,  USA.
26 **
27 ******************************************************************************/
28 
29 /* ========================================================================= */
30 /* ============================= include files ============================= */
31 /* ========================================================================= */
32 
33 #include "enscache.h"
34 #include "ensprojectionsegment.h"
35 #include "enssequence.h"
36 #include "enssequenceedit.h"
37 
38 
39 
40 
41 /* ========================================================================= */
42 /* =============================== constants =============================== */
43 /* ========================================================================= */
44 
45 
46 
47 
48 /* ========================================================================= */
49 /* =========================== global variables ============================ */
50 /* ========================================================================= */
51 
52 
53 
54 
55 /* ========================================================================= */
56 /* ============================= private data ============================== */
57 /* ========================================================================= */
58 
59 
60 
61 
62 /* ========================================================================= */
63 /* =========================== private constants =========================== */
64 /* ========================================================================= */
65 
66 /* @conststatic sequenceKChunkPower *******************************************
67 **
68 ** The Ensembl Sequence Adaptor handles sequences in larger chunks internally.
69 ** Both, memory allocation and SQL database operations become more efficient,
70 ** since requests for neighbouring regions are likely to be returned from the
71 ** same block.
72 **
73 ** 1 << 18 = 256 KiB, about the size of a BAC clone
74 **
75 ******************************************************************************/
76 
77 static const ajuint sequenceKChunkPower = 18U;
78 
79 
80 
81 
82 /* @conststatic sequenceadaptorKCacheMaxBytes *********************************
83 **
84 ** The maximum memory size in bytes the Ensembl Sequence Adaptor-internal
85 ** Ensembl Cache can use.
86 **
87 ** 1 << 26 = 64 MiB
88 **
89 ******************************************************************************/
90 
91 static const size_t sequenceadaptorKCacheMaxBytes = 1U << 26U;
92 
93 
94 
95 
96 /* @conststatic sequenceadaptorKCacheMaxCount *********************************
97 **
98 ** The mamximum number of sequence chunks the Ensembl Sequence Adaptor-internal
99 ** Ensembl Cache can hold.
100 **
101 ** 1 << 16 = 64 Ki
102 **
103 ******************************************************************************/
104 
105 static const ajuint sequenceadaptorKCacheMaxCount = 1U << 16U;
106 
107 
108 
109 
110 /* @conststatic sequenceadaptorKCacheMaxSize **********************************
111 **
112 ** Maximum size of a sequence chunk to be allowed into the
113 ** Ensembl Sequence Adaptor-internal Ensembl Cache.
114 **
115 ******************************************************************************/
116 
117 static const size_t sequenceadaptorKCacheMaxSize = 0U;
118 
119 
120 
121 
122 /* @conststatic sequenceadaptorKCacheMaxLength ********************************
123 **
124 ** Maximum length of a sequence request up to which it gets chunked and cached.
125 ** Larger request are returned directly.
126 **
127 ** 1 << 21 = 2 Mi, or about 8 BAC clones
128 **
129 ******************************************************************************/
130 
131 static const ajuint sequenceadaptorKCacheMaxLength = 1U << 21U;
132 
133 
134 
135 
136 /* ========================================================================= */
137 /* =========================== private variables =========================== */
138 /* ========================================================================= */
139 
140 
141 
142 
143 /* ========================================================================= */
144 /* =========================== private functions =========================== */
145 /* ========================================================================= */
146 
147 static size_t sequenceadaptorCacheSize(const void *value);
148 
149 static AjBool sequenceadaptorFetchCircularsliceSubStr(
150     EnsPSequenceadaptor sqa,
151     EnsPSlice slice,
152     ajint start,
153     ajint end,
154     ajint strand,
155     AjPStr *Psequence);
156 
157 
158 
159 
160 /* ========================================================================= */
161 /* ======================= All functions by section ======================== */
162 /* ========================================================================= */
163 
164 
165 
166 
167 /* @filesection enssequence ***************************************************
168 **
169 ** @nam1rule ens Function belongs to the Ensembl library
170 **
171 ******************************************************************************/
172 
173 
174 
175 
176 /* @datasection [EnsPSequenceadaptor] Ensembl Sequence Adaptor ****************
177 **
178 ** @nam2rule Sequenceadaptor Functions for manipulating
179 ** Ensembl Sequence Adaptor objects
180 **
181 ** @cc Bio::EnsEMBL::DBSQL::Sequenceadaptor
182 ** @cc CVS Revision: 1.54
183 ** @cc CVS Tag: branch-ensembl-68
184 **
185 ******************************************************************************/
186 
187 
188 
189 
190 /* @funcstatic sequenceadaptorCacheSize ***************************************
191 **
192 ** Wrapper function to calculate the memory size of an Ensembl Sequence
193 ** (i.e. an AJAX String) from an Ensembl Cache.
194 **
195 ** @param [r] value [const void*] AJAX String
196 **
197 ** @return [size_t] Memory size in bytes or 0
198 **
199 ** @release 6.3.0
200 ** @@
201 ******************************************************************************/
202 
sequenceadaptorCacheSize(const void * value)203 static size_t sequenceadaptorCacheSize(const void *value)
204 {
205     size_t size = 0;
206 
207     if (!value)
208         return 0;
209 
210     size += sizeof (AjOStr);
211 
212     size += ajStrGetRes((const AjPStr) value);
213 
214     return size;
215 }
216 
217 
218 
219 
220 /* @section constructors ******************************************************
221 **
222 ** All constructors return a new Ensembl Sequence Adaptor by pointer.
223 ** It is the responsibility of the user to first destroy any previous
224 ** Sequence Adaptor. The target pointer does not need to be initialised to
225 ** NULL, but it is good programming practice to do so anyway.
226 **
227 ** @fdata [EnsPSequenceadaptor]
228 **
229 ** @nam3rule New Constructor
230 **
231 ** @argrule New dba [EnsPDatabaseadaptor] Ensembl Database Adaptor
232 ** @argrule Obj object [EnsPSequenceadaptor] Ensembl Sequence Adaptor
233 ** @argrule Ref object [EnsPSequenceadaptor] Ensembl Sequence Adaptor
234 **
235 ** @valrule * [EnsPSequenceadaptor] Ensembl Sequence Adaptor or NULL
236 **
237 ** @fcategory new
238 ******************************************************************************/
239 
240 
241 
242 
243 /* @func ensSequenceadaptorNew ************************************************
244 **
245 ** Default constructor for an Ensembl Sequence Adaptor.
246 **
247 ** Ensembl Object Adaptors are singleton objects in the sense that a single
248 ** instance of an Ensembl Object Adaptor connected to a particular database is
249 ** sufficient to instantiate any number of Ensembl Objects from the database.
250 ** Each Ensembl Object will have a weak reference to the Object Adaptor that
251 ** instantiated it. Therefore, Ensembl Object Adaptors should not be
252 ** instantiated directly, but rather obtained from the Ensembl Registry,
253 ** which will in turn call this function if neccessary.
254 **
255 ** @see ensRegistryGetDatabaseadaptor
256 ** @see ensRegistryGetSequenceadaptor
257 **
258 ** @cc Bio::EnsEMBL::DBSQL::Sequenceadaptor::new
259 ** @param [u] dba [EnsPDatabaseadaptor] Ensembl Database Adaptor
260 **
261 ** @return [EnsPSequenceadaptor] Ensembl Sequence Adaptor or NULL
262 **
263 ** @release 6.2.0
264 ** @@
265 ******************************************************************************/
266 
ensSequenceadaptorNew(EnsPDatabaseadaptor dba)267 EnsPSequenceadaptor ensSequenceadaptorNew(
268     EnsPDatabaseadaptor dba)
269 {
270     EnsPSequenceadaptor sqa = NULL;
271 
272     if (!dba)
273         return NULL;
274 
275     if (ajDebugTest("ensSequenceadaptorNew"))
276         ajDebug("ensSequenceadaptorNew\n"
277                 "  dba %p\n",
278                 dba);
279 
280     AJNEW0(sqa);
281 
282     sqa->Adaptor = dba;
283 
284     sqa->Cache = ensCacheNew(
285         ensECacheTypeAlphaNumeric,
286         sequenceadaptorKCacheMaxBytes,
287         sequenceadaptorKCacheMaxCount,
288         sequenceadaptorKCacheMaxSize,
289         (void *(*)(void *)) &ajStrNewRef,
290         (void (*)(void **)) &ajStrDel,
291         &sequenceadaptorCacheSize,
292         (void *(*)(const void *)) NULL,
293         (AjBool (*)(const void *)) NULL,
294         ajFalse,
295         "Sequence");
296 
297     return sqa;
298 }
299 
300 
301 
302 
303 /* @section destructors *******************************************************
304 **
305 ** Destruction destroys all internal data structures and frees the memory
306 ** allocated for an Ensembl Sequence Adaptor object.
307 **
308 ** @fdata [EnsPSequenceadaptor]
309 **
310 ** @nam3rule Del Destroy (free) an Ensembl Sequence Adaptor
311 **
312 ** @argrule * Psqa [EnsPSequenceadaptor*] Ensembl Sequence Adaptor address
313 **
314 ** @valrule * [void]
315 **
316 ** @fcategory delete
317 ******************************************************************************/
318 
319 
320 
321 
322 /* @func ensSequenceadaptorDel ************************************************
323 **
324 ** Default destructor for an Ensembl Sequence Adaptor.
325 **
326 ** Ensembl Object Adaptors are singleton objects that are registered in the
327 ** Ensembl Registry and weakly referenced by Ensembl Objects that have been
328 ** instantiated by it. Therefore, Ensembl Object Adaptors should never be
329 ** destroyed directly. Upon exit, the Ensembl Registry will call this function
330 ** if required.
331 **
332 ** @param [d] Psqa [EnsPSequenceadaptor*] Ensembl Sequence Adaptor address
333 **
334 ** @return [void]
335 **
336 ** @release 6.2.0
337 ** @@
338 ******************************************************************************/
339 
ensSequenceadaptorDel(EnsPSequenceadaptor * Psqa)340 void ensSequenceadaptorDel(EnsPSequenceadaptor *Psqa)
341 {
342     EnsPSequenceadaptor pthis = NULL;
343 
344     if (!Psqa)
345         return;
346 
347 #if defined(AJ_DEBUG) && AJ_DEBUG >= 1
348     if (ajDebugTest("ensSequenceadaptorDel"))
349         ajDebug("ensSequenceadaptorDel\n"
350                 "  *Psqa %p\n",
351                 *Psqa);
352 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 1 */
353 
354     if (!(pthis = *Psqa))
355         return;
356 
357     ensCacheDel(&pthis->Cache);
358 
359     ajMemFree((void **) Psqa);
360 
361     return;
362 }
363 
364 
365 
366 
367 /* @section cache *************************************************************
368 **
369 ** Functions for maintaining the Ensembl Sequence Adaptor-internal cache.
370 **
371 ** @fdata [EnsPSequenceadaptor]
372 **
373 ** @nam3rule Cache
374 ** @nam4rule Clear Clear the Ensembl Sequence Adaptor-internal cache
375 **
376 ** @argrule * sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
377 **
378 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
379 **
380 ** @fcategory use
381 ******************************************************************************/
382 
383 
384 
385 
386 /* @func ensSequenceadaptorCacheClear *****************************************
387 **
388 ** Clear the internal cache of an Ensembl Sequence Adaptor.
389 **
390 ** @cc Bio::EnsEMBL::DBSQL::SequenceAdaptor::clear_cache
391 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
392 **
393 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
394 **
395 ** @release 6.5.0
396 ** @@
397 ******************************************************************************/
398 
ensSequenceadaptorCacheClear(EnsPSequenceadaptor sqa)399 AjBool ensSequenceadaptorCacheClear(EnsPSequenceadaptor sqa)
400 {
401     if (!sqa)
402         return ajFalse;
403 
404     return ensCacheClear(sqa->Cache);
405 }
406 
407 
408 
409 
410 /* @section member retrieval **************************************************
411 **
412 ** Functions for returning members of an
413 ** Ensembl Sequence Adaptor object.
414 **
415 ** @fdata [EnsPSequenceadaptor]
416 **
417 ** @nam3rule Get Return Ensembl Sequence Adaptor attribute(s)
418 ** @nam4rule Databaseadaptor Return the Ensembl Database Adaptor
419 **
420 ** @argrule * sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
421 **
422 ** @valrule Databaseadaptor [EnsPDatabaseadaptor]
423 ** Ensembl Database Adaptor or NULL
424 **
425 ** @fcategory use
426 ******************************************************************************/
427 
428 
429 
430 
431 /* @func ensSequenceadaptorGetDatabaseadaptor ***************************
432 **
433 ** Get the Ensembl Database Adaptor of an Ensembl Sequence Adaptor.
434 **
435 ** @param [u] sqa [EnsPSequenceadaptor]
436 ** Ensembl Sequence Adaptor
437 **
438 ** @return [EnsPDatabaseadaptor] Ensembl Database Adaptor or NULL
439 **
440 ** @release 6.5.0
441 ** @@
442 ******************************************************************************/
443 
ensSequenceadaptorGetDatabaseadaptor(EnsPSequenceadaptor sqa)444 EnsPDatabaseadaptor ensSequenceadaptorGetDatabaseadaptor(
445     EnsPSequenceadaptor sqa)
446 {
447     return (sqa) ? sqa->Adaptor : NULL;
448 }
449 
450 
451 
452 
453 /* @section object fetching ***************************************************
454 **
455 ** Functions for retrieving Sequence objects from an Ensembl Core database.
456 **
457 ** @fdata [EnsPSequenceadaptor]
458 **
459 ** @nam3rule Fetch Fetch Ensembl Sequence object(s)
460 ** @nam4rule Seqregion Fetch by an Ensembl Sequence Region
461 ** @nam4rule Slice Fetch by an Ensembl Slice
462 ** @nam5rule All Fetch the complete sequence
463 ** @nam5rule Sub Fetch a sub-sequence
464 ** @nam6rule Str Fetch as an AJAX String
465 ** @nam6rule Seq Fetch as an AJAX Sequence
466 **
467 ** @argrule * sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
468 ** @argrule Seqregion sr [const EnsPSeqregion] Ensembl Sequence Region
469 ** @argrule SeqregionSub start [ajuint] Start coordinate
470 ** @argrule SeqregionSub length [ajuint] Sequence length
471 ** @argrule Slice slice [EnsPSlice] Ensembl Slice
472 ** @argrule SliceSub start [ajint] Start
473 ** @argrule SliceSub end [ajint] End
474 ** @argrule SliceSub strand [ajint] Strand
475 ** @argrule Str Psequence [AjPStr*] AJAX String address
476 ** @argrule Seq Psequence [AjPSeq*] AJAX Sequence address
477 **
478 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
479 **
480 ** @fcategory use
481 ******************************************************************************/
482 
483 
484 
485 
486 /* @func ensSequenceadaptorFetchSeqregionAllSeq *******************************
487 **
488 ** Fetch the sequence of an Ensembl Sequence Region as an AJAX Sequence.
489 **
490 ** The caller is responsible for deleting the AJAX String.
491 **
492 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
493 ** @param [r] sr [const EnsPSeqregion] Ensembl Sequence Region
494 ** @param [wP] Psequence [AjPSeq*] AJAX Sequence address
495 **
496 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
497 **
498 ** @release 6.4.0
499 ** @@
500 ** This function will only return biological sequence information for
501 ** Ensembl Sequence Region objects, which are in the sequence-level
502 ** Ensembl Coordinate System. All other Ensembl Sequence Region objects do not
503 ** have sequence attached so that their sequence can only be fetched in the
504 ** context of an Ensembl Slice, which is subsequently mapped to the
505 ** sequence-level Ensembl Coordinate System. See the description of the
506 ** ensSequenceadaptorFetchSliceAllSeq function for further details.
507 ******************************************************************************/
508 
ensSequenceadaptorFetchSeqregionAllSeq(EnsPSequenceadaptor sqa,const EnsPSeqregion sr,AjPSeq * Psequence)509 AjBool ensSequenceadaptorFetchSeqregionAllSeq(EnsPSequenceadaptor sqa,
510                                               const EnsPSeqregion sr,
511                                               AjPSeq *Psequence)
512 {
513     return ensSequenceadaptorFetchSeqregionSubSeq(
514         sqa,
515         sr,
516         1,
517         (ajuint) ensSeqregionGetLength(sr),
518         Psequence);
519 }
520 
521 
522 
523 
524 /* @func ensSequenceadaptorFetchSeqregionAllStr *******************************
525 **
526 ** Fetch the sequence of an Ensembl Sequence Region as an AJAX String.
527 **
528 ** The caller is responsible for deleting the AJAX String.
529 **
530 ** @cc Bio::EnsEMBL::DBSQL::Sequenceadaptor::_fetch_seq
531 ** @see ensSequenceadaptorFetchSliceAllStr
532 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
533 ** @param [r] sr [const EnsPSeqregion] Ensembl Sequence Region
534 ** @param [u] Psequence [AjPStr*] Sequence address
535 **
536 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
537 **
538 ** @release 6.4.0
539 ** @@
540 ** This function will only return biological sequence information for
541 ** Ensembl Sequence Region objects, which are in the sequence-level
542 ** Ensembl Coordinate System. All other Ensembl Sequence Region objects do not
543 ** have sequence attached so that their sequence can only be fetched in the
544 ** context of an Ensembl Slice, which is subsequently mapped to the
545 ** sequence-level Ensembl Coordinate System. See the description of the
546 ** ensSequenceadaptorFetchSliceAllStr function for further details.
547 ******************************************************************************/
548 
ensSequenceadaptorFetchSeqregionAllStr(EnsPSequenceadaptor sqa,const EnsPSeqregion sr,AjPStr * Psequence)549 AjBool ensSequenceadaptorFetchSeqregionAllStr(EnsPSequenceadaptor sqa,
550                                               const EnsPSeqregion sr,
551                                               AjPStr *Psequence)
552 {
553     return ensSequenceadaptorFetchSeqregionSubStr(
554         sqa,
555         sr,
556         1,
557         (ajuint) ensSeqregionGetLength(sr),
558         Psequence);
559 }
560 
561 
562 
563 
564 /* @func ensSequenceadaptorFetchSeqregionSubSeq *******************************
565 **
566 ** Fetch a sub-sequence of an Ensembl Sequence Region as an AJAX Sequence.
567 ** The start coordinate is one-based, as in the SQL SUBSTRING function.
568 ** A start of 1 and a length equal the Sequence Region length covers the whole
569 ** Sequence Region.
570 **
571 ** The caller is responsible for deleting the AJAX Sequence.
572 **
573 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
574 ** @param [r] sr [const EnsPSeqregion] Ensembl Sequence Region
575 ** @param [r] start [ajuint] Start coordinate
576 ** @param [r] length [ajuint] Sequence length
577 ** @param [wP] Psequence [AjPSeq*] AJAX Sequence address
578 **
579 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
580 **
581 ** @release 6.4.0
582 ** @@
583 ** This function will only return biological sequence information for
584 ** Ensembl Sequence Region objects, which are in the sequence-level
585 ** Ensembl Coordinate System. All other Ensembl Sequence Region objects do not
586 ** have sequence attached so that their sequence can only be fetched in the
587 ** context of an Ensembl Slice, which is subsequently mapped to the
588 ** sequence-level Ensembl Coordinate System. See the description of the
589 ** ensSequenceadaptorFetchSliceAllSeq function for further details.
590 ******************************************************************************/
591 
ensSequenceadaptorFetchSeqregionSubSeq(EnsPSequenceadaptor sqa,const EnsPSeqregion sr,ajuint start,ajuint length,AjPSeq * Psequence)592 AjBool ensSequenceadaptorFetchSeqregionSubSeq(EnsPSequenceadaptor sqa,
593                                               const EnsPSeqregion sr,
594                                               ajuint start,
595                                               ajuint length,
596                                               AjPSeq *Psequence)
597 {
598     AjBool result = AJFALSE;
599 
600     AjPStr name     = NULL;
601     AjPStr sequence = NULL;
602 
603     if (ajDebugTest("ensSequenceadaptorFetchSeqregionSubSeq"))
604     {
605         ajDebug("ensSequenceadaptorFetchSeqregionSubSeq\n"
606                 "  sqa %p\n"
607                 "  sr %p\n"
608                 "  start %u\n"
609                 "  length %u\n"
610                 "  Psequence %p\n",
611                 sqa,
612                 sr,
613                 start,
614                 length,
615                 Psequence);
616 
617         ensSeqregionTrace(sr, 1);
618     }
619 
620     if (!sqa)
621         return ajFalse;
622 
623     if (!sr)
624         return ajFalse;
625 
626     if (!Psequence)
627         return ajFalse;
628 
629     /*
630     ** It is sligtly more efficient, if undefined AJAX String objects are
631     ** directly allocated by the following functions to their final size.
632     */
633 
634     name = ajFmtStr(
635         "%S:%S:%S:%u:%u:1",
636         ensCoordsystemGetName(ensSeqregionGetCoordsystem(sr)),
637         ensCoordsystemGetVersion(ensSeqregionGetCoordsystem(sr)),
638         ensSeqregionGetName(sr),
639         start,
640         start + length - 1);
641 
642     result = ensSequenceadaptorFetchSeqregionSubStr(
643         sqa,
644         sr,
645         start,
646         length,
647         &sequence);
648 
649     if (*Psequence)
650     {
651         ajSeqClear(*Psequence);
652 
653         ajSeqAssignNameS(*Psequence, name);
654         ajSeqAssignSeqS(*Psequence, sequence);
655     }
656     else
657         *Psequence = ajSeqNewNameS(sequence, name);
658 
659     ajSeqSetNuc(*Psequence);
660 
661     ajStrDel(&name);
662     ajStrDel(&sequence);
663 
664     return result;
665 }
666 
667 
668 
669 
670 /* @func ensSequenceadaptorFetchSeqregionSubStr *******************************
671 **
672 ** Fetch a sub-sequence of an Ensembl Sequence Region as an AJAX String.
673 ** The start coordinate is one-based, as in the SQL SUBSTRING function.
674 ** A start of 1 and a length equal the Sequence Region length covers the whole
675 ** Sequence Region.
676 **
677 ** The caller is responsible for deleting the AJAX String.
678 **
679 ** @cc Bio::EnsEMBL::DBSQL::Sequenceadaptor::_fetch_seq
680 ** @see ensSequenceadaptorFetchSliceSubStr
681 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
682 ** @param [r] sr [const EnsPSeqregion] Ensembl Sequence Region
683 ** @param [r] start [ajuint] Start coordinate
684 ** @param [r] length [ajuint] Sequence length
685 ** @param [u] Psequence [AjPStr*] Sequence address
686 **
687 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
688 **
689 ** @release 6.4.0
690 ** @@
691 ** This function will only return biological sequence information for
692 ** Ensembl Sequence Region objects, which are in the sequence-level
693 ** Ensembl Coordinate System. All other Ensembl Sequence Region objects do not
694 ** have sequence attached so that their sequence can only be fetched in the
695 ** context of an Ensembl Slice, which is subsequently mapped to the
696 ** sequence-level Ensembl Coordinate System. See the description of the
697 ** ensSequenceadaptorFetchSliceSubStr function for further details.
698 ******************************************************************************/
699 
ensSequenceadaptorFetchSeqregionSubStr(EnsPSequenceadaptor sqa,const EnsPSeqregion sr,ajuint start,ajuint length,AjPStr * Psequence)700 AjBool ensSequenceadaptorFetchSeqregionSubStr(EnsPSequenceadaptor sqa,
701                                               const EnsPSeqregion sr,
702                                               ajuint start,
703                                               ajuint length,
704                                               AjPStr *Psequence)
705 {
706     register ajuint i = 0U;
707 
708     ajuint chkmin = 0U;
709     ajuint chkmax = 0U;
710     ajuint posmin = 0U;
711 
712     AjPSqlstatement sqls = NULL;
713     AjISqlrow sqli       = NULL;
714     AjPSqlrow sqlr       = NULL;
715 
716     AjPStr chkstr    = NULL;
717     AjPStr tmpstr    = NULL;
718     AjPStr key       = NULL;
719     AjPStr statement = NULL;
720 
721     EnsPDatabaseadaptor dba = NULL;
722 
723     if (ajDebugTest("ensSequenceadaptorFetchSeqregionSubStr"))
724     {
725         ajDebug("ensSequenceadaptorFetchSeqregionSubStr\n"
726                 "  sqa %p\n"
727                 "  sr %p\n"
728                 "  start %d\n"
729                 "  length %d\n",
730                 sqa,
731                 sr,
732                 start,
733                 length);
734 
735         ensSeqregionTrace(sr, 1);
736     }
737 
738     if (!sqa)
739         return ajFalse;
740 
741     if (!sr)
742         return ajFalse;
743 
744     if (!Psequence)
745         return ajFalse;
746 
747     /*
748     ** Reserve sequence space in larger blocks based on the requested length
749     ** plus one position for the 'nul' string terminator.
750     */
751 
752     if (*Psequence)
753         ajStrAssignClear(Psequence);
754     else
755         *Psequence = ajStrNewRes((((length + 1) >> sequenceKChunkPower) + 1)
756                                  << sequenceKChunkPower);
757 
758     dba = ensSequenceadaptorGetDatabaseadaptor(sqa);
759 
760     if (length < sequenceadaptorKCacheMaxLength)
761     {
762         chkmin = (start - 1) >> sequenceKChunkPower;
763 
764         chkmax = (start + length - 1) >> sequenceKChunkPower;
765 
766         /*
767         ** Allocate an AJAX String and reserve space for the number of
768         ** sequence chunks plus one for the 'nul' string terminator.
769         */
770 
771         tmpstr = ajStrNewRes(((1 << sequenceKChunkPower) + 1) *
772                              (chkmax - chkmin + 1));
773 
774         /* Piece together sequence from cached sequence chunks. */
775 
776         for (i = chkmin; i <= chkmax; i++)
777         {
778             key = ajFmtStr("%u:%u", ensSeqregionGetIdentifier(sr), i);
779 
780             chkstr = NULL;
781 
782             ensCacheFetch(sqa->Cache, (void *) key, (void **) &chkstr);
783 
784             if (chkstr)
785             {
786                 ajStrAppendS(&tmpstr, chkstr);
787 
788                 ajStrDel(&chkstr);
789             }
790             else
791             {
792                 /* Fetch uncached sequence chunks. */
793 
794                 posmin = (i << sequenceKChunkPower) + 1;
795 
796                 statement = ajFmtStr(
797                     "SELECT "
798                     "SUBSTRING(dna.sequence FROM %u FOR %u) "
799                     "FROM "
800                     "dna "
801                     "WHERE "
802                     "dna.seq_region_id = %u",
803                     posmin,
804                     1 << sequenceKChunkPower,
805                     ensSeqregionGetIdentifier(sr));
806 
807                 sqls = ensDatabaseadaptorSqlstatementNew(dba, statement);
808 
809                 sqli = ajSqlrowiterNew(sqls);
810 
811                 while (!ajSqlrowiterDone(sqli))
812                 {
813                     sqlr = ajSqlrowiterGet(sqli);
814 
815                     /*
816                     ** Allocate an AJAX String and reserve space for the
817                     ** maximum sequence chunk length plus the 'nul' string
818                     ** terminator.
819                     */
820 
821                     chkstr = ajStrNewRes((1 << sequenceKChunkPower) + 1);
822 
823                     ajSqlcolumnToStr(sqlr, &chkstr);
824 
825                     /*
826                     ** Always store upper case sequence
827                     ** so that it can be properly soft-masked.
828                     */
829 
830                     ajStrFmtUpper(&chkstr);
831 
832                     ensCacheStore(sqa->Cache, (void *) key, (void **) &chkstr);
833 
834                     ajStrAppendS(&tmpstr, chkstr);
835 
836                     ajStrDel(&chkstr);
837                 }
838 
839                 ajSqlrowiterDel(&sqli);
840 
841                 ensDatabaseadaptorSqlstatementDel(dba, &sqls);
842 
843                 ajStrDel(&statement);
844             }
845 
846             ajStrDel(&key);
847         }
848 
849         /* Return only the requested portion of the entire sequence. */
850 
851         posmin = (chkmin << sequenceKChunkPower) + 1;
852 
853         ajStrAssignSubS(Psequence,
854                         tmpstr,
855                         start - posmin,
856                         start - posmin + length - 1);
857 
858         ajStrDel(&tmpstr);
859     }
860     else
861     {
862         /* Do not attempt any caching for requests of very large sequences. */
863 
864         statement = ajFmtStr(
865             "SELECT "
866             "SUBSTRING(dna.sequence FROM %u FOR %u) "
867             "FROM dna "
868             "WHERE "
869             "dna.seq_region_id = %u",
870             start,
871             length,
872             ensSeqregionGetIdentifier(sr));
873 
874         sqls = ensDatabaseadaptorSqlstatementNew(dba, statement);
875 
876         sqli = ajSqlrowiterNew(sqls);
877 
878         while (!ajSqlrowiterDone(sqli))
879         {
880             sqlr = ajSqlrowiterGet(sqli);
881 
882             /*
883             ** Allocate an AJAX String and reserve space of the length plus
884             ** the 'nul' string terminator.
885             */
886 
887             chkstr = ajStrNewRes((length + 1));
888 
889             ajSqlcolumnToStr(sqlr, &chkstr);
890 
891             /*
892             ** Always return upper case sequence
893             ** so that it can be properly soft-masked.
894             */
895 
896             ajStrFmtUpper(&chkstr);
897 
898             ajStrAssignS(Psequence, chkstr);
899 
900             ajStrDel(&chkstr);
901         }
902 
903         ajSqlrowiterDel(&sqli);
904 
905         ensDatabaseadaptorSqlstatementDel(dba, &sqls);
906 
907         ajStrDel(&statement);
908     }
909 
910     return ajTrue;
911 }
912 
913 
914 
915 
916 /* @func ensSequenceadaptorFetchSliceAllSeq ***********************************
917 **
918 ** Fetch the sequence of an Ensembl Slice as an AJAX Sequence.
919 **
920 ** The caller is responsible for deleting the AJAX Sequence.
921 **
922 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
923 ** @param [u] slice [EnsPSlice] Ensembl Slice
924 ** @param [wP] Psequence [AjPSeq*] AJAX Sequence address
925 **
926 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
927 **
928 ** @release 6.4.0
929 ** @@
930 ******************************************************************************/
931 
ensSequenceadaptorFetchSliceAllSeq(EnsPSequenceadaptor sqa,EnsPSlice slice,AjPSeq * Psequence)932 AjBool ensSequenceadaptorFetchSliceAllSeq(EnsPSequenceadaptor sqa,
933                                           EnsPSlice slice,
934                                           AjPSeq *Psequence)
935 {
936     return ensSequenceadaptorFetchSliceSubSeq(
937         sqa,
938         slice,
939         1,
940         ensSliceCalculateLength(slice),
941         1,
942         Psequence);
943 }
944 
945 
946 
947 
948 /* @func ensSequenceadaptorFetchSliceAllStr ***********************************
949 **
950 ** Fetch the sequence of an Ensembl Slice as an AJAX String.
951 **
952 ** The caller is responsible for deleting the AJAX String.
953 **
954 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
955 ** @param [u] slice [EnsPSlice] Ensembl Slice
956 ** @param [u] Psequence [AjPStr*] Sequence address
957 **
958 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
959 **
960 ** @release 6.4.0
961 ** @@
962 ******************************************************************************/
963 
ensSequenceadaptorFetchSliceAllStr(EnsPSequenceadaptor sqa,EnsPSlice slice,AjPStr * Psequence)964 AjBool ensSequenceadaptorFetchSliceAllStr(EnsPSequenceadaptor sqa,
965                                           EnsPSlice slice,
966                                           AjPStr *Psequence)
967 {
968     return ensSequenceadaptorFetchSliceSubStr(
969         sqa,
970         slice,
971         1,
972         ensSliceCalculateLength(slice),
973         1,
974         Psequence);
975 }
976 
977 
978 
979 
980 /* @func ensSequenceadaptorFetchSliceSubSeq ***********************************
981 **
982 ** Fetch a sub-sequence of an Ensembl Slice as an AJAX Sequence.
983 ** Coordinates are relative to the Slice and one-based.
984 ** A start of 1 and an end equal the Slice length covers the whole Slice.
985 **
986 ** The caller is responsible for deleting the AJAX Sequence.
987 **
988 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
989 ** @param [u] slice [EnsPSlice] Ensembl Slice
990 ** @param [r] start [ajint] Start coordinate
991 ** @param [r] end [ajint] End coordinate
992 ** @param [r] strand [ajint] Strand information
993 ** @param [wP] Psequence [AjPSeq*] AJAX Sequence address
994 **
995 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
996 **
997 ** @release 6.4.0
998 ** @@
999 ******************************************************************************/
1000 
ensSequenceadaptorFetchSliceSubSeq(EnsPSequenceadaptor sqa,EnsPSlice slice,ajint start,ajint end,ajint strand,AjPSeq * Psequence)1001 AjBool ensSequenceadaptorFetchSliceSubSeq(EnsPSequenceadaptor sqa,
1002                                           EnsPSlice slice,
1003                                           ajint start,
1004                                           ajint end,
1005                                           ajint strand,
1006                                           AjPSeq *Psequence)
1007 {
1008     ajint srstart  = 0;
1009     ajint srend    = 0;
1010     ajint srstrand = 0;
1011 
1012     AjBool result = AJFALSE;
1013 
1014     AjPStr name     = NULL;
1015     AjPStr sequence = NULL;
1016 
1017     if (!sqa)
1018         return ajFalse;
1019 
1020     if (!slice)
1021         return ajFalse;
1022 
1023     if (!strand)
1024         strand = 1;
1025 
1026     if (!Psequence)
1027         return ajFalse;
1028 
1029     /* Transform relative into absolute coordinates for the Slice name. */
1030 
1031     if (ensSliceGetStrand(slice) > 0)
1032     {
1033         srstart = ensSliceGetStart(slice) + start - 1;
1034 
1035         srend = ensSliceGetStart(slice) + end - 1;
1036 
1037         srstrand = +strand;
1038     }
1039     else
1040     {
1041         srstart = ensSliceGetEnd(slice) - end + 1;
1042 
1043         srend = ensSliceGetEnd(slice) - start + 1;
1044 
1045         srstrand = -strand;
1046     }
1047 
1048     /*
1049     ** It is sligtly more efficient, if undefined AJAX String objects are
1050     ** directly allocated by the following functions to their final size.
1051     */
1052 
1053     name = ajFmtStr(
1054         "%S:%S:%S:%d:%d:%d",
1055         ensSliceGetCoordsystemName(slice),
1056         ensSliceGetCoordsystemVersion(slice),
1057         ensSliceGetSeqregionName(slice),
1058         srstart,
1059         srend,
1060         srstrand);
1061 
1062     result = ensSequenceadaptorFetchSliceSubStr(
1063         sqa,
1064         slice,
1065         start,
1066         end,
1067         strand,
1068         &sequence);
1069 
1070     if (*Psequence)
1071     {
1072         ajSeqClear(*Psequence);
1073 
1074         ajSeqAssignNameS(*Psequence, name);
1075         ajSeqAssignSeqS(*Psequence, sequence);
1076     }
1077     else
1078         *Psequence = ajSeqNewNameS(sequence, name);
1079 
1080     ajSeqSetNuc(*Psequence);
1081 
1082     ajStrDel(&name);
1083     ajStrDel(&sequence);
1084 
1085     return result;
1086 }
1087 
1088 
1089 
1090 
1091 /* @func ensSequenceadaptorFetchSliceSubStr ***********************************
1092 **
1093 ** Fetch a sub-sequence of an Ensembl Slice as an AJAX String.
1094 ** Coordinates are relative to the Slice and one-based.
1095 ** A start of 1 and an end equal the Slice length covers the whole Slice.
1096 **
1097 ** The caller is responsible for deleting the AJAX String.
1098 **
1099 ** @cc Bio::EnsEMBL::DBSQL::Sequenceadaptor::fetch_by_Slice_start_end_strand
1100 ** @see ensSequenceadaptorFetchSliceAllStr
1101 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
1102 ** @param [u] slice [EnsPSlice] Ensembl Slice
1103 ** @param [r] start [ajint] Start coordinate
1104 ** @param [r] end [ajint] End coordinate
1105 ** @param [r] strand [ajint] Strand information
1106 ** @param [u] Psequence [AjPStr*] Sequence address
1107 **
1108 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1109 **
1110 ** @release 6.4.0
1111 ** @@
1112 ******************************************************************************/
1113 
ensSequenceadaptorFetchSliceSubStr(EnsPSequenceadaptor sqa,EnsPSlice slice,ajint start,ajint end,ajint strand,AjPStr * Psequence)1114 AjBool ensSequenceadaptorFetchSliceSubStr(EnsPSequenceadaptor sqa,
1115                                           EnsPSlice slice,
1116                                           ajint start,
1117                                           ajint end,
1118                                           ajint strand,
1119                                           AjPStr *Psequence)
1120 {
1121     const char *Ptr = NULL;
1122 
1123     register ajuint i = 0U;
1124 
1125     ajint five   = 0;
1126     ajint three  = 0;
1127     ajint fshift = 0;
1128     ajint tshift = 0;
1129 
1130     ajuint padding = 0U;
1131     ajuint total   = 0U;
1132 
1133     AjBool circular = AJFALSE;
1134     AjBool debug    = AJFALSE;
1135 
1136     AjPList pslist = NULL;
1137     AjPList ses    = NULL;
1138 
1139     AjPStr tmpstr = NULL;
1140 
1141     EnsPCoordsystem seqlvlcs   = NULL;
1142     EnsPCoordsystemadaptor csa = NULL;
1143 
1144     EnsPDatabaseadaptor dba = NULL;
1145 
1146     EnsPProjectionsegment ps = NULL;
1147 
1148     EnsPSequenceedit se = NULL;
1149 
1150     EnsPSlice eslice     = NULL;
1151     EnsPSlice nslice     = NULL;
1152     EnsPSlice sslice     = NULL;
1153     EnsPSliceadaptor sla = NULL;
1154 
1155     debug = ajDebugTest("ensSequenceadaptorFetchSliceSubStr");
1156 
1157     if (debug)
1158     {
1159         ajDebug("ensSequenceadaptorFetchSliceSubStr\n"
1160                 "  sqa %p\n"
1161                 "  slice %p\n"
1162                 "  start %d\n"
1163                 "  end %d\n"
1164                 "  strand %d\n"
1165                 "  Psequence %p\n",
1166                 sqa,
1167                 slice,
1168                 start,
1169                 end,
1170                 strand,
1171                 Psequence);
1172 
1173         ensSliceTrace(slice, 1);
1174     }
1175 
1176     if (!sqa)
1177         return ajFalse;
1178 
1179     if (!slice)
1180         return ajFalse;
1181 
1182     if (!Psequence)
1183         return ajFalse;
1184 
1185     if (!ensSliceIsCircular(slice, &circular))
1186         return ajFalse;
1187 
1188     if (circular == ajTrue)
1189     {
1190         if (start > end)
1191             return sequenceadaptorFetchCircularsliceSubStr(
1192                 sqa,
1193                 slice,
1194                 start,
1195                 end,
1196                 strand,
1197                 Psequence);
1198 
1199         if (start < 0)
1200             start += ensSliceGetSeqregionLength(slice);
1201 
1202         if (end < 0)
1203             end += ensSliceGetSeqregionLength(slice);
1204 
1205         if (ensSliceGetStart(slice) > ensSliceGetEnd(slice))
1206             return sequenceadaptorFetchCircularsliceSubStr(
1207                 sqa,
1208                 slice,
1209                 ensSliceGetStart(slice),
1210                 ensSliceGetEnd(slice),
1211                 strand,
1212                 Psequence);
1213     }
1214 
1215     if (start > end)
1216     {
1217         ajDebug("ensSequenceadaptorFetchSliceSubStr requires the start %d "
1218                 "to be less than or equal to the end %d coordinate.\n",
1219                 start, end);
1220 
1221         return ajFalse;
1222     }
1223 
1224     if (!strand)
1225         strand = 1;
1226 
1227     /*
1228     ** Reserve sequence space in larger blocks based on the requested length
1229     ** plus one position for the 'nul' string terminator.
1230     */
1231 
1232     if (*Psequence)
1233         ajStrAssignClear(Psequence);
1234     else
1235         *Psequence = ajStrNewRes(
1236             (((ensSliceCalculateRegion(slice, start, end) + 1)
1237               >> sequenceKChunkPower) + 1) << sequenceKChunkPower);
1238 
1239     dba = ensSequenceadaptorGetDatabaseadaptor(sqa);
1240 
1241     csa = ensRegistryGetCoordsystemadaptor(dba);
1242     sla = ensRegistryGetSliceadaptor(dba);
1243 
1244     /*
1245     ** Get a new Slice that spans the exact region to retrieve DNA from.
1246     ** Only this short region of the Slice needs mapping into the
1247     ** sequence-level coordinate system.
1248     */
1249 
1250     /* Relative Slice coordinates range from 1 to length. */
1251 
1252     five = 1 - start;
1253 
1254     three = end - ensSliceCalculateLength(slice);
1255 
1256     if (five || three)
1257         ensSliceFetchSliceexpanded(slice,
1258                                    five,
1259                                    three,
1260                                    ajFalse,
1261                                    &fshift,
1262                                    &tshift,
1263                                    &eslice);
1264     else
1265         eslice = ensSliceNewRef(slice);
1266 
1267     /*
1268     ** Retrieve normalised, non-symlinked Slice objects, which allows fetching
1269     ** of haplotypes (HAPs) and pseudo-autosomal regions (PARs).
1270     */
1271 
1272     pslist = ajListNew();
1273 
1274     ensSliceadaptorRetrieveNormalisedprojection(sla, eslice, pslist);
1275 
1276     if (!ajListGetLength(pslist))
1277         ajFatal("ensSequenceadaptorFetchSliceSubStr could not "
1278                 "retrieve normalised Slices. Database contains incorrect "
1279                 "information in the 'assembly_exception' table.\n");
1280 
1281     /*
1282     ** Call this method again with any Slice that was sym-linked to by this
1283     ** Slice.
1284     */
1285 
1286     ajListPeekFirst(pslist, (void **) &ps);
1287 
1288     if ((ajListGetLength(pslist) != 1) ||
1289         (!ensSliceMatch(ensProjectionsegmentGetTargetSlice(ps), slice)))
1290     {
1291         tmpstr = ajStrNew();
1292 
1293         while (ajListPop(pslist, (void **) &ps))
1294         {
1295             nslice = ensProjectionsegmentGetTargetSlice(ps);
1296 
1297             ensSequenceadaptorFetchSliceAllStr(sqa, nslice, &tmpstr);
1298 
1299             ajStrAppendS(Psequence, tmpstr);
1300 
1301             ensProjectionsegmentDel(&ps);
1302         }
1303 
1304         ajStrDel(&tmpstr);
1305 
1306         ajListFree(&pslist);
1307 
1308         if (strand < 0)
1309             ajSeqstrReverse(Psequence);
1310 
1311         ensSliceDel(&eslice);
1312 
1313         return ajTrue;
1314     }
1315 
1316     /*
1317     ** Clear the AJAX List of Ensembl Projection Segment objects resulting
1318     ** from the projection of the expanded Slice object to normalised
1319     ** Slice objects.
1320     */
1321 
1322     while (ajListPop(pslist, (void **) &ps))
1323         ensProjectionsegmentDel(&ps);
1324 
1325     /*
1326     ** Now, this Slice needs projecting onto the sequence-level Coordinate
1327     ** System even if it is already in this Coordinate System, because
1328     ** flanking gaps need trimming out the Slice is past the edges of
1329     ** the Sequence Region.
1330     */
1331 
1332     ensCoordsystemadaptorFetchSeqlevel(csa, &seqlvlcs);
1333 
1334     ensSliceProject(slice,
1335                     ensCoordsystemGetName(seqlvlcs),
1336                     ensCoordsystemGetVersion(seqlvlcs),
1337                     pslist);
1338 
1339     ensCoordsystemDel(&seqlvlcs);
1340 
1341     /*
1342     ** Fetch the sequence for each of the Ensembl Sequence Region objects
1343     ** projected onto. Allocate space for 512 KiB (1 << 19) that should fit
1344     ** one BAC clone.
1345     */
1346 
1347     tmpstr = ajStrNewRes((1 << 19) + 1);
1348 
1349     while (ajListPop(pslist, (void **) &ps))
1350     {
1351         /*
1352         ** Check for gaps between Ensembl Projection Segment objects and
1353         ** pad them with Ns.
1354         */
1355 
1356         padding = ensProjectionsegmentGetSourceStart(ps) - total - 1;
1357 
1358         if (padding)
1359         {
1360             ajStrAppendCountK(Psequence, 'N', padding);
1361 
1362             if (debug)
1363                 ajDebug("ensSequenceadaptorFetchSliceSubStr got total %u "
1364                         "and Projection Segment source start %u, "
1365                         "therefore added %u N padding between.\n",
1366                         total,
1367                         ensProjectionsegmentGetSourceStart(ps),
1368                         padding);
1369         }
1370 
1371         sslice = ensProjectionsegmentGetTargetSlice(ps);
1372 
1373         ensSequenceadaptorFetchSeqregionSubStr(
1374             sqa,
1375             ensSliceGetSeqregion(sslice),
1376             ensSliceGetStart(sslice),
1377             ensSliceCalculateLength(sslice),
1378             &tmpstr);
1379 
1380         if (ensSliceGetStrand(sslice) < 0)
1381             ajSeqstrReverse(&tmpstr);
1382 
1383         ajStrAppendS(Psequence, tmpstr);
1384 
1385         total = ensProjectionsegmentGetSourceEnd(ps);
1386 
1387         ensProjectionsegmentDel(&ps);
1388     }
1389 
1390     ajStrDel(&tmpstr);
1391 
1392     ajListFree(&pslist);
1393 
1394     /* Check for any remaining gaps at the end. */
1395 
1396     padding = ensSliceCalculateLength(slice) - total;
1397 
1398     if (padding)
1399     {
1400         ajStrAppendCountK(Psequence, 'N', padding);
1401 
1402         if (debug)
1403             ajDebug("ensSequenceadaptorFetchSliceSubStr got total %u "
1404                     "and Ensembl Slice length %u, "
1405                     "therefore added %u N padding.\n",
1406                     total,
1407                     ensSliceCalculateLength(slice),
1408                     padding);
1409     }
1410 
1411     /*
1412     ** If the sequence is too short, because we came in with a sequence-level
1413     ** Slice that was partially off the Sequence Region, pad the end with Ns
1414     ** to make it long enough.
1415     ** NOTE: Since ajStrGetLen returns size_t, which exceeds ajint,
1416     ** the sequence length needs to be determined here.
1417     ** padding = ensSliceCalculateLength(slice) - ajStrGetLen(*Psequence);
1418     */
1419 
1420     for (i = 0U, Ptr = ajStrGetPtr(*Psequence); (Ptr && *Ptr); i++, Ptr++)
1421         if (i == UINT_MAX)
1422             ajFatal("ensSequenceadaptorFetchSliceSubStr exeeded UINT_MAX.");
1423 
1424     padding = ensSliceCalculateLength(slice) - i;
1425 
1426     if (padding)
1427     {
1428         ajStrAppendCountK(Psequence, 'N', padding);
1429 
1430         if (debug)
1431             ajDebug("ensSequenceadaptorFetchSliceSubStr got "
1432                     "sequence length %u, but Slice length %u, "
1433                     "therefore added %u N final padding.\n",
1434                     ajStrGetLen(*Psequence),
1435                     ensSliceCalculateLength(slice),
1436                     padding);
1437     }
1438 
1439     /* Apply Sequence Edits. */
1440 
1441     ses = ajListNew();
1442 
1443     ensSliceFetchAllSequenceedits(slice, ses);
1444 
1445     /*
1446     ** Sort Sequence Edits in reverse order to avoid the complication of
1447     ** adjusting down-stream Sequence Edit coordinates.
1448     */
1449 
1450     ensListSequenceeditSortStartDescending(ses);
1451 
1452     while (ajListPop(ses, (void **) &se))
1453     {
1454         /* Adjust Sequence Edit coordinates to the Sub-Slice. */
1455 
1456         ensSequenceeditApplyString(se,
1457                                    ensSliceGetStart(eslice) - 1,
1458                                    Psequence);
1459 
1460         ensSequenceeditDel(&se);
1461     }
1462 
1463     ajListFree(&ses);
1464 
1465     /* Reverse sequence if requested. */
1466 
1467     if (strand < 0)
1468         ajSeqstrReverse(Psequence);
1469 
1470     ensSliceDel(&eslice);
1471 
1472     return ajTrue;
1473 }
1474 
1475 
1476 
1477 
1478 /* @funcstatic sequenceadaptorFetchCircularsliceSubStr ************************
1479 **
1480 ** Fetch a sub-sequence of a (circular) Ensembl Slice as an AJAX String.
1481 ** Coordinates are relative to the Slice and one-based.
1482 ** A start of 1 and an end equal the Slice length covers the whole Slice.
1483 **
1484 ** The caller is responsible for deleting the AJAX String.
1485 **
1486 ** @cc Bio::EnsEMBL::DBSQL::Sequenceadaptor::
1487 ** _fetch_by_Slice_start_end_strand_circular
1488 ** @param [u] sqa [EnsPSequenceadaptor] Ensembl Sequence Adaptor
1489 ** @param [u] slice [EnsPSlice] Ensembl Slice
1490 ** @param [r] start [ajint] Start coordinate
1491 ** @param [r] end [ajint] End coordinate
1492 ** @param [r] strand [ajint] Strand information
1493 ** @param [u] Psequence [AjPStr*] Sequence address
1494 **
1495 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1496 **
1497 ** @release 6.4.0
1498 ** @@
1499 ******************************************************************************/
1500 
sequenceadaptorFetchCircularsliceSubStr(EnsPSequenceadaptor sqa,EnsPSlice slice,ajint start,ajint end,ajint strand,AjPStr * Psequence)1501 static AjBool sequenceadaptorFetchCircularsliceSubStr(EnsPSequenceadaptor sqa,
1502                                                       EnsPSlice slice,
1503                                                       ajint start,
1504                                                       ajint end,
1505                                                       ajint strand,
1506                                                       AjPStr *Psequence)
1507 {
1508     const char *Ptr = NULL;
1509 
1510     register ajuint i = 0U;
1511 
1512     ajint mpoint = 0;
1513 
1514     ajint five   = 0;
1515     ajint three  = 0;
1516     ajint fshift = 0;
1517     ajint tshift = 0;
1518 
1519     ajuint padding = 0U;
1520     ajuint total   = 0U;
1521 
1522     AjBool circular = AJFALSE;
1523     AjBool debug    = AJFALSE;
1524 
1525     AjPList pslist = NULL;
1526     AjPList ses    = NULL;
1527 
1528     AjPStr sequence1 = NULL;
1529     AjPStr sequence2 = NULL;
1530     AjPStr tmpstr    = NULL;
1531 
1532     EnsPCoordsystem seqlvlcs   = NULL;
1533     EnsPCoordsystemadaptor csa = NULL;
1534 
1535     EnsPDatabaseadaptor dba = NULL;
1536 
1537     EnsPProjectionsegment ps = NULL;
1538 
1539     EnsPSequenceedit se = NULL;
1540 
1541     EnsPSlice eslice     = NULL;
1542     EnsPSlice nslice     = NULL;
1543     EnsPSlice sslice     = NULL;
1544     EnsPSliceadaptor sla = NULL;
1545 
1546     debug = ajDebugTest("sequenceadaptorFetchCircularsliceSubStr");
1547 
1548     if (debug)
1549     {
1550         ajDebug("sequenceadaptorFetchCircularsliceSubStr\n"
1551                 "  sqa %p\n"
1552                 "  slice %p\n"
1553                 "  start %d\n"
1554                 "  end %d\n"
1555                 "  strand %d\n"
1556                 "  Psequence %p\n",
1557                 sqa,
1558                 slice,
1559                 start,
1560                 end,
1561                 strand,
1562                 Psequence);
1563 
1564         ensSliceTrace(slice, 1);
1565     }
1566 
1567     if (!sqa)
1568         return ajFalse;
1569 
1570     if (!slice)
1571         return ajFalse;
1572 
1573     if (!Psequence)
1574         return ajFalse;
1575 
1576     if (*Psequence)
1577         ajStrAssignClear(Psequence);
1578     else
1579         *Psequence = ajStrNewRes(
1580             (((ensSliceCalculateRegion(slice, start, end) + 1)
1581               >> sequenceKChunkPower) + 1) << sequenceKChunkPower);
1582 
1583     if (!ensSliceIsCircular(slice, &circular))
1584         return ajFalse;
1585 
1586     dba = ensSequenceadaptorGetDatabaseadaptor(sqa);
1587 
1588     csa = ensRegistryGetCoordsystemadaptor(dba);
1589     sla = ensRegistryGetSliceadaptor(dba);
1590 
1591     if ((start > end) && (circular == ajTrue))
1592     {
1593         mpoint
1594             = ensSliceGetSeqregionLength(slice)
1595             - ensSliceGetStart(slice)
1596             + 1;
1597 
1598         sequence1 = ajStrNew();
1599         sequence2 = ajStrNew();
1600 
1601         sequenceadaptorFetchCircularsliceSubStr(
1602             sqa,
1603             slice,
1604             1,
1605             mpoint,
1606             1,
1607             &sequence1);
1608 
1609         sequenceadaptorFetchCircularsliceSubStr(
1610             sqa,
1611             slice,
1612             mpoint + 1,
1613             ensSliceCalculateLength(slice),
1614             1,
1615             &sequence1);
1616 
1617         if (ensSliceGetStrand(slice) >= 0)
1618         {
1619             ajStrAppendS(Psequence, sequence1);
1620             ajStrAppendS(Psequence, sequence2);
1621         }
1622         else
1623         {
1624             ajStrAppendS(Psequence, sequence2);
1625             ajStrAppendS(Psequence, sequence1);
1626 
1627             ajSeqstrReverse(Psequence);
1628         }
1629 
1630         ajStrDel(&sequence1);
1631         ajStrDel(&sequence2);
1632 
1633         return ajTrue;
1634     }
1635 
1636     /* Get a new Slice that spans the exact region to retrieve DNA from. */
1637 
1638     /* Relative Slice coordinates range from 1 to length. */
1639 
1640     five = 1 - start;
1641 
1642     three = end - ensSliceCalculateLength(slice);
1643 
1644     if (five || three)
1645     {
1646         if (ensSliceGetStrand(slice) >= 0)
1647             ensSliceFetchSliceexpanded(slice,
1648                                        five,
1649                                        three,
1650                                        ajFalse,
1651                                        &fshift,
1652                                        &tshift,
1653                                        &eslice);
1654         else
1655             ensSliceFetchSliceexpanded(slice,
1656                                        three,
1657                                        five,
1658                                        ajFalse,
1659                                        &tshift,
1660                                        &fshift,
1661                                        &eslice);
1662     }
1663 
1664     /*
1665     ** Retrieve normalized non-symlinked Slice objects, which allows fetching
1666     ** of haplotypes (HAPs) and pseudo-autosomal regions (PARs).
1667     */
1668 
1669     pslist = ajListNew();
1670 
1671     ensSliceadaptorRetrieveNormalisedprojection(sla, eslice, pslist);
1672 
1673     if (!ajListGetLength(pslist))
1674         ajFatal("sequenceadaptorFetchCircularsliceSubStr could not "
1675                 "retrieve normalised Slices. Database contains incorrect "
1676                 "information in the 'assembly_exception' table.\n");
1677 
1678 
1679     /*
1680     ** Call this method again with any Slice that was sym-linked to by this
1681     ** Slice.
1682     */
1683 
1684     ajListPeekFirst(pslist, (void **) &ps);
1685 
1686     if ((ajListGetLength(pslist) != 1) ||
1687         (!ensSliceMatch(ensProjectionsegmentGetTargetSlice(ps), slice)))
1688     {
1689         tmpstr = ajStrNew();
1690 
1691         while (ajListPop(pslist, (void **) &ps))
1692         {
1693             nslice = ensProjectionsegmentGetTargetSlice(ps);
1694 
1695             ensSequenceadaptorFetchSliceAllStr(sqa, nslice, &tmpstr);
1696 
1697             ajStrAppendS(Psequence, tmpstr);
1698 
1699             ensProjectionsegmentDel(&ps);
1700         }
1701 
1702         ajStrDel(&tmpstr);
1703 
1704         ajListFree(&pslist);
1705 
1706         if (strand < 0)
1707             ajSeqstrReverse(Psequence);
1708 
1709         ensSliceDel(&eslice);
1710 
1711         return ajTrue;
1712     }
1713 
1714     /*
1715     ** Clear the AJAX List of Ensembl Projection Segment objects resulting
1716     ** from the projection of the expanded Slice object to normalised
1717     ** Slice objects.
1718     */
1719 
1720     while (ajListPop(pslist, (void **) &ps))
1721         ensProjectionsegmentDel(&ps);
1722 
1723     /*
1724     ** Now, this Slice needs projecting onto the sequence-level Coordinate
1725     ** System even if it is already in this Coordinate System, because
1726     ** flanking gaps need trimming out the Slice is past the edges of
1727     ** the Sequence Region.
1728     */
1729 
1730     ensCoordsystemadaptorFetchSeqlevel(csa, &seqlvlcs);
1731 
1732     ensSliceProject(slice,
1733                     ensCoordsystemGetName(seqlvlcs),
1734                     ensCoordsystemGetVersion(seqlvlcs),
1735                     pslist);
1736 
1737     ensCoordsystemDel(&seqlvlcs);
1738 
1739     /*
1740     ** Fetch the sequence for each of the Ensembl Sequence Region objects
1741     ** projected onto. Allocate space for 512 KiB (1 << 19) that should fit
1742     ** one BAC clone.
1743     */
1744 
1745     tmpstr = ajStrNewRes((1 << 19) + 1);
1746 
1747     while (ajListPop(pslist, (void **) &ps))
1748     {
1749         /*
1750         ** Check for gaps between Ensembl Projection Segment objects and
1751         ** pad them with Ns.
1752         */
1753 
1754         padding = ensProjectionsegmentGetSourceStart(ps) - total - 1;
1755 
1756         if (padding)
1757         {
1758             ajStrAppendCountK(Psequence, 'N', padding);
1759 
1760             if (debug)
1761                 ajDebug("sequenceadaptorFetchCircularsliceSubStr got total %u "
1762                         "and Projection Segment source start %u, "
1763                         "therefore added %u N padding between.\n",
1764                         total,
1765                         ensProjectionsegmentGetSourceStart(ps),
1766                         padding);
1767         }
1768 
1769         sslice = ensProjectionsegmentGetTargetSlice(ps);
1770 
1771         ensSequenceadaptorFetchSeqregionSubStr(
1772             sqa,
1773             ensSliceGetSeqregion(sslice),
1774             ensSliceGetStart(sslice),
1775             ensSliceCalculateLength(sslice),
1776             &tmpstr);
1777 
1778         if (ensSliceGetStrand(sslice) < 0)
1779             ajSeqstrReverse(&tmpstr);
1780 
1781         ajStrAppendS(Psequence, tmpstr);
1782 
1783         total = ensProjectionsegmentGetSourceEnd(ps);
1784 
1785         ensProjectionsegmentDel(&ps);
1786     }
1787 
1788     /* Check for any remaining gaps at the end. */
1789 
1790     padding = ensSliceCalculateLength(slice) - total;
1791 
1792     if (padding)
1793     {
1794         ajStrAppendCountK(Psequence, 'N', padding);
1795 
1796         if (debug)
1797             ajDebug("sequenceadaptorFetchCircularsliceSubStr got total %u "
1798                     "and Ensembl Slice length %u, "
1799                     "therefore added %u N padding.\n",
1800                     total,
1801                     ensSliceCalculateLength(slice),
1802                     padding);
1803     }
1804 
1805     /*
1806     ** If the sequence is too short, because we came in with a sequence-level
1807     ** Slice that was partially off the Sequence Region, pad the end with Ns
1808     ** to make it long enough.
1809     ** NOTE: Since ajStrGetLen returns size_t, which exceeds ajint,
1810     ** the sequence length needs to be determined here.
1811     ** padding = ensSliceCalculateLength(slice) - ajStrGetLen(*Psequence);
1812     */
1813 
1814     for (i = 0U, Ptr = ajStrGetPtr(*Psequence); (Ptr && *Ptr); i++, Ptr++)
1815         if (i == UINT_MAX)
1816             ajFatal("sequenceadaptorFetchCircularsliceSubStr exeeded "
1817                     "UINT_MAX.");
1818 
1819     padding = ensSliceCalculateLength(slice) - i;
1820 
1821     if (padding)
1822     {
1823         ajStrAppendCountK(Psequence, 'N', padding);
1824 
1825         if (debug)
1826             ajDebug("sequenceadaptorFetchCircularsliceSubStr got "
1827                     "sequence length %u, but Slice length %u, "
1828                     "therefore added %u N final padding.\n",
1829                     ajStrGetLen(*Psequence),
1830                     ensSliceCalculateLength(slice),
1831                     padding);
1832     }
1833 
1834     /* Apply Sequence Edits. */
1835 
1836     ses = ajListNew();
1837 
1838     ensSliceFetchAllSequenceedits(slice, ses);
1839 
1840     /*
1841     ** Sort Sequence Edits in reverse order to avoid the complication of
1842     ** adjusting down-stream Sequence Edit coordinates.
1843     */
1844 
1845     ensListSequenceeditSortStartDescending(ses);
1846 
1847     while (ajListPop(ses, (void **) &se))
1848     {
1849         /* Adjust Sequence Edit coordinates to the Sub-Slice. */
1850 
1851         ensSequenceeditApplyString(se,
1852                                    ensSliceGetStart(eslice) - 1,
1853                                    Psequence);
1854 
1855         ensSequenceeditDel(&se);
1856     }
1857 
1858     ajListFree(&ses);
1859 
1860     /* Reverse sequence if requested. */
1861 
1862     if (strand < 0)
1863         ajSeqstrReverse(Psequence);
1864 
1865     ensSliceDel(&eslice);
1866 
1867     return ajTrue;
1868 }
1869