1 /* @source ensslice ***********************************************************
2 **
3 ** Ensembl Slice functions
4 **
5 ** @author Copyright (C) 1999 Ensembl Developers
6 ** @author Copyright (C) 2006 Michael K. Schuster
7 ** @version $Revision: 1.68 $
8 ** @modified 2009 by Alan Bleasby for incorporation into EMBOSS core
9 ** @modified $Date: 2013/02/17 13:02:40 $ by $Author: mks $
10 ** @@
11 **
12 ** This library is free software; you can redistribute it and/or
13 ** modify it under the terms of the GNU Lesser General Public
14 ** License as published by the Free Software Foundation; either
15 ** version 2.1 of the License, or (at your option) any later version.
16 **
17 ** This library is distributed in the hope that it will be useful,
18 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ** Lesser General Public License for more details.
21 **
22 ** You should have received a copy of the GNU Lesser General Public
23 ** License along with this library; if not, write to the Free Software
24 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
25 ** MA 02110-1301, USA.
26 **
27 ******************************************************************************/
28
29 /* ========================================================================= */
30 /* ============================= include files ============================= */
31 /* ========================================================================= */
32
33 #include "enscache.h"
34 #include "ensassemblyexception.h"
35 #include "ensassemblymapper.h"
36 #include "ensattribute.h"
37 #include "ensmapper.h"
38 #include "ensprojectionsegment.h"
39 #include "enssequence.h"
40 #include "enssequenceedit.h"
41 #include "ensslice.h"
42 #include "ensrepeat.h"
43 #include "enstable.h"
44 #include "enstranslation.h"
45
46
47
48
49 /* ========================================================================= */
50 /* =============================== constants =============================== */
51 /* ========================================================================= */
52
53
54
55
56 /* ========================================================================= */
57 /* =========================== global variables ============================ */
58 /* ========================================================================= */
59
60
61
62
63 /* ========================================================================= */
64 /* ============================= private data ============================== */
65 /* ========================================================================= */
66
67
68
69
70 /* ========================================================================= */
71 /* =========================== private constants =========================== */
72 /* ========================================================================= */
73
74 /* @conststatic sliceKSequenceeditCode ****************************************
75 **
76 ** Ensembl Sequence Edit objects for Ensembl Slice objects are a sub-set of
77 ** Ensembl Attribute objects that provide information about modifications of
78 ** the Slice sequence. Ensembl Attribute objects with the following codes are
79 ** Sequence Edit objects on the Slice-level.
80 **
81 ** _rna_edit: General sequence edit
82 **
83 ******************************************************************************/
84
85 static const char *sliceKSequenceeditCode[] =
86 {
87 "_rna_edit",
88 (const char *) NULL
89 };
90
91
92
93
94 /* @conststatic sliceKType ****************************************************
95 **
96 ** The Ensembl Slice Type member is enumerated in both, the SQL table
97 ** definition and the data structure. The following strings are used for
98 ** conversion in database operations and correspond to EnsESliceType.
99 **
100 ******************************************************************************/
101
102 static const char *const sliceKType[] =
103 {
104 "",
105 "Linear",
106 "Circular",
107 "LocusReferenceGenome",
108 (const char *) NULL
109 };
110
111
112
113
114
115 /* @conststatic sliceadaptorKCacheMaxBytes ************************************
116 **
117 ** Maximum memory size in bytes the Ensembl Slice Adaptor-internal
118 ** Ensembl Cache can use.
119 **
120 ** 1 << 26 = 64 MiB
121 **
122 ******************************************************************************/
123
124 static const size_t sliceadaptorKCacheMaxBytes = 1U << 26U;
125
126
127
128
129 /* @conststatic sliceadaptorKCacheMaxCount ************************************
130 **
131 ** Maximum number of Ensembl Slice objects the Ensembl Slice Adaptor-internal
132 ** Ensembl Cache can hold.
133 **
134 ** 1 << 16 = 64 Ki
135 **
136 ******************************************************************************/
137
138 static const ajuint sliceadaptorKCacheMaxCount = 1U << 16U;
139
140
141
142
143 /* @conststatic sliceadaptorKCacheMaxSize *************************************
144 **
145 ** Maximum memory size in bytes of an Ensembl Slice to be allowed into the
146 ** Ensembl Slice Adaptor-internal Ensembl Cache.
147 **
148 ******************************************************************************/
149
150 static const size_t sliceadaptorKCacheMaxSize = 0U;
151
152
153
154
155 /* ========================================================================= */
156 /* =========================== private variables =========================== */
157 /* ========================================================================= */
158
159
160
161
162 /* ========================================================================= */
163 /* =========================== private functions =========================== */
164 /* ========================================================================= */
165
166 static EnsPProjectionsegment sliceConstrain(EnsPSlice slice);
167
168 static AjBool sliceProject(EnsPSlice slice,
169 EnsPCoordsystem trgcs,
170 AjPList pss);
171
172 static int listSliceCompareIdentifierAscending(
173 const void *item1,
174 const void *item2);
175
176 static int listSliceCompareIdentifierDescending(
177 const void *item1,
178 const void *item2);
179
180 static int listSliceCompareNameAscending(
181 const void *item1,
182 const void *item2);
183
184 static int listSliceCompareNameDescending(
185 const void *item1,
186 const void *item2);
187
188 static void sliceadaptorCacheDelete(void **Pvalue);
189
190
191
192
193 /* ========================================================================= */
194 /* ======================= All functions by section ======================== */
195 /* ========================================================================= */
196
197
198
199
200 /* @filesection ensslice ******************************************************
201 **
202 ** @nam1rule ens Function belongs to the Ensembl library
203 **
204 ******************************************************************************/
205
206
207
208
209 /* @datasection [EnsPSlice] Ensembl Slice *************************************
210 **
211 ** @nam2rule Slice Functions for manipulating Ensembl Slice objects
212 **
213 ** @cc Bio::EnsEMBL::Slice
214 ** @cc CVS Revision: 1.301
215 ** @cc CVS Tag: branch-ensembl-68
216 **
217 ** @cc Bio::EnsEMBL::CircularSlice
218 ** @cc CVS Revision: 1.12
219 ** @cc CVS Tag: branch-ensembl-68
220 **
221 ******************************************************************************/
222
223
224
225
226 /* @section constructors ******************************************************
227 **
228 ** All constructors return a new Ensembl Slice by pointer.
229 ** It is the responsibility of the user to first destroy any previous
230 ** Slice. The target pointer does not need to be initialised to
231 ** NULL, but it is good programming practice to do so anyway.
232 **
233 ** @fdata [EnsPSlice]
234 **
235 ** @nam3rule New Constructor
236 ** @nam4rule Cpy Constructor with existing object
237 ** @nam4rule Ini Constructor with initial values
238 ** @nam4rule Ref Constructor by incrementing the reference counter
239 ** @nam4rule Seq Constructor with a sequence
240 **
241 ** @argrule Cpy slice [const EnsPSlice] Ensembl Slice
242 ** @argrule Ini sla [EnsPSliceadaptor] Ensembl Slice Adaptor
243 ** @argrule Ini sr [EnsPSeqregion] Ensembl Sequence Region
244 ** @argrule Ini start [ajint] Ensembl Sequence Region start
245 ** @argrule Ini end [ajint] Ensembl Sequence Region end
246 ** @argrule Ini strand [ajint] Ensembl Sequence Region strand
247 ** @argrule Ref slice [EnsPSlice] Ensembl Slice
248 ** @argrule Seq sla [EnsPSliceadaptor] Ensembl Slice Adaptor
249 ** @argrule Seq sr [EnsPSeqregion] Ensembl Sequence Region
250 ** @argrule Seq start [ajint] Ensembl Sequence Region start
251 ** @argrule Seq end [ajint] Ensembl Sequence Region end
252 ** @argrule Seq strand [ajint] Ensembl Sequence Region strand
253 ** @argrule Seq sequence [AjPStr] Sequence (optional)
254 **
255 ** @valrule * [EnsPSlice] Ensembl Slice or NULL
256 **
257 ** @fcategory new
258 ******************************************************************************/
259
260
261
262
263 /* @func ensSliceNewCpy *******************************************************
264 **
265 ** Object-based constructor function, which returns an independent object.
266 **
267 ** @param [r] slice [const EnsPSlice] Ensembl Slice
268 **
269 ** @return [EnsPSlice] Ensembl Slice or NULL
270 **
271 ** @release 6.4.0
272 ** @@
273 ******************************************************************************/
274
ensSliceNewCpy(const EnsPSlice slice)275 EnsPSlice ensSliceNewCpy(const EnsPSlice slice)
276 {
277 EnsPSlice pthis = NULL;
278
279 if (!slice)
280 return NULL;
281
282 AJNEW0(pthis);
283
284 pthis->Adaptor = slice->Adaptor;
285 pthis->Seqregion = ensSeqregionNewRef(slice->Seqregion);
286
287 if (slice->Sequence)
288 pthis->Sequence = ajStrNewRef(slice->Sequence);
289
290 pthis->Topology = slice->Topology;
291 pthis->Type = slice->Type;
292 pthis->Start = slice->Start;
293 pthis->End = slice->End;
294 pthis->Strand = slice->Strand;
295 pthis->Use = 1U;
296
297 return pthis;
298 }
299
300
301
302
303 /* @func ensSliceNewIni *******************************************************
304 **
305 ** Constructor for an Ensembl Slice with initial values.
306 **
307 ** @cc Bio::EnsEMBL::Slice::new
308 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
309 ** @param [u] sr [EnsPSeqregion] Ensembl Sequence Region
310 ** @param [r] start [ajint] Ensembl Sequence Region start
311 ** @param [r] end [ajint] Ensembl Sequence Region end
312 ** @param [r] strand [ajint] Ensembl Sequence Region strand
313 **
314 ** @return [EnsPSlice] Ensembl Slice or NULL
315 **
316 ** @release 6.4.0
317 ** @@
318 ******************************************************************************/
319
ensSliceNewIni(EnsPSliceadaptor sla,EnsPSeqregion sr,ajint start,ajint end,ajint strand)320 EnsPSlice ensSliceNewIni(EnsPSliceadaptor sla,
321 EnsPSeqregion sr,
322 ajint start,
323 ajint end,
324 ajint strand)
325 {
326 EnsPSlice slice = NULL;
327
328 if (!sr)
329 {
330 ajDebug("ensSliceNewIni requires an Ensembl Sequence Region.\n");
331
332 return NULL;
333 }
334
335 if (ensCoordsystemGetToplevel(ensSeqregionGetCoordsystem(sr)))
336 {
337 ajDebug("ensSliceNewIni cannot create a Slice on a Sequence Region "
338 "with a top-level Coordinate System.\n");
339
340 return NULL;
341 }
342
343 if ((strand != 1) && (strand != -1))
344 {
345 ajDebug("ensSliceNewIni requires a strand of either 1 or -1 "
346 "not %d.\n", strand);
347
348 return NULL;
349 }
350
351 AJNEW0(slice);
352
353 slice->Adaptor = sla;
354 slice->Seqregion = ensSeqregionNewRef(sr);
355 slice->Sequence = NULL;
356 slice->Topology = ensESliceTopologyNULL;
357 slice->Type = ensSliceTypeFromSeqregion(sr);
358 slice->Start = start;
359 slice->End = end;
360 slice->Strand = strand;
361 slice->Use = 1U;
362
363 return slice;
364 }
365
366
367
368
369 /* @func ensSliceNewRef *******************************************************
370 **
371 ** Ensembl Object referencing function, which returns a pointer to the
372 ** Ensembl Object passed in and increases its reference count.
373 **
374 ** @param [u] slice [EnsPSlice] Ensembl Slice
375 **
376 ** @return [EnsPSlice] Ensembl Slice or NULL
377 **
378 ** @release 6.2.0
379 ** @@
380 ******************************************************************************/
381
ensSliceNewRef(EnsPSlice slice)382 EnsPSlice ensSliceNewRef(EnsPSlice slice)
383 {
384 if (!slice)
385 return NULL;
386
387 slice->Use++;
388
389 return slice;
390 }
391
392
393
394
395 /* @func ensSliceNewSeq *******************************************************
396 **
397 ** Construct a new Ensembl Slice with sequence information.
398 **
399 ** @cc Bio::EnsEMBL::Slice::new
400 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
401 ** @param [u] sr [EnsPSeqregion] Ensembl Sequence Region
402 ** @param [r] start [ajint] Ensembl Sequence Region start
403 ** @param [r] end [ajint] Ensembl Sequence Region end
404 ** @param [r] strand [ajint] Ensembl Sequence Region strand
405 ** @param [u] sequence [AjPStr] Sequence (optional)
406 **
407 ** @return [EnsPSlice] Ensembl Slice or NULL
408 **
409 ** @release 6.4.0
410 ** @@
411 ******************************************************************************/
412
ensSliceNewSeq(EnsPSliceadaptor sla,EnsPSeqregion sr,ajint start,ajint end,ajint strand,AjPStr sequence)413 EnsPSlice ensSliceNewSeq(EnsPSliceadaptor sla,
414 EnsPSeqregion sr,
415 ajint start,
416 ajint end,
417 ajint strand,
418 AjPStr sequence)
419 {
420 EnsPSlice slice = NULL;
421
422 if (!sr)
423 {
424 ajDebug("ensSliceNewSeq requires an Ensembl Sequence Region.\n");
425
426 return NULL;
427 }
428
429 if (ensCoordsystemGetToplevel(ensSeqregionGetCoordsystem(sr)))
430 {
431 ajDebug("ensSliceNewSeq cannot create a Slice on a Sequence Region "
432 "with a top-level Coordinate System.\n");
433
434 return NULL;
435 }
436
437 if (!start)
438 start = 1;
439
440 if (!end)
441 end = ensSeqregionGetLength(sr);
442
443 if (start > (end + 1))
444 {
445 ajDebug("ensSliceNewSeq requires that the start coordinate %d is "
446 "less than or equal to the end coordinate %d + 1.\n",
447 start, end);
448
449 return NULL;
450 }
451
452 if (!strand)
453 strand = 1;
454
455 if ((strand != 1) && (strand != -1))
456 {
457 ajDebug("ensSliceNewSeq requires a strand of either 1 or -1 "
458 "not %d.\n", strand);
459
460 return NULL;
461 }
462
463 if (sequence &&
464 (ajStrGetLen(sequence) != (ajuint) ensSeqregionGetLength(sr)))
465 {
466 ajDebug("ensSliceNewSeq requires that the Sequence Region length %d "
467 "matches the length of the Sequence string %u.\n",
468 ensSeqregionGetLength(sr), ajStrGetLen(sequence));
469
470 return NULL;
471 }
472
473 AJNEW0(slice);
474
475 slice->Adaptor = sla;
476
477 slice->Seqregion = ensSeqregionNewRef(sr);
478
479 if (sequence)
480 slice->Sequence = ajStrNewRef(sequence);
481
482 slice->Topology = ensESliceTopologyNULL;
483 slice->Type = ensSliceTypeFromSeqregion(sr);
484 slice->Start = start;
485 slice->End = end;
486 slice->Strand = strand;
487 slice->Use = 1U;
488
489 return slice;
490 }
491
492
493
494
495 /* @section destructors *******************************************************
496 **
497 ** Destruction destroys all internal data structures and frees the memory
498 ** allocated for an Ensembl Slice object.
499 **
500 ** @fdata [EnsPSlice]
501 **
502 ** @nam3rule Del Destroy (free) an Ensembl Slice
503 **
504 ** @argrule * Pslice [EnsPSlice*] Ensembl Slice address
505 **
506 ** @valrule * [void]
507 **
508 ** @fcategory delete
509 ******************************************************************************/
510
511
512
513
514 /* @func ensSliceDel **********************************************************
515 **
516 ** Default destructor for an Ensembl Slice.
517 **
518 ** @param [d] Pslice [EnsPSlice*] Ensembl Slice address
519 **
520 ** @return [void]
521 **
522 ** @release 6.2.0
523 ** @@
524 ******************************************************************************/
525
ensSliceDel(EnsPSlice * Pslice)526 void ensSliceDel(EnsPSlice *Pslice)
527 {
528 EnsPSlice pthis = NULL;
529
530 if (!Pslice)
531 return;
532
533 #if defined(AJ_DEBUG) && AJ_DEBUG >= 1
534 if (ajDebugTest("ensSliceDel"))
535 {
536 ajDebug("ensSliceDel\n"
537 " *Pslice %p\n",
538 *Pslice);
539
540 ensSliceTrace(*Pslice, 1);
541 }
542 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 1 */
543
544 if (!(pthis = *Pslice) || --pthis->Use)
545 {
546 *Pslice = NULL;
547
548 return;
549 }
550
551 ensSeqregionDel(&pthis->Seqregion);
552
553 ajStrDel(&pthis->Sequence);
554
555 ajMemFree((void **) Pslice);
556
557 return;
558 }
559
560
561
562
563 /* @section member retrieval **************************************************
564 **
565 ** Functions for returning members of an Ensembl Slice object.
566 **
567 ** @fdata [EnsPSlice]
568 **
569 ** @nam3rule Get Return Slice attribute(s)
570 ** @nam4rule Adaptor Return the Ensembl Slice Adaptor
571 ** @nam4rule End Return the end
572 ** @nam4rule Seqregion Return the Ensembl Sequence Region
573 ** @nam4rule Sequence Return the sequence
574 ** @nam4rule Start Return the start
575 ** @nam4rule Strand Return the strand
576 **
577 ** @argrule * slice [const EnsPSlice] Slice
578 **
579 ** @valrule Adaptor [EnsPSliceadaptor] Ensembl Slice Adaptor or NULL
580 ** @valrule End [ajint] End or 0
581 ** @valrule Seqregion [EnsPSeqregion] Ensembl Sequence Region or NULL
582 ** @valrule Sequence [const AjPStr] Sequence or NULL
583 ** @valrule Start [ajint] Start or 0
584 ** @valrule Strand [ajint] Strand or 0
585 **
586 ** @fcategory use
587 ******************************************************************************/
588
589
590
591
592 /* @func ensSliceGetAdaptor ***************************************************
593 **
594 ** Get the Ensembl Slice Adaptor member an Ensembl Slice.
595 **
596 ** @cc Bio::EnsEMBL::Slice::adaptor
597 ** @param [r] slice [const EnsPSlice] Ensembl Slice
598 **
599 ** @return [EnsPSliceadaptor] Ensembl Slice Adaptor or NULL
600 **
601 ** @release 6.2.0
602 ** @@
603 ******************************************************************************/
604
ensSliceGetAdaptor(const EnsPSlice slice)605 EnsPSliceadaptor ensSliceGetAdaptor(const EnsPSlice slice)
606 {
607 return (slice) ? slice->Adaptor : NULL;
608 }
609
610
611
612
613 /* @func ensSliceGetEnd *******************************************************
614 **
615 ** Get the end coordinate member of an Ensembl Slice.
616 **
617 ** @cc Bio::EnsEMBL::Slice::end
618 ** @param [r] slice [const EnsPSlice] Ensembl Slice
619 **
620 ** @return [ajint] End coordinate or 0
621 **
622 ** @release 6.2.0
623 ** @@
624 ******************************************************************************/
625
ensSliceGetEnd(const EnsPSlice slice)626 ajint ensSliceGetEnd(const EnsPSlice slice)
627 {
628 return (slice) ? slice->End : 0;
629 }
630
631
632
633
634 /* @func ensSliceGetSeqregion *************************************************
635 **
636 ** Get the Ensembl Sequence Region member of an Ensembl Slice.
637 **
638 ** @param [r] slice [const EnsPSlice] Ensembl Slice
639 **
640 ** @return [EnsPSeqregion] Ensembl Sequence Region or NULL
641 **
642 ** @release 6.2.0
643 ** @@
644 ******************************************************************************/
645
ensSliceGetSeqregion(const EnsPSlice slice)646 EnsPSeqregion ensSliceGetSeqregion(const EnsPSlice slice)
647 {
648 return (slice) ? slice->Seqregion : NULL;
649 }
650
651
652
653
654 /* @func ensSliceGetSequence **************************************************
655 **
656 ** Get the sequence member of an Ensembl Slice.
657 **
658 ** NOTE: This function returns only the sequence member that has been set for
659 ** an Ensembl Slice object that is not based on an Ensembl Sequence Region.
660 ** For Ensembl Slice objects based on Ensembl Sequence Region objects the
661 ** sequence is fetched dynamically via ensSliceFetchSequence functions.
662 **
663 ** @param [r] slice [const EnsPSlice] Ensembl Slice
664 **
665 ** @return [const AjPStr] Sequence or NULL
666 **
667 ** @release 6.2.0
668 ** @@
669 ******************************************************************************/
670
ensSliceGetSequence(const EnsPSlice slice)671 const AjPStr ensSliceGetSequence(const EnsPSlice slice)
672 {
673 return (slice) ? slice->Sequence : NULL;
674 }
675
676
677
678
679 /* @func ensSliceGetStart *****************************************************
680 **
681 ** Get the start coordinate member of an Ensembl Slice.
682 **
683 ** @cc Bio::EnsEMBL::Slice::start
684 ** @param [r] slice [const EnsPSlice] Ensembl Slice
685 **
686 ** @return [ajint] Start coordinate or 0
687 **
688 ** @release 6.2.0
689 ** @@
690 ******************************************************************************/
691
ensSliceGetStart(const EnsPSlice slice)692 ajint ensSliceGetStart(const EnsPSlice slice)
693 {
694 return (slice) ? slice->Start : 0;
695 }
696
697
698
699
700 /* @func ensSliceGetStrand ****************************************************
701 **
702 ** Get the strand member of an Ensembl Slice.
703 **
704 ** @cc Bio::EnsEMBL::Slice::strand
705 ** @param [r] slice [const EnsPSlice] Ensembl Slice
706 **
707 ** @return [ajint] Strand information or 0
708 **
709 ** @release 6.2.0
710 ** @@
711 ******************************************************************************/
712
ensSliceGetStrand(const EnsPSlice slice)713 ajint ensSliceGetStrand(const EnsPSlice slice)
714 {
715 return (slice) ? slice->Strand : 0;
716 }
717
718
719
720
721 /* @section load on demand ****************************************************
722 **
723 ** Functions for returning members of an Ensembl Slice object,
724 ** which may need loading from an Ensembl SQL database on demand.
725 **
726 ** @fdata [EnsPSlice]
727 **
728 ** @nam3rule Load Return Ensembl Slice attribute(s) loaded on demand
729 ** @nam4rule Topology Return the topology
730 **
731 ** @argrule * slice [EnsPSlice] Ensembl Slice
732 **
733 ** @valrule Topology [EnsESliceTopology] Topology or ensESliceTopologyNULL
734 **
735 ** @fcategory use
736 ******************************************************************************/
737
738
739
740
741 /* @func ensSliceLoadTopology *************************************************
742 **
743 ** Load the topology member of an Ensembl Slice.
744 **
745 ** This is not a simple accessor function, since it will attempt fetching
746 ** Ensembl Attribute objects from the Ensembl database associated with the
747 ** Ensembl Sequence Region.
748 **
749 ** @cc Bio::EnsEMBL::Slice::is_circular
750 ** @param [u] slice [EnsPSlice] Ensembl Slice
751 **
752 ** @return [EnsESliceTopology] Topology or ensESliceTopologyNULL
753 **
754 ** @release 6.4.0
755 ** @@
756 ******************************************************************************/
757
ensSliceLoadTopology(EnsPSlice slice)758 EnsESliceTopology ensSliceLoadTopology(EnsPSlice slice)
759 {
760 AjPList attributes = NULL;
761
762 AjPStr code = NULL;
763
764 EnsPAttribute attribute = NULL;
765
766 if (!slice)
767 return ensESliceTopologyNULL;
768
769 if (slice->Topology == ensESliceTopologyNULL)
770 {
771 if (slice->Seqregion == NULL)
772 return ensESliceTopologyNULL;
773
774 code = ajStrNewC("circular_seq");
775
776 attributes = ajListNew();
777
778 ensSliceFetchAllAttributes(slice, code, attributes);
779
780 if (ajListGetLength(attributes) > 0)
781 slice->Topology = ensESliceTopologyCircular;
782 else
783 slice->Topology = ensESliceTopologyLinear;
784
785 while (ajListPop(attributes, (void **) &attribute))
786 ensAttributeDel(&attribute);
787
788 ajListFree(&attributes);
789
790 ajStrDel(&code);
791 }
792
793 return slice->Topology;
794 }
795
796
797
798
799 /* @section member assignment *************************************************
800 **
801 ** Functions for assigning members of an Ensembl Slice object.
802 **
803 ** @fdata [EnsPSlice]
804 **
805 ** @nam3rule Set Set one member of a Slice
806 ** @nam4rule Adaptor Set the Ensembl Slice Adaptor
807 ** @nam4rule Sequence Set the sequence
808 ** @nam4rule Topology Set the topology
809 **
810 ** @argrule * slice [EnsPSlice] Ensembl Slice object
811 ** @argrule Adaptor sla [EnsPSliceadaptor] Ensembl Slice Adaptor
812 ** @argrule Sequence sequence [AjPStr] Sequence
813 ** @argrule Topology sltp [EnsESliceTopology] Ensembl Slice Topology
814 **
815 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
816 **
817 ** @fcategory modify
818 ******************************************************************************/
819
820
821
822
823 /* @func ensSliceSetAdaptor ***************************************************
824 **
825 ** Set the Ensembl Slice Adaptor member of an Ensembl Slice.
826 **
827 ** @cc Bio::EnsEMBL::Slice::adaptor
828 ** @param [u] slice [EnsPSlice] Ensembl Slice
829 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
830 **
831 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
832 **
833 ** @release 6.2.0
834 ** @@
835 ******************************************************************************/
836
ensSliceSetAdaptor(EnsPSlice slice,EnsPSliceadaptor sla)837 AjBool ensSliceSetAdaptor(EnsPSlice slice, EnsPSliceadaptor sla)
838 {
839 if (!slice)
840 return ajFalse;
841
842 slice->Adaptor = sla;
843
844 return ajTrue;
845 }
846
847
848
849
850 /* @func ensSliceSetSequence **************************************************
851 **
852 ** Set the sequence member of an Ensembl Slice.
853 **
854 ** @param [u] slice [EnsPSlice] Ensembl Slice
855 ** @param [u] sequence [AjPStr] Sequence
856 **
857 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
858 **
859 ** @release 6.2.0
860 ** @@
861 ******************************************************************************/
862
ensSliceSetSequence(EnsPSlice slice,AjPStr sequence)863 AjBool ensSliceSetSequence(EnsPSlice slice, AjPStr sequence)
864 {
865 if (!slice)
866 return ajFalse;
867
868 ajStrDel(&slice->Sequence);
869
870 if (sequence)
871 {
872 if (ajStrGetLen(sequence) == ensSliceCalculateLength(slice))
873 slice->Sequence = ajStrNewRef(sequence);
874 else
875 ajFatal("ensSliceSetSequence got sequence of length %u, "
876 "which does not match the length of the Slice %u.\n",
877 ajStrGetLen(sequence),
878 ensSliceCalculateLength(slice));
879 }
880
881 return ajTrue;
882 }
883
884
885
886
887 /* @func ensSliceSetTopology **************************************************
888 **
889 ** Set the Ensembl Slice Topology member of an Ensembl Slice.
890 **
891 ** @cc Bio::EnsEMBL::Slice::is_circular
892 ** @param [u] slice [EnsPSlice] Ensembl Slice
893 ** @param [u] sltp [EnsESliceTopology] Ensembl Slice Topology
894 **
895 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
896 **
897 ** @release 6.4.0
898 ** @@
899 ******************************************************************************/
900
ensSliceSetTopology(EnsPSlice slice,EnsESliceTopology sltp)901 AjBool ensSliceSetTopology(EnsPSlice slice, EnsESliceTopology sltp)
902 {
903 if (!slice)
904 return ajFalse;
905
906 slice->Topology = sltp;
907
908 return ajTrue;
909 }
910
911
912
913
914 /* @section debugging *********************************************************
915 **
916 ** Functions for reporting of an Ensembl Slice object.
917 **
918 ** @fdata [EnsPSlice]
919 **
920 ** @nam3rule Trace Report Ensembl Slice members to debug file
921 **
922 ** @argrule Trace slice [const EnsPSlice] Ensembl Slice
923 ** @argrule Trace level [ajuint] Indentation level
924 **
925 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
926 **
927 ** @fcategory misc
928 ******************************************************************************/
929
930
931
932
933 /* @func ensSliceTrace ********************************************************
934 **
935 ** Trace an Ensembl Slice.
936 **
937 ** @param [r] slice [const EnsPSlice] Ensembl Slice
938 ** @param [r] level [ajuint] Indentation level
939 **
940 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
941 **
942 ** @release 6.2.0
943 ** @@
944 ******************************************************************************/
945
ensSliceTrace(const EnsPSlice slice,ajuint level)946 AjBool ensSliceTrace(const EnsPSlice slice, ajuint level)
947 {
948 AjPStr indent = NULL;
949
950 if (!slice)
951 return ajFalse;
952
953 indent = ajStrNew();
954
955 ajStrAppendCountK(&indent, ' ', level * 2);
956
957 ajDebug("%SensSliceTrace %p\n"
958 "%S Adaptor %p\n"
959 "%S Seqregion %p\n"
960 "%S Sequence %p\n"
961 "%S Topology %d\n"
962 "%S Start %d\n"
963 "%S End %d\n"
964 "%S Strand %d\n"
965 "%S Use %u\n",
966 indent, slice,
967 indent, slice->Adaptor,
968 indent, slice->Seqregion,
969 indent, slice->Sequence,
970 indent, slice->Topology,
971 indent, slice->Start,
972 indent, slice->End,
973 indent, slice->Strand,
974 indent, slice->Use);
975
976 ensSeqregionTrace(slice->Seqregion, level + 1);
977
978 ajStrDel(&indent);
979
980 return ajTrue;
981 }
982
983
984
985
986 /* @section convenience functions *********************************************
987 **
988 ** Ensembl Slice convenience functions
989 **
990 ** @fdata [EnsPSlice]
991 **
992 ** @nam3rule Get Get member(s) of associated objects
993 ** @nam4rule Coordsystem Get Ensembl Coordinate System member(s)
994 ** @nam5rule Name Get the name
995 ** @nam5rule Object Get the Ensembl Coordinate System object
996 ** @nam5rule Version Get the version
997 ** @nam4rule Seqregion Get Ensembl Sequence Region member(s)
998 ** @nam5rule Identifier Get the SQL database-internal identifier
999 ** @nam5rule Length Get the length
1000 ** @nam5rule Name Get the name
1001 ** @nam4rule Translation Get an AJAX Translation
1002 **
1003 ** @argrule Coordsystem slice [const EnsPSlice] Ensembl Slice
1004 ** @argrule Seqregion slice [const EnsPSlice] Ensembl Slice
1005 ** @argrule Translation slice [EnsPSlice] Ensembl Slice
1006 **
1007 ** @valrule CoordsystemObject [EnsPCoordsystem] Ensembl Coordinate System or
1008 ** NULL
1009 ** @valrule Identifier [ajuint] SQL database-internal identifier or 0U
1010 ** @valrule Name [const AjPStr] Name or NULL
1011 ** @valrule Length [ajint] Length or 0
1012 ** @valrule Version [const AjPStr] Version or NULL
1013 ** @valrule Translation [const AjPTrn] AJAX Translation or NULL
1014 **
1015 ** @fcategory use
1016 ******************************************************************************/
1017
1018
1019
1020
1021 /* @func ensSliceGetCoordsystemName *******************************************
1022 **
1023 ** Get the name member of the Ensembl Coordinate System member of the
1024 ** Ensembl Sequence Region member of an Ensembl Slice.
1025 **
1026 ** @cc Bio::EnsEMBL::Slice::coord_system_name
1027 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1028 **
1029 ** @return [const AjPStr] Ensembl Coordinate System name
1030 **
1031 ** @release 6.2.0
1032 ** @@
1033 ******************************************************************************/
1034
ensSliceGetCoordsystemName(const EnsPSlice slice)1035 const AjPStr ensSliceGetCoordsystemName(const EnsPSlice slice)
1036 {
1037 return (slice) ?
1038 ensCoordsystemGetName(
1039 ensSeqregionGetCoordsystem(slice->Seqregion)) : NULL;
1040 }
1041
1042
1043
1044
1045 /* @func ensSliceGetCoordsystemObject *****************************************
1046 **
1047 ** Get the Ensembl Coordinate System member of the
1048 ** Ensembl Sequence Region member of an Ensembl Slice.
1049 **
1050 ** @cc Bio::EnsEMBL::Slice::coord_system
1051 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1052 **
1053 ** @return [EnsPCoordsystem] Ensembl Coordinate System
1054 **
1055 ** @release 6.4.0
1056 ** @@
1057 ******************************************************************************/
1058
ensSliceGetCoordsystemObject(const EnsPSlice slice)1059 EnsPCoordsystem ensSliceGetCoordsystemObject(const EnsPSlice slice)
1060 {
1061 return (slice) ? ensSeqregionGetCoordsystem(slice->Seqregion) : NULL;
1062 }
1063
1064
1065
1066
1067 /* @func ensSliceGetCoordsystemVersion ****************************************
1068 **
1069 ** Get the version member of the Ensembl Coordinate System member of the
1070 ** Ensembl Sequence Region member of an Ensembl Slice.
1071 **
1072 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1073 **
1074 ** @return [const AjPStr] Ensembl Coordinate System version or NULL
1075 **
1076 ** @release 6.2.0
1077 ** @@
1078 ******************************************************************************/
1079
ensSliceGetCoordsystemVersion(const EnsPSlice slice)1080 const AjPStr ensSliceGetCoordsystemVersion(const EnsPSlice slice)
1081 {
1082 return (slice) ?
1083 ensCoordsystemGetVersion(
1084 ensSeqregionGetCoordsystem(slice->Seqregion)) : NULL;
1085 }
1086
1087
1088
1089
1090 /* @func ensSliceGetSeqregionIdentifier ***************************************
1091 **
1092 ** Get the identifier member of the Ensembl Sequence Region member of an
1093 ** Ensembl Slice.
1094 **
1095 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1096 **
1097 ** @return [ajuint] Ensembl Sequence Region identifier or 0U
1098 **
1099 ** @release 6.2.0
1100 ** @@
1101 ******************************************************************************/
1102
ensSliceGetSeqregionIdentifier(const EnsPSlice slice)1103 ajuint ensSliceGetSeqregionIdentifier(const EnsPSlice slice)
1104 {
1105 return (slice) ? ensSeqregionGetIdentifier(slice->Seqregion) : 0U;
1106 }
1107
1108
1109
1110
1111 /* @func ensSliceGetSeqregionLength *******************************************
1112 **
1113 ** Calculate the length of the Ensembl Sequence Region member of an
1114 ** Ensembl Slice.
1115 **
1116 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1117 **
1118 ** @return [ajint] Ensembl Sequence Region length or 0
1119 **
1120 ** @release 6.2.0
1121 ** @@
1122 ******************************************************************************/
1123
ensSliceGetSeqregionLength(const EnsPSlice slice)1124 ajint ensSliceGetSeqregionLength(const EnsPSlice slice)
1125 {
1126 return (slice) ? ensSeqregionGetLength(slice->Seqregion) : 0;
1127 }
1128
1129
1130
1131
1132 /* @func ensSliceGetSeqregionName *********************************************
1133 **
1134 ** Get the name member of the Ensembl Sequence Region member of an
1135 ** Ensembl Slice.
1136 **
1137 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1138 **
1139 ** @return [const AjPStr] Ensembl Sequence Region name or NULL
1140 **
1141 ** @release 6.2.0
1142 ** @@
1143 ******************************************************************************/
1144
ensSliceGetSeqregionName(const EnsPSlice slice)1145 const AjPStr ensSliceGetSeqregionName(const EnsPSlice slice)
1146 {
1147 return (slice) ? ensSeqregionGetName(slice->Seqregion) : NULL;
1148 }
1149
1150
1151
1152
1153 /* @func ensSliceGetTranslation ***********************************************
1154 **
1155 ** Get an AJAX Translation for an Ensembl Slice.
1156 **
1157 ** The AJAX Translation will match the codon table defined as an
1158 ** Ensembl Attribute of code "codon_table" associated with an Ensembl Slice.
1159 ** If no Attribute is associated with this Slice, an AJAX Translation based on
1160 ** codon table 0, the standard code with translation start at AUG only,
1161 ** will be returned.
1162 **
1163 ** @param [u] slice [EnsPSlice] Ensembl Slice
1164 **
1165 ** @return [const AjPTrn] AJAX Translation or NULL
1166 **
1167 ** @release 6.3.0
1168 ** @@
1169 ******************************************************************************/
1170
ensSliceGetTranslation(EnsPSlice slice)1171 const AjPTrn ensSliceGetTranslation(EnsPSlice slice)
1172 {
1173 ajuint codontable = 0U;
1174
1175 AjPList attributes = NULL;
1176
1177 AjPStr code = NULL;
1178 AjPStr value = NULL;
1179
1180 EnsPAttribute attribute = NULL;
1181
1182 if (!slice)
1183 return NULL;
1184
1185 code = ajStrNewC("codon_table");
1186
1187 attributes = ajListNew();
1188
1189 ensSliceFetchAllAttributes(slice, code, attributes);
1190
1191 ajStrDel(&code);
1192
1193 while (ajListPop(attributes, (void **) &attribute))
1194 {
1195 value = ensAttributeGetValue(attribute);
1196
1197 if (value && ajStrGetLen(value))
1198 {
1199 if (!ajStrToUint(value, &codontable))
1200 ajWarn("ensSliceGetTranslation Could not parse "
1201 "Ensembl Attribute value '%S' into an "
1202 "unsigned integer value.",
1203 value);
1204 }
1205 else
1206 {
1207 ajDebug("ensSliceGetTranslation got Ensembl Attribute %p with an "
1208 "empty value.",
1209 attribute);
1210
1211 ensAttributeTrace(attribute, 1);
1212 }
1213
1214 ensAttributeDel(&attribute);
1215 }
1216
1217 ajListFree(&attributes);
1218
1219 return ensTranslationCacheGet(codontable);
1220 }
1221
1222
1223
1224
1225 /* @section calculate *********************************************************
1226 **
1227 ** Functions for calculating information from an Ensembl Slice object.
1228 **
1229 ** @fdata [EnsPSlice]
1230 **
1231 ** @nam3rule Calculate Calculate Ensembl Slice information
1232 ** @nam4rule Centrepoint Calculate the centre point
1233 ** @nam4rule Length Calculate the length
1234 ** @nam4rule Memsize Calculate the memory size in bytes
1235 ** @nam4rule Region Calculate the length of a region
1236 **
1237 ** @argrule Centrepoint slice [EnsPSlice] Ensembl Slice
1238 ** @argrule Length slice [EnsPSlice] Ensembl Slice
1239 ** @argrule Memsize slice [const EnsPSlice] Ensembl Slice
1240 ** @argrule Region slice [EnsPSlice] Ensembl Slice
1241 ** @argrule Region start [ajint] Start
1242 ** @argrule Region end [ajint] End
1243 **
1244 ** @valrule Centrepoint [ajint] Centre point or 0
1245 ** @valrule Length [ajuint] Length or 0U
1246 ** @valrule Memsize [size_t] Memory size in bytes or 0
1247 ** @valrule Region [ajuint] Length or 0U
1248 **
1249 ** @fcategory misc
1250 ******************************************************************************/
1251
1252
1253
1254
1255 /* @func ensSliceCalculateCentrepoint *****************************************
1256 **
1257 ** Calculate the centre point coordinate of an Ensembl Slice.
1258 **
1259 ** @cc Bio::EnsEMBL::Slice::centrepoint
1260 ** @cc Bio::EnsEMBL::CircularSlice::centrepoint
1261 ** @param [u] slice [EnsPSlice] Ensembl Slice
1262 **
1263 ** @return [ajint] Centre point coordinate or 0
1264 **
1265 ** @release 6.4.0
1266 ** @@
1267 ******************************************************************************/
1268
ensSliceCalculateCentrepoint(EnsPSlice slice)1269 ajint ensSliceCalculateCentrepoint(EnsPSlice slice)
1270 {
1271 ajint mpoint = 0;
1272
1273 if (!slice)
1274 return 0;
1275
1276 mpoint = slice->Start + ensSliceCalculateLength(slice) / 2;
1277
1278 if (mpoint > ensSeqregionGetLength(slice->Seqregion))
1279 mpoint -= ensSeqregionGetLength(slice->Seqregion);
1280
1281 return mpoint;
1282 }
1283
1284
1285
1286
1287 /* @func ensSliceCalculateLength **********************************************
1288 **
1289 ** Calculate the length of an Ensembl Slice.
1290 **
1291 ** @cc Bio::EnsEMBL::Slice::length
1292 ** @cc Bio::EnsEMBL::CircularSlice::length
1293 ** @param [u] slice [EnsPSlice] Ensembl Slice
1294 **
1295 ** @return [ajuint] Ensembl Slice length or 0U
1296 **
1297 ** @release 6.4.0
1298 ** @@
1299 ******************************************************************************/
1300
ensSliceCalculateLength(EnsPSlice slice)1301 ajuint ensSliceCalculateLength(EnsPSlice slice)
1302 {
1303 return (slice) ?
1304 ensSliceCalculateRegion(slice, slice->Start, slice->End) : 0U;
1305 }
1306
1307
1308
1309
1310 /* @func ensSliceCalculateMemsize *********************************************
1311 **
1312 ** Calculate the memory size in bytes of an Ensembl Slice.
1313 **
1314 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1315 **
1316 ** @return [size_t] Memory size in bytes or 0
1317 **
1318 ** @release 6.4.0
1319 ** @@
1320 ******************************************************************************/
1321
ensSliceCalculateMemsize(const EnsPSlice slice)1322 size_t ensSliceCalculateMemsize(const EnsPSlice slice)
1323 {
1324 size_t size = 0;
1325
1326 if (!slice)
1327 return 0;
1328
1329 size += sizeof (EnsOSlice);
1330
1331 size += ensSeqregionCalculateMemsize(slice->Seqregion);
1332
1333 if (slice->Sequence)
1334 {
1335 size += sizeof (AjOStr);
1336
1337 size += ajStrGetRes(slice->Sequence);
1338 }
1339
1340 return size;
1341 }
1342
1343
1344
1345
1346 /* @func ensSliceCalculateRegion **********************************************
1347 **
1348 ** Calculate the length of a region spanning a (circular) Ensembl Slice.
1349 **
1350 ** @param [u] slice [EnsPSlice] Ensembl Slice
1351 ** @param [r] start [ajint] Start
1352 ** @param [r] end [ajint] End
1353 **
1354 ** @return [ajuint] Length or 0U
1355 **
1356 ** @release 6.4.0
1357 ** @@
1358 ** Linear Slice:
1359 ** length = start - end + 1
1360 ** Circular Slice (start > end): in two sections
1361 ** length = (Sequence Region Length - start + 1) + (end - 1 + 1)
1362 ** length = Sequence Region Length - start + 1 + end
1363 ******************************************************************************/
1364
ensSliceCalculateRegion(EnsPSlice slice,ajint start,ajint end)1365 ajuint ensSliceCalculateRegion(EnsPSlice slice, ajint start, ajint end)
1366 {
1367 ajuint length = 0U;
1368
1369 AjBool circular = AJFALSE;
1370
1371 if (!slice)
1372 return 0U;
1373
1374 if (start > end)
1375 {
1376 if (ensSliceIsCircular(slice, &circular))
1377 {
1378 if (circular == ajTrue)
1379 length = ensSliceGetSeqregionLength(slice) - start + 1U + end;
1380 else
1381 ajFatal("ensSliceCalculateRegion cannot calculate the length "
1382 "of a region on a linear Ensembl Slice, where the "
1383 "start coordinate (%d) is greater than the "
1384 "end coordinate (%d).", start, end);
1385 }
1386 else
1387 ajFatal("ensSliceCalculateSpan cannot determine the "
1388 "Ensembl Slice topology.");
1389 }
1390 else
1391 length = end - start + 1U;
1392
1393 /*
1394 ** NOTE: This could be rewritten to:
1395 **
1396 ** length = end - start + 1U;
1397 **
1398 ** if ((start > end) && (circular == ajTrue))
1399 ** length += ensSliceGetSeqregionLength(slice);
1400 */
1401
1402 return length;
1403 }
1404
1405
1406
1407
1408 /* @section fetch *************************************************************
1409 **
1410 ** Functions for fetching objects of an Ensembl Slice object.
1411 **
1412 ** @fdata [EnsPSlice]
1413 **
1414 ** @nam3rule Fetch Fetch Ensembl Slice objects
1415 ** @nam4rule All Fetch all objects
1416 ** @nam5rule Attributes Fetch all Ensembl Attribute objects
1417 ** @nam5rule Repeatfeatures Fetch all Repeat Feature objects
1418 ** @nam5rule Sequenceedits Fetch all Sequence Edit objects
1419 ** @nam4rule Name Fetch the name
1420 ** @nam4rule Sequence Fetch the sequence
1421 ** @nam5rule All Fetch the complete sequence
1422 ** @nam5rule Sub Fetch a sub-sequence
1423 ** @nam6rule Seq Fetch as AJAX Sequence object
1424 ** @nam6rule Str Fetch as AJAX String object
1425 ** @nam4rule Sliceexpanded Fetch an expanded Ensembl Slice
1426 ** @nam4rule Sliceinverted Fetch an inverted Ensembl Slice
1427 ** @nam4rule Slicesub Fetch a sub-Slice
1428 **
1429 ** @argrule AllAttributes slice [EnsPSlice] Ensembl Slice
1430 ** @argrule AllAttributes code [const AjPStr] Ensembl Attribute code
1431 ** @argrule AllAttributes attributes [AjPList]
1432 ** AJAX List of Ensembl Attribute objects
1433 ** @argrule AllRepeatfeatures slice [EnsPSlice] Ensembl Slice
1434 ** @argrule AllRepeatfeatures anname [const AjPStr] Ensembl Analysis name
1435 ** @argrule AllRepeatfeatures rctype [const AjPStr]
1436 ** Ensembl Repeat Consensus type
1437 ** @argrule AllRepeatfeatures rcclass [const AjPStr]
1438 ** Ensembl Repeat Consensus class
1439 ** @argrule AllRepeatfeatures rcname [const AjPStr]
1440 ** Ensembl Repeat Consensus name
1441 ** @argrule AllRepeatfeatures rfs [AjPList]
1442 ** AJAX List of Ensembl Repeat Feature objects
1443 ** @argrule AllSequenceedits slice [EnsPSlice] Ensembl Slice
1444 ** @argrule AllSequenceedits ses [AjPList]
1445 ** AJAX List of Ensembl Sequence Edit objects
1446 ** @argrule Name slice [const EnsPSlice] Ensembl Slice
1447 ** @argrule Name Pname [AjPStr*] Name
1448 ** @argrule Sequence slice [EnsPSlice] Ensembl Slice
1449 ** @argrule Sub start [ajint] Start coordinate
1450 ** @argrule Sub end [ajint] End coordinate
1451 ** @argrule Sub strand [ajint] Strand information
1452 ** @argrule Seq Psequence [AjPSeq*] AJAX Sequence address
1453 ** @argrule Str Psequence [AjPStr*] AJAX String address
1454 ** @argrule Sliceinverted slice [EnsPSlice] Ensembl Slice
1455 ** @argrule Sliceinverted Pslice [EnsPSlice*] Sliceinverted Ensembl Slice
1456 ** @argrule Sliceexpanded slice [EnsPSlice] Ensembl Slice
1457 ** @argrule Sliceexpanded five [ajint]
1458 ** Number of bases to expand the 5' region
1459 ** @argrule Sliceexpanded three [ajint]
1460 ** Number of bases to expand the 3' region
1461 ** @argrule Sliceexpanded force [AjBool] Force Slice contraction
1462 ** @argrule Sliceexpanded Pfshift [ajint*]
1463 ** Maximum possible 5' shift when "force" is set
1464 ** @argrule Sliceexpanded Ptshift [ajint*]
1465 ** Maximum possible 3' shift when "force" is set
1466 ** @argrule Sliceexpanded Pslice [EnsPSlice*] Ensembl Slice address
1467 ** @argrule Slicesub slice [EnsPSlice] Ensembl Slice
1468 ** @argrule Slicesub start [ajint] Start coordinate
1469 ** @argrule Slicesub end [ajint] End coordinate
1470 ** @argrule Slicesub strand [ajint] Strand information
1471 ** @argrule Slicesub Pslice [EnsPSlice*] Ensembl Slice address
1472 **
1473 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
1474 **
1475 ** @fcategory misc
1476 ******************************************************************************/
1477
1478
1479
1480
1481 /* @func ensSliceFetchAllAttributes *******************************************
1482 **
1483 ** Fetch all Ensembl Attribute objects for an Ensembl Slice.
1484 ** The caller is responsible for deleting the Ensembl Attribute objects before
1485 ** deleting the AJAX List.
1486 **
1487 ** @cc Bio::EnsEMBL::Slice::get_all_Attributes
1488 ** @cc Bio::EnsEMBL::CircularSlice::get_all_Attributes
1489 ** @param [u] slice [EnsPSlice] Ensembl Slice
1490 ** @param [rN] code [const AjPStr] Ensembl Attribute code
1491 ** @param [u] attributes [AjPList] AJAX List of Ensembl Attribute objects
1492 **
1493 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1494 **
1495 ** @release 6.2.0
1496 ** @@
1497 ******************************************************************************/
1498
ensSliceFetchAllAttributes(EnsPSlice slice,const AjPStr code,AjPList attributes)1499 AjBool ensSliceFetchAllAttributes(EnsPSlice slice,
1500 const AjPStr code,
1501 AjPList attributes)
1502 {
1503 if (!slice)
1504 return ajFalse;
1505
1506 if (!attributes)
1507 return ajFalse;
1508
1509 if (!slice->Seqregion)
1510 {
1511 ajDebug("ensSliceFetchAllAttributes cannot fetch "
1512 "Ensembl Attribute objects for an "
1513 "Ensembl Slice without an "
1514 "Ensembl Sequence Region.\n");
1515
1516 return ajFalse;
1517 }
1518
1519 ensSeqregionFetchAllAttributes(slice->Seqregion, code, attributes);
1520
1521 return ajTrue;
1522 }
1523
1524
1525
1526
1527 /* @func ensSliceFetchAllRepeatfeatures ***************************************
1528 **
1529 ** Fetch all Ensembl Repeat Feature objects on an Ensembl Slice.
1530 **
1531 ** The caller is responsible for deleting the Ensembl Repeat Feture objects
1532 ** before deleting the AJAX List.
1533 **
1534 ** @param [u] slice [EnsPSlice] Ensembl Slice
1535 ** @param [r] anname [const AjPStr] Ensembl Analysis name
1536 ** @param [r] rctype [const AjPStr] Ensembl Repeat Consensus type
1537 ** @param [r] rcclass [const AjPStr] Ensembl Repeat Consensus class
1538 ** @param [r] rcname [const AjPStr] Ensembl Repeat Consensus name
1539 ** @param [u] rfs [AjPList] AJAX List of Ensembl Repeat Feature objects
1540 **
1541 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1542 **
1543 ** @release 6.2.0
1544 ** @@
1545 ******************************************************************************/
1546
ensSliceFetchAllRepeatfeatures(EnsPSlice slice,const AjPStr anname,const AjPStr rctype,const AjPStr rcclass,const AjPStr rcname,AjPList rfs)1547 AjBool ensSliceFetchAllRepeatfeatures(EnsPSlice slice,
1548 const AjPStr anname,
1549 const AjPStr rctype,
1550 const AjPStr rcclass,
1551 const AjPStr rcname,
1552 AjPList rfs)
1553 {
1554 if (ajDebugTest("ensSliceFetchAllRepeatfeatures"))
1555 ajDebug("ensSliceFetchAllRepeatfeatures\n"
1556 " slice %p\n"
1557 " anname '%S'\n"
1558 " rctype '%S'\n"
1559 " rcclass '%S'\n"
1560 " rcname '%S'\n"
1561 " rfs %p\n",
1562 slice,
1563 anname,
1564 rctype,
1565 rcclass,
1566 rcname,
1567 rfs);
1568
1569 if (!slice)
1570 return ajFalse;
1571
1572 if (!slice->Adaptor)
1573 {
1574 ajDebug("ensSliceFetchAllRepeatfeatures cannot get Repeat Feature "
1575 "objects without a Slice Adaptor attached to the Slice.\n");
1576
1577 return ajFalse;
1578 }
1579
1580 return ensRepeatfeatureadaptorFetchAllbySlice(
1581 ensRegistryGetRepeatfeatureadaptor(
1582 ensSliceadaptorGetDatabaseadaptor(slice->Adaptor)),
1583 slice,
1584 anname,
1585 rctype,
1586 rcclass,
1587 rcname,
1588 rfs);
1589 }
1590
1591
1592
1593
1594 /* @func ensSliceFetchAllSequenceedits ****************************************
1595 **
1596 ** Fetch all Ensembl Sequence Edit objects of an Ensembl Slice.
1597 **
1598 ** The caller is responsible for deleting the Ensembl Sequence Edit objects
1599 ** before deleting the AJAX List.
1600 **
1601 ** @cc Bio::EnsEMBL::DBSQL::SequenceAdaptor::_rna_edit
1602 ** @param [u] slice [EnsPSlice] Ensembl Slice
1603 ** @param [u] ses [AjPList] AJAX List of Ensembl Sequence Edit objects
1604 **
1605 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1606 **
1607 ** @release 6.4.0
1608 ** @@
1609 ** Ensembl Sequence Edit objects are Ensembl Attribute objects, which codes
1610 ** have to be defined in the static const char *sliceKSequenceeditCode array.
1611 ******************************************************************************/
1612
ensSliceFetchAllSequenceedits(EnsPSlice slice,AjPList ses)1613 AjBool ensSliceFetchAllSequenceedits(EnsPSlice slice,
1614 AjPList ses)
1615 {
1616 register ajuint i = 0U;
1617
1618 AjPList attributes = NULL;
1619
1620 AjPStr code = NULL;
1621
1622 EnsPAttribute at = NULL;
1623
1624 EnsPSequenceedit se = NULL;
1625
1626 if (!slice)
1627 return ajFalse;
1628
1629 if (!ses)
1630 return ajFalse;
1631
1632 code = ajStrNew();
1633
1634 attributes = ajListNew();
1635
1636 for (i = 0U; sliceKSequenceeditCode[i]; i++)
1637 {
1638 ajStrAssignC(&code, sliceKSequenceeditCode[i]);
1639
1640 ensSliceFetchAllAttributes(slice, code, attributes);
1641 }
1642
1643 while (ajListPop(attributes, (void **) &at))
1644 {
1645 se = ensSequenceeditNewAttribute(at);
1646
1647 ajListPushAppend(ses, (void *) se);
1648
1649 ensAttributeDel(&at);
1650 }
1651
1652 ajListFree(&attributes);
1653
1654 ajStrDel(&code);
1655
1656 return ajTrue;
1657 }
1658
1659
1660
1661
1662 /* @func ensSliceFetchName ****************************************************
1663 **
1664 ** Fetch the name of an Ensembl Slice.
1665 **
1666 ** @cc Bio::EnsEMBL::Slice::name
1667 ** @param [r] slice [const EnsPSlice] Ensembl Slice
1668 ** @param [wP] Pname [AjPStr*] Name String address
1669 **
1670 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1671 **
1672 ** @release 6.2.0
1673 ** @@
1674 ******************************************************************************/
1675
ensSliceFetchName(const EnsPSlice slice,AjPStr * Pname)1676 AjBool ensSliceFetchName(const EnsPSlice slice, AjPStr *Pname)
1677 {
1678 EnsPCoordsystem cs = NULL;
1679
1680 if (!slice)
1681 return ajFalse;
1682
1683 if (!Pname)
1684 return ajFalse;
1685
1686 cs = ensSeqregionGetCoordsystem(slice->Seqregion);
1687
1688 if (*Pname)
1689 *Pname = ajFmtPrintS(
1690 Pname,
1691 "%S:%S:%S:%d:%d:%d",
1692 ensCoordsystemGetName(cs),
1693 ensCoordsystemGetVersion(cs),
1694 ensSeqregionGetName(slice->Seqregion),
1695 slice->Start,
1696 slice->End,
1697 slice->Strand);
1698 else
1699 *Pname = ajFmtStr(
1700 "%S:%S:%S:%d:%d:%d",
1701 ensCoordsystemGetName(cs),
1702 ensCoordsystemGetVersion(cs),
1703 ensSeqregionGetName(slice->Seqregion),
1704 slice->Start,
1705 slice->End,
1706 slice->Strand);
1707
1708 return ajTrue;
1709 }
1710
1711
1712
1713
1714 /* @func ensSliceFetchSequenceAllSeq ******************************************
1715 **
1716 ** Fetch the sequence of an Ensembl Slice as AJAX Sequence.
1717 **
1718 ** @param [u] slice [EnsPSlice] Ensembl Slice
1719 ** @param [wP] Psequence [AjPSeq*] AJAX Sequence address
1720 **
1721 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1722 **
1723 ** @release 6.4.0
1724 ** @@
1725 ******************************************************************************/
1726
ensSliceFetchSequenceAllSeq(EnsPSlice slice,AjPSeq * Psequence)1727 AjBool ensSliceFetchSequenceAllSeq(EnsPSlice slice, AjPSeq *Psequence)
1728 {
1729 AjPStr name = NULL;
1730 AjPStr sequence = NULL;
1731
1732 if (!slice)
1733 return ajFalse;
1734
1735 if (!Psequence)
1736 return ajFalse;
1737
1738 /*
1739 ** It is sligtly more efficient, if undefined AJAX String objects are
1740 ** directly allocated by the following functions to their final size.
1741 */
1742
1743 ensSliceFetchName(slice, &name);
1744 ensSliceFetchSequenceAllStr(slice, &sequence);
1745
1746 if (*Psequence)
1747 {
1748 ajSeqClear(*Psequence);
1749
1750 ajSeqAssignNameS(*Psequence, name);
1751 ajSeqAssignSeqS(*Psequence, sequence);
1752 }
1753 else
1754 *Psequence = ajSeqNewNameS(sequence, name);
1755
1756 ajSeqSetNuc(*Psequence);
1757
1758 ajStrDel(&sequence);
1759 ajStrDel(&name);
1760
1761 return ajTrue;
1762 }
1763
1764
1765
1766
1767 /* @func ensSliceFetchSequenceAllStr ******************************************
1768 **
1769 ** Fetch the sequence of an Ensembl Slice as AJAX String.
1770 **
1771 ** @cc Bio::EnsEMBL::Slice::seq
1772 ** @cc Bio::EnsEMBL::CircularSlice::seq
1773 ** @param [u] slice [EnsPSlice] Ensembl Slice
1774 ** @param [wP] Psequence [AjPStr*] Sequence String address
1775 **
1776 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1777 **
1778 ** @release 6.4.0
1779 ** @@
1780 ******************************************************************************/
1781
ensSliceFetchSequenceAllStr(EnsPSlice slice,AjPStr * Psequence)1782 AjBool ensSliceFetchSequenceAllStr(EnsPSlice slice, AjPStr *Psequence)
1783 {
1784 AjBool circular = AJFALSE;
1785
1786 AjPStr sequence1 = NULL;
1787 AjPStr sequence2 = NULL;
1788
1789 EnsPDatabaseadaptor dba = NULL;
1790
1791 EnsPSequenceadaptor sqa = NULL;
1792
1793 EnsPSlice slice1 = NULL;
1794 EnsPSlice slice2 = NULL;
1795
1796 if (!slice)
1797 return ajFalse;
1798
1799 if (!Psequence)
1800 return ajFalse;
1801
1802 if (*Psequence)
1803 ajStrAssignClear(Psequence);
1804 else
1805 *Psequence = ajStrNewRes(ensSliceCalculateLength(slice) + 1);
1806
1807 /* Special case for "in-between" (insertion) coordinates. */
1808
1809 if (slice->Start == (slice->End + 1))
1810 return ajTrue;
1811
1812 if (slice->Sequence)
1813 {
1814 /*
1815 ** Since the Slice has sequence attached, check whether its
1816 ** Slice length matches its sequence length.
1817 */
1818
1819 if (ajStrGetLen(slice->Sequence) != ensSliceCalculateLength(slice))
1820 ajFatal("ensSliceFetchSequenceAllStr got a Slice, "
1821 "which sequence length %u does not match its length %u.\n",
1822 ajStrGetLen(slice->Sequence),
1823 ensSliceCalculateLength(slice));
1824
1825 ajStrAssignS(Psequence, slice->Sequence);
1826 }
1827 else if (slice->Adaptor)
1828 {
1829 /*
1830 ** Since the Slice has a Slice Adaptor attached, it is possible to
1831 ** retrieve the sequence from the database.
1832 */
1833
1834 dba = ensSliceadaptorGetDatabaseadaptor(slice->Adaptor);
1835
1836 sqa = ensRegistryGetSequenceadaptor(dba);
1837
1838 ensSliceIsCircular(slice, &circular);
1839
1840 if ((circular == ajTrue) && (slice->Start > slice->End))
1841 {
1842 slice1 = ensSliceNewCpy(slice);
1843 slice2 = ensSliceNewCpy(slice);
1844
1845 slice1->End = ensSeqregionGetLength(slice1->Seqregion);
1846 slice2->Start = 1;
1847
1848 /*
1849 ** FIXME: The Perl API uses two linear Slice objects here
1850 ** and passes in undef for the end. The above copies may
1851 ** therefore not work.
1852 */
1853
1854 /*
1855 ** It is sligtly more efficient, if undefined AJAX String objects
1856 ** are directly allocated by the following functions to their
1857 ** final size.
1858 */
1859
1860 ensSequenceadaptorFetchSliceAllStr(sqa, slice1, &sequence1);
1861 ensSequenceadaptorFetchSliceAllStr(sqa, slice2, &sequence2);
1862
1863 ajStrAssignS(Psequence, sequence1);
1864 ajStrAppendS(Psequence, sequence2);
1865
1866 ajStrDel(&sequence1);
1867 ajStrDel(&sequence2);
1868
1869 ensSliceDel(&slice1);
1870 ensSliceDel(&slice2);
1871 }
1872 else
1873 ensSequenceadaptorFetchSliceAllStr(sqa, slice, Psequence);
1874 }
1875 else
1876 {
1877 /*
1878 ** The Slice has no sequence and no Slice Adaptor attached,
1879 ** so just return Ns.
1880 */
1881
1882 ajStrAppendCountK(Psequence, 'N', ensSliceCalculateLength(slice));
1883 }
1884
1885 if (ajStrGetLen(*Psequence) != ensSliceCalculateLength(slice))
1886 ajWarn("ensSliceFetchSequenceAllStr got sequence of length %u "
1887 "for Ensembl Slice of length %u.\n",
1888 ajStrGetLen(*Psequence),
1889 ensSliceCalculateLength(slice));
1890
1891 return ajTrue;
1892 }
1893
1894
1895
1896
1897 /* @func ensSliceFetchSequenceSubSeq ******************************************
1898 **
1899 ** Fetch a sub-sequence of an Ensembl Slice as AJAX Sequence.
1900 **
1901 ** @param [u] slice [EnsPSlice] Ensembl Slice
1902 ** @param [r] start [ajint] Start coordinate
1903 ** @param [r] end [ajint] End coordinate
1904 ** @param [r] strand [ajint] Strand information
1905 ** @param [wP] Psequence [AjPSeq*] Sequence address
1906 **
1907 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
1908 **
1909 ** @release 6.4.0
1910 ** @@
1911 ******************************************************************************/
1912
ensSliceFetchSequenceSubSeq(EnsPSlice slice,ajint start,ajint end,ajint strand,AjPSeq * Psequence)1913 AjBool ensSliceFetchSequenceSubSeq(EnsPSlice slice,
1914 ajint start,
1915 ajint end,
1916 ajint strand,
1917 AjPSeq *Psequence)
1918 {
1919 ajint srstart = 0;
1920 ajint srend = 0;
1921 ajint srstrand = 0;
1922
1923 AjPStr name = NULL;
1924 AjPStr sequence = NULL;
1925
1926 if (!slice)
1927 return ajFalse;
1928
1929 if (!strand)
1930 strand = 1;
1931
1932 if (!Psequence)
1933 return ajFalse;
1934
1935 /*
1936 ** Construct the Slice name, but convert relative Slice coordinates into
1937 ** absolute Sequence Region coordinates.
1938 */
1939
1940 if (slice->Strand >= 0)
1941 {
1942 srstart = slice->Start + start - 1;
1943 srend = slice->Start + end - 1;
1944 }
1945 else
1946 {
1947 srstart = slice->End - end + 1;
1948 srend = slice->End - start + 1;
1949 }
1950
1951 srstrand = slice->Strand * strand;
1952
1953 name = ajFmtStr("%S:%S:%S:%d:%d:%d",
1954 ensSliceGetCoordsystemName(slice),
1955 ensSliceGetCoordsystemVersion(slice),
1956 ensSliceGetSeqregionName(slice),
1957 srstart,
1958 srend,
1959 srstrand);
1960
1961 /*
1962 ** Fetch the Slice sequence.
1963 **
1964 ** It is sligtly more efficient, if undefined AJAX String objects are
1965 ** directly allocated by the following functions to their final size.
1966 */
1967
1968 ensSliceFetchSequenceSubStr(slice, start, end, strand, &sequence);
1969
1970 if (*Psequence)
1971 {
1972 ajSeqClear(*Psequence);
1973
1974 ajSeqAssignNameS(*Psequence, name);
1975 ajSeqAssignSeqS(*Psequence, sequence);
1976 }
1977 else
1978 *Psequence = ajSeqNewNameS(sequence, name);
1979
1980 ajSeqSetNuc(*Psequence);
1981
1982 ajStrDel(&sequence);
1983 ajStrDel(&name);
1984
1985 return ajTrue;
1986 }
1987
1988
1989
1990
1991 /* @func ensSliceFetchSequenceSubStr ******************************************
1992 **
1993 ** Fetch a sub-sequence of an Ensembl Slice as AJAX String
1994 ** in releative coordinates.
1995 **
1996 ** @cc Bio::EnsEMBL::Slice::subseq
1997 ** @cc Bio::EnsEMBL::CircularSlice::subseq
1998 ** @param [u] slice [EnsPSlice] Ensembl Slice
1999 ** @param [r] start [ajint] Start coordinate
2000 ** @param [r] end [ajint] End coordinate
2001 ** @param [r] strand [ajint] Strand information
2002 ** @param [wP] Psequence [AjPStr*] Sequence address
2003 **
2004 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
2005 **
2006 ** @release 6.4.0
2007 ** @@
2008 ******************************************************************************/
2009
ensSliceFetchSequenceSubStr(EnsPSlice slice,ajint start,ajint end,ajint strand,AjPStr * Psequence)2010 AjBool ensSliceFetchSequenceSubStr(EnsPSlice slice,
2011 ajint start,
2012 ajint end,
2013 ajint strand,
2014 AjPStr *Psequence)
2015 {
2016 ajint region1 = 0;
2017 ajint region2 = 0;
2018 ajint slength = 0;
2019
2020 ajuint rlength = 0U;
2021
2022 AjBool circular = AJFALSE;
2023
2024 AjPStr sequence1 = NULL;
2025 AjPStr sequence2 = NULL;
2026
2027 EnsPDatabaseadaptor dba = NULL;
2028
2029 EnsPSequenceadaptor sqa = NULL;
2030
2031 if (ajDebugTest("ensSliceFetchSequenceSubStr"))
2032 ajDebug("ensSliceFetchSequenceSubStr\n"
2033 " slice %p\n"
2034 " start %d\n"
2035 " end %d\n"
2036 " strand %d\n"
2037 " Psequence %p\n",
2038 slice,
2039 start,
2040 end,
2041 strand,
2042 Psequence);
2043
2044 if (!slice)
2045 return ajFalse;
2046
2047 if (!Psequence)
2048 return ajFalse;
2049
2050 /* For "in-between" (insertion) coordinates return an empty string. */
2051
2052 if (start == (end + 1))
2053 return ajTrue;
2054
2055 ensSliceIsCircular(slice, &circular);
2056
2057 if ((circular == ajFalse) && (start > (end + 1)))
2058 {
2059 ajDebug("ensSliceFetchSequenceSubStr requires for linear Slices that "
2060 "the start coordinate %d is less than the "
2061 "end coordinate %d + 1.\n",
2062 start, end);
2063
2064 return ajFalse;
2065 }
2066
2067 if (!strand)
2068 strand = 1;
2069
2070 if ((circular == ajTrue) && (start > end))
2071 {
2072 region1 = ensSeqregionGetLength(slice->Seqregion) - start;
2073
2074 region2 = end;
2075
2076 rlength = region1 + region2 + 1;
2077 }
2078 else
2079 rlength = end - start + 1;
2080
2081 if (*Psequence)
2082 ajStrAssignClear(Psequence);
2083 else
2084 *Psequence = ajStrNewRes(rlength + 1);
2085
2086 if (slice->Sequence)
2087 {
2088 /*
2089 ** Since the Slice has sequence attached, check whether
2090 ** Slice length and sequence length match.
2091 */
2092
2093 if (ajStrGetLen(slice->Sequence) != ensSliceCalculateLength(slice))
2094 ajFatal("ensSliceFetchSequenceSubStr got a Slice, "
2095 "which sequence length (%Lu) does not match its "
2096 "length (%u).\n",
2097 ajStrGetLen(slice->Sequence),
2098 ensSliceCalculateLength(slice));
2099 /* FIXME: size_t can be shorter than ajulong */
2100
2101 /* Relative Slice coordinates range from 1 to length. */
2102
2103 /* Check for a gap at the beginning and pad it with Ns. */
2104
2105 if (start < 1)
2106 {
2107 ajStrAppendCountK(Psequence, 'N', 1 - start);
2108
2109 start = 1;
2110 }
2111
2112 ajStrAppendSubS(Psequence,
2113 slice->Sequence,
2114 start - 1,
2115 end - start + 1);
2116
2117 /* Check that the Slice is within signed integer range. */
2118
2119 if (ensSliceCalculateLength(slice) <= INT_MAX)
2120 slength = (ajint) ensSliceCalculateLength(slice);
2121 else
2122 ajFatal("ensSliceFetchSequenceSubStr got an "
2123 "Ensembl Slice, which length (%u) exceeds the "
2124 "maximum integer limit (%d).\n",
2125 ensSliceCalculateLength(slice), INT_MAX);
2126
2127 /* Check for a gap at the end and pad it again with Ns. */
2128
2129 if (end > slength)
2130 ajStrAppendCountK(Psequence, 'N', (ajuint) (end - slength));
2131
2132 if (strand < 0)
2133 ajSeqstrReverse(Psequence);
2134 }
2135 else if (slice->Adaptor)
2136 {
2137 /*
2138 ** Since the Slice has a Slice Adaptor attached, it is possible to
2139 ** retrieve the sequence from the database.
2140 */
2141
2142 dba = ensSliceadaptorGetDatabaseadaptor(slice->Adaptor);
2143
2144 sqa = ensRegistryGetSequenceadaptor(dba);
2145
2146 if ((circular == ajTrue) && (start > end))
2147 {
2148 sequence1 = ajStrNewRes(rlength);
2149 sequence2 = ajStrNewRes(rlength);
2150
2151 ensSequenceadaptorFetchSliceSubStr(
2152 sqa,
2153 slice,
2154 start,
2155 ensSeqregionGetLength(slice->Seqregion),
2156 strand,
2157 &sequence1);
2158
2159 ensSequenceadaptorFetchSliceSubStr(
2160 sqa,
2161 slice,
2162 1,
2163 end,
2164 strand,
2165 &sequence2);
2166
2167 ajStrAppendS(Psequence, sequence1);
2168 ajStrAppendS(Psequence, sequence2);
2169
2170 ajStrDel(&sequence1);
2171 ajStrDel(&sequence2);
2172 }
2173 else
2174 ensSequenceadaptorFetchSliceSubStr(sqa,
2175 slice,
2176 start,
2177 end,
2178 strand,
2179 Psequence);
2180 }
2181 else
2182 {
2183 /*
2184 ** The Slice has no sequence and no Slice Adaptor attached,
2185 ** so just return Ns.
2186 */
2187
2188 ajStrAppendCountK(Psequence, 'N', rlength);
2189 }
2190
2191 if (ajStrGetLen(*Psequence) != rlength)
2192 ajWarn("ensSliceFetchSequenceSubStr got sequence of length %Lu "
2193 "for region of length %u.\n",
2194 ajStrGetLen(*Psequence),
2195 rlength);
2196 /* FIXME: size_t can be shorter than ajulong */
2197
2198 return ajTrue;
2199 }
2200
2201
2202
2203
2204 /* @func ensSliceFetchSliceexpanded *******************************************
2205 **
2206 ** Fetch an expanded copy of an Ensembl Slice, which remains unchanged.
2207 ** The start and end are moved outwards from the centre of the Slice if
2208 ** positive values are provided and moved inwards if negative values are
2209 ** provided. A Slice may not be contracted below 1 base pair but may grow
2210 ** to be arbitrarily large.
2211 **
2212 ** The caller is responsible for deleting the Ensembl Slice.
2213 **
2214 ** @cc Bio::EnsEMBL::Slice::expand
2215 ** @cc Bio::EnsEMBL::CircularSlice::expand
2216 ** @param [u] slice [EnsPSlice] Ensembl Slice
2217 ** @param [r] five [ajint] Number of bases to expand the 5' region
2218 ** Positive values expand the Slice, negative values contract it
2219 ** @param [r] three [ajint] Number of bases to expand the 3' region
2220 ** Positive values expand the Slice, negative values contract it
2221 ** @param [r] force [AjBool] Force Slice contraction, even when shifts for
2222 ** "five" and "three" overlap. In that case "five" and
2223 ** "three" will be set to a maximum possible number
2224 ** resulting in a Slice, which would have only 2 base
2225 ** pairs.
2226 ** @param [w] Pfshift [ajint*] Maximum possible 5' shift when "force" is set
2227 ** @param [w] Ptshift [ajint*] Maximum possible 3' shift when "force" is set
2228 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
2229 **
2230 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
2231 **
2232 ** @release 6.4.0
2233 ** @@
2234 ** NOTE: Currently, this function fetches the expanded Slice via a
2235 ** Slice Adaptor, if one is set for this Slice. The Slice Adaptor registers
2236 ** Slice objects in the internal cache, which minimises memory requirements.
2237 ** However, registering many Slice objects, the LRU cache may drop important
2238 ** Slice objects.
2239 ******************************************************************************/
2240
ensSliceFetchSliceexpanded(EnsPSlice slice,ajint five,ajint three,AjBool force,ajint * Pfshift,ajint * Ptshift,EnsPSlice * Pslice)2241 AjBool ensSliceFetchSliceexpanded(EnsPSlice slice,
2242 ajint five,
2243 ajint three,
2244 AjBool force,
2245 ajint *Pfshift,
2246 ajint *Ptshift,
2247 EnsPSlice *Pslice)
2248 {
2249 ajint sshift = 0;
2250 ajint eshift = 0;
2251
2252 ajint srstart = 0;
2253 ajint srend = 0;
2254
2255 AjBool circular = AJFALSE;
2256
2257 if (ajDebugTest("ensSliceFetchSliceexpanded"))
2258 ajDebug("ensSliceFetchSliceexpanded\n"
2259 " slice %p\n"
2260 " five %d\n"
2261 " three %d\n"
2262 " force '%B'\n"
2263 " Pfshift %p\n"
2264 " Ptshift %p\n"
2265 " Pslice %p\n",
2266 slice,
2267 five,
2268 three,
2269 force,
2270 Pfshift,
2271 Ptshift,
2272 Pslice);
2273
2274 if (!slice)
2275 return ajFalse;
2276
2277 if (!Pfshift)
2278 return ajFalse;
2279
2280 if (!Ptshift)
2281 return ajFalse;
2282
2283 if (!Pslice)
2284 return ajFalse;
2285
2286 *Pslice = NULL;
2287
2288 if (slice->Sequence)
2289 {
2290 ajDebug("ensSliceFetchSliceexpanded cannot expand an Ensembl Slice "
2291 "with an attached sequence.\n");
2292
2293 return ajFalse;
2294 }
2295
2296 if (slice->Strand > 0)
2297 {
2298 sshift = five;
2299 eshift = three;
2300 }
2301 else
2302 {
2303 sshift = three;
2304 eshift = five;
2305 }
2306
2307 srstart = slice->Start - sshift;
2308 srend = slice->End + eshift;
2309
2310 ensSliceIsCircular(slice, &circular);
2311
2312 if (
2313 (
2314 (srstart <= 0)
2315 ||
2316 (srstart > ensSliceGetSeqregionLength(slice))
2317 ||
2318 (srend <= 0)
2319 ||
2320 (srend > ensSliceGetSeqregionLength(slice))
2321 )
2322 &&
2323 (circular == ajTrue)
2324 )
2325 {
2326 if (srstart <= 0)
2327 srstart += ensSliceGetSeqregionLength(slice);
2328
2329 if (srstart > ensSliceGetSeqregionLength(slice))
2330 srstart -= ensSliceGetSeqregionLength(slice);
2331
2332 if (srend <= 0)
2333 srend += ensSliceGetSeqregionLength(slice);
2334
2335 if (srend > ensSliceGetSeqregionLength(slice))
2336 srend -= ensSliceGetSeqregionLength(slice);
2337 }
2338
2339 if ((srstart > srend) && (circular == ajFalse))
2340 {
2341 if (force)
2342 {
2343 /* Apply the maximal possible shift, if force is set. */
2344
2345 if (sshift < 0)
2346 {
2347 /*
2348 ** If we are contracting the Slice from the start,
2349 ** move the start just before the end.
2350 */
2351
2352 srstart = srend - 1;
2353
2354 sshift = slice->Start - srstart;
2355 }
2356 else
2357 {
2358 /*
2359 ** If the Slice still has a negative length,
2360 ** try to move the end.
2361 */
2362
2363 if (eshift < 0)
2364 {
2365 srend = srstart + 1;
2366
2367 eshift = srend - slice->End;
2368 }
2369
2370 *Pfshift = (slice->Strand >= 0) ? eshift : sshift;
2371 *Ptshift = (slice->Strand >= 0) ? sshift : eshift;
2372 }
2373 }
2374
2375 if (srstart > srend)
2376 {
2377 /* If the Slice still has a negative length, return NULL. */
2378
2379 ajDebug("ensSliceFetchSliceexpanded requires the Slice "
2380 "start %d to be less than the Slice end %d "
2381 "coordinate.\n",
2382 srstart, srend);
2383
2384 return ajFalse;
2385 }
2386 }
2387
2388 if (slice->Adaptor)
2389 ensSliceadaptorFetchBySeqregionIdentifier(
2390 slice->Adaptor,
2391 ensSeqregionGetIdentifier(slice->Seqregion),
2392 srstart,
2393 srend,
2394 slice->Strand,
2395 Pslice);
2396 else
2397 *Pslice = ensSliceNewIni(slice->Adaptor,
2398 slice->Seqregion,
2399 srstart,
2400 srend,
2401 slice->Strand);
2402
2403 return ajTrue;
2404 }
2405
2406
2407
2408
2409 /* @func ensSliceFetchSliceinverted *******************************************
2410 **
2411 ** Fetch an inverted Slice from an Ensembl Slice.
2412 **
2413 ** The caller is responsible for deleting the Ensembl Slice.
2414 **
2415 ** @cc Bio::EnsEMBL::Slice::invert
2416 ** @cc Bio::EnsEMBL::CircularSlice::invert
2417 ** @param [u] slice [EnsPSlice] Ensembl Slice
2418 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
2419 ** @see ensSliceadaptorFetchBySlice
2420 **
2421 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
2422 **
2423 ** @release 6.4.0
2424 ** @@
2425 ** NOTE: Currently, this function fetches the expanded Slice via a
2426 ** Slice Adaptor, if one is set for this Slice. The Slice Adaptor registers
2427 ** Slice objects in the internal cache, which minimises memory requirements.
2428 ** However, registering many Slice objects, the LRU cache may drop important
2429 ** Slice objects.
2430 ******************************************************************************/
2431
ensSliceFetchSliceinverted(EnsPSlice slice,EnsPSlice * Pslice)2432 AjBool ensSliceFetchSliceinverted(EnsPSlice slice, EnsPSlice *Pslice)
2433 {
2434 AjPStr sequence = NULL;
2435
2436 if (!slice)
2437 return ajFalse;
2438
2439 if (!Pslice)
2440 return ajFalse;
2441
2442 *Pslice = NULL;
2443
2444 /*
2445 ** Retrieve the inverted Slice via the Slice Adaptor if one is attached.
2446 ** This will automatically register the new Slice in the
2447 ** Slice Adaptor-internal cache and will also take care of
2448 ** an eventual Slice-internal sequence.
2449 ** Use relative Slice coordinates, which range from 1 to length.
2450 */
2451
2452 if (slice->Adaptor)
2453 {
2454 ensSliceadaptorFetchBySlice(slice->Adaptor,
2455 slice,
2456 1,
2457 (ajint) ensSliceCalculateLength(slice),
2458 -1,
2459 Pslice);
2460 return ajTrue;
2461 }
2462
2463 if (slice->Sequence)
2464 {
2465 /* Reverse (and complement) the sequence if one has been defined. */
2466
2467 sequence = ajStrNewS(slice->Sequence);
2468
2469 ajSeqstrReverse(&sequence);
2470
2471 *Pslice = ensSliceNewSeq(slice->Adaptor,
2472 slice->Seqregion,
2473 slice->Start,
2474 slice->End,
2475 slice->Strand * -1,
2476 sequence);
2477
2478 ajStrDel(&sequence);
2479 }
2480 else
2481 *Pslice = ensSliceNewIni(slice->Adaptor,
2482 slice->Seqregion,
2483 slice->Start,
2484 slice->End,
2485 slice->Strand * -1);
2486
2487 return ajTrue;
2488 }
2489
2490
2491
2492
2493 /* @func ensSliceFetchSlicesub ************************************************
2494 **
2495 ** Fetch a Sub-Slice from an Ensembl Slice.
2496 **
2497 ** If a Slice is requested, which lies outside the boundaries of this Slice,
2498 ** this function will return NULL. This means that the behaviour will be
2499 ** consistent whether or not the Slice is attached to the database
2500 ** (i.e. if there is attached sequence to the Slice).
2501 ** Alternatively, the ensSliceFetchSliceexpanded or
2502 ** ensSliceAdaptorFetchByRegion functions could be used instead.
2503 **
2504 ** The caller is responsible for deleting the Ensembl Slice.
2505 **
2506 ** @cc Bio::EnsEMBL::Slice::sub_Slice
2507 ** @param [u] slice [EnsPSlice] Ensembl Slice
2508 ** @param [r] start [ajint] Start coordinate
2509 ** @param [r] end [ajint] End coordinate
2510 ** @param [r] strand [ajint] Strand information
2511 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
2512 ** @see ensSliceadaptorFetchBySlice
2513 **
2514 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
2515 **
2516 ** @release 6.4.0
2517 ** @@
2518 ** NOTE: Currently, this function fetches the expanded Slice via a
2519 ** Slice Adaptor, if one is set for this Slice. The Slice Adaptor registers
2520 ** Slice objects in the internal cache, which minimises memory requirements.
2521 ** However, registering many Slice objects, the LRU cache may drop important
2522 ** Slice objects.
2523 ******************************************************************************/
2524
ensSliceFetchSlicesub(EnsPSlice slice,ajint start,ajint end,ajint strand,EnsPSlice * Pslice)2525 AjBool ensSliceFetchSlicesub(EnsPSlice slice,
2526 ajint start,
2527 ajint end,
2528 ajint strand,
2529 EnsPSlice *Pslice)
2530 {
2531 ajint srstart = 0;
2532 ajint srend = 0;
2533 ajint srstrand = 0;
2534
2535 AjPStr sequence = NULL;
2536
2537 if (!slice)
2538 {
2539 ajDebug("ensSliceGetSubSlice requires an Ensembl Slice.\n");
2540
2541 return ajFalse;
2542 }
2543
2544 if (!Pslice)
2545 return ajFalse;
2546
2547 *Pslice = NULL;
2548
2549 if ((start < 1) || (start > slice->End))
2550 {
2551 ajDebug("ensSliceGetSubSlice requires the start coordinate %d "
2552 "to be greater than or equal to 1 and less than or equal to "
2553 "the end coordinate %d of the Ensembl Slice.\n",
2554 start, slice->End);
2555
2556 return ajFalse;
2557 }
2558
2559 if ((end < start) || (end > slice->End))
2560 {
2561 ajDebug("ensSliceGetSubSlice requires the end coordinate %d "
2562 "to be greater than or equal to the start coordinate %d "
2563 "and to be less than or equal to the "
2564 "end coordinate %d of the Ensembl Slice.\n",
2565 end, start, slice->End);
2566
2567 return ajFalse;
2568 }
2569
2570 if (!strand)
2571 strand = 1;
2572
2573 /*
2574 ** Retrieve the Sub-Slice via the Slice Adaptor if one is attached.
2575 ** This will automatically register the new Slice in the
2576 ** Slice Adaptor-internal cache and will also take care of
2577 ** an eventual Slice-internal sequence.
2578 */
2579
2580 if (slice->Adaptor)
2581 {
2582 ensSliceadaptorFetchBySlice(slice->Adaptor,
2583 slice,
2584 start,
2585 end,
2586 strand,
2587 Pslice);
2588
2589 return ajTrue;
2590 }
2591
2592 /* Transform relative into absolute Slice coordinates. */
2593
2594 if (slice->Strand >= 0)
2595 {
2596 srstart = slice->Start + start - 1;
2597 srend = slice->Start + end - 1;
2598 }
2599 else
2600 {
2601 srstart = slice->End - end + 1;
2602 srend = slice->End - start + 1;
2603 }
2604
2605 srstrand = slice->Strand * strand;
2606
2607 if (slice->Sequence)
2608 {
2609 sequence = ajStrNewRes(end - start + 1);
2610
2611 ensSliceFetchSequenceSubStr(slice,
2612 start,
2613 end,
2614 strand,
2615 &sequence);
2616
2617 *Pslice = ensSliceNewSeq(slice->Adaptor,
2618 slice->Seqregion,
2619 srstart,
2620 srend,
2621 srstrand,
2622 sequence);
2623
2624 ajStrDel(&sequence);
2625 }
2626 else
2627 *Pslice = ensSliceNewIni(slice->Adaptor,
2628 slice->Seqregion,
2629 srstart,
2630 srend,
2631 srstrand);
2632
2633 return ajTrue;
2634 }
2635
2636
2637
2638
2639 /* @section comparison ********************************************************
2640 **
2641 ** Functions for comparing Ensembl Slice objects.
2642 **
2643 ** @fdata [EnsPSlice]
2644 **
2645 ** @nam3rule Compare Functions for comparing Ensembl Slice objects
2646 ** @nam4rule Identifier Compare the SQL database-internal identifier
2647 ** @nam5rule Ascending Sort in ascending order
2648 ** @nam5rule Descending Sort in descending order
2649 **
2650 ** @argrule * slice1 [const EnsPSlice] Ensembl Slice
2651 ** @argrule * slice2 [const EnsPSlice] Ensembl Slice
2652 **
2653 ** @valrule * [int] Integer less than, equal or greater than 0
2654 **
2655 ** @fcategory misc
2656 ******************************************************************************/
2657
2658
2659
2660
2661 /* @func ensSliceCompareIdentifierAscending ***********************************
2662 **
2663 ** Comparison function to sort Ensembl Slice objects by their
2664 ** Ensembl Sequence Region identifier in ascending order.
2665 **
2666 ** Ensembl Slice objects without Ensembl Sequence Region objects sort towards
2667 ** the end of the AJAX List.
2668 **
2669 ** @param [r] slice1 [const EnsPSlice] Ensembl Slice 1
2670 ** @param [r] slice2 [const EnsPSlice] Ensembl Slice 2
2671 ** @see ajListSort
2672 **
2673 ** @return [int] The comparison function returns an integer less than,
2674 ** equal to, or greater than zero if the first argument is
2675 ** considered to be respectively less than, equal to, or
2676 ** greater than the second.
2677 **
2678 ** @release 6.3.0
2679 ** @@
2680 ******************************************************************************/
2681
ensSliceCompareIdentifierAscending(const EnsPSlice slice1,const EnsPSlice slice2)2682 int ensSliceCompareIdentifierAscending(const EnsPSlice slice1,
2683 const EnsPSlice slice2)
2684 {
2685 int result = 0;
2686
2687 ajuint srid1 = 0U;
2688 ajuint srid2 = 0U;
2689
2690 /* Sort empty values towards the end of the AJAX List. */
2691
2692 if (slice1 && (!slice2))
2693 return -1;
2694
2695 if ((!slice1) && (!slice2))
2696 return 0;
2697
2698 if ((!slice1) && slice2)
2699 return +1;
2700
2701 srid1 = ensSliceGetSeqregionIdentifier(slice1);
2702 srid2 = ensSliceGetSeqregionIdentifier(slice2);
2703
2704 if (srid1 < srid2)
2705 result = -1;
2706
2707 if (srid1 > srid2)
2708 result = +1;
2709
2710 return result;
2711 }
2712
2713
2714
2715
2716 /* @section comparison ********************************************************
2717 **
2718 ** Functions for matching Ensembl Slice objects.
2719 **
2720 ** @fdata [EnsPSlice]
2721 **
2722 ** @nam3rule Match Functions for matching Ensembl Slice objects
2723 ** @nam3rule Similarity Functions for matching Ensembl Slice objects
2724 **
2725 ** @argrule * slice1 [const EnsPSlice] Ensembl Slice
2726 ** @argrule * slice2 [const EnsPSlice] Ensembl Slice
2727 **
2728 ** @valrule * [AjBool] ajTrue if the Slice objects match
2729 **
2730 ** @fcategory misc
2731 ******************************************************************************/
2732
2733
2734
2735
2736 /* @func ensSliceMatch ********************************************************
2737 **
2738 ** Test for matching two Ensembl Slice objects.
2739 **
2740 ** @param [r] slice1 [const EnsPSlice] First Ensembl Slice
2741 ** @param [r] slice2 [const EnsPSlice] Second Ensembl Slice
2742 **
2743 ** @return [AjBool] ajTrue if the Slice objects are equal
2744 **
2745 ** @release 6.2.0
2746 ** @@
2747 ** The comparison is based on an initial pointer equality test and if that
2748 ** fails, the Coordinate System, the Sequence Region, as well as the Slice
2749 ** start, end and strand members are compared. In case the Slice has a
2750 ** sequence String attached, it is compared as well.
2751 ******************************************************************************/
2752
ensSliceMatch(const EnsPSlice slice1,const EnsPSlice slice2)2753 AjBool ensSliceMatch(const EnsPSlice slice1, const EnsPSlice slice2)
2754 {
2755 if (ajDebugTest("ensSliceMatch"))
2756 {
2757 ajDebug("ensSliceMatch\n"
2758 " slice1 %p\n"
2759 " slice2 %p\n",
2760 slice1,
2761 slice2);
2762
2763 ensSliceTrace(slice1, 1);
2764 ensSliceTrace(slice2, 1);
2765 }
2766
2767 if (!slice1)
2768 return ajFalse;
2769
2770 if (!slice2)
2771 return ajFalse;
2772
2773 /* Try a direct pointer comparison first. */
2774
2775 if (slice1 == slice2)
2776 return ajTrue;
2777
2778 if (!ensSeqregionMatch(slice1->Seqregion, slice2->Seqregion))
2779 return ajFalse;
2780
2781 if (slice1->Start != slice2->Start)
2782 return ajFalse;
2783
2784 if (slice1->End != slice2->End)
2785 return ajFalse;
2786
2787 if (slice1->Strand != slice2->Strand)
2788 return ajFalse;
2789
2790 if ((slice1->Sequence || slice2->Sequence) &&
2791 (!ajStrMatchS(slice1->Sequence, slice2->Sequence)))
2792 return ajFalse;
2793
2794 return ajTrue;
2795 }
2796
2797
2798
2799
2800 /* @func ensSliceSimilarity ***************************************************
2801 **
2802 ** Test two Ensembl Slice objects for similarity.
2803 **
2804 ** For similarity Ensembl Slice objects have to be defined on the same
2805 ** Ensembl Sequence Region, but can have different start end and strand
2806 ** coordinates. If a sequence has been set, it has to match perfectly.
2807 **
2808 ** @param [r] slice1 [const EnsPSlice] First Ensembl Slice
2809 ** @param [r] slice2 [const EnsPSlice] Second Ensembl Slice
2810 **
2811 ** @return [AjBool] ajTrue if the Slice objects are equal
2812 **
2813 ** @release 6.4.0
2814 ** @@
2815 ** The comparison is based on an initial pointer equality test and if that
2816 ** fails, the Ensembl Sequence Region objects are compared. In case one of the
2817 ** Ensembl Slice objects has a sequence attached, it is compared as well.
2818 ******************************************************************************/
2819
ensSliceSimilarity(const EnsPSlice slice1,const EnsPSlice slice2)2820 AjBool ensSliceSimilarity(const EnsPSlice slice1, const EnsPSlice slice2)
2821 {
2822 if (ajDebugTest("ensSliceSimilarity"))
2823 {
2824 ajDebug("ensSliceSimilarity\n"
2825 " slice1 %p\n"
2826 " slice2 %p\n",
2827 slice1,
2828 slice2);
2829
2830 ensSliceTrace(slice1, 1);
2831 ensSliceTrace(slice2, 1);
2832 }
2833
2834 if (!slice1)
2835 return ajFalse;
2836
2837 if (!slice2)
2838 return ajFalse;
2839
2840 /* Try a direct pointer comparison first. */
2841
2842 if (slice1 == slice2)
2843 return ajTrue;
2844
2845 if (!ensSeqregionMatch(slice1->Seqregion, slice2->Seqregion))
2846 return ajFalse;
2847
2848 /* Sequence members are optional. */
2849
2850 if (((slice1->Sequence != NULL) || (slice2->Sequence != NULL)) &&
2851 (ajStrMatchS(slice1->Sequence, slice2->Sequence) == ajFalse))
2852 return ajFalse;
2853
2854 return ajTrue;
2855 }
2856
2857
2858
2859
2860 /* @section query *************************************************************
2861 **
2862 ** Functions for querying the properties of an Ensembl Slice.
2863 **
2864 ** @fdata [EnsPSlice]
2865 **
2866 ** @nam3rule Is Check whether an Ensembl Slice represents a certain property
2867 ** @nam4rule Circular Check for a circular Ensembl Slice
2868 ** @nam4rule Nonreference Check for a non-reference Ensembl Slice
2869 ** @nam4rule Toplevel Check for a top-level Ensembl Slice
2870 **
2871 ** @argrule * slice [EnsPSlice] Ensembl Slice
2872 ** @argrule * Presult [AjBool*] Boolean result
2873 **
2874 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
2875 **
2876 ** @fcategory use
2877 ******************************************************************************/
2878
2879
2880
2881
2882 /* @func ensSliceIsCircular ***************************************************
2883 **
2884 ** Check whether an Ensembl Slice is circular based on an Ensembl Sequence
2885 ** Region, which has an Ensembl Attribute of code "circular_seq" set.
2886 **
2887 ** @cc Bio::EnsEMBL::Slice::is_circular
2888 ** @param [u] slice [EnsPSlice] Ensembl Slice
2889 ** @param [u] Presult [AjBool*] ajTrue if the Ensembl Sequence Region has an
2890 ** Ensembl Attribute of code "circular_seq" set
2891 **
2892 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
2893 **
2894 ** @release 6.4.0
2895 ** @@
2896 ** NOTE: In this implementation, the "circular" property relies on
2897 ** Ensembl Attribute objects are linked to Ensembl Sequence Region objects
2898 ** that are at the base of Ensembl Slice objects. Although Ensembl Sequence
2899 ** Region objects cache all Ensembl Attribute objects, one SQL query is still
2900 ** needed for each new Ensembl Sequence Region object.
2901 ** The Perl API caches the information for all Ensembl Sequence Region objects
2902 ** at once, but does not formalise the information within
2903 ** Ensembl Attribute objects.
2904 ******************************************************************************/
2905
ensSliceIsCircular(EnsPSlice slice,AjBool * Presult)2906 AjBool ensSliceIsCircular(EnsPSlice slice, AjBool *Presult)
2907 {
2908 EnsESliceTopology sltp = ensESliceTopologyNULL;
2909
2910 if (!slice)
2911 return ajFalse;
2912
2913 if (!Presult)
2914 return ajFalse;
2915
2916 sltp = ensSliceLoadTopology(slice);
2917
2918 if (sltp == ensESliceTopologyCircular)
2919 *Presult = ajTrue;
2920 else
2921 *Presult = ajFalse;
2922
2923 return ajTrue;
2924 }
2925
2926
2927
2928
2929 /* @func ensSliceIsNonreference ***********************************************
2930 **
2931 ** Check whether an Ensembl Slice is based on an Ensembl Sequence Region, which
2932 ** has an Ensembl Attribute of code "non_ref" set.
2933 **
2934 ** @cc Bio::EnsEMBL::Slice::is_reference
2935 ** @param [u] slice [EnsPSlice] Ensembl Slice
2936 ** @param [u] Presult [AjBool*] ajTrue if the Ensembl Sequence Region has an
2937 ** Ensembl Attribute of code "non_ref" set
2938 **
2939 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
2940 **
2941 ** @release 6.4.0
2942 ** @@
2943 ******************************************************************************/
2944
ensSliceIsNonreference(EnsPSlice slice,AjBool * Presult)2945 AjBool ensSliceIsNonreference(EnsPSlice slice, AjBool *Presult)
2946 {
2947 if (!slice)
2948 return ajFalse;
2949
2950 if (!Presult)
2951 return ajFalse;
2952
2953 if (!slice->Seqregion)
2954 {
2955 ajDebug("ensSliceIsNonreference requires the Ensembl Slice to have an "
2956 "Ensembl Sequence region attached.\n");
2957
2958 return ajFalse;
2959 }
2960
2961 return ensSeqregionIsNonreference(slice->Seqregion, Presult);
2962 }
2963
2964
2965
2966
2967 /* @func ensSliceIsToplevel ***************************************************
2968 **
2969 ** Check whether an Ensembl Slice is based on an Ensembl Sequence Region, which
2970 ** has an Ensembl Attribute of code "toplevel" set.
2971 **
2972 ** @cc Bio::EnsEMBL::Slice::is_toplevel
2973 ** @param [u] slice [EnsPSlice] Ensembl Slice
2974 ** @param [u] Presult [AjBool*] ajTrue if the Ensembl Sequence Region has an
2975 ** Ensembl Attribute of code "toplevel" set
2976 **
2977 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
2978 **
2979 ** @release 6.4.0
2980 ** @@
2981 ******************************************************************************/
2982
ensSliceIsToplevel(EnsPSlice slice,AjBool * Presult)2983 AjBool ensSliceIsToplevel(EnsPSlice slice, AjBool *Presult)
2984 {
2985 if (!slice)
2986 return ajFalse;
2987
2988 if (!Presult)
2989 return ajFalse;
2990
2991 if (!slice->Seqregion)
2992 {
2993 ajDebug("ensSliceIsToplevel requires the Ensembl Slice to have an "
2994 "Ensembl Sequence region attached.\n");
2995
2996 return ajFalse;
2997 }
2998
2999 return ensSeqregionIsToplevel(slice->Seqregion, Presult);
3000 }
3001
3002
3003
3004
3005 /* @section map ***************************************************************
3006 **
3007 ** Functions for mapping Ensembl Slice objects.
3008 **
3009 ** @fdata [EnsPSlice]
3010 **
3011 ** @nam3rule Project Project an Ensembl Slice onto another
3012 ** Ensembl Coordinate System
3013 ** @nam3rule Projectslice Project an Ensembl Slice onto another
3014 ** Ensembl Slice
3015 **
3016 ** @argrule Project slice [EnsPSlice] Ensembl Slice
3017 ** @argrule Project csname [const AjPStr] Ensembl Coordinate System name
3018 ** @argrule Project csversion [const AjPStr] Ensembl Coordinate System version
3019 ** @argrule Project pss [AjPList] AJAX List of
3020 ** Ensembl Projection Segment objects
3021 ** @argrule Projectslice srcslice [EnsPSlice] Source Ensembl Slice
3022 ** @argrule Projectslice trgslice [EnsPSlice] Target Ensembl Slice
3023 ** @argrule Projectslice pss [AjPList] AJAX List of
3024 ** Ensembl Projection Segment objects
3025 **
3026 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
3027 **
3028 ** @fcategory misc
3029 ******************************************************************************/
3030
3031
3032
3033
3034 /* @funcstatic sliceConstrain *************************************************
3035 **
3036 ** Constrain an Ensembl Slice to its Ensembl Sequence Region member.
3037 **
3038 ** The caller is responsible for deleting the Ensembl Projection Segment.
3039 **
3040 ** @cc Bio::EnsEMBL::Slice::_constrain_to_region
3041 ** @cc Bio::EnsEMBL::CircularSlice::_constrain_to_region
3042 ** @param [u] slice [EnsPSlice] Ensembl Slice
3043 **
3044 ** @return [EnsPProjectionsegment] Ensembl Projection Segment or NULL
3045 **
3046 ** @release 6.4.0
3047 ** @@
3048 ******************************************************************************/
3049
sliceConstrain(EnsPSlice slice)3050 static EnsPProjectionsegment sliceConstrain(EnsPSlice slice)
3051 {
3052 ajint five = 0;
3053 ajint three = 0;
3054 ajint fshift = 0;
3055 ajint tshift = 0;
3056
3057 EnsPProjectionsegment ps = NULL;
3058
3059 EnsPSlice nslice = NULL;
3060
3061 if (!slice)
3062 {
3063 ajDebug("sliceConstrain requires an Ensembl Slice.\n");
3064
3065 return NULL;
3066 }
3067
3068 /* Return NULL, if this Slice does not overlap with its Sequence Region. */
3069
3070 if ((slice->Start > ensSeqregionGetLength(slice->Seqregion)) ||
3071 (slice->End < 1))
3072 return NULL;
3073
3074 /*
3075 ** If the Slice has negative coordinates or coordinates exceeding the
3076 ** length of the Sequence Region the Slice needs shrinking to the
3077 ** defined Sequence Region.
3078 */
3079
3080 if (slice->Start < 1)
3081 five = slice->Start - 1;
3082
3083 if (slice->End > ensSeqregionGetLength(slice->Seqregion))
3084 three = ensSeqregionGetLength(slice->Seqregion) - slice->End;
3085
3086 if (five || three)
3087 {
3088 if (slice->Strand >= 0)
3089 ensSliceFetchSliceexpanded(slice,
3090 five,
3091 three,
3092 ajFalse,
3093 &fshift,
3094 &tshift,
3095 &nslice);
3096 else
3097 ensSliceFetchSliceexpanded(slice,
3098 three,
3099 five,
3100 ajFalse,
3101 &fshift,
3102 &tshift,
3103 &nslice);
3104 }
3105 else
3106 nslice = ensSliceNewRef(slice);
3107
3108 ps = ensProjectionsegmentNewIni(1 - five,
3109 ensSliceCalculateLength(slice) + three,
3110 nslice);
3111
3112 ensSliceDel(&nslice);
3113
3114 return ps;
3115 }
3116
3117
3118
3119
3120 /* @funcstatic sliceProject ***************************************************
3121 **
3122 ** Ensembl Slice project helper function.
3123 **
3124 ** The caller is responsible for deleting the Ensembl Projection Segment
3125 ** objects before deleting the AJAX List.
3126 **
3127 ** @cc Bio::EnsEMBL::Slice::project
3128 ** @cc Bio::EnsEMBL::CircularSlice::project
3129 ** @param [u] slice [EnsPSlice] Ensembl Slice
3130 ** @param [u] trgcs [EnsPCoordsystem] Ensembl Coordinate System
3131 ** @param [u] pss [AjPList] AJAX List of Ensembl Projection Segment objects
3132 **
3133 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
3134 **
3135 ** @release 6.4.0
3136 ** @@
3137 ******************************************************************************/
3138
sliceProject(EnsPSlice slice,EnsPCoordsystem trgcs,AjPList pss)3139 static AjBool sliceProject(EnsPSlice slice,
3140 EnsPCoordsystem trgcs,
3141 AjPList pss)
3142 {
3143 ajint end = 0;
3144 ajint start = 1;
3145 ajint length = 0;
3146
3147 AjBool debug = AJFALSE;
3148 AjBool circular = AJFALSE;
3149
3150 AjPList nrmpss = NULL;
3151 AjPList mrs = NULL;
3152
3153 EnsPAssemblymapper am = NULL;
3154 EnsPAssemblymapperadaptor ama = NULL;
3155
3156 EnsPCoordsystem srccs = NULL;
3157 EnsPCoordsystem nrmcs = NULL;
3158 EnsPCoordsystem mrcs = NULL;
3159
3160 EnsPDatabaseadaptor dba = NULL;
3161
3162 EnsPProjectionsegment nrmps = NULL;
3163 EnsPProjectionsegment ps = NULL;
3164
3165 EnsPMapperresult mr = NULL;
3166
3167 const EnsPSlice nrmslice = NULL;
3168 EnsPSlice newslice = NULL;
3169
3170 /* Deliberately connect debug requests to ensSliceProject. */
3171
3172 debug = ajDebugTest("ensSliceProject");
3173
3174 if (debug)
3175 {
3176 ajDebug("sliceProject\n"
3177 " slice %p\n"
3178 " trgcs %p\n"
3179 " pss %p\n",
3180 slice,
3181 trgcs,
3182 pss);
3183
3184 ensSliceTrace(slice, 1);
3185
3186 ensCoordsystemTrace(trgcs, 1);
3187 }
3188
3189 if (!slice)
3190 return ajFalse;
3191
3192 if (!trgcs)
3193 return ajFalse;
3194
3195 if (!pss)
3196 return ajFalse;
3197
3198 if (!slice->Adaptor)
3199 {
3200 ajDebug("sliceProject requires that an Ensembl Slice Adaptor has "
3201 "been set in the Ensembl Slice.\n");
3202
3203 return ajFalse;
3204 }
3205
3206 dba = ensSliceadaptorGetDatabaseadaptor(slice->Adaptor);
3207
3208 /*
3209 ** Get the source Coordinate System, which is the
3210 ** Ensembl Coordinate System member of the
3211 ** Ensembl Sequence Region member of this Ensembl Slice.
3212 */
3213
3214 if (!slice->Seqregion)
3215 {
3216 ajDebug("sliceProject requires that an Ensembl Sequence Region "
3217 "has been set in the Ensembl Slice.\n");
3218
3219 return ajFalse;
3220 }
3221
3222 srccs = ensSeqregionGetCoordsystem(slice->Seqregion);
3223
3224 if (!srccs)
3225 {
3226 ajDebug("sliceProject requires that an Ensembl Coordinate System "
3227 "member has been set in the Ensembl Sequence Region member "
3228 "of the Ensembl Slice.\n");
3229
3230 return ajFalse;
3231 }
3232
3233 /*
3234 ** No mapping is needed if the requested Ensembl Coordinate System is the
3235 ** one this Ensembl Slice is based upon, but we do need to check if some
3236 ** of the Slice is outside of defined regions.
3237 */
3238
3239 if (ensCoordsystemMatch(srccs, trgcs))
3240 {
3241 ajListPushAppend(pss, (void *) sliceConstrain(slice));
3242
3243 return ajTrue;
3244 }
3245
3246 /*
3247 ** Decompose this Slice into its symlinked components, which allows
3248 ** handling of haplotypes (HAPs) and pseudo-autosomal region (PARs).
3249 */
3250
3251 ama = ensRegistryGetAssemblymapperadaptor(dba);
3252
3253 nrmpss = ajListNew();
3254
3255 ensSliceadaptorRetrieveNormalisedprojection(slice->Adaptor,
3256 slice,
3257 nrmpss);
3258
3259 while (ajListPop(nrmpss, (void **) &nrmps))
3260 {
3261 nrmslice = ensProjectionsegmentGetTargetSlice(nrmps);
3262
3263 nrmcs = ensSeqregionGetCoordsystem(nrmslice->Seqregion);
3264
3265 ensAssemblymapperadaptorFetchByCoordsystems(ama, nrmcs, trgcs, &am);
3266
3267 mrs = ajListNew();
3268
3269 if (am)
3270 ensAssemblymapperMapSlice(am, nrmslice, ajFalse, mrs);
3271 else
3272 {
3273 if (debug)
3274 ajDebug("sliceProject could not fetch an "
3275 "Ensembl Assembly Mapper between "
3276 "Ensembl Coordinate System objects "
3277 "'%S:%S' and '%S:%S', which implies an "
3278 "Ensembl Mapper Result of type "
3279 "ensEMapperresultTypeGap.\n",
3280 ensCoordsystemGetName(nrmcs),
3281 ensCoordsystemGetVersion(nrmcs),
3282 ensCoordsystemGetName(trgcs),
3283 ensCoordsystemGetVersion(trgcs));
3284
3285 mr = ensMapperresultNewGap(nrmslice->Start, nrmslice->End, 0);
3286
3287 ajListPushAppend(mrs, (void *) mr);
3288 }
3289
3290 ensAssemblymapperDel(&am);
3291
3292 /* Construct a projection from the mapping results and return it. */
3293
3294 while (ajListPop(mrs, (void **) &mr))
3295 {
3296 switch (ensMapperresultGetType(mr))
3297 {
3298 case ensEMapperresultTypeCoordinate:
3299
3300 /*
3301 ** Calculate the Ensembl Mapper Result length for circular
3302 ** or linear Ensembl Slice objects.
3303 */
3304
3305 if (ensMapperresultGetCoordinateStart(mr) >
3306 ensMapperresultGetCoordinateEnd(mr))
3307 length
3308 = ensSliceGetSeqregionLength(nrmslice)
3309 + ensMapperresultGetCoordinateStart(mr)
3310 + ensMapperresultGetCoordinateEnd(mr)
3311 + 1;
3312 else
3313 length
3314 = ensMapperresultGetCoordinateEnd(mr)
3315 - ensMapperresultGetCoordinateStart(mr)
3316 + 1;
3317
3318 mrcs = ensMapperresultGetCoordsystem(mr);
3319
3320 /*
3321 ** If the normalised projection just ended up mapping to
3322 ** the same Coordinate System we were already in then we
3323 ** should just return the original region. This can happen
3324 ** for example, if we were on a PAR region on Y, which
3325 ** referred to X and a projection to "toplevel" was
3326 ** requested.
3327 */
3328
3329 if (ensCoordsystemMatch(mrcs, nrmcs))
3330 {
3331 /* Trim off regions, which are not defined. */
3332
3333 ajListPushAppend(pss, (void *) sliceConstrain(slice));
3334
3335 /*
3336 ** Delete this Ensembl Mapper Result and the rest of
3337 ** the Ensembl Mapper Results including the AJAX List.
3338 */
3339
3340 ensMapperresultDel(&mr);
3341
3342 while (ajListPop(mrs, (void **) &mr))
3343 ensMapperresultDel(&mr);
3344
3345 ajListFree(&mrs);
3346
3347 /*
3348 ** Delete this normalised Projection Segment and the
3349 ** rest of the normalised Projection Segment objects
3350 ** including the AJAX List.
3351 */
3352
3353 ensProjectionsegmentDel(&nrmps);
3354
3355 while (ajListPop(nrmpss, (void **) &nrmps))
3356 ensProjectionsegmentDel(&nrmps);
3357
3358 ajListFree(&nrmpss);
3359
3360 return ajTrue;
3361 }
3362 else
3363 {
3364 /* Create a Slice in the target Coordinate System. */
3365
3366 ensSliceadaptorFetchByMapperresult(slice->Adaptor,
3367 mr,
3368 &newslice);
3369
3370 end = start + length - 1;
3371
3372 ensSliceIsCircular(newslice, &circular);
3373
3374 if ((circular == ajTrue) &&
3375 (end > ensSliceGetSeqregionLength(newslice)))
3376 end -= ensSliceGetSeqregionLength(newslice);
3377
3378 ps = ensProjectionsegmentNewIni(start, end, newslice);
3379
3380 ajListPushAppend(pss, (void *) ps);
3381
3382 ensSliceDel(&newslice);
3383 }
3384
3385 break;
3386
3387 case ensEMapperresultTypeGap:
3388
3389 /*
3390 ** Skip gaps, but calculate the Ensembl Mapper Result
3391 ** length for circular or linear Ensembl Slice objects.
3392 */
3393
3394 if (ensMapperresultGetGapStart(mr) >
3395 ensMapperresultGetGapEnd(mr))
3396 length
3397 = ensSliceGetSeqregionLength(nrmslice)
3398 + ensMapperresultGetGapStart(mr)
3399 + ensMapperresultGetGapEnd(mr)
3400 + 1;
3401 else
3402 length
3403 = ensMapperresultGetGapEnd(mr)
3404 - ensMapperresultGetGapStart(mr)
3405 +1;
3406
3407 break;
3408
3409 case ensEMapperresultTypeInDel:
3410
3411 /*
3412 ** Calculate the Ensembl Mapper Result length for circular
3413 ** or linear Ensembl Slice objects.
3414 ** NOTE: The Ensembl Mapper Result Coordinate member is
3415 ** most likely the one to be used here. The Perl API just
3416 ** specifies code for Bio::EnsEMBL::Mapper::Coordinate
3417 ** objects.
3418 */
3419
3420 if (ensMapperresultGetCoordinateStart(mr) >
3421 ensMapperresultGetCoordinateEnd(mr))
3422 length
3423 = ensSliceGetSeqregionLength(nrmslice)
3424 + ensMapperresultGetCoordinateStart(mr)
3425 + ensMapperresultGetCoordinateEnd(mr)
3426 + 1;
3427 else
3428 length
3429 = ensMapperresultGetCoordinateEnd(mr)
3430 - ensMapperresultGetCoordinateStart(mr)
3431 + 1;
3432
3433 ajWarn("sliceProject got an unexpected "
3434 "Ensembl Mapper Result of type "
3435 "ensEMapperresultTypeInDel.");
3436
3437 break;
3438
3439 default:
3440
3441 ajWarn("sliceProject got an unexpected "
3442 "Ensembl Mapper Result of type %d.",
3443 ensMapperresultGetType(mr));
3444 }
3445
3446 start += length;
3447
3448 ensMapperresultDel(&mr);
3449 }
3450
3451 ajListFree(&mrs);
3452
3453 ensProjectionsegmentDel(&nrmps);
3454 }
3455
3456 ajListFree(&nrmpss);
3457
3458 return ajTrue;
3459 }
3460
3461
3462
3463
3464 /* @func ensSliceProject ******************************************************
3465 **
3466 ** Project an Ensembl Slice onto another Ensembl Coordinate System.
3467 **
3468 ** Projecting an Ensembl Slice onto an Ensembl Coordinate System that the
3469 ** Slice is assembled from is analogous to retrieving a tiling path.
3470 ** This method may also be used to project up-wards to a higher-level
3471 ** Ensembl Coordinate System.
3472 **
3473 ** The caller is responsible for deleting the Ensembl Projection Segment
3474 ** objects before deleting the AJAX List.
3475 **
3476 ** @cc Bio::EnsEMBL::Slice::project
3477 ** @param [u] slice [EnsPSlice] Ensembl Slice
3478 ** @param [r] csname [const AjPStr] Ensembl Coordinate System name
3479 ** @param [rN] csversion [const AjPStr] Ensembl Coordinate System version
3480 ** @param [u] pss [AjPList] AJAX List of Ensembl Projection Segment objects
3481 **
3482 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
3483 **
3484 ** @release 6.2.0
3485 ** @@
3486 ******************************************************************************/
3487
ensSliceProject(EnsPSlice slice,const AjPStr csname,const AjPStr csversion,AjPList pss)3488 AjBool ensSliceProject(EnsPSlice slice,
3489 const AjPStr csname,
3490 const AjPStr csversion,
3491 AjPList pss)
3492 {
3493 AjBool circular = AJFALSE;
3494 AjBool result = AJFALSE;
3495
3496 EnsPCoordsystem cs = NULL;
3497 EnsPCoordsystemadaptor csa = NULL;
3498
3499 EnsPDatabaseadaptor dba = NULL;
3500
3501 EnsPSlice slice1 = NULL;
3502 EnsPSlice slice2 = NULL;
3503
3504 if (ajDebugTest("ensSliceProject"))
3505 {
3506 ajDebug("ensSliceProject\n"
3507 " slice %p\n"
3508 " csname '%S'\n"
3509 " cvsversion '%S'\n"
3510 " pss %p\n",
3511 slice,
3512 csname,
3513 csversion,
3514 pss);
3515
3516 ensSliceTrace(slice, 1);
3517 }
3518
3519 if (!slice)
3520 {
3521 ajDebug("ensSliceProject requires an Ensembl Slice.\n");
3522
3523 return ajFalse;
3524 }
3525
3526 if (!csname)
3527 {
3528 ajDebug("ensSliceProject requires an "
3529 "Ensembl Coordinate System name.\n");
3530
3531 return ajFalse;
3532 }
3533
3534 /*
3535 ** A Coordinate System version is not strictly required,
3536 ** since ensCoordsystemadaptorFetchByName does not require one.
3537 */
3538
3539 if (!pss)
3540 {
3541 ajDebug("ensSliceProject requires an AJAX List of "
3542 "Ensembl Projection Segment objects.\n");
3543
3544 return ajFalse;
3545 }
3546
3547 if (!slice->Adaptor)
3548 {
3549 ajDebug("ensSliceProject requires that an Ensembl Slice Adaptor has "
3550 "been set in the Ensembl Slice.\n");
3551
3552 return ajFalse;
3553 }
3554
3555 if (!slice->Seqregion)
3556 {
3557 ajDebug("ensSliceProject requires that an Ensembl Sequence Region "
3558 "has been set in the Ensembl Slice.\n");
3559
3560 return ajFalse;
3561 }
3562
3563 if (!ensSliceIsCircular(slice, &circular))
3564 return ajFalse;
3565
3566 /* Fetch the target Coordinate System. */
3567
3568 dba = ensSliceadaptorGetDatabaseadaptor(slice->Adaptor);
3569
3570 csa = ensRegistryGetCoordsystemadaptor(dba);
3571
3572 ensCoordsystemadaptorFetchByName(csa, csname, csversion, &cs);
3573
3574 if (!cs)
3575 {
3576 ajDebug("ensSliceProject cannot project to an unknown "
3577 "Ensembl Coordinate System '%S:%S'.\n", csname, csversion);
3578
3579 return ajFalse;
3580 }
3581
3582 if ((circular == ajTrue) && (slice->Start > slice->End))
3583 {
3584 slice1 = ensSliceNewCpy(slice);
3585 slice2 = ensSliceNewCpy(slice);
3586
3587 slice1->End = ensSeqregionGetLength(slice1->Seqregion);
3588 slice2->Start = 1;
3589
3590 if (!sliceProject(slice1, cs, pss))
3591 result = ajFalse;
3592
3593 if (!sliceProject(slice2, cs, pss))
3594 result = ajFalse;
3595
3596 ensSliceDel(&slice1);
3597 ensSliceDel(&slice2);
3598 }
3599 else
3600 {
3601 if (!sliceProject(slice, cs, pss))
3602 result = ajFalse;
3603 }
3604
3605 ensCoordsystemDel(&cs);
3606
3607 return result;
3608 }
3609
3610
3611
3612
3613 /* @func ensSliceProjectslice *************************************************
3614 **
3615 ** Project an Ensembl Slice onto another Ensembl Slice.
3616 **
3617 ** Needed for cases where multiple assembly mappings exist and a specific
3618 ** mapping is specified.
3619 **
3620 ** The caller is responsible for deleting the Ensembl Projection Segment
3621 ** objects before deleting the AJAX List.
3622 **
3623 ** @cc Bio::EnsEMBL::Slice::project_to_slice
3624 ** @param [u] srcslice [EnsPSlice] Ensembl Slice
3625 ** @param [u] trgslice [EnsPSlice] Target Ensembl Slice
3626 ** @param [u] pss [AjPList] AJAX List of Ensembl Projection Segment objects
3627 **
3628 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
3629 **
3630 ** @release 6.4.0
3631 ** @@
3632 ******************************************************************************/
3633
ensSliceProjectslice(EnsPSlice srcslice,EnsPSlice trgslice,AjPList pss)3634 AjBool ensSliceProjectslice(EnsPSlice srcslice,
3635 EnsPSlice trgslice,
3636 AjPList pss)
3637 {
3638 ajint start = 0;
3639 ajint length = 0;
3640
3641 ajuint last = 0U;
3642
3643 AjPList nrmpss = NULL;
3644 AjPList mrs = NULL;
3645
3646 EnsPAssemblymapper am = NULL;
3647 EnsPAssemblymapperadaptor ama = NULL;
3648
3649 EnsPCoordsystem srccs = NULL;
3650 EnsPCoordsystem trgcs = NULL;
3651 EnsPCoordsystem nrmcs = NULL;
3652
3653 EnsPDatabaseadaptor dba = NULL;
3654
3655 EnsPProjectionsegment nrmps = NULL;
3656 EnsPProjectionsegment ps = NULL;
3657
3658 EnsPMapperresult mr = NULL;
3659
3660 const EnsPSlice nrmslice = NULL;
3661 EnsPSlice newslice = NULL;
3662
3663 if (!srcslice)
3664 return ajFalse;
3665
3666 if (!trgslice)
3667 return ajFalse;
3668
3669 dba = ensSliceadaptorGetDatabaseadaptor(srcslice->Adaptor);
3670
3671 ama = ensRegistryGetAssemblymapperadaptor(dba);
3672
3673 /*
3674 ** Get the source Ensembl Coordinate System, which is the
3675 ** Ensembl Coordinate System member of the
3676 ** Ensembl Sequence Region member of this Ensembl Slice.
3677 */
3678
3679 srccs = ensSeqregionGetCoordsystem(srcslice->Seqregion);
3680
3681 if (!srccs)
3682 {
3683 ajDebug("ensSliceProjectslice requires that an "
3684 "Ensembl Coordinate System member has been set in the "
3685 "Ensembl Sequence Region member of the source "
3686 "Ensembl Slice.\n");
3687
3688 return ajFalse;
3689 }
3690
3691 /*
3692 ** Get the target Coordinate System, which is the
3693 ** Ensembl Coordinate System member of the
3694 ** Ensembl Sequence Region member of this Ensembl Slice.
3695 */
3696
3697 trgcs = ensSeqregionGetCoordsystem(trgslice->Seqregion);
3698
3699 if (!trgcs)
3700 {
3701 ajDebug("ensSliceProjectslice requires that an "
3702 "Ensembl Coordinate System member has been set in the "
3703 "Ensembl Sequence Region member of the target "
3704 "Ensembl Slice.\n");
3705
3706 return ajFalse;
3707 }
3708
3709 /*
3710 ** Decompose this Slice into its symlinked components, which allows
3711 ** handling of haplotypes (HAPs) and pseudo-autosomal region (PARs).
3712 */
3713
3714 nrmpss = ajListNew();
3715
3716 ensSliceadaptorRetrieveNormalisedprojection(srcslice->Adaptor,
3717 srcslice,
3718 nrmpss);
3719
3720 while (ajListPop(nrmpss, (void **) &nrmps))
3721 {
3722 nrmslice = ensProjectionsegmentGetTargetSlice(nrmps);
3723
3724 nrmcs = ensSeqregionGetCoordsystem(nrmslice->Seqregion);
3725
3726 ensAssemblymapperadaptorFetchByCoordsystems(ama, nrmcs, trgcs, &am);
3727
3728 mrs = ajListNew();
3729
3730 if (am)
3731 ensAssemblymapperMapToSlice(am, nrmslice, trgslice, ajFalse, mrs);
3732 else
3733 {
3734 ajDebug("ensSliceProjectslice got no Ensembl Assemblymapper -> "
3735 "Ensembl Mapper Result of type gap\n");
3736
3737 mr = ensMapperresultNewGap(nrmslice->Start, nrmslice->End, 0);
3738
3739 ajListPushAppend(mrs, (void *) mr);
3740 }
3741
3742 ensAssemblymapperDel(&am);
3743
3744 /* Construct a projection from the mapping results and return it. */
3745
3746 while (ajListPop(mrs, (void **) &mr))
3747 {
3748 length = ensMapperresultCalculateLengthResult(mr);
3749
3750 /*
3751 ** Reset the start coordinate, if a new target Sequence Region
3752 ** was encountered.
3753 */
3754
3755 if (last != ensMapperresultGetRank(mr))
3756 start = 1;
3757
3758 last = ensMapperresultGetRank(mr);
3759
3760 /* Skip gaps. */
3761
3762 if (ensMapperresultGetType(mr) == ensEMapperresultTypeCoordinate)
3763 {
3764 /* For multiple mappings only get the correct one. */
3765
3766 if (ensMapperresultGetObjectidentifier(mr) ==
3767 ensSliceGetSeqregionIdentifier(trgslice))
3768 {
3769 /* Create a Slice in the target Coordinate System. */
3770
3771 ensSliceadaptorFetchByMapperresult(srcslice->Adaptor,
3772 mr,
3773 &newslice);
3774
3775 ps = ensProjectionsegmentNewIni(start,
3776 start + length - 1,
3777 newslice);
3778
3779 ajListPushAppend(pss, (void *) ps);
3780
3781 ensSliceDel(&newslice);
3782 }
3783 }
3784
3785 start += length;
3786
3787 ensMapperresultDel(&mr);
3788 }
3789
3790 ajListFree(&mrs);
3791
3792 ensProjectionsegmentDel(&nrmps);
3793 }
3794
3795 ajListFree(&nrmpss);
3796
3797 ensCoordsystemDel(&trgcs);
3798
3799 /*
3800 ** Delete the Ensembl Assembly Mapper Adaptor cache,
3801 ** as the next mapping may include a different set.
3802 */
3803
3804 ensAssemblymapperadaptorClear(ama);
3805
3806 return ajTrue;
3807 }
3808
3809
3810
3811
3812 /* @datasection [EnsESliceType] Ensembl Slice Type ****************************
3813 **
3814 ** @nam2rule Slice Functions for manipulating
3815 ** Ensembl Slice objects
3816 ** @nam3rule SliceType Functions for manipulating
3817 ** Ensembl Slice Type enumerations
3818 **
3819 ******************************************************************************/
3820
3821
3822
3823
3824 /* @section Misc **************************************************************
3825 **
3826 ** Functions for returning an Ensembl Slice Type enumeration.
3827 **
3828 ** @fdata [EnsESliceType]
3829 **
3830 ** @nam4rule From Ensembl Slice Type query
3831 ** @nam5rule Seqregion Ensembl Sequence Region
3832 ** @nam5rule Str AJAX String object query
3833 **
3834 ** @argrule Seqregion sr [EnsPSeqregion] Ensembl Sequence Region
3835 ** @argrule Str type [const AjPStr] Type string
3836 **
3837 ** @valrule * [EnsESliceType] Ensembl Slice Type enumeration or
3838 ** ensESliceTypeNULL
3839 **
3840 ** @fcategory misc
3841 ******************************************************************************/
3842
3843
3844
3845
3846 /* @func ensSliceTypeFromSeqregion ********************************************
3847 **
3848 ** Convert an Ensembl Sequence Region into an Ensembl Slice Type enumeration.
3849 **
3850 ** @param [u] sr [EnsPSeqregion] Ensembl Sequence Region
3851 **
3852 ** @return [EnsESliceType] Ensembl Slice Type enumeration or
3853 ** ensESliceTypeNULL
3854 **
3855 ** @release 6.4.0
3856 ** @@
3857 ******************************************************************************/
3858
ensSliceTypeFromSeqregion(EnsPSeqregion sr)3859 EnsESliceType ensSliceTypeFromSeqregion(EnsPSeqregion sr)
3860 {
3861 AjBool result = AJFALSE;
3862
3863 if (!sr)
3864 return ensESliceTypeNULL;
3865
3866 ensSeqregionIsLocusreferencegenomic(sr, &result);
3867
3868 if (result == ajTrue)
3869 return ensESliceTypeLrg;
3870 else
3871 return ensESliceTypeLinear;
3872 }
3873
3874
3875
3876
3877 /* @func ensSliceTypeFromStr **************************************************
3878 **
3879 ** Convert an AJAX String into an Ensembl Slice Type enumeration.
3880 **
3881 ** @param [r] type [const AjPStr] Type string
3882 **
3883 ** @return [EnsESliceType] Ensembl Slice Type enumeration or
3884 ** ensESliceTypeNULL
3885 **
3886 ** @release 6.4.0
3887 ** @@
3888 ******************************************************************************/
3889
ensSliceTypeFromStr(const AjPStr type)3890 EnsESliceType ensSliceTypeFromStr(const AjPStr type)
3891 {
3892 register EnsESliceType i = ensESliceTypeNULL;
3893
3894 EnsESliceType ste = ensESliceTypeNULL;
3895
3896 for (i = ensESliceTypeNULL;
3897 sliceKType[i];
3898 i++)
3899 if (ajStrMatchC(type, sliceKType[i]))
3900 ste = i;
3901
3902 if (!ste)
3903 ajDebug("ensSliceTypeFromStr encountered "
3904 "unexpected string '%S'.\n", type);
3905
3906 return ste;
3907 }
3908
3909
3910
3911
3912 /* @section Cast **************************************************************
3913 **
3914 ** Functions for returning attributes of an
3915 ** Ensembl Slice Type enumeration.
3916 **
3917 ** @fdata [EnsESliceType]
3918 **
3919 ** @nam4rule To Return Ensembl Slice Type enumeration
3920 ** @nam5rule Char Return C character string value
3921 **
3922 ** @argrule To ste [EnsESliceType] Ensembl Slice Type enumeration
3923 **
3924 ** @valrule Char [const char*] Ensembl Slice Type C-type (char *) string
3925 **
3926 ** @fcategory cast
3927 ******************************************************************************/
3928
3929
3930
3931
3932 /* @func ensSliceTypeToChar ***************************************************
3933 **
3934 ** Convert an Ensembl Slice Type enumeration into a C-type (char *) string.
3935 **
3936 ** @param [u] ste [EnsESliceType] Ensembl Slice Type enumeration
3937 **
3938 ** @return [const char*] Ensembl Slice Type C-type (char *) string
3939 **
3940 ** @release 6.4.0
3941 ** @@
3942 ******************************************************************************/
3943
ensSliceTypeToChar(EnsESliceType ste)3944 const char* ensSliceTypeToChar(EnsESliceType ste)
3945 {
3946 register EnsESliceType i = ensESliceTypeNULL;
3947
3948 for (i = ensESliceTypeNULL;
3949 sliceKType[i] && (i < ste);
3950 i++);
3951
3952 if (!sliceKType[i])
3953 ajDebug("ensSliceTypeToChar "
3954 "encountered an out of boundary error on "
3955 "Ensembl Slice Type "
3956 "enumeration %d.\n",
3957 ste);
3958
3959 return sliceKType[i];
3960 }
3961
3962
3963
3964
3965 /* @datasection [AjPList] AJAX List *******************************************
3966 **
3967 ** @nam2rule List Functions for manipulating AJAX List objects
3968 **
3969 ******************************************************************************/
3970
3971
3972
3973
3974 /* @section list **************************************************************
3975 **
3976 ** Functions for manipulating AJAX List objects.
3977 **
3978 ** @fdata [AjPList]
3979 **
3980 ** @nam3rule Slice Functions for manipulating AJAX List objects of
3981 ** Ensembl Slice objects
3982 ** @nam4rule Remove Remove functions
3983 ** @nam5rule Duplications Remove duplications
3984 ** @nam4rule Sort Sort functions
3985 ** @nam5rule Identifier Sort by Ensembl Sequence Region identifier member
3986 ** @nam5rule Name Sort by Ensembl Sequence Region name member
3987 ** @nam6rule Ascending Sort in ascending order
3988 ** @nam6rule Descending Sort in descending order
3989 **
3990 ** @argrule * slices [AjPList] AJAX List of Ensembl Slice objects
3991 **
3992 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
3993 **
3994 ** @fcategory misc
3995 ******************************************************************************/
3996
3997
3998
3999
4000 /* @func ensListSliceRemoveDuplications ***************************************
4001 **
4002 ** Process an AJAX List of Ensembl Slice objects and remove duplicate regions.
4003 **
4004 ** This function checks each Ensembl Slice object on the AJAX List for
4005 ** duplicate regions and if positive, replaces the Ensembl Slice object by one
4006 ** or more Ensembl Slice objects representing the unique sequence.
4007 **
4008 ** Ensembl Slice objects of complete mammalian Y chromosomes contain duplictaed
4009 ** regions where the pseudo autosomal regions (PARs) on both chromosome ends
4010 ** represent unique sequence, while the bulk of the chromosome is copied
4011 ** (duplicated) from the X chromosome. Therefore, this function would remove an
4012 ** Ensembl Slice spanning the Y chromosome and replace it with two Ensembl
4013 ** Slice objects representing just the PAR regions.
4014 **
4015 ** The caller is responsible for deleting the Ensembl Slice objects before
4016 ** deleting the AJAX List.
4017 **
4018 ** @param [u] slices [AjPList] AJAX List of Ensembl Slice objects
4019 **
4020 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
4021 **
4022 ** @release 6.4.0
4023 ** @@
4024 ******************************************************************************/
4025
ensListSliceRemoveDuplications(AjPList slices)4026 AjBool ensListSliceRemoveDuplications(AjPList slices)
4027 {
4028 AjIList iter = NULL;
4029
4030 AjPList aes = NULL;
4031 AjPList pss = NULL;
4032
4033 EnsPAssemblyexception ae = NULL;
4034 EnsPAssemblyexceptionadaptor aea = NULL;
4035
4036 EnsPDatabaseadaptor dba = NULL;
4037
4038 EnsPProjectionsegment ps = NULL;
4039
4040 EnsPSlice psslice = NULL;
4041 EnsPSlice srslice = NULL;
4042
4043 if (!slices)
4044 return ajFalse;
4045
4046 aes = ajListNew();
4047 pss = ajListNew();
4048
4049 iter = ajListIterNew(slices);
4050
4051 while (!ajListIterDone(iter))
4052 {
4053 srslice = (EnsPSlice) ajListIterGet(iter);
4054
4055 dba = ensSliceadaptorGetDatabaseadaptor(srslice->Adaptor);
4056
4057 aea = ensRegistryGetAssemblyexceptionadaptor(dba);
4058
4059 ensAssemblyexceptionadaptorFetchAllbyReferenceSeqregion(
4060 aea,
4061 ensSliceGetSeqregionIdentifier(srslice),
4062 aes);
4063
4064 if (ajListGetLength(aes))
4065 {
4066 /*
4067 ** This Ensembl Slice may have duplicate regions, so de-reference
4068 ** symlinked assembly regions and remove any Ensembl Slice objects,
4069 ** which have a symlink, because these are duplicates.
4070 ** Replace them with any symlinked Ensembl Slice objects based on
4071 ** the same Ensembl Sequence Region and Ensembl Coordinate System
4072 ** as the original Ensembl Slice.
4073 */
4074
4075 ajListIterRemove(iter);
4076
4077 ensSliceadaptorRetrieveNormalisedprojection(srslice->Adaptor,
4078 srslice,
4079 pss);
4080
4081 while (ajListPop(pss, (void **) &ps))
4082 {
4083 psslice = ensProjectionsegmentGetTargetSlice(ps);
4084
4085 if (ensSeqregionMatch(psslice->Seqregion,
4086 srslice->Seqregion))
4087 {
4088 ajListIterInsert(iter, (void *) ensSliceNewRef(psslice));
4089
4090 /* Advance the AJAX List Iterator after the insert. */
4091
4092 (void) ajListIterGet(iter);
4093 }
4094
4095 ensProjectionsegmentDel(&ps);
4096 }
4097
4098 ensSliceDel(&srslice);
4099 }
4100
4101 while (ajListPop(aes, (void **) &ae))
4102 ensAssemblyexceptionDel(&ae);
4103 }
4104
4105 ajListIterDel(&iter);
4106
4107 ajListFree(&aes);
4108 ajListFree(&pss);
4109
4110 return ajTrue;
4111 }
4112
4113
4114
4115
4116 /* @funcstatic listSliceCompareIdentifierAscending ****************************
4117 **
4118 ** AJAX List of Ensembl Slice objects comparison function to sort by
4119 ** Ensembl Sequence Region identifier in ascending order.
4120 **
4121 ** @param [r] item1 [const void*] Ensembl Slice address 1
4122 ** @param [r] item2 [const void*] Ensembl Slice address 2
4123 ** @see ajListSort
4124 **
4125 ** @return [int] The comparison function returns an integer less than,
4126 ** equal to, or greater than zero if the first argument is
4127 ** considered to be respectively less than, equal to, or
4128 ** greater than the second.
4129 **
4130 ** @release 6.4.0
4131 ** @@
4132 ******************************************************************************/
4133
listSliceCompareIdentifierAscending(const void * item1,const void * item2)4134 static int listSliceCompareIdentifierAscending(
4135 const void *item1,
4136 const void *item2)
4137 {
4138 ajuint srid1 = 0U;
4139 ajuint srid2 = 0U;
4140
4141 EnsPSlice slice1 = *(EnsOSlice *const *) item1;
4142 EnsPSlice slice2 = *(EnsOSlice *const *) item2;
4143
4144 #if defined(AJ_DEBUG) && AJ_DEBUG >= 2
4145 if (ajDebugTest("listSliceCompareIdentifierAscending"))
4146 ajDebug("listSliceCompareIdentifierAscending\n"
4147 " slice1 %p\n"
4148 " slice2 %p\n",
4149 slice1,
4150 slice2);
4151 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 2 */
4152
4153 /* Sort empty values towards the end of the AJAX List. */
4154
4155 if (slice1 && (!slice2))
4156 return -1;
4157
4158 if ((!slice1) && (!slice2))
4159 return 0;
4160
4161 if ((!slice1) && slice2)
4162 return +1;
4163
4164 srid1 = ensSliceGetSeqregionIdentifier(slice1);
4165 srid2 = ensSliceGetSeqregionIdentifier(slice2);
4166
4167 if (srid1 && (!srid2))
4168 return -1;
4169
4170 if ((!srid1) && (!srid2))
4171 return 0;
4172
4173 if ((!srid1) && srid2)
4174 return +1;
4175
4176 if (srid1 < srid2)
4177 return -1;
4178
4179 if (srid1 > srid2)
4180 return +1;
4181
4182 return 0;
4183 }
4184
4185
4186
4187
4188 /* @func ensListSliceSortIdentifierAscending **********************************
4189 **
4190 ** Sort an AJAX List of Ensembl Slice objects by their
4191 ** Ensembl Sequence Region identifier in ascending order.
4192 **
4193 ** @param [u] slices [AjPList] AJAX List of Ensembl Slice objects
4194 **
4195 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
4196 **
4197 ** @release 6.4.0
4198 ** @@
4199 ******************************************************************************/
4200
ensListSliceSortIdentifierAscending(AjPList slices)4201 AjBool ensListSliceSortIdentifierAscending(AjPList slices)
4202 {
4203 if (!slices)
4204 return ajFalse;
4205
4206 ajListSort(slices, &listSliceCompareIdentifierAscending);
4207
4208 return ajTrue;
4209 }
4210
4211
4212
4213
4214 /* @funcstatic listSliceCompareIdentifierDescending ***************************
4215 **
4216 ** AJAX List of Ensembl Slice objects comparison function to sort by
4217 ** Ensembl Sequence Region identifier in descending order.
4218 **
4219 ** @param [r] item1 [const void*] Ensembl Slice address 1
4220 ** @param [r] item2 [const void*] Ensembl Slice address 2
4221 ** @see ajListSort
4222 **
4223 ** @return [int] The comparison function returns an integer less than,
4224 ** equal to, or greater than zero if the first argument is
4225 ** considered to be respectively less than, equal to, or
4226 ** greater than the second.
4227 **
4228 ** @release 6.4.0
4229 ** @@
4230 ******************************************************************************/
4231
listSliceCompareIdentifierDescending(const void * item1,const void * item2)4232 static int listSliceCompareIdentifierDescending(
4233 const void *item1,
4234 const void *item2)
4235 {
4236 ajuint srid1 = 0U;
4237 ajuint srid2 = 0U;
4238
4239 EnsPSlice slice1 = *(EnsOSlice *const *) item1;
4240 EnsPSlice slice2 = *(EnsOSlice *const *) item2;
4241
4242 #if defined(AJ_DEBUG) && AJ_DEBUG >= 2
4243 if (ajDebugTest("listSliceCompareIdentifierDescending"))
4244 ajDebug("listSliceCompareIdentifierDescending\n"
4245 " slice1 %p\n"
4246 " slice2 %p\n",
4247 slice1,
4248 slice2);
4249 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 2 */
4250
4251 /* Sort empty values towards the end of the AJAX List. */
4252
4253 if (slice1 && (!slice2))
4254 return -1;
4255
4256 if ((!slice1) && (!slice2))
4257 return 0;
4258
4259 if ((!slice1) && slice2)
4260 return +1;
4261
4262 srid1 = ensSliceGetSeqregionIdentifier(slice1);
4263 srid2 = ensSliceGetSeqregionIdentifier(slice2);
4264
4265 if (srid1 && (!srid2))
4266 return -1;
4267
4268 if ((!srid1) && (!srid2))
4269 return 0;
4270
4271 if ((!srid1) && srid2)
4272 return +1;
4273
4274 if (srid1 > srid2)
4275 return -1;
4276
4277 if (srid1 < srid2)
4278 return +1;
4279
4280 return 0;
4281 }
4282
4283
4284
4285
4286 /* @func ensListSliceSortIdentifierDescending *********************************
4287 **
4288 ** Sort an AJAX List of Ensembl Slice objects by their
4289 ** Ensembl Sequence Region identifier in descending order.
4290 **
4291 ** @param [u] slices [AjPList] AJAX List of Ensembl Slice objects
4292 **
4293 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
4294 **
4295 ** @release 6.4.0
4296 ** @@
4297 ******************************************************************************/
4298
ensListSliceSortIdentifierDescending(AjPList slices)4299 AjBool ensListSliceSortIdentifierDescending(AjPList slices)
4300 {
4301 if (!slices)
4302 return ajFalse;
4303
4304 ajListSort(slices, &listSliceCompareIdentifierDescending);
4305
4306 return ajTrue;
4307 }
4308
4309
4310
4311
4312 /* @funcstatic listSliceCompareNameAscending **********************************
4313 **
4314 ** AJAX List of Ensembl Slice objects comparison function to sort by
4315 ** Ensembl Sequence Region name in ascending order.
4316 **
4317 ** @param [r] item1 [const void*] Ensembl Slice address 1
4318 ** @param [r] item2 [const void*] Ensembl Slice address 2
4319 ** @see ajListSort
4320 **
4321 ** @return [int] The comparison function returns an integer less than,
4322 ** equal to, or greater than zero if the first argument is
4323 ** considered to be respectively less than, equal to, or
4324 ** greater than the second.
4325 **
4326 ** @release 6.4.0
4327 ** @@
4328 ******************************************************************************/
4329
listSliceCompareNameAscending(const void * item1,const void * item2)4330 static int listSliceCompareNameAscending(
4331 const void *item1,
4332 const void *item2)
4333 {
4334 const AjPStr srname1 = NULL;
4335 const AjPStr srname2 = NULL;
4336
4337 EnsPSlice slice1 = *(EnsOSlice *const *) item1;
4338 EnsPSlice slice2 = *(EnsOSlice *const *) item2;
4339
4340 #if defined(AJ_DEBUG) && AJ_DEBUG >= 2
4341 if (ajDebugTest("listSliceCompareNameAscending"))
4342 ajDebug("listSliceCompareNameAscending\n"
4343 " slice1 %p\n"
4344 " slice2 %p\n",
4345 slice1,
4346 slice2);
4347 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 2 */
4348
4349 /* Sort empty values towards the end of the AJAX List. */
4350
4351 if (slice1 && (!slice2))
4352 return -1;
4353
4354 if ((!slice1) && (!slice2))
4355 return 0;
4356
4357 if ((!slice1) && slice2)
4358 return +1;
4359
4360 srname1 = ensSliceGetSeqregionName(slice1);
4361 srname2 = ensSliceGetSeqregionName(slice2);
4362
4363 if (srname1 && (!srname2))
4364 return -1;
4365
4366 if ((!srname1) && (!srname2))
4367 return 0;
4368
4369 if ((!srname1) && srname2)
4370 return +1;
4371
4372 return ajStrCmpS(srname1, srname2);
4373 }
4374
4375
4376
4377
4378 /* @func ensListSliceSortNameAscending ****************************************
4379 **
4380 ** Sort an AJAX List of Ensembl Slice objects by their
4381 ** Ensembl Sequence Region name in ascending order.
4382 **
4383 ** @param [u] slices [AjPList] AJAX List of Ensembl Slice objects
4384 **
4385 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
4386 **
4387 ** @release 6.4.0
4388 ** @@
4389 ******************************************************************************/
4390
ensListSliceSortNameAscending(AjPList slices)4391 AjBool ensListSliceSortNameAscending(AjPList slices)
4392 {
4393 if (!slices)
4394 return ajFalse;
4395
4396 ajListSort(slices, &listSliceCompareNameAscending);
4397
4398 return ajTrue;
4399 }
4400
4401
4402
4403
4404 /* @funcstatic listSliceCompareNameDescending *********************************
4405 **
4406 ** AJAX List of Ensembl Slice objects comparison function to sort by
4407 ** Ensembl Sequence Region name in descending order.
4408 **
4409 ** @param [r] item1 [const void*] Ensembl Slice address 1
4410 ** @param [r] item2 [const void*] Ensembl Slice address 2
4411 ** @see ajListSort
4412 **
4413 ** @return [int] The comparison function returns an integer less than,
4414 ** equal to, or greater than zero if the first argument is
4415 ** considered to be respectively less than, equal to, or
4416 ** greater than the second.
4417 **
4418 ** @release 6.4.0
4419 ** @@
4420 ******************************************************************************/
4421
listSliceCompareNameDescending(const void * item1,const void * item2)4422 static int listSliceCompareNameDescending(
4423 const void *item1,
4424 const void *item2)
4425 {
4426 const AjPStr srname1 = NULL;
4427 const AjPStr srname2 = NULL;
4428
4429 EnsPSlice slice1 = *(EnsOSlice *const *) item1;
4430 EnsPSlice slice2 = *(EnsOSlice *const *) item2;
4431
4432 #if defined(AJ_DEBUG) && AJ_DEBUG >= 2
4433 if (ajDebugTest("listSliceCompareNameDescending"))
4434 ajDebug("listSliceCompareNameDescending\n"
4435 " slice1 %p\n"
4436 " slice2 %p\n",
4437 slice1,
4438 slice2);
4439 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 2 */
4440
4441 /* Sort empty values towards the end of the AJAX List. */
4442
4443 if (slice1 && (!slice2))
4444 return -1;
4445
4446 if ((!slice1) && (!slice2))
4447 return 0;
4448
4449 if ((!slice1) && slice2)
4450 return +1;
4451
4452 srname1 = ensSliceGetSeqregionName(slice1);
4453 srname2 = ensSliceGetSeqregionName(slice2);
4454
4455 if (srname1 && (!srname2))
4456 return -1;
4457
4458 if ((!srname1) && (!srname2))
4459 return 0;
4460
4461 if ((!srname1) && srname2)
4462 return +1;
4463
4464 return -1 * ajStrCmpS(srname1, srname2);
4465 }
4466
4467
4468
4469
4470 /* @func ensListSliceSortNameDescending ***************************************
4471 **
4472 ** Sort an AJAX List of Ensembl Slice objects by their
4473 ** Ensembl Sequence Region name in descending order.
4474 **
4475 ** @param [u] slices [AjPList] AJAX List of Ensembl Slice objects
4476 **
4477 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
4478 **
4479 ** @release 6.4.0
4480 ** @@
4481 ******************************************************************************/
4482
ensListSliceSortNameDescending(AjPList slices)4483 AjBool ensListSliceSortNameDescending(AjPList slices)
4484 {
4485 if (!slices)
4486 return ajFalse;
4487
4488 ajListSort(slices, &listSliceCompareNameDescending);
4489
4490 return ajTrue;
4491 }
4492
4493
4494
4495
4496 /* @datasection [EnsPSliceadaptor] Ensembl Slice Adaptor **********************
4497 **
4498 ** @nam2rule Sliceadaptor Functions for manipulating
4499 ** Ensembl Slice Adaptor objects
4500 **
4501 ** @cc Bio::EnsEMBL::DBSQL::SliceAdaptor
4502 ** @cc CVS Revision: 1.134
4503 ** @cc CVS Tag: branch-ensembl-68
4504 **
4505 ******************************************************************************/
4506
4507
4508
4509
4510 /* @funcstatic sliceadaptorCacheDelete ****************************************
4511 **
4512 ** Wrapper function to delete an Ensembl Slice from an Ensembl Cache.
4513 **
4514 ** @param [d] Pvalue [void**] Ensembl Slice address
4515 ** @see ensCacheNew
4516 **
4517 ** @return [void]
4518 **
4519 ** @release 6.3.0
4520 ** @@
4521 ** When deleting from the Cache, this function also removes and deletes the
4522 ** Ensembl Slice from the name cache, which is based on a
4523 ** conventional AJAX Table.
4524 ******************************************************************************/
4525
sliceadaptorCacheDelete(void ** Pvalue)4526 static void sliceadaptorCacheDelete(void **Pvalue)
4527 {
4528 ajint start = 0;
4529 ajint end = 0;
4530 ajint strand = 0;
4531
4532 AjPStr key = NULL;
4533
4534 EnsPCoordsystem cs = NULL;
4535
4536 EnsPSlice oldslice = NULL;
4537 EnsPSlice newslice = NULL;
4538
4539 if (!Pvalue)
4540 return;
4541
4542 if (!*Pvalue)
4543 return;
4544
4545 /*
4546 ** Synchronise the deletion of this Sequence Region from the
4547 ** identifier cache, which is based on an Ensembl (LRU) Cache,
4548 ** with the name cache, based on a conventional AJAX Table,
4549 ** both in the Sequence Adaptor.
4550 */
4551
4552 newslice = *((EnsPSlice *) Pvalue);
4553
4554 if (newslice->Adaptor && newslice->Adaptor->CacheByName)
4555 {
4556 cs = ensSeqregionGetCoordsystem(newslice->Seqregion);
4557
4558 start = newslice->Start;
4559 end = newslice->End;
4560 strand = newslice->Strand;
4561
4562 if ((start == 1) &&
4563 (end == ensSeqregionGetLength(newslice->Seqregion)) &&
4564 (strand == 1))
4565 {
4566 start = 0;
4567 end = 0;
4568 strand = 0;
4569 }
4570
4571 /* Remove from the name cache. */
4572
4573 key = ajFmtStr("%S:%S:%S:%d:%d:%d",
4574 ensCoordsystemGetName(cs),
4575 ensCoordsystemGetVersion(cs),
4576 ensSeqregionGetName(newslice->Seqregion),
4577 start, end, strand);
4578
4579 oldslice = (EnsPSlice) ajTableRemove(
4580 newslice->Adaptor->CacheByName,
4581 (const void *) key);
4582
4583 ensSliceDel(&oldslice);
4584
4585 ajStrDel(&key);
4586 }
4587
4588 ensSliceDel((EnsPSlice *) Pvalue);
4589
4590 return;
4591 }
4592
4593
4594
4595
4596 /* @section constructors ******************************************************
4597 **
4598 ** All constructors return a new Ensembl Slice Adaptor by pointer.
4599 ** It is the responsibility of the user to first destroy any previous
4600 ** Slice Adaptor. The target pointer does not need to be initialised to
4601 ** NULL, but it is good programming practice to do so anyway.
4602 **
4603 ** @fdata [EnsPSliceadaptor]
4604 **
4605 ** @nam3rule New Constructor
4606 **
4607 ** @argrule New dba [EnsPDatabaseadaptor] Ensembl Database Adaptor
4608 **
4609 ** @valrule * [EnsPSliceadaptor] Ensembl Slice Adaptor or NULL
4610 **
4611 ** @fcategory new
4612 ******************************************************************************/
4613
4614
4615
4616
4617 /* @func ensSliceadaptorNew ***************************************************
4618 **
4619 ** Default constructor for an Ensembl Slice Adaptor.
4620 **
4621 ** Ensembl Object Adaptors are singleton objects in the sense that a single
4622 ** instance of an Ensembl Object Adaptor connected to a particular database is
4623 ** sufficient to instantiate any number of Ensembl Objects from the database.
4624 ** Each Ensembl Object will have a weak reference to the Object Adaptor that
4625 ** instantiated it. Therefore, Ensembl Object Adaptors should not be
4626 ** instantiated directly, but rather obtained from the Ensembl Registry,
4627 ** which will in turn call this function if neccessary.
4628 **
4629 ** @see ensRegistryGetDatabaseadaptor
4630 ** @see ensRegistryGetSliceadaptor
4631 **
4632 ** @param [u] dba [EnsPDatabaseadaptor] Ensembl Database Adaptor
4633 **
4634 ** @return [EnsPSliceadaptor] Ensembl Slice Adaptor or NULL
4635 **
4636 ** @release 6.2.0
4637 ** @@
4638 ******************************************************************************/
4639
ensSliceadaptorNew(EnsPDatabaseadaptor dba)4640 EnsPSliceadaptor ensSliceadaptorNew(
4641 EnsPDatabaseadaptor dba)
4642 {
4643 EnsPSliceadaptor sla = NULL;
4644
4645 if (!dba)
4646 return NULL;
4647
4648 AJNEW0(sla);
4649
4650 sla->Adaptor = dba;
4651
4652 sla->CacheByIdentifier = ensCacheNew(
4653 ensECacheTypeAlphaNumeric,
4654 sliceadaptorKCacheMaxBytes,
4655 sliceadaptorKCacheMaxCount,
4656 sliceadaptorKCacheMaxSize,
4657 (void *(*)(void *)) &ensSliceNewRef,
4658 &sliceadaptorCacheDelete,
4659 (size_t (*)(const void *)) &ensSliceCalculateMemsize,
4660 (void *(*)(const void *key)) NULL,
4661 (AjBool (*)(const void *value)) NULL,
4662 ajFalse,
4663 "Slice");
4664
4665 sla->CacheByName = ajTablestrNew(0U);
4666
4667 ajTableSetDestroyvalue(sla->CacheByName, (void (*)(void **)) &ensSliceDel);
4668
4669 return sla;
4670 }
4671
4672
4673
4674
4675 /* @section destructors *******************************************************
4676 **
4677 ** Destruction destroys all internal data structures and frees the memory
4678 ** allocated for an Ensembl Slice Adaptor object.
4679 **
4680 ** @fdata [EnsPSliceadaptor]
4681 **
4682 ** @nam3rule Del Destroy (free) an Ensembl Slice Adaptor
4683 **
4684 ** @argrule * Psla [EnsPSliceadaptor*] Ensembl Slice Adaptor address
4685 **
4686 ** @valrule * [void]
4687 **
4688 ** @fcategory delete
4689 ******************************************************************************/
4690
4691
4692
4693
4694 /* @func ensSliceadaptorDel ***************************************************
4695 **
4696 ** Default destructor for an Ensembl Slice Adaptor.
4697 **
4698 ** This function also clears the internal Sequence Region caches.
4699 **
4700 ** Ensembl Object Adaptors are singleton objects that are registered in the
4701 ** Ensembl Registry and weakly referenced by Ensembl Objects that have been
4702 ** instantiated by it. Therefore, Ensembl Object Adaptors should never be
4703 ** destroyed directly. Upon exit, the Ensembl Registry will call this function
4704 ** if required.
4705 **
4706 ** @param [d] Psla [EnsPSliceadaptor*] Ensembl Slice Adaptor address
4707 **
4708 ** @return [void]
4709 **
4710 ** @release 6.2.0
4711 ** @@
4712 ******************************************************************************/
4713
ensSliceadaptorDel(EnsPSliceadaptor * Psla)4714 void ensSliceadaptorDel(EnsPSliceadaptor *Psla)
4715 {
4716 EnsPSliceadaptor pthis = NULL;
4717
4718 if (!Psla)
4719 return;
4720
4721 #if defined(AJ_DEBUG) && AJ_DEBUG >= 1
4722 if (ajDebugTest("ensSliceadaptorDel"))
4723 ajDebug("ensSliceadaptorDel\n"
4724 " *Psla %p\n",
4725 *Psla);
4726 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 1 */
4727
4728 if (!(pthis = *Psla))
4729 return;
4730
4731 /*
4732 ** Clear the identifier cache, which is based on an Ensembl LRU Cache.
4733 ** Clearing the Ensembl LRU Cache automatically clears the name cache
4734 ** via sliceadaptorCacheDelete.
4735 */
4736
4737 ensCacheDel(&pthis->CacheByIdentifier);
4738
4739 ajTableDel(&pthis->CacheByName);
4740
4741 ajMemFree((void **) Psla);
4742
4743 return;
4744 }
4745
4746
4747
4748
4749 /* @section member retrieval **************************************************
4750 **
4751 ** Functions for returning members of an Ensembl Slice Adaptor object.
4752 **
4753 ** @fdata [EnsPSliceadaptor]
4754 **
4755 ** @nam3rule Get Return Ensembl Slice Adaptor attribute(s)
4756 ** @nam4rule Databaseadaptor Return the Ensembl Database Adaptor
4757 **
4758 ** @argrule * sla [EnsPSliceadaptor] Ensembl Slice Adaptor
4759 **
4760 ** @valrule Databaseadaptor [EnsPDatabaseadaptor] Ensembl Database Adaptor
4761 ** or NULL
4762 **
4763 ** @fcategory use
4764 ******************************************************************************/
4765
4766
4767
4768
4769 /* @func ensSliceadaptorGetDatabaseadaptor ************************************
4770 **
4771 ** Get the Ensembl Database Adaptor member of an Ensembl Slice Adaptor.
4772 **
4773 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
4774 **
4775 ** @return [EnsPDatabaseadaptor] Ensembl Database Adaptor
4776 **
4777 ** @release 6.2.0
4778 ** @@
4779 ******************************************************************************/
4780
ensSliceadaptorGetDatabaseadaptor(EnsPSliceadaptor sla)4781 EnsPDatabaseadaptor ensSliceadaptorGetDatabaseadaptor(
4782 EnsPSliceadaptor sla)
4783 {
4784 return (sla) ? sla->Adaptor : NULL;
4785 }
4786
4787
4788
4789
4790 /* @section member retrieval **************************************************
4791 **
4792 ** Functions for manipulating an Ensembl Slice Adaptor cache.
4793 **
4794 ** @fdata [EnsPSliceadaptor]
4795 **
4796 ** @nam3rule Cache Manupulate an Ensembl Slice Adaptor cache
4797 ** @nam4rule Insert Insert an Ensembl Slice
4798 **
4799 ** @argrule * sla [EnsPSliceadaptor] Ensembl Slice Adaptor
4800 ** @argrule Insert Pslice [EnsPSlice*] Ensembl Slice
4801 ** address
4802 **
4803 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
4804 **
4805 ** @fcategory use
4806 ******************************************************************************/
4807
4808
4809
4810
4811 /* @func ensSliceadaptorCacheInsert *******************************************
4812 **
4813 ** Insert an Ensembl Slice into the Slice Adaptor-internal cache.
4814 **
4815 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
4816 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
4817 **
4818 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
4819 **
4820 ** @release 6.2.0
4821 ** @@
4822 ******************************************************************************/
4823
ensSliceadaptorCacheInsert(EnsPSliceadaptor sla,EnsPSlice * Pslice)4824 AjBool ensSliceadaptorCacheInsert(EnsPSliceadaptor sla, EnsPSlice *Pslice)
4825 {
4826 ajint start = 0;
4827 ajint end = 0;
4828 ajint strand = 0;
4829
4830 AjPStr ikey = NULL;
4831 AjPStr nkey = NULL;
4832
4833 EnsPCoordsystem cs = NULL;
4834
4835 EnsPSlice slice1 = NULL;
4836 EnsPSlice slice2 = NULL;
4837
4838 if (!sla)
4839 return ajFalse;
4840
4841 if (!Pslice)
4842 return ajFalse;
4843
4844 if (ajDebugTest("ensSliceadaptorCacheInsert"))
4845 {
4846 ajDebug("ensSliceadaptorCacheInsert\n"
4847 " sla %p\n"
4848 " *Pslice %p\n",
4849 sla,
4850 *Pslice);
4851
4852 ensSliceTrace(*Pslice, 1);
4853 }
4854
4855 if (!*Pslice)
4856 return ajFalse;
4857
4858 cs = ensSeqregionGetCoordsystem((*Pslice)->Seqregion);
4859
4860 start = (*Pslice)->Start;
4861 end = (*Pslice)->End;
4862 strand = (*Pslice)->Strand;
4863
4864 /*
4865 ** For Ensembl Slice objects that cover an entire Ensembl Sequence Region,
4866 ** zero all coordinates.
4867 */
4868
4869 if ((start == 1) &&
4870 (end == ensSeqregionGetLength((*Pslice)->Seqregion)) &&
4871 (strand == 1))
4872 {
4873 start = 0;
4874 end = 0;
4875 strand = 0;
4876 }
4877
4878 /* Search the identifier cache. */
4879
4880 ikey = ajFmtStr("%u:%d:%d:%d",
4881 ensSeqregionGetIdentifier((*Pslice)->Seqregion),
4882 start, end, strand);
4883
4884 ensCacheFetch(sla->CacheByIdentifier,
4885 (void *) ikey,
4886 (void **) &slice1);
4887
4888 /* Search the name cache. */
4889
4890 nkey = ajFmtStr("%S:%S:%S:%d:%d:%d",
4891 ensCoordsystemGetName(cs),
4892 ensCoordsystemGetVersion(cs),
4893 ensSeqregionGetName((*Pslice)->Seqregion),
4894 start, end, strand);
4895
4896 slice2 = (EnsPSlice) ajTableFetchmodS(sla->CacheByName, nkey);
4897
4898 if ((!slice1) && (!slice2))
4899 {
4900 /*
4901 ** None of the caches returned an identical Ensembl Slice so add this
4902 ** one to both caches. The Ensembl LRU Cache automatically references
4903 ** the Ensembl Slice via the sliceadaptorCacheReference function,
4904 ** while the AJAX Table-based cache needs manual referencing.
4905 */
4906
4907 ensCacheStore(sla->CacheByIdentifier,
4908 (void *) ikey,
4909 (void **) Pslice);
4910
4911 ajTablePut(sla->CacheByName,
4912 (void *) ajStrNewS(nkey),
4913 (void *) ensSliceNewRef(*Pslice));
4914 }
4915
4916 if (slice1 && slice2 && (slice1 == slice2))
4917 {
4918 /*
4919 ** Both caches returned the same Ensembl Slice so delete it and
4920 ** return a pointer to the one already in the cache.
4921 */
4922
4923 ensSliceDel(Pslice);
4924
4925 *Pslice = ensSliceNewRef(slice2);
4926 }
4927
4928 if (slice1 && slice2 && (slice1 != slice2))
4929 ajDebug("ensSliceadaptorCacheInsert detected Slice objects in the "
4930 "identifier '%S' and name '%S' cache with "
4931 "different addresses (%p and %p).\n",
4932 ikey, nkey, slice1, slice2);
4933
4934 if (slice1 && (!slice2))
4935 ajDebug("ensSliceadaptorCacheInsert detected a Slice in "
4936 "the identifier, but not in the name cache.\n");
4937
4938 if ((!slice1) && slice2)
4939 ajDebug("ensSliceadaptorCacheInsert detected a Slice in "
4940 "the name, but not in the identifier cache.\n");
4941
4942 ensSliceDel(&slice1);
4943
4944 ajStrDel(&ikey);
4945 ajStrDel(&nkey);
4946
4947 return ajTrue;
4948 }
4949
4950
4951
4952
4953 /* @section canonical object retrieval ****************************************
4954 **
4955 ** Functions for fetching Ensembl Slice objects from an
4956 ** Ensembl SQL database.
4957 **
4958 ** @fdata [EnsPSliceadaptor]
4959 **
4960 ** @nam3rule Fetch Fetch Ensembl Slice object(s)
4961 ** @nam4rule All Fetch all Ensembl Slice objects
4962 ** @nam4rule Allby Fetch all Ensembl Slice objects matching a criterion
4963 ** @nam5rule Regionunique Fetch all Ensembl Slice objects representing the
4964 ** unique parts of an Ensembl Sequence Region
4965 ** @nam4rule By Fetch one Ensembl Slice object matching a criterion
4966 ** @nam5rule Feature Fetch by an Ensembl Feature
4967 ** @nam5rule Location Fetch by a top-level location
4968 ** @nam5rule Mapperresult Fetch by an Ensembl Mapper Result
4969 ** @nam5rule Name Fetch by a name
4970 ** @nam5rule Region Fetch by a region
4971 ** @nam5rule Seqregion Fetch by an Ensembl Sequence Region member
4972 ** @nam6rule Identifier Fetch by an SQL database-internal identifier
4973 ** @nam6rule Name Fetch by a name
4974 ** @nam5rule Slice Fetch by an Ensembl Slice
4975 **
4976 ** @argrule * sla [EnsPSliceadaptor] Ensembl Slice Adaptor
4977 ** @argrule All csname [const AjPStr] Ensembl Coordinate System name
4978 ** @argrule All csversion [const AjPStr] Ensembl Coordinate System version
4979 ** @argrule All nonreference [AjBool]
4980 ** Include non-reference Ensembl Sequence Region objects
4981 ** @argrule All duplicates [AjBool]
4982 ** Include duplicate Ensembl Sequence Region objects
4983 ** @argrule All lrg [AjBool] Include Locus Reference Genomic
4984 ** @argrule All slices [AjPList] AJAX List of Ensembl Slice objects
4985 ** @argrule AllbyRegionunique csname [const AjPStr]
4986 ** Ensembl Coordinate System name
4987 ** @argrule AllbyRegionunique csversion [const AjPStr]
4988 ** Ensembl Coordinate System version
4989 ** @argrule AllbyRegionunique srname [const AjPStr]
4990 ** Ensembl Sequence Region name
4991 ** @argrule AllbyRegionunique srstart [ajint] Start coordinate
4992 ** @argrule AllbyRegionunique srend [ajint] End coordinate
4993 ** @argrule AllbyRegionunique srstrand [ajint] Strand information
4994 ** @argrule Allby slices [AjPList] AJAX List of Ensembl Slice objects
4995 ** @argrule ByLocation location [const AjPStr] Top-level location
4996 ** @argrule ByMapperresult mr [const EnsPMapperresult] Ensembl Mapper Result
4997 ** @argrule ByName name [const AjPStr] Ensembl Slice name
4998 ** @argrule ByRegion csname [const AjPStr] Ensembl Coordinate System name
4999 ** @argrule ByRegion csversion [const AjPStr] Ensembl Coordinate System version
5000 ** @argrule ByRegion srname [const AjPStr] Ensembl Sequence Region name
5001 ** @argrule ByRegion srstart [ajint] Start coordinate
5002 ** @argrule ByRegion srend [ajint] End coordinate
5003 ** @argrule ByRegion srstrand [ajint] Strand information
5004 ** @argrule BySeqregionIdentifier srid [ajuint]
5005 ** Ensembl Sequence Region identifier
5006 ** @argrule BySeqregionIdentifier srstart [ajint] Start coordinate
5007 ** @argrule BySeqregionIdentifier srend [ajint] End coordinate
5008 ** @argrule BySeqregionIdentifier srstrand [ajint] Strand information
5009 ** @argrule BySeqregionName csname [const AjPStr]
5010 ** Ensembl Coordinate System name
5011 ** @argrule BySeqregionName csversion [const AjPStr]
5012 ** Ensembl Coordinate System version
5013 ** @argrule BySeqregionName srname [const AjPStr] Ensembl Sequence Region name
5014 ** @argrule BySlice slice [EnsPSlice] Ensembl Slice
5015 ** @argrule BySlice start [ajint] Start coordinate
5016 ** @argrule BySlice end [ajint] End coordinate
5017 ** @argrule BySlice strand [ajint] Strand information
5018 ** @argrule ByFeature feature [const EnsPFeature] Ensembl Feature
5019 ** @argrule ByFeature flank [ajint] Flanking region in base pair coordinates
5020 ** @argrule By Pslice [EnsPSlice*] Ensembl Slice address
5021 **
5022 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
5023 **
5024 ** @fcategory use
5025 ******************************************************************************/
5026
5027
5028
5029
5030 /* @func ensSliceadaptorFetchAll **********************************************
5031 **
5032 ** Fetch all Ensembl Slice objects representing Ensembl Sequence Region objects
5033 ** of a given Ensembl Coordinate System.
5034 **
5035 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5036 ** @param [r] csname [const AjPStr] Ensembl Coordinate System name
5037 ** @param [rN] csversion [const AjPStr] Ensembl Coordinate System version
5038 ** @param [r] nonreference [AjBool]
5039 ** Include non-reference Ensembl Sequence Region objects
5040 ** @param [r] duplicates [AjBool]
5041 ** Include duplicate Ensembl Sequence Region objects
5042 ** @param [r] lrg [AjBool] Include Locus Reference Genomic
5043 ** @param [u] slices [AjPList] An AJAX List of Ensembl Slice objects
5044 **
5045 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5046 **
5047 ** @release 6.2.0
5048 ** @@
5049 ** The Coordinate System name may be the name of an actual Coordinate System
5050 ** or an alias like "seqlevel" or "toplevel".
5051 ** If the Coordinate System name provided is "toplevel", all non-redundant,
5052 ** top-level Slice objects are returned. The Coordinate System version will be
5053 ** ignored in that case.
5054 ** Non-reference Slice objects reflect alternative haplotype assemblies.
5055 ** Examples for human NCBI36 would be c5_H2, c6_COX, c6_QBL and c22_H2.
5056 ** If the duplicate option is not set and a pseudo-autosomal region (PAR) is
5057 ** at the beginning of the Sequence Region, then the resulting Slice will not
5058 ** start at position 1, so that coordinates retrieved from this Slice might
5059 ** not be what was expected.
5060 ******************************************************************************/
5061
ensSliceadaptorFetchAll(EnsPSliceadaptor sla,const AjPStr csname,const AjPStr csversion,AjBool nonreference,AjBool duplicates,AjBool lrg,AjPList slices)5062 AjBool ensSliceadaptorFetchAll(EnsPSliceadaptor sla,
5063 const AjPStr csname,
5064 const AjPStr csversion,
5065 AjBool nonreference,
5066 AjBool duplicates,
5067 AjBool lrg,
5068 AjPList slices)
5069 {
5070 AjBool debug = AJFALSE;
5071 AjBool result = AJFALSE;
5072
5073 AjPList srs = NULL;
5074
5075 EnsPCoordsystem cs = NULL;
5076 EnsPCoordsystemadaptor csa = NULL;
5077
5078 EnsPDatabaseadaptor dba = NULL;
5079
5080 EnsPSeqregion sr = NULL;
5081 EnsPSeqregionadaptor sra = NULL;
5082
5083 EnsPSlice slice = NULL;
5084
5085 debug = ajDebugTest("ensSliceadaptorFetchAll");
5086
5087 if (debug)
5088 ajDebug("ensSliceadaptorFetchAll\n"
5089 " sla %p\n"
5090 " csname '%S'\n"
5091 " csversion '%S'\n"
5092 " nonreference '%B'\n"
5093 " duplicates '%B'\n"
5094 " lrg '%B'\n"
5095 " slices %p\n",
5096 sla,
5097 csname,
5098 csversion,
5099 nonreference,
5100 duplicates,
5101 lrg,
5102 slices);
5103
5104 if (!sla)
5105 return ajFalse;
5106
5107 if (!csname)
5108 return ajFalse;
5109
5110 if (!slices)
5111 return ajFalse;
5112
5113 dba = ensSliceadaptorGetDatabaseadaptor(sla);
5114
5115 csa = ensRegistryGetCoordsystemadaptor(dba);
5116 sra = ensRegistryGetSeqregionadaptor(dba);
5117
5118 ensCoordsystemadaptorFetchByName(csa, csname, csversion, &cs);
5119
5120 if (!cs)
5121 {
5122 ajWarn("ensSliceadaptorFetchAll could not retrieve Coordinate System "
5123 "for name '%S' and version '%S'.\n", csname, csversion);
5124
5125 return ajTrue;
5126 }
5127
5128 /* Fetch the Ensembl Sequence Region objects from the database. */
5129
5130 srs = ajListNew();
5131
5132 ensSeqregionadaptorFetchAllbyCoordsystem(sra, cs, srs);
5133
5134 while (ajListPop(srs, (void **) &sr))
5135 {
5136 /* Check for non-reference Ensembl Sequence Region objects. */
5137
5138 if (nonreference == ajFalse)
5139 {
5140 if (!ensSeqregionIsNonreference(sr, &result))
5141 {
5142 ajDebug("ensSliceadaptorFetchAll could not call "
5143 "ensSeqregionIsNonreference successfully.\n");
5144
5145 ensSeqregionTrace(sr, 1);
5146
5147 ensSeqregionDel(&sr);
5148
5149 continue;
5150 }
5151
5152 if (result == ajTrue)
5153 {
5154 if (debug)
5155 {
5156 ajDebug("ensSliceadaptorFetchAll removed non-reference "
5157 "Ensembl Sequence Region %p.\n", sr);
5158
5159 ensSeqregionTrace(sr, 1);
5160 }
5161
5162 ensSeqregionDel(&sr);
5163
5164 continue;
5165 }
5166 }
5167
5168 /*
5169 ** Check for Locus Reference Genomic (LRG)
5170 ** Ensembl Sequence Region objects.
5171 */
5172
5173 if (lrg == ajFalse)
5174 {
5175 if (!ensSeqregionIsLocusreferencegenomic(sr, &result))
5176 {
5177 ajDebug("ensSliceadaptorFetchAll could not call "
5178 "ensSeqregionIsLocusreferencegenomic successfully.\n");
5179
5180 ensSeqregionTrace(sr, 1);
5181
5182 ensSeqregionDel(&sr);
5183
5184 continue;
5185 }
5186
5187 if (result == ajTrue)
5188 {
5189 if (debug)
5190 {
5191 ajDebug("ensSliceadaptorFetchAll removed LRG "
5192 "Ensembl Sequence Region %p.\n", sr);
5193
5194 ensSeqregionTrace(sr, 1);
5195 }
5196
5197 ensSeqregionDel(&sr);
5198
5199 continue;
5200 }
5201 }
5202
5203 /* Create a new Slice spanning the entire Sequence Region. */
5204
5205 slice = ensSliceNewIni(sla, sr, 1, ensSeqregionGetLength(sr), 1);
5206
5207 ajListPushAppend(slices, (void *) slice);
5208
5209 ensSeqregionDel(&sr);
5210 }
5211
5212 if (duplicates == ajFalse)
5213 ensListSliceRemoveDuplications(slices);
5214
5215 ajListFree(&srs);
5216
5217 ensCoordsystemDel(&cs);
5218
5219 return ajTrue;
5220 }
5221
5222
5223
5224
5225 /* @func ensSliceadaptorFetchAllbyRegionunique ********************************
5226 **
5227 ** Fetch Ensembl Slice objects representing the unique parts of an
5228 ** Ensembl Sequence Region. At a minimum the name of an
5229 ** Ensembl Sequence Region must be provided. If no Coordinate System name is
5230 ** provided then a Slice of the highest ranked Coordinate System with a
5231 ** matching Sequence Region name will be returned.
5232 **
5233 ** The caller is responsible for deleting the Ensembl Slice objects before
5234 ** deleting the AJAX List.
5235 **
5236 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5237 ** @param [rN] csname [const AjPStr] Ensembl Coordinate System name
5238 ** @param [rN] csversion [const AjPStr] Ensembl Coordinate System version
5239 ** @param [r] srname [const AjPStr] Ensembl Sequence Region name
5240 ** @param [r] srstart [ajint] Start coordinate
5241 ** @param [r] srend [ajint] End coordinate
5242 ** @param [r] srstrand [ajint] Strand information
5243 ** @param [u] slices [AjPList] AJAX List of Ensembl Slice objects
5244 **
5245 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5246 **
5247 ** @release 6.4.0
5248 ** @@
5249 **
5250 ** NOTE: The Ensembl Core Perl API performs some fuzzy matching, if no exact
5251 ** match for the provided Sequence Region name can be found. This allows clones
5252 ** to be fetched even when their sequence version is not known.
5253 ** For example ensSliceadaptorFetchByRegion(..., "clone", "AC008066", ...)
5254 ** will retrieve the Sequence Region with name "AC008066.4".
5255 **
5256 ** The fuzzy matching can be turned off by setting the $no_fuzz argument to a
5257 ** true value.
5258 **
5259 ** This has not been implemented here.
5260 ******************************************************************************/
5261
ensSliceadaptorFetchAllbyRegionunique(EnsPSliceadaptor sla,const AjPStr csname,const AjPStr csversion,const AjPStr srname,ajint srstart,ajint srend,ajint srstrand,AjPList slices)5262 AjBool ensSliceadaptorFetchAllbyRegionunique(EnsPSliceadaptor sla,
5263 const AjPStr csname,
5264 const AjPStr csversion,
5265 const AjPStr srname,
5266 ajint srstart,
5267 ajint srend,
5268 ajint srstrand,
5269 AjPList slices)
5270 {
5271 AjBool result = AJFALSE;
5272
5273 EnsPSlice slice = NULL;
5274
5275 if (!sla)
5276 return ajFalse;
5277
5278 if (!(srname && ajStrGetLen(srname)))
5279 return ajFalse;
5280
5281 if (!slices)
5282 return ajFalse;
5283
5284 result = ensSliceadaptorFetchByRegion(
5285 sla,
5286 csname,
5287 csversion,
5288 srname,
5289 srstart,
5290 srend,
5291 srstrand,
5292 &slice);
5293
5294 ajListPushAppend(slices, (void *) slice);
5295
5296 ensListSliceRemoveDuplications(slices);
5297
5298 return result;
5299 }
5300
5301
5302
5303
5304 /* @func ensSliceadaptorFetchByFeature ****************************************
5305 **
5306 ** Fetch an Ensembl Slice around an Ensembl Feature.
5307 **
5308 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5309 ** @param [r] feature [const EnsPFeature] Ensembl Feature
5310 ** @param [r] flank [ajint] Flanking region in base pair coordinates
5311 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
5312 **
5313 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5314 **
5315 ** @release 6.2.0
5316 ** @@
5317 ** All this really does is return a resized version of the Slice that the
5318 ** Feature is already on. Note that Slice objects returned from this function
5319 ** are always on the forward strand of the Sequence Region regardless of the
5320 ** strandedness of the Feature passed in.
5321 ******************************************************************************/
5322
ensSliceadaptorFetchByFeature(EnsPSliceadaptor sla,const EnsPFeature feature,ajint flank,EnsPSlice * Pslice)5323 AjBool ensSliceadaptorFetchByFeature(EnsPSliceadaptor sla,
5324 const EnsPFeature feature,
5325 ajint flank,
5326 EnsPSlice *Pslice)
5327 {
5328 ajuint srid = 0U;
5329 ajint srstart = 0;
5330 ajint srend = 0;
5331
5332 AjBool result = AJFALSE;
5333
5334 EnsPSlice slice = NULL;
5335
5336 if (ajDebugTest("ensSliceadaptorFetchByFeature"))
5337 {
5338 ajDebug("ensSliceadaptorFetchByFeature\n"
5339 " sla %p\n"
5340 " feature %p\n"
5341 " flank %d\n"
5342 " Pslice %p\n",
5343 sla,
5344 feature,
5345 flank,
5346 Pslice);
5347
5348 ensFeatureTrace(feature, 1);
5349 }
5350
5351 if (!sla)
5352 {
5353 ajDebug("ensSliceadaptorFetchByFeature requires an "
5354 "Ensembl Slice Adaptor.\n");
5355
5356 return ajFalse;
5357 }
5358
5359 if (!feature)
5360 {
5361 ajDebug("ensSliceadaptorFetchByFeature requires an "
5362 "Ensembl Feature.\n");
5363
5364 return ajFalse;
5365 }
5366
5367 if (!Pslice)
5368 return ajFalse;
5369
5370 *Pslice = NULL;
5371
5372 slice = ensFeatureGetSlice(feature);
5373
5374 if (!slice)
5375 {
5376 ajDebug("ensSliceadaptorFetchByFeature requires an "
5377 "Ensembl Slice attached to the Ensembl Feature.\n");
5378
5379 return ajFalse;
5380 }
5381
5382 srid = ensSliceGetSeqregionIdentifier(slice);
5383
5384 /* Convert the Feature Slice coordinates to Sequence Region coordinates. */
5385
5386 if (slice->Strand >= 0)
5387 {
5388 srstart = slice->Start + ensFeatureGetStart(feature) - 1;
5389 srend = slice->Start + ensFeatureGetEnd(feature) - 1;
5390 }
5391 else
5392 {
5393 srstart = slice->End - ensFeatureGetEnd(feature) + 1;
5394 srend = slice->End - ensFeatureGetStart(feature) + 1;
5395 }
5396
5397 result = ensSliceadaptorFetchBySeqregionIdentifier(
5398 sla,
5399 srid,
5400 srstart - flank,
5401 srend + flank,
5402 1,
5403 Pslice);
5404
5405 return result;
5406 }
5407
5408
5409
5410
5411 /* @func ensSliceadaptorFetchByLocation ***************************************
5412 **
5413 ** Fetch an Ensembl Slice based on a top-level location.
5414 **
5415 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5416 ** @param [r] location [const AjPStr] Top-leve location
5417 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
5418 **
5419 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5420 **
5421 ** @release 6.5.0
5422 ** @@
5423 ******************************************************************************/
5424
ensSliceadaptorFetchByLocation(EnsPSliceadaptor sla,const AjPStr location,EnsPSlice * Pslice)5425 AjBool ensSliceadaptorFetchByLocation(EnsPSliceadaptor sla,
5426 const AjPStr location,
5427 EnsPSlice *Pslice)
5428 {
5429 ajint srstart = 0;
5430 ajint srend = 0;
5431
5432 AjBool result = AJFALSE;
5433
5434 AjPRegexp re = NULL;
5435
5436 AjPStr csname = NULL;
5437 AjPStr csversion = NULL;
5438 AjPStr localstr = NULL;
5439 AjPStr srname = NULL;
5440 AjPStr number = NULL;
5441
5442 if (!sla)
5443 return ajFalse;
5444
5445 if ((!location) && (!ajStrGetLen(location)))
5446 return ajFalse;
5447
5448 if (!Pslice)
5449 return ajFalse;
5450
5451 localstr = ajStrNewS(location);
5452
5453 /* Cleanup any nomenclature like 1_000 or 1 000 or 1,000 */
5454
5455 ajStrRemoveSetC(&localstr, " _,");
5456
5457 /* NOTE: This needs adjusting to more complex patterns. */
5458
5459 re = ajRegCompC("^(\\w+):?(\\d+)?(?:-|[.]{2})?(\\d+)?$");
5460
5461 if (ajRegExec(re, localstr))
5462 {
5463 srname = ajStrNew();
5464
5465 ajRegSubI(re, 1, &srname);
5466
5467 if (ajRegLenI(re, 2))
5468 {
5469 number = ajStrNew();
5470 ajRegSubI(re, 2, &number);
5471 ajStrToInt(number, &srstart);
5472 ajStrDel(&number);
5473 }
5474
5475 if (ajRegLenI(re, 3))
5476 {
5477 number = ajStrNew();
5478 ajRegSubI(re, 3, &number);
5479 ajStrToInt(number, &srend);
5480 ajStrDel(&number);
5481 }
5482
5483 csname = ajStrNewC("toplevel");
5484 csversion = ajStrNew();
5485
5486 result = ensSliceadaptorFetchByRegion(
5487 sla,
5488 csname,
5489 csversion,
5490 srname,
5491 srstart,
5492 srend,
5493 1,
5494 Pslice);
5495
5496 ajStrDel(&csname);
5497 ajStrDel(&csversion);
5498 ajStrDel(&srname);
5499 }
5500
5501 ajRegFree(&re);
5502
5503 ajStrDel(&localstr);
5504
5505 return result;
5506 }
5507
5508
5509
5510
5511 /* @func ensSliceadaptorFetchByMapperresult ***********************************
5512 **
5513 ** Fetch an Ensembl Slice based on an Ensembl Mapper Result.
5514 **
5515 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5516 ** @param [r] mr [const EnsPMapperresult] Ensembl Mapper Result
5517 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
5518 **
5519 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5520 **
5521 ** @release 6.4.0
5522 ** @@
5523 ******************************************************************************/
5524
ensSliceadaptorFetchByMapperresult(EnsPSliceadaptor sla,const EnsPMapperresult mr,EnsPSlice * Pslice)5525 AjBool ensSliceadaptorFetchByMapperresult(EnsPSliceadaptor sla,
5526 const EnsPMapperresult mr,
5527 EnsPSlice *Pslice)
5528 {
5529 if (!sla)
5530 return ajFalse;
5531
5532 if (!mr)
5533 return ajFalse;
5534
5535 if (!Pslice)
5536 return ajFalse;
5537
5538 return ensSliceadaptorFetchBySeqregionIdentifier(
5539 sla,
5540 ensMapperresultGetObjectidentifier(mr),
5541 ensMapperresultGetCoordinateStart(mr),
5542 ensMapperresultGetCoordinateEnd(mr),
5543 ensMapperresultGetCoordinateStrand(mr),
5544 Pslice);
5545 }
5546
5547
5548
5549
5550 /* @func ensSliceadaptorFetchByName *******************************************
5551 **
5552 ** Fetch an Ensembl Slice by name, which consists of the following
5553 ** colon-separated fields:
5554 ** Coordinate System name,
5555 ** Coordinate System version,
5556 ** Sequence Region start,
5557 ** Sequence Region end and
5558 ** Sequence Region strand
5559 **
5560 ** csname:csversion:srname:srstart:srend:srstrand
5561 **
5562 ** The caller is responsible for deleting the Ensembl Slice.
5563 **
5564 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5565 ** @param [r] name [const AjPStr] Ensembl Slice name
5566 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
5567 **
5568 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5569 **
5570 ** @release 6.2.0
5571 ** @@
5572 ******************************************************************************/
5573
ensSliceadaptorFetchByName(EnsPSliceadaptor sla,const AjPStr name,EnsPSlice * Pslice)5574 AjBool ensSliceadaptorFetchByName(EnsPSliceadaptor sla,
5575 const AjPStr name,
5576 EnsPSlice *Pslice)
5577 {
5578 ajint start = 0;
5579 ajint end = 0;
5580 ajint strand = 0;
5581
5582 AjBool debug = AJFALSE;
5583 AjBool error = AJFALSE;
5584 AjBool result = AJFALSE;
5585
5586 AjPStr csname = NULL;
5587 AjPStr csversion = NULL;
5588 AjPStr srname = NULL;
5589 AjPStr srstart = NULL;
5590 AjPStr srend = NULL;
5591 AjPStr srstrand = NULL;
5592
5593 AjPStrTok token = NULL;
5594
5595 debug = ajDebugTest("ensSliceadaptorFetchByName");
5596
5597 if (debug)
5598 ajDebug("ensSliceadaptorFetchByName\n"
5599 " sla %p\n"
5600 " name '%S'\n"
5601 " Pslice %p\n",
5602 sla,
5603 name,
5604 Pslice);
5605
5606 if (!sla)
5607 return ajFalse;
5608
5609 if (!(name && ajStrGetLen(name)))
5610 return ajFalse;
5611
5612 if (!Pslice)
5613 return ajFalse;
5614
5615 /* Check the Slice Adaptor-internal cache for the name, first. */
5616
5617 *Pslice = (EnsPSlice) ajTableFetchmodS(sla->CacheByName, name);
5618
5619 if (*Pslice)
5620 {
5621 /*
5622 ** For any object returned by the AJAX Table the reference counter
5623 ** needs to be incremented manually.
5624 */
5625
5626 ensSliceNewRef(*Pslice);
5627
5628 return ajTrue;
5629 }
5630
5631 token = ajStrTokenNewC(name, ":");
5632
5633 csname = ajStrNew();
5634 csversion = ajStrNew();
5635 srname = ajStrNew();
5636 srstart = ajStrNew();
5637 srend = ajStrNew();
5638 srstrand = ajStrNew();
5639
5640 if (ajStrTokenNextParseNoskip(token, &csname) &&
5641 ajStrTokenNextParseNoskip(token, &csversion) &&
5642 ajStrTokenNextParseNoskip(token, &srname) &&
5643 ajStrTokenNextParseNoskip(token, &srstart) &&
5644 ajStrTokenNextParseNoskip(token, &srend) &&
5645 ajStrTokenNextParseNoskip(token, &srstrand))
5646 {
5647 /*
5648 ** Convert AJAX String objects into AJAX integers.
5649 ** Empty AJAX String objects are not converted and default to 0, to
5650 ** discriminate them from out of range and other errors reported by
5651 ** ajStrToInt.
5652 */
5653
5654 if (ajStrGetLen(srstart))
5655 if (!ajStrToInt(srstart, &start))
5656 {
5657 ajWarn("ensSliceadaptorFetchByName could not convert '%S' "
5658 "into an integer.",
5659 srstart);
5660
5661 error = ajTrue;
5662 }
5663
5664 if (ajStrGetLen(srend))
5665 if (!ajStrToInt(srend, &end))
5666 {
5667 ajWarn("ensSliceadaptorFetchByName could not convert '%S' "
5668 "into an integer.",
5669 srend);
5670
5671 error = ajTrue;
5672 }
5673
5674 if (ajStrGetLen(srstrand))
5675 if (!ajStrToInt(srstrand, &strand))
5676 {
5677 ajWarn("ensSliceadaptorFetchByName could not convert '%S' "
5678 "into an integer.",
5679 srstrand);
5680
5681 error = ajTrue;
5682 }
5683
5684 if (debug)
5685 ajDebug("ensSliceadaptorFetchByName parsed name '%S' into:\n"
5686 " csname '%S'\n"
5687 " csversion '%S'\n"
5688 " srname '%S'\n"
5689 " srstart '%S' %d\n"
5690 " srend '%S' %d\n"
5691 " srstrand '%S' %d\n",
5692 name,
5693 csname,
5694 csversion,
5695 srname,
5696 srstart, start,
5697 srend, end,
5698 srstrand, strand);
5699
5700 if (error == ajFalse)
5701 {
5702 if (start == 0 && end == 0)
5703 result = ensSliceadaptorFetchBySeqregionName(
5704 sla,
5705 csname,
5706 csversion,
5707 srname,
5708 Pslice);
5709 else
5710 result = ensSliceadaptorFetchByRegion(
5711 sla,
5712 csname,
5713 csversion,
5714 srname,
5715 start,
5716 end,
5717 strand,
5718 Pslice);
5719 }
5720 }
5721 else
5722 ajDebug("ensSliceadaptorFetchByName got a malformed Ensembl Slice "
5723 "name '%S', should be something like "
5724 "chromosome:NCBI36:X:1000000:2000000:1\n"
5725 " csname '%S'\n"
5726 " csversion '%S'\n"
5727 " srname '%S'\n"
5728 " srstart '%S'\n"
5729 " srend '%S'\n"
5730 " srstrand '%S'\n",
5731 name,
5732 csname,
5733 csversion,
5734 srname,
5735 srstart,
5736 srend,
5737 srstrand);
5738
5739 ajStrDel(&csname);
5740 ajStrDel(&csversion);
5741 ajStrDel(&srname);
5742 ajStrDel(&srstart);
5743 ajStrDel(&srend);
5744 ajStrDel(&srstrand);
5745
5746 ajStrTokenDel(&token);
5747
5748 return result;
5749 }
5750
5751
5752
5753
5754 /* @func ensSliceadaptorFetchByRegion *****************************************
5755 **
5756 ** Fetch an Ensembl Slice by region information. At a minimum the name of an
5757 ** Ensembl Sequence Region must be provided. If no Coordinate System name is
5758 ** provided then a Slice of the highest ranked Coordinate System with a
5759 ** matching Sequence Region name will be returned.
5760 **
5761 ** The caller is responsible for deleting the Ensembl Slice.
5762 **
5763 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5764 ** @param [rN] csname [const AjPStr] Ensembl Coordinate System name
5765 ** @param [rN] csversion [const AjPStr] Ensembl Coordinate System version
5766 ** @param [r] srname [const AjPStr] Ensembl Sequence Region name
5767 ** @param [r] srstart [ajint] Start coordinate
5768 ** @param [r] srend [ajint] End coordinate
5769 ** @param [r] srstrand [ajint] Strand information
5770 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
5771 **
5772 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5773 **
5774 ** @release 6.2.0
5775 ** @@
5776 **
5777 ** NOTE: The Ensembl Core Perl API performs some fuzzy matching, if no exact
5778 ** match for the provided Sequence Region name can be found. This allows clones
5779 ** to be fetched even when their sequence version is not known.
5780 ** For example ensSliceadaptorFetchByRegion(..., "clone", "AC008066", ...)
5781 ** will retrieve the Sequence Region with name "AC008066.4".
5782 **
5783 ** The fuzzy matching can be turned off by setting the $no_fuzz argument to a
5784 ** true value.
5785 **
5786 ** This has not been implemented here.
5787 ******************************************************************************/
5788
ensSliceadaptorFetchByRegion(EnsPSliceadaptor sla,const AjPStr csname,const AjPStr csversion,const AjPStr srname,ajint srstart,ajint srend,ajint srstrand,EnsPSlice * Pslice)5789 AjBool ensSliceadaptorFetchByRegion(EnsPSliceadaptor sla,
5790 const AjPStr csname,
5791 const AjPStr csversion,
5792 const AjPStr srname,
5793 ajint srstart,
5794 ajint srend,
5795 ajint srstrand,
5796 EnsPSlice *Pslice)
5797 {
5798 AjPStr key = NULL;
5799
5800 EnsPCoordsystem cs = NULL;
5801 EnsPCoordsystemadaptor csa = NULL;
5802
5803 EnsPDatabaseadaptor dba = NULL;
5804
5805 EnsPSeqregion sr = NULL;
5806 EnsPSeqregionadaptor sra = NULL;
5807
5808 if (ajDebugTest("ensSliceadaptorFetchByRegion"))
5809 ajDebug("ensSliceadaptorFetchByRegion\n"
5810 " sla %p\n"
5811 " csname '%S'\n"
5812 " csversion '%S'\n"
5813 " srname '%S'\n"
5814 " srstart %d\n"
5815 " srend %d\n"
5816 " srstrand %d\n"
5817 " Pslice %p\n",
5818 sla,
5819 csname,
5820 csversion,
5821 srname,
5822 srstart,
5823 srend,
5824 srstrand,
5825 Pslice);
5826
5827 if (!sla)
5828 {
5829 ajDebug("ensSliceadaptorFetchByRegion requires an "
5830 "Ensembl Slice Adaptor.\n");
5831
5832 return ajFalse;
5833 }
5834
5835 if ((srname == NULL) || (ajStrGetLen(srname) == 0))
5836 {
5837 ajDebug("ensSliceadaptorFetchByRegion requires an "
5838 "Ensembl Sequence Region name.\n");
5839
5840 return ajFalse;
5841 }
5842
5843 if (!Pslice)
5844 return ajFalse;
5845
5846 *Pslice = NULL;
5847
5848 /*
5849 ** Initially, search the cache, which can only return an Ensembl Slice,
5850 ** which is associated with a name and version of a regular Ensembl
5851 ** Coordinate System. For requests specifying the top-level Coordinate
5852 ** System or no Coordinate System at all the database needs to be queried
5853 ** for the Sequence Region associated with the Coordinate System of the
5854 ** highest rank. However, all Slice objects will be inserted into the cache
5855 ** with their true Coordinate System, keeping at least the memory
5856 ** requirements minimal.
5857 */
5858
5859 key = ajFmtStr("%S:%S:%S:%d:%d:%d",
5860 csname,
5861 csversion,
5862 srname,
5863 srstart,
5864 srend,
5865 srstrand);
5866
5867 *Pslice = (EnsPSlice) ajTableFetchmodS(sla->CacheByName, key);
5868
5869 ajStrDel(&key);
5870
5871 if (*Pslice)
5872 {
5873 /*
5874 ** For any object returned by the AJAX Table the reference counter
5875 ** needs to be incremented manually.
5876 */
5877
5878 ensSliceNewRef(*Pslice);
5879
5880 return ajTrue;
5881 }
5882
5883 dba = ensSliceadaptorGetDatabaseadaptor(sla);
5884
5885 /* Load the Ensembl Coordinate System if a name has been provided. */
5886
5887 if (csname && ajStrGetLen(csname))
5888 {
5889 csa = ensRegistryGetCoordsystemadaptor(dba);
5890
5891 ensCoordsystemadaptorFetchByName(csa, csname, csversion, &cs);
5892
5893 if (!cs)
5894 {
5895 ajDebug("ensSliceadaptorFetchByRegion could not load an Ensembl "
5896 "Coordinate System for name '%S' and version '%S'.\n",
5897 csname, csversion);
5898
5899 return ajTrue;
5900 }
5901 }
5902
5903 /* Load the Ensembl Sequence Region by name. */
5904
5905 sra = ensRegistryGetSeqregionadaptor(dba);
5906
5907 ensSeqregionadaptorFetchByName(sra, cs, srname, &sr);
5908
5909 if (!sr)
5910 {
5911 ajDebug("ensSliceadaptorFetchByRegion could not load an Ensembl "
5912 "Sequence Region for name '%S' and Coordinate System "
5913 "identifier %d.\n", srname, ensCoordsystemGetIdentifier(cs));
5914
5915 ensCoordsystemTrace(cs, 1);
5916
5917 /* NOTE: The Perl implementation also performs a fuzzy search. */
5918
5919 ensSeqregionadaptorFetchBySynonym(sra, srname, &sr);
5920 }
5921
5922 if (!sr)
5923 {
5924 ajDebug("ensSliceadaptorFetchByRegion could not load an Ensembl "
5925 "Sequence Region for synonym '%S'.\n",
5926 srname);
5927 }
5928
5929 if (srstrand == 0)
5930 srstrand = 1;
5931
5932 /*
5933 ** NOTE: The Perl API tests the Coordinate System name whether it matches
5934 ** a meta table entry with meta_key = 'LRG' to create a Bio::EnsEMBL::Slice
5935 ** or Bio::EnsEMBL::LRGSlice object. This implementation leaves this to the
5936 ** ensSliceNewIni function.
5937 */
5938
5939 if (sr)
5940 {
5941 *Pslice = ensSliceNewIni(sla, sr, srstart, srend, srstrand);
5942
5943 /*
5944 ** This implentation does not have a
5945 ** Bio::EnsEMBL::CircularSlice object.
5946 */
5947
5948 if (srstart > (srend + 1))
5949 ensSliceSetTopology(*Pslice, ensESliceTopologyCircular);
5950
5951 ensSliceadaptorCacheInsert(sla, Pslice);
5952 }
5953
5954 ensCoordsystemDel(&cs);
5955
5956 ensSeqregionDel(&sr);
5957
5958 return ajTrue;
5959 }
5960
5961
5962
5963
5964 /* @func ensSliceadaptorFetchBySeqregionIdentifier ****************************
5965 **
5966 ** Fetch an Ensembl Slice by an Ensembl Sequence Region identifier.
5967 ** The caller is responsible for deleting the Ensembl Slice.
5968 **
5969 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
5970 ** @param [r] srid [ajuint] Ensembl Sequence Region identifier
5971 ** @param [r] srstart [ajint] Start coordinate
5972 ** @param [r] srend [ajint] End coordinate
5973 ** @param [r] srstrand [ajint] Strand information
5974 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
5975 **
5976 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
5977 **
5978 ** @release 6.2.0
5979 ** @@
5980 ******************************************************************************/
5981
ensSliceadaptorFetchBySeqregionIdentifier(EnsPSliceadaptor sla,ajuint srid,ajint srstart,ajint srend,ajint srstrand,EnsPSlice * Pslice)5982 AjBool ensSliceadaptorFetchBySeqregionIdentifier(EnsPSliceadaptor sla,
5983 ajuint srid,
5984 ajint srstart,
5985 ajint srend,
5986 ajint srstrand,
5987 EnsPSlice *Pslice)
5988 {
5989 AjPStr key = NULL;
5990
5991 EnsPSeqregion sr = NULL;
5992
5993 if (ajDebugTest("ensSliceadaptorFetchBySeqregionIdentifier"))
5994 ajDebug("ensSliceadaptorFetchBySeqregionIdentifier\n"
5995 " sla %p\n"
5996 " srid %u\n"
5997 " srstart %d\n"
5998 " srend %d\n"
5999 " srstrand %d\n"
6000 " Pslice %p\n",
6001 sla,
6002 srid,
6003 srstart,
6004 srend,
6005 srstrand,
6006 Pslice);
6007
6008 if (!sla)
6009 {
6010 ajDebug("ensSliceadaptorFetchBySeqregionIdentifier requires an "
6011 "Ensembl Slice Adaptor.\n");
6012
6013 return ajFalse;
6014 }
6015
6016 if (!srid)
6017 {
6018 ajDebug("ensSliceadaptorFetchBySeqregionIdentifier requires an "
6019 "Ensembl Sequence Region identifier.\n");
6020
6021 return ajFalse;
6022 }
6023
6024 if (!Pslice)
6025 return ajFalse;
6026
6027 *Pslice = NULL;
6028
6029 /* Check the cache first. */
6030
6031 key = ajFmtStr("%u:%d:%d:%d", srid, srstart, srend, srstrand);
6032
6033 ensCacheFetch(sla->CacheByIdentifier, (void *) key, (void **) Pslice);
6034
6035 ajStrDel(&key);
6036
6037 if (*Pslice)
6038 return ajTrue;
6039
6040 ensSeqregionadaptorFetchByIdentifier(
6041 ensRegistryGetSeqregionadaptor(
6042 ensSliceadaptorGetDatabaseadaptor(sla)),
6043 srid,
6044 &sr);
6045
6046 if (!sr)
6047 {
6048 ajDebug("ensSliceadaptorFetchBySeqregionIdentifier could not load an "
6049 "Ensembl Sequence Region for identifier %d.\n", srid);
6050
6051 return ajTrue;
6052 }
6053
6054 if (!srstart)
6055 srstart = 1;
6056
6057 if (!srend)
6058 srend = ensSeqregionGetLength(sr);
6059
6060 if (srstart > (srend + 1))
6061 ajFatal("ensSliceadaptorFetchBySeqregionIdentifier requires the Slice "
6062 "start %d to be less than or equal to the end %d + 1 for "
6063 "Ensembl Sequence Region identifier %d.",
6064 srstart, srend, srid);
6065
6066 if (!srstrand)
6067 srstrand = 1;
6068
6069 *Pslice = ensSliceNewIni(sla, sr, srstart, srend, srstrand);
6070
6071 ensSliceadaptorCacheInsert(sla, Pslice);
6072
6073 ensSeqregionDel(&sr);
6074
6075 return ajTrue;
6076 }
6077
6078
6079
6080
6081 /* @func ensSliceadaptorFetchBySeqregionName **********************************
6082 **
6083 ** Fetch an Ensembl Slice by an Ensembl Sequence Region name, which must be
6084 ** provided at a minimum. If no Coordinate System name has been specified,
6085 ** an Ensembl Slice of the highest ranked Ensembl Coordinate System with a
6086 ** matching Sequence Region name will be returned.
6087 **
6088 ** The caller is responsible for deleting the Ensembl Slice.
6089 **
6090 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
6091 ** @param [rN] csname [const AjPStr] Ensembl Coordinate System name
6092 ** @param [rN] csversion [const AjPStr] Ensembl Coordinate System version
6093 ** @param [r] srname [const AjPStr] Ensembl Sequence Region name
6094 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
6095 **
6096 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
6097 **
6098 ** @release 6.4.0
6099 ** @@
6100 **
6101 ** NOTE: The Ensembl Core Perl API performs some fuzzy matching, if no exact
6102 ** match for the provided Sequence Region name can be found. This allows clones
6103 ** to be fetched even when their sequence version is not known.
6104 ** For example ensSliceadaptorFetchByRegion(..., "clone", "AC008066", ...)
6105 ** will retrieve the Sequence Region with name "AC008066.4".
6106 **
6107 ** The fuzzy matching can be turned off by setting the $no_fuzz argument to a
6108 ** true value.
6109 **
6110 ** This has not been implemented here.
6111 ******************************************************************************/
6112
ensSliceadaptorFetchBySeqregionName(EnsPSliceadaptor sla,const AjPStr csname,const AjPStr csversion,const AjPStr srname,EnsPSlice * Pslice)6113 AjBool ensSliceadaptorFetchBySeqregionName(EnsPSliceadaptor sla,
6114 const AjPStr csname,
6115 const AjPStr csversion,
6116 const AjPStr srname,
6117 EnsPSlice *Pslice)
6118 {
6119 AjPStr key = NULL;
6120
6121 EnsPCoordsystem cs = NULL;
6122
6123 EnsPDatabaseadaptor dba = NULL;
6124
6125 EnsPSeqregion sr = NULL;
6126
6127 if (ajDebugTest("ensSliceadaptorFetchBySeqregionName"))
6128 ajDebug("ensSliceadaptorFetchBySeqregionName\n"
6129 " sla %p\n"
6130 " csname '%S'\n"
6131 " csversion '%S'\n"
6132 " srname '%S'\n"
6133 " Pslice %p\n",
6134 sla,
6135 csname,
6136 csversion,
6137 srname,
6138 Pslice);
6139
6140 if (!sla)
6141 return ajFalse;
6142
6143 if (!(srname && ajStrGetLen(srname)))
6144 return ajFalse;
6145
6146 if (!Pslice)
6147 return ajFalse;
6148
6149 *Pslice = NULL;
6150
6151 /*
6152 ** Initially, search the cache, which can only return an Ensembl Slice,
6153 ** which is associated with a name and version of a regular Ensembl
6154 ** Coordinate System. For requests specifying the top-level Coordinate
6155 ** System or no Coordinate System at all the database needs to be queried
6156 ** for the Sequence Region associated with the Coordinate System of the
6157 ** highest rank. However, all Slice objects will be inserted into the cache
6158 ** with their true Coordinate System, keeping at least the memory
6159 ** requirements minimal.
6160 */
6161
6162 key = ajFmtStr("%S:%S:%S:0:0:0",
6163 csname,
6164 csversion,
6165 srname);
6166
6167 *Pslice = (EnsPSlice) ajTableFetchmodS(sla->CacheByName, key);
6168
6169 ajStrDel(&key);
6170
6171 if (*Pslice)
6172 {
6173 /*
6174 ** For any object returned by the AJAX Table the reference counter
6175 ** needs to be incremented manually.
6176 */
6177
6178 ensSliceNewRef(*Pslice);
6179
6180 return ajTrue;
6181 }
6182
6183 dba = ensSliceadaptorGetDatabaseadaptor(sla);
6184
6185 /* Load the Ensembl Coordinate System if a name has been provided. */
6186
6187 if (csname && ajStrGetLen(csname))
6188 {
6189 ensCoordsystemadaptorFetchByName(
6190 ensRegistryGetCoordsystemadaptor(dba),
6191 csname,
6192 csversion,
6193 &cs);
6194
6195 if (!cs)
6196 {
6197 ajDebug("ensSliceadaptorFetchBySeqregionName could not load an "
6198 "Ensembl Coordinate System for name '%S' and "
6199 "version '%S'.\n",
6200 csname, csversion);
6201
6202 return ajTrue;
6203 }
6204 }
6205
6206 /* Load the Ensembl Sequence Region. */
6207
6208 ensSeqregionadaptorFetchByName(
6209 ensRegistryGetSeqregionadaptor(dba),
6210 cs,
6211 srname,
6212 &sr);
6213
6214 if (!sr)
6215 {
6216 ajDebug("ensSliceadaptorFetchBySeqregionName could not load an "
6217 "Ensembl Sequence Region for name '%S' and Coordinate System "
6218 "identifier %d.\n", srname, ensCoordsystemGetIdentifier(cs));
6219
6220 ensCoordsystemTrace(cs, 1);
6221
6222 ensCoordsystemDel(&cs);
6223
6224 return ajTrue;
6225 }
6226
6227 /*
6228 ** NOTE: The Perl API tests the Coordinate System name whether it matches
6229 ** a meta table entry with meta_key = 'LRG' to create a Bio::EnsEMBL::Slice
6230 ** or Bio::EnsEMBL::LRGSlice object. This implementation leaves this to the
6231 ** ensSliceNewIni function.
6232 */
6233
6234 *Pslice = ensSliceNewIni(sla, sr, 1, ensSeqregionGetLength(sr), 1);
6235
6236 ensSliceadaptorCacheInsert(sla, Pslice);
6237
6238 ensCoordsystemDel(&cs);
6239
6240 ensSeqregionDel(&sr);
6241
6242 return ajTrue;
6243 }
6244
6245
6246
6247
6248 /* @func ensSliceadaptorFetchBySlice ******************************************
6249 **
6250 ** Fetch an Ensembl Slice by an Ensembl Slice in relative coordinates.
6251 **
6252 ** This function is experimental, but the advantage of going through the
6253 ** Slice Adaptor would be that a (Sub-)Slice would be registered in the
6254 ** Slice Adaptor-internal cache, which in turn reduces memory consumption.
6255 ** The caller is responsible for deleting the Ensembl Slice.
6256 **
6257 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
6258 ** @param [u] slice [EnsPSlice] Ensembl Slice
6259 ** @param [r] start [ajint] Start coordinate
6260 ** @param [r] end [ajint] End coordinate
6261 ** @param [r] strand [ajint] Strand information
6262 ** @param [wP] Pslice [EnsPSlice*] Ensembl Slice address
6263 **
6264 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
6265 **
6266 ** @release 6.2.0
6267 ** @@
6268 ******************************************************************************/
6269
ensSliceadaptorFetchBySlice(EnsPSliceadaptor sla,EnsPSlice slice,ajint start,ajint end,ajint strand,EnsPSlice * Pslice)6270 AjBool ensSliceadaptorFetchBySlice(EnsPSliceadaptor sla,
6271 EnsPSlice slice,
6272 ajint start,
6273 ajint end,
6274 ajint strand,
6275 EnsPSlice *Pslice)
6276 {
6277 ajuint srid = 0U;
6278 ajint srstart = 0;
6279 ajint srend = 0;
6280 ajint srstrand = 0;
6281
6282 if (!sla)
6283 return ajFalse;
6284
6285 if (!slice)
6286 return ajFalse;
6287
6288 if (start > (end + 1))
6289 {
6290 ajDebug("ensSliceadaptorFetchBySlice requires the start coordinate %d "
6291 "to be less than the end coordinate %d + 1.\n",
6292 start, end);
6293
6294 return ajFalse;
6295 }
6296
6297 if (!strand)
6298 strand = 1;
6299
6300 if (!Pslice)
6301 return ajFalse;
6302
6303 *Pslice = NULL;
6304
6305 /* Transform relative into absolute Sequence Region coordinates. */
6306
6307 /*
6308 **
6309 ** s e
6310 ** SS \ +1 / SE
6311 ** 1 \ |-------| / length
6312 ** \ |------Slice------| /
6313 ** |--------Seqregion--------|
6314 ** |------Slice------|
6315 ** / |-------| \
6316 ** SS / -1 \ SE
6317 ** e s
6318 **
6319 */
6320
6321 srid = ensSeqregionGetIdentifier(slice->Seqregion);
6322
6323 if (slice->Strand >= 0)
6324 {
6325 srstart = slice->Start + start - 1;
6326 srend = slice->Start + end - 1;
6327 }
6328 else
6329 {
6330 srstart = slice->End - end + 1;
6331 srend = slice->End - start + 1;
6332 }
6333
6334 srstrand = slice->Strand * strand;
6335
6336 ensSliceadaptorFetchBySeqregionIdentifier(sla,
6337 srid,
6338 srstart,
6339 srend,
6340 srstrand,
6341 Pslice);
6342
6343 if (!*Pslice)
6344 return ajFalse;
6345
6346 /* Adjust the sequence in case the Slice has one associated. */
6347
6348 if (slice->Sequence)
6349 ensSliceFetchSequenceSubStr(slice,
6350 start,
6351 end,
6352 strand,
6353 &((*Pslice)->Sequence));
6354
6355 return ajTrue;
6356 }
6357
6358
6359
6360
6361 /* @section accessory object retrieval ****************************************
6362 **
6363 ** Functions for retrieving objects releated to Ensembl Slice objects
6364 ** from an Ensembl SQL database.
6365 **
6366 ** @fdata [EnsPSliceadaptor]
6367 **
6368 ** @nam3rule Retrieve Retrieve Ensembl Slice-releated object(s)
6369 ** @nam4rule Normalisedprojection Retrieve all Ensembl Projection Segment
6370 ** objects for a normalised projection
6371 **
6372 ** @argrule * sla [EnsPSliceadaptor] Ensembl Slice Adaptor
6373 ** @argrule Normalisedprojection slice [EnsPSlice] Ensembl Slice
6374 ** @argrule Normalisedprojection pss [AjPList] AJAX List of
6375 ** Ensembl Projection Segment objects
6376 **
6377 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
6378 **
6379 ** @fcategory use
6380 ******************************************************************************/
6381
6382
6383
6384
6385 /* @func ensSliceadaptorRetrieveNormalisedprojection **************************
6386 **
6387 ** Retrieve all Ensembl Projection Segment objects required for a normalised
6388 ** projection of an Ensembl Slice.
6389 **
6390 ** This function returns an AJAX List of Ensembl Projection Segment objects
6391 ** where symbolic links exist for the given Ensembl Slice.
6392 **
6393 ** The caller is resposible for deleting the Ensembl Projection Segment objects
6394 ** before deleting the AJAX List.
6395 **
6396 ** @param [u] sla [EnsPSliceadaptor] Ensembl Slice Adaptor
6397 ** @param [u] slice [EnsPSlice] Ensembl Slice
6398 ** @param [u] pss [AjPList] AJAX List of Ensembl Projection Segment objects
6399 **
6400 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
6401 **
6402 ** @release 6.4.0
6403 ** @@
6404 ******************************************************************************/
6405
ensSliceadaptorRetrieveNormalisedprojection(EnsPSliceadaptor sla,EnsPSlice slice,AjPList pss)6406 AjBool ensSliceadaptorRetrieveNormalisedprojection(EnsPSliceadaptor sla,
6407 EnsPSlice slice,
6408 AjPList pss)
6409 {
6410 ajint difference = 0;
6411
6412 ajuint excsrid = 0U;
6413 ajuint excstart = 1U;
6414 ajuint excend = 0U;
6415 ajuint exclength = 0U;
6416
6417 ajuint refsrid = 0U;
6418 ajuint refstart = 1U;
6419 ajuint refend = 0U;
6420 ajuint reflength = 0U;
6421
6422 ajuint start = 1U;
6423 /* ajuint srid = 0U; */
6424
6425 AjBool debug = AJFALSE;
6426
6427 AjPList aes = NULL;
6428 AjPList haps = NULL;
6429 AjPList pars = NULL;
6430 AjPList syms = NULL;
6431 AjPList mrs = NULL;
6432
6433 AjPStr source = NULL;
6434 AjPStr target = NULL;
6435
6436 EnsPAssemblyexception ae = NULL;
6437 EnsPAssemblyexceptionadaptor aea = NULL;
6438
6439 EnsPMapper mapper = NULL;
6440
6441 EnsPMapperresult mr = NULL;
6442
6443 EnsPSlice excslice = NULL;
6444 EnsPSlice prjslice = NULL;
6445 EnsPSlice refslice = NULL;
6446
6447 debug = ajDebugTest("ensSliceadaptorRetrieveNormalisedprojection");
6448
6449 if (debug)
6450 {
6451 ajDebug("ensSliceadaptorRetrieveNormalisedprojection\n"
6452 " sla %p\n"
6453 " slice %p\n"
6454 " pss %p\n",
6455 sla,
6456 slice,
6457 pss);
6458
6459 ensSliceTrace(slice, 1);
6460 }
6461
6462 if (!sla)
6463 {
6464 ajDebug("ensSliceadaptorRetrieveNormalisedprojection requires an "
6465 "Ensembl Slice Adaptor.\n");
6466
6467 return ajFalse;
6468 }
6469
6470 if (!slice)
6471 {
6472 ajDebug("ensSliceadaptorRetrieveNormalisedprojection requires an "
6473 "Ensembl Slice.\n");
6474
6475 return ajFalse;
6476 }
6477
6478 if (!pss)
6479 {
6480 ajDebug("ensSliceadaptorRetrieveNormalisedprojection requires an "
6481 "AJAX List of Ensembl Projection Segment objects.\n");
6482
6483 return ajFalse;
6484 }
6485
6486 aea = ensRegistryGetAssemblyexceptionadaptor(
6487 ensSliceadaptorGetDatabaseadaptor(sla));
6488
6489 refsrid = ensSliceGetSeqregionIdentifier(slice);
6490
6491 haps = ajListNew();
6492 pars = ajListNew();
6493 aes = ajListNew();
6494
6495 ensAssemblyexceptionadaptorFetchAllbyReferenceSeqregion(aea, refsrid, aes);
6496
6497 while (ajListPop(aes, (void **) &ae))
6498 {
6499 /*
6500 ** Select all overlapping pseudo-autosomal regions (PARs) and all
6501 ** haplotype regions (HAPs) if any.
6502 */
6503
6504 switch (ensAssemblyexceptionGetType(ae))
6505 {
6506 case ensEAssemblyexceptionTypePAR:
6507
6508 if (((ajint) ensAssemblyexceptionGetReferenceStart(ae)
6509 <= slice->End) &&
6510 ((ajint) ensAssemblyexceptionGetReferenceEnd(ae)
6511 >= slice->Start))
6512 ajListPushAppend(pars, (void *) ae);
6513 else
6514 ensAssemblyexceptionDel(&ae);
6515
6516 break;
6517
6518 default:
6519
6520 ajListPushAppend(haps, (void *) ae);
6521 }
6522 }
6523
6524 ajListFree(&aes);
6525
6526 if ((!ajListGetLength(haps)) && (!ajListGetLength(pars)))
6527 {
6528 /*
6529 ** There are no haplotypes and no pseudo-autosomal regions,
6530 ** so return just a Projection Segment for this Slice.
6531 */
6532
6533 if (debug)
6534 ajDebug("ensSliceadaptorRetrieveNormalisedprojection "
6535 "found no Ensembl Assembly Exception objects overlapping "
6536 "this Ensembl Slice.\n");
6537
6538 ajListFree(&haps);
6539 ajListFree(&pars);
6540
6541 ajListPushAppend(
6542 pss,
6543 (void *) ensProjectionsegmentNewIni(
6544 1,
6545 ensSliceCalculateLength(slice),
6546 slice));
6547
6548 return ajTrue;
6549 }
6550
6551 syms = ajListNew();
6552
6553 if (ajListGetLength(haps) > 0)
6554 {
6555 /*
6556 ** We want the region of the haplotype inverted, which means that we
6557 ** want the two regions of the Slice that are not covered by the
6558 ** haplotype as Projection Segment objects.
6559 **
6560 ** Haplotype: *******
6561 ** Reference: -------------------
6562 ** Projection Segment objects: ++++++ ++++++
6563 */
6564
6565 ensListAssemblyexceptionSortReferenceEndAscending(haps);
6566
6567 ajListPeekFirst(haps, (void **) &ae);
6568
6569 ensSliceadaptorFetchBySeqregionIdentifier(
6570 sla,
6571 ensAssemblyexceptionGetReferenceSeqregion(ae),
6572 0,
6573 0,
6574 0,
6575 &refslice);
6576
6577 ensSliceadaptorFetchBySeqregionIdentifier(
6578 sla,
6579 ensAssemblyexceptionGetExceptionSeqregion(ae),
6580 0,
6581 0,
6582 0,
6583 &excslice);
6584
6585 excsrid = ensAssemblyexceptionGetExceptionSeqregion(ae);
6586 excstart = 1;
6587 excend = 0;
6588 exclength = ensSliceCalculateLength(excslice);
6589
6590 refstart = 1;
6591 refend = 0;
6592 reflength = ensSliceCalculateLength(refslice);
6593
6594 ensSliceDel(&refslice);
6595 ensSliceDel(&excslice);
6596
6597 while (ajListPop(haps, (void **) &ae))
6598 {
6599 excend = ensAssemblyexceptionGetReferenceStart(ae) - 1;
6600 refend = ensAssemblyexceptionGetExceptionStart(ae) - 1;
6601
6602 /*
6603 ** Add an Ensembl Assembly Exception for a haplotype (HAP) at
6604 ** the start or the end of the reference Slice.
6605 */
6606
6607 if ((excend > 0) && (excstart < reflength))
6608 ajListPushAppend(
6609 syms,
6610 (void *) ensAssemblyexceptionNewIni(
6611 (EnsPAssemblyexceptionadaptor) NULL,
6612 0,
6613 ensAssemblyexceptionGetReferenceSeqregion(ae),
6614 excstart,
6615 excend,
6616 ensAssemblyexceptionGetExceptionSeqregion(ae),
6617 refstart,
6618 refend,
6619 1,
6620 ensEAssemblyexceptionTypeNULL));
6621
6622 refstart
6623 = refend
6624 + ensAssemblyexceptionCalculateExceptionLength(ae)
6625 + 1;
6626
6627 excstart
6628 = excend
6629 + ensAssemblyexceptionCalculateReferenceLength(ae)
6630 + 1;
6631
6632 ensAssemblyexceptionDel(&ae);
6633 }
6634
6635 /*
6636 ** For the last segment from the end of the
6637 ** Ensembl Assembly Exception to the end of the Ensembl Slice objects.
6638 */
6639
6640 excend = reflength;
6641 refend = exclength;
6642
6643 difference = (excend - excstart) - (refend - refstart);
6644
6645 if (difference > 0)
6646 ajListPushAppend(
6647 syms,
6648 ensAssemblyexceptionNewIni(
6649 (EnsPAssemblyexceptionadaptor) NULL,
6650 0,
6651 refsrid,
6652 excstart,
6653 excend,
6654 excsrid,
6655 refstart,
6656 refend + difference,
6657 1,
6658 ensEAssemblyexceptionTypeNULL));
6659 else if (difference < 0)
6660 ajListPushAppend(
6661 syms,
6662 ensAssemblyexceptionNewIni(
6663 (EnsPAssemblyexceptionadaptor) NULL,
6664 0,
6665 refsrid,
6666 excstart,
6667 excend - difference,
6668 excsrid,
6669 refstart,
6670 refend,
6671 1,
6672 ensEAssemblyexceptionTypeNULL));
6673 else
6674 ajListPushAppend(
6675 syms,
6676 ensAssemblyexceptionNewIni(
6677 (EnsPAssemblyexceptionadaptor) NULL,
6678 0,
6679 refsrid,
6680 excstart,
6681 excend,
6682 excsrid,
6683 refstart,
6684 refend,
6685 1,
6686 ensEAssemblyexceptionTypeNULL));
6687 }
6688
6689 ajListFree(&haps);
6690
6691 /*
6692 ** The ajListPushlist function adds all nodes from the second AJAX List to
6693 ** the first and deletes the second.
6694 */
6695
6696 ajListPushlist(syms, &pars);
6697
6698 source = ajStrNewC("sym");
6699 target = ajStrNewC("org");
6700
6701 mapper = ensMapperNewIni(source,
6702 target,
6703 ensSliceGetCoordsystemObject(slice),
6704 ensSliceGetCoordsystemObject(slice));
6705
6706 while (ajListPop(syms, (void **) &ae))
6707 {
6708 ensMapperAddCoordinates(mapper,
6709 ensAssemblyexceptionGetReferenceSeqregion(ae),
6710 ensAssemblyexceptionGetReferenceStart(ae),
6711 ensAssemblyexceptionGetReferenceEnd(ae),
6712 1,
6713 ensAssemblyexceptionGetExceptionSeqregion(ae),
6714 ensAssemblyexceptionGetExceptionStart(ae),
6715 ensAssemblyexceptionGetExceptionEnd(ae));
6716
6717 ensAssemblyexceptionDel(&ae);
6718 }
6719
6720 ajListFree(&syms);
6721
6722 mrs = ajListNew();
6723
6724 ensMapperMap(mapper,
6725 refsrid,
6726 slice->Start,
6727 slice->End,
6728 slice->Strand,
6729 source,
6730 mrs);
6731
6732 ajStrDel(&source);
6733 ajStrDel(&target);
6734
6735 while (ajListPop(mrs, (void **) &mr))
6736 {
6737 switch (ensMapperresultGetType(mr))
6738 {
6739 case ensEMapperresultTypeCoordinate:
6740
6741 if (debug)
6742 {
6743 ajDebug("ensSliceadaptorRetrieveNormalisedprojection "
6744 " got Ensembl Mapper Result %p.\n", mr);
6745
6746 ensMapperresultTrace(mr, 1);
6747 }
6748
6749 ensSliceadaptorFetchByMapperresult(sla, mr, &prjslice);
6750
6751 ajListPushAppend(
6752 pss,
6753 (void *) ensProjectionsegmentNewIni(
6754 start,
6755 start + ensMapperresultCalculateLengthResult(mr) - 1,
6756 prjslice));
6757
6758 ensSliceDel(&prjslice);
6759
6760 break;
6761
6762 case ensEMapperresultTypeGap:
6763
6764 if (debug)
6765 ajDebug("ensSliceadaptorRetrieveNormalisedprojection\n"
6766 " Ensembl Mapper Result gap %d:%d\n",
6767 ensMapperresultGetGapStart(mr),
6768 ensMapperresultGetGapEnd(mr));
6769
6770 prjslice = ensSliceNewIni(sla,
6771 slice->Seqregion,
6772 ensMapperresultGetGapStart(mr),
6773 ensMapperresultGetGapEnd(mr),
6774 slice->Strand);
6775
6776 ajListPushAppend(
6777 pss,
6778 (void *) ensProjectionsegmentNewIni(
6779 start,
6780 start + ensMapperresultCalculateLengthGap(mr) - 1,
6781 prjslice));
6782
6783 ensSliceDel(&prjslice);
6784
6785 break;
6786
6787 default:
6788
6789 ajWarn("ensSliceadaptorRetrieveNormalisedprojection got "
6790 "an unexpected Ensembl Mapper Result type %d.\n",
6791 ensMapperresultGetType(mr));
6792 }
6793
6794 start += ensMapperresultCalculateLengthResult(mr);
6795
6796 ensMapperresultDel(&mr);
6797 }
6798
6799 ajListFree(&mrs);
6800
6801 ensMapperDel(&mapper);
6802
6803 return ajTrue;
6804 }
6805
6806
6807
6808
6809 /* @datasection [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice ***********
6810 **
6811 ** @nam2rule Repeatmaskedslice Functions for manipulating
6812 ** Ensembl Repeat-Masked Slice objects
6813 **
6814 ** @cc Bio::EnsEMBL::RepeatMaskedSlice
6815 ** @cc CVS Revision: 1.21
6816 ** @cc CVS Tag: branch-ensembl-68
6817 **
6818 ******************************************************************************/
6819
6820
6821
6822
6823 /* @section constructors ******************************************************
6824 **
6825 ** All constructors return a new Ensembl Repeat-Masked Slice by pointer.
6826 ** It is the responsibility of the user to first destroy any previous
6827 ** Repeat-Masked Slice. The target pointer does not need to be initialised to
6828 ** NULL, but it is good programming practice to do so anyway.
6829 **
6830 ** @fdata [EnsPRepeatmaskedslice]
6831 **
6832 ** @nam3rule New Constructor
6833 ** @nam4rule Cpy Constructor with existing object
6834 ** @nam4rule Ini Constructor with initial values
6835 ** @nam4rule Ref Constructor by incrementing the reference counter
6836 **
6837 ** @argrule Cpy rmslice [const EnsPRepeatmaskedslice] Ensembl Repeat-Masked
6838 ** Slice
6839 ** @argrule Ini slice [EnsPSlice] Ensembl Slice
6840 ** @argrule Ini annames [AjPList]
6841 ** AJAX List of AJAX String (Ensembl Analysis name) objects
6842 ** @argrule Ini masking [AjPTable]
6843 ** AJAX Table of AJAX String key data and
6844 ** Ensembl Repeat Mask Type enumeration value data
6845 ** to override masking types
6846 ** @argrule Ref rmslice [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice
6847 **
6848 ** @valrule * [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice or NULL
6849 **
6850 ** @fcategory new
6851 ******************************************************************************/
6852
6853
6854
6855
6856 /* @func ensRepeatmaskedsliceNewCpy *******************************************
6857 **
6858 ** Object-based constructor function, which returns an independent object.
6859 **
6860 ** @param [r] rmslice [const EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice
6861 **
6862 ** @return [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice or NULL
6863 **
6864 ** @release 6.4.0
6865 ** @@
6866 ******************************************************************************/
6867
ensRepeatmaskedsliceNewCpy(const EnsPRepeatmaskedslice rmslice)6868 EnsPRepeatmaskedslice ensRepeatmaskedsliceNewCpy(
6869 const EnsPRepeatmaskedslice rmslice)
6870 {
6871 void **keyarray = NULL;
6872 void **valarray = NULL;
6873
6874 register ajuint i = 0U;
6875
6876 EnsERepeatMaskType *Pmsktyp = NULL;
6877
6878 AjIList iter = NULL;
6879
6880 AjPStr key = NULL;
6881
6882 EnsPRepeatmaskedslice pthis = NULL;
6883
6884 if (!rmslice)
6885 return NULL;
6886
6887 AJNEW0(pthis);
6888
6889 pthis->Slice = ensSliceNewRef(rmslice->Slice);
6890
6891 /* Copy the AJAX List of (Ensembl Analysis name) AJAX String objects. */
6892
6893 if (rmslice->Analysisnames)
6894 {
6895 pthis->Analysisnames = ajListstrNew();
6896
6897 iter = ajListIterNew(rmslice->Analysisnames);
6898
6899 while (!ajListIterDone(iter))
6900 {
6901 key = (AjPStr) ajListIterGet(iter);
6902
6903 if (key && ajStrGetLen(key))
6904 ajListPushAppend(pthis->Analysisnames,
6905 (void *) ajStrNewS(key));
6906 }
6907
6908 ajListIterDel(&iter);
6909 }
6910
6911 /* Copy the AJAX Table of AJAX String key and AJAX Enum masking types. */
6912
6913 if (rmslice->Masking)
6914 {
6915 pthis->Masking = ajTablestrNew(0U);
6916
6917 ajTableSetDestroyvalue(pthis->Masking, &ajMemFree);
6918
6919 ajTableToarrayKeysValues(rmslice->Masking, &keyarray, &valarray);
6920
6921 for (i = 0U; keyarray[i]; i++)
6922 {
6923 key = ajStrNewS((AjPStr) keyarray[i]);
6924
6925 AJNEW0(Pmsktyp);
6926
6927 *Pmsktyp = *((EnsERepeatMaskType *) valarray[i]);
6928
6929 ajTablePut(pthis->Masking, (void *) key, (void *) Pmsktyp);
6930 }
6931
6932 AJFREE(keyarray);
6933 AJFREE(valarray);
6934 }
6935
6936 pthis->Use = 1U;
6937
6938 return pthis;
6939 }
6940
6941
6942
6943
6944 /* @func ensRepeatmaskedsliceNewIni *******************************************
6945 **
6946 ** Constructor for an Ensembl Repeat-Masked Slice with initial values.
6947 **
6948 ** @param [u] slice [EnsPSlice] Ensembl Slice
6949 ** @param [uN] annames [AjPList]
6950 ** AJAX List of AJAX String (Ensembl Analysis name) objects
6951 ** @param [uN] masking [AjPTable]
6952 ** AJAX Table of AJAX String key data and
6953 ** Ensembl Repeat Mask Type enumeration value data
6954 ** to override masking types
6955 **
6956 ** @return [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice or NULL
6957 **
6958 ** @release 6.4.0
6959 ** @@
6960 ******************************************************************************/
6961
ensRepeatmaskedsliceNewIni(EnsPSlice slice,AjPList annames,AjPTable masking)6962 EnsPRepeatmaskedslice ensRepeatmaskedsliceNewIni(EnsPSlice slice,
6963 AjPList annames,
6964 AjPTable masking)
6965 {
6966 void **keyarray = NULL;
6967 void **valarray = NULL;
6968
6969 register ajuint i = 0U;
6970
6971 EnsERepeatMaskType *Pmsktyp = NULL;
6972
6973 AjIList iter = NULL;
6974
6975 AjPStr key = NULL;
6976
6977 EnsPRepeatmaskedslice rmslice = NULL;
6978
6979 if (ajDebugTest("ensRepeatmaskedsliceNewIni"))
6980 {
6981 ajDebug("ensRepeatmaskedsliceNewIni\n"
6982 " slice %p\n"
6983 " annames %p\n"
6984 " masking %p\n",
6985 slice,
6986 annames,
6987 masking);
6988
6989 ensSliceTrace(slice, 1);
6990 }
6991
6992 if (!slice)
6993 return NULL;
6994
6995 AJNEW0(rmslice);
6996
6997 rmslice->Slice = ensSliceNewRef(slice);
6998
6999 /* Copy the AJAX List of (Ensembl Analysis name) AJAX String objects. */
7000
7001 if (annames)
7002 {
7003 rmslice->Analysisnames = ajListstrNew();
7004
7005 iter = ajListIterNew(annames);
7006
7007 while (!ajListIterDone(iter))
7008 {
7009 key = (AjPStr) ajListIterGet(iter);
7010
7011 if (key && ajStrGetLen(key))
7012 ajListPushAppend(rmslice->Analysisnames,
7013 (void *) ajStrNewS(key));
7014 }
7015
7016 ajListIterDel(&iter);
7017 }
7018
7019 /* Copy the AJAX Table of AJAX String key and AJAX Enum masking types. */
7020
7021 if (masking)
7022 {
7023 rmslice->Masking = ajTablestrNew(0U);
7024
7025 ajTableSetDestroyvalue(rmslice->Masking, &ajMemFree);
7026
7027 ajTableToarrayKeysValues(masking, &keyarray, &valarray);
7028
7029 for (i = 0U; keyarray[i]; i++)
7030 {
7031 key = ajStrNewS((AjPStr) keyarray[i]);
7032
7033 AJNEW0(Pmsktyp);
7034
7035 *Pmsktyp = *((EnsERepeatMaskType *) valarray[i]);
7036
7037 ajTablePut(rmslice->Masking, (void *) key, (void *) Pmsktyp);
7038 }
7039
7040 AJFREE(keyarray);
7041 AJFREE(valarray);
7042 }
7043
7044 rmslice->Use = 1U;
7045
7046 return rmslice;
7047 }
7048
7049
7050
7051
7052 /* @func ensRepeatmaskedsliceNewRef *******************************************
7053 **
7054 ** Ensembl Object referencing function, which returns a pointer to the
7055 ** Ensembl Object passed in and increases its reference count.
7056 **
7057 ** @param [u] rmslice [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice
7058 **
7059 ** @return [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice or NULL
7060 **
7061 ** @release 6.2.0
7062 ** @@
7063 ******************************************************************************/
7064
ensRepeatmaskedsliceNewRef(EnsPRepeatmaskedslice rmslice)7065 EnsPRepeatmaskedslice ensRepeatmaskedsliceNewRef(
7066 EnsPRepeatmaskedslice rmslice)
7067 {
7068 if (!rmslice)
7069 return NULL;
7070
7071 rmslice->Use++;
7072
7073 return rmslice;
7074 }
7075
7076
7077
7078
7079 /* @section destructors *******************************************************
7080 **
7081 ** Destruction destroys all internal data structures and frees the memory
7082 ** allocated for an Ensembl Repeat-Masked Slice object.
7083 **
7084 ** @fdata [EnsPRepeatmaskedslice]
7085 **
7086 ** @nam3rule Del Destroy (free) an Ensembl Repeat-Masked Slice
7087 **
7088 ** @argrule * Prmslice [EnsPRepeatmaskedslice*]
7089 ** Ensembl Repeat-Masked Slice address
7090 **
7091 ** @valrule * [void]
7092 **
7093 ** @fcategory delete
7094 ******************************************************************************/
7095
7096
7097
7098
7099 /* @func ensRepeatmaskedsliceDel **********************************************
7100 **
7101 ** Default destructor for an Ensembl Repeat-Masked Slice.
7102 **
7103 ** @param [d] Prmslice [EnsPRepeatmaskedslice*]
7104 ** Ensembl Repeat-Masked Slice address
7105 **
7106 ** @return [void]
7107 **
7108 ** @release 6.2.0
7109 ** @@
7110 ******************************************************************************/
7111
ensRepeatmaskedsliceDel(EnsPRepeatmaskedslice * Prmslice)7112 void ensRepeatmaskedsliceDel(EnsPRepeatmaskedslice *Prmslice)
7113 {
7114 EnsPRepeatmaskedslice pthis = NULL;
7115
7116 if (!Prmslice)
7117 return;
7118
7119 #if defined(AJ_DEBUG) && AJ_DEBUG >= 2
7120 if (ajDebugTest("ensRepeatmaskedsliceDel"))
7121 {
7122 ajDebug("ensRepeatmaskedsliceDel\n"
7123 " *Prmslice %p\n",
7124 *Prmslice);
7125
7126 ensRepeatmaskedsliceTrace(*Prmslice, 1);
7127 }
7128 #endif /* defined(AJ_DEBUG) && AJ_DEBUG >= 1 */
7129
7130 if (!(pthis = *Prmslice) || --pthis->Use)
7131 {
7132 *Prmslice = NULL;
7133
7134 return;
7135 }
7136
7137 ensSliceDel(&pthis->Slice);
7138
7139 /*
7140 ** Clear and delete the AJAX List of
7141 ** (Ensembl Analysis name) AJAX String objects.
7142 */
7143
7144 ajListstrFreeData(&pthis->Analysisnames);
7145
7146 ajTableDel(&pthis->Masking);
7147
7148 ajMemFree((void **) Prmslice);
7149
7150 return;
7151 }
7152
7153
7154
7155
7156 /* @section debugging *********************************************************
7157 **
7158 ** Functions for reporting of an Ensembl Repeat-Masked Slice object.
7159 **
7160 ** @fdata [EnsPRepeatmaskedslice]
7161 **
7162 ** @nam3rule Trace Report Ensembl Repeat-Masked Slice members to debug file
7163 **
7164 ** @argrule Trace rmslice [const EnsPRepeatmaskedslice]
7165 ** Ensembl Repeat-Masked Slice
7166 ** @argrule Trace level [ajuint] Indentation level
7167 **
7168 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
7169 **
7170 ** @fcategory misc
7171 ******************************************************************************/
7172
7173
7174
7175
7176 /* @func ensRepeatmaskedsliceTrace ********************************************
7177 **
7178 ** Trace an Ensembl Repeat-Masked Slice.
7179 **
7180 ** @param [r] rmslice [const EnsPRepeatmaskedslice]
7181 ** Ensembl Repeat-Masked Slice
7182 ** @param [r] level [ajuint] Indentation level
7183 **
7184 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
7185 **
7186 ** @release 6.2.0
7187 ** @@
7188 ******************************************************************************/
7189
ensRepeatmaskedsliceTrace(const EnsPRepeatmaskedslice rmslice,ajuint level)7190 AjBool ensRepeatmaskedsliceTrace(const EnsPRepeatmaskedslice rmslice,
7191 ajuint level)
7192 {
7193 AjPStr indent = NULL;
7194
7195 if (!rmslice)
7196 return ajFalse;
7197
7198 indent = ajStrNew();
7199
7200 ajStrAppendCountK(&indent, ' ', level * 2);
7201
7202 ajDebug("%SensRepeatmaskedsliceTrace %p\n"
7203 "%S Slice %p\n"
7204 "%S Analysisnames %p\n"
7205 "%S Masking %p\n"
7206 "%S Use %u\n",
7207 indent, rmslice,
7208 indent, rmslice->Slice,
7209 indent, rmslice->Analysisnames,
7210 indent, rmslice->Masking,
7211 indent, rmslice->Use);
7212
7213 ensSliceTrace(rmslice->Slice, level + 1);
7214
7215 ajStrDel(&indent);
7216
7217 return ajTrue;
7218 }
7219
7220
7221
7222
7223 /* @section fetch *************************************************************
7224 **
7225 ** Functions for fetching objects of an Ensembl Repeat-Masked Slice object.
7226 **
7227 ** @fdata [EnsPRepeatmaskedslice]
7228 **
7229 ** @nam3rule Fetch Fetch Ensembl Repeat-Masked Slice objects
7230 ** @nam4rule All Fetch all objects
7231 ** @nam4rule Sequence Fetch the sequence
7232 ** @nam5rule Seq Fetch as AJAX Sequence object
7233 ** @nam5rule Str Fetch as AJAX String object
7234 **
7235 ** @argrule Sequence rmslice [EnsPRepeatmaskedslice] Ensembl Repeat-Masked
7236 ** Slice
7237 ** @argrule Sequence mtype [EnsERepeatMaskType] Repeat masking type
7238 ** @argrule Seq Psequence [AjPSeq*] AJAX Sequence address
7239 ** @argrule Str Psequence [AjPStr*] AJAX String address
7240 **
7241 ** @valrule * [AjBool] ajTrue upon success, ajFalse otherwise
7242 **
7243 ** @fcategory misc
7244 ******************************************************************************/
7245
7246
7247
7248
7249 /* @func ensRepeatmaskedsliceFetchSequenceSeq *********************************
7250 **
7251 ** Fetch the (masked) sequence of an Ensembl Repeat-Masked Slice as
7252 ** AJAX Sequence.
7253 **
7254 ** @param [u] rmslice [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice
7255 ** @param [u] mtype [EnsERepeatMaskType] Repeat masking type
7256 ** @param [wP] Psequence [AjPSeq*] Sequence address
7257 **
7258 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
7259 **
7260 ** @release 6.2.0
7261 ** @@
7262 ******************************************************************************/
7263
ensRepeatmaskedsliceFetchSequenceSeq(EnsPRepeatmaskedslice rmslice,EnsERepeatMaskType mtype,AjPSeq * Psequence)7264 AjBool ensRepeatmaskedsliceFetchSequenceSeq(EnsPRepeatmaskedslice rmslice,
7265 EnsERepeatMaskType mtype,
7266 AjPSeq *Psequence)
7267 {
7268 AjPStr name = NULL;
7269 AjPStr sequence = NULL;
7270
7271 if (!rmslice)
7272 return ajFalse;
7273
7274 if (!Psequence)
7275 return ajFalse;
7276
7277 name = ajStrNew();
7278
7279 ensSliceFetchName(rmslice->Slice, &name);
7280
7281 sequence = ajStrNew();
7282
7283 ensRepeatmaskedsliceFetchSequenceStr(rmslice, mtype, &sequence);
7284
7285 if (*Psequence)
7286 {
7287 ajSeqClear(*Psequence);
7288
7289 ajSeqAssignSeqS(*Psequence, sequence);
7290 ajSeqAssignNameS(*Psequence, name);
7291 }
7292 else
7293 *Psequence = ajSeqNewNameS(sequence, name);
7294
7295 ajSeqSetNuc(*Psequence);
7296
7297 ajStrDel(&sequence);
7298 ajStrDel(&name);
7299
7300 return ajTrue;
7301 }
7302
7303
7304
7305
7306 /* @func ensRepeatmaskedsliceFetchSequenceStr *********************************
7307 **
7308 ** Fetch the (masked) sequence for an Ensembl Repeat-Masked Slice as
7309 ** AJAX String.
7310 **
7311 ** @cc Bio::EnsEMBL::RepeatMaskedSlice::seq
7312 ** @cc Bio::EnsEMBL::Slice::_mask_features
7313 ** @param [u] rmslice [EnsPRepeatmaskedslice] Ensembl Repeat-Masked Slice
7314 ** @param [u] mtype [EnsERepeatMaskType] Repeat masking type
7315 ** @param [wP] Psequence [AjPStr*] Sequence String address
7316 **
7317 ** @return [AjBool] ajTrue upon success, ajFalse otherwise
7318 **
7319 ** @release 6.2.0
7320 ** @@
7321 ** NOTE: The Bio::EnsEMBL::RepeatMaskedSlice::subseq method has not been
7322 ** implemented, because the Ensembl Base Adaptor has no global Ensembl Feature
7323 ** cache. Handling of polymorphic Bio::EnsEMBL::Feature subclasses is simple in
7324 ** object-oriented Perl, but quite complex in ANSI C. The Perl method
7325 ** retrieves Bio::EnsEMBL::RepeatFeature objects for larger blocks
7326 ** (2^18 or 256 Ki base pairs) for more efficient caching of subsequent
7327 ** requests.
7328 ** However, the performance hit should be minimal, as the relational database
7329 ** management system (RDBMS) caches the underlying queries. It is also possible
7330 ** to fetch the (masked) sequence for the whole Repeat-Masked Slice and
7331 ** subsequently extract the sub sequence with correspoinding AJAX functions.
7332 ** Therefore, the Bio::EnsEMBL::RepeatMaskedSlice::subseq method offers
7333 ** only small benefits over Bio::EnsEMBL::RepeatMaskedSlice::seq.
7334 ** As a consequence, the private Bio::EnsEMBL::Slice::_mask_features method
7335 ** has been merged into this function to simplify parameter checking if both,
7336 ** sequence and Repeat Feature objects are fetched from the same Slice.
7337 ******************************************************************************/
7338
ensRepeatmaskedsliceFetchSequenceStr(EnsPRepeatmaskedslice rmslice,EnsERepeatMaskType mtype,AjPStr * Psequence)7339 AjBool ensRepeatmaskedsliceFetchSequenceStr(EnsPRepeatmaskedslice rmslice,
7340 EnsERepeatMaskType mtype,
7341 AjPStr *Psequence)
7342 {
7343 ajint start = 0;
7344 ajint end = 0;
7345
7346 EnsERepeatMaskType msktyp = ensERepeatMaskTypeNULL;
7347 EnsERepeatMaskType *Pmsktyp = NULL;
7348
7349 AjIList iter = NULL;
7350 AjPList rfs = NULL;
7351
7352 AjPStr key = NULL;
7353
7354 EnsPDatabaseadaptor dba = NULL;
7355
7356 EnsPFeature feature = NULL;
7357
7358 EnsPRepeatconsensus rc = NULL;
7359
7360 EnsPRepeatfeature rf = NULL;
7361 EnsPRepeatfeatureadaptor rfa = NULL;
7362
7363 EnsPSliceadaptor sla = NULL;
7364
7365 if (ajDebugTest("ensRepeatmaskedsliceFetchSequenceStr"))
7366 ajDebug("ensRepeatmaskedsliceFetchSequenceStr\n"
7367 " rmslice %p\n"
7368 " mtype %d\n"
7369 " *Psequence %p\n",
7370 rmslice,
7371 mtype,
7372 *Psequence);
7373
7374 if (!rmslice)
7375 return ajFalse;
7376
7377 if (!Psequence)
7378 return ajFalse;
7379
7380 if (*Psequence)
7381 ajStrAssignClear(Psequence);
7382 else
7383 *Psequence = ajStrNew();
7384
7385 if ((mtype <= ensERepeatMaskTypeNULL) || (mtype > ensERepeatMaskTypeHard))
7386 {
7387 ajDebug("ensRepeatmaskedsliceFetchSequenceStr got unsupported "
7388 "masking type %d\n", mtype);
7389
7390 return ajFalse;
7391 }
7392
7393 if (!rmslice->Slice)
7394 {
7395 ajDebug("ensRepeatmaskedsliceFetchSequenceStr got an "
7396 "Ensembl Repeat-Masked Slice without an "
7397 "Ensembl Slice.\n");
7398
7399 return ajFalse;
7400 }
7401
7402 ensSliceFetchSequenceAllStr(rmslice->Slice, Psequence);
7403
7404 /* Sanity check */
7405
7406 if (ensSliceCalculateLength(rmslice->Slice) != ajStrGetLen(*Psequence))
7407 ajWarn("ensRepeatmaskedsliceFetchSequenceStr got Slice of length %u, "
7408 "but DNA sequence of length %u.\n",
7409 ensSliceCalculateLength(rmslice->Slice),
7410 ajStrGetLen(*Psequence));
7411
7412 sla = ensSliceGetAdaptor(rmslice->Slice);
7413
7414 dba = ensSliceadaptorGetDatabaseadaptor(sla);
7415
7416 rfa = ensRegistryGetRepeatfeatureadaptor(dba);
7417
7418 rfs = ajListNew();
7419
7420 if (rmslice->Analysisnames)
7421 {
7422 iter = ajListIterNew(rmslice->Analysisnames);
7423
7424 while (!ajListIterDone(iter))
7425 {
7426 key = (AjPStr) ajListIterGet(iter);
7427
7428 ensRepeatfeatureadaptorFetchAllbySlice(rfa,
7429 rmslice->Slice,
7430 key,
7431 (AjPStr) NULL,
7432 (AjPStr) NULL,
7433 (AjPStr) NULL,
7434 rfs);
7435 }
7436
7437 ajListIterDel(&iter);
7438 }
7439 else
7440 ensRepeatfeatureadaptorFetchAllbySlice(rfa,
7441 rmslice->Slice,
7442 (AjPStr) NULL,
7443 (AjPStr) NULL,
7444 (AjPStr) NULL,
7445 (AjPStr) NULL,
7446 rfs);
7447
7448 while (ajListPop(rfs, (void **) &rf))
7449 {
7450 /*
7451 ** NOTE: Since this function merges methods from the RepeatMaskedSlice
7452 ** and Slice classes the Repeat Feature objects are automatically
7453 ** fetched from the same Slice and do not need transforming back onto
7454 ** the Slice that has been passed into the second method. It is also
7455 ** not necessary to check whether the Feature objects overlap with the
7456 ** Slice.
7457 **
7458 ** For the AJAX String masking functions, the start and end coordinates
7459 ** of Repeat Feature objects partly outside the Slice need to be
7460 ** trimmed.
7461 */
7462
7463 feature = ensRepeatfeatureGetFeature(rf);
7464
7465 start = (ensFeatureGetStart(feature) >= 1) ?
7466 ensFeatureGetStart(feature) :
7467 1;
7468
7469 end = (ensFeatureGetEnd(feature) <=
7470 (ajint) ensSliceCalculateLength(rmslice->Slice)) ?
7471 ensFeatureGetEnd(feature) :
7472 (ajint) ensSliceCalculateLength(rmslice->Slice);
7473
7474 if (rmslice->Masking)
7475 {
7476 rc = ensRepeatfeatureGetRepeatconsensus(rf);
7477
7478 /* Get the masking type for the Repeat Consensus type. */
7479
7480 key = ajFmtStr("repeat_type_%S", ensRepeatconsensusGetType(rc));
7481
7482 Pmsktyp = (EnsERepeatMaskType *) ajTableFetchmodV(
7483 rmslice->Masking,
7484 (const void *) key);
7485
7486 if (Pmsktyp)
7487 msktyp = *Pmsktyp;
7488
7489 ajStrDel(&key);
7490
7491 /* Get the masking type for the Repeat Consensus class. */
7492
7493 key = ajFmtStr("repeat_class_%S", ensRepeatconsensusGetClass(rc));
7494
7495 Pmsktyp = (EnsERepeatMaskType *) ajTableFetchmodV(
7496 rmslice->Masking,
7497 (const void *) key);
7498
7499 if (Pmsktyp)
7500 msktyp = *Pmsktyp;
7501
7502 ajStrDel(&key);
7503
7504 /* Get the masking type for the Repeat Consensus name. */
7505
7506 key = ajFmtStr("repeat_name_%S", ensRepeatconsensusGetName(rc));
7507
7508 Pmsktyp = (EnsERepeatMaskType *) ajTableFetchmodV(
7509 rmslice->Masking,
7510 (const void *) key);
7511
7512 if (Pmsktyp)
7513 msktyp = *Pmsktyp;
7514
7515 ajStrDel(&key);
7516 }
7517
7518 /* Set the default masking type, if no other type has been defined. */
7519
7520 if (!msktyp)
7521 msktyp = mtype;
7522
7523 /* Mask the DNA sequence. */
7524
7525 switch (msktyp)
7526 {
7527 case ensERepeatMaskTypeNULL:
7528
7529 break;
7530
7531 case ensERepeatMaskTypeSoft:
7532
7533 ajStrFmtLowerSub(Psequence, start - 1, end - 1);
7534
7535 break;
7536
7537 case ensERepeatMaskTypeHard:
7538
7539 ajStrMaskRange(Psequence, start - 1, end - 1, 'N');
7540
7541 break;
7542
7543 default:
7544
7545 ajDebug("ensRepeatmaskedsliceFetchSequenceStr got unsupported "
7546 "Ensembl Repeat Mask Type enumeration %d\n", msktyp);
7547 }
7548
7549 ensRepeatfeatureDel(&rf);
7550 }
7551
7552 ajListFree(&rfs);
7553
7554 return ajTrue;
7555 }
7556