1 /* @source ajtaxread **********************************************************
2 **
3 ** AJAX taxonomy reading functions
4 **
5 ** These functions control all aspects of AJAX taxonomy reading
6 **
7 ** @author Copyright (C) 2010 Peter Rice
8 ** @version $Revision: 1.31 $
9 ** @modified Oct 5 pmr First version
10 ** @modified $Date: 2012/12/07 10:10:52 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA  02110-1301,  USA.
27 **
28 ******************************************************************************/
29 
30 #include "ajlib.h"
31 
32 #include "ajtaxread.h"
33 #include "ajtax.h"
34 #include "ajcall.h"
35 #include "ajlist.h"
36 #include "ajquery.h"
37 #include "ajtextread.h"
38 #include "ajnam.h"
39 #include "ajfileio.h"
40 
41 #include <string.h>
42 
43 
44 AjPTable taxDbMethods = NULL;
45 
46 static AjPStr taxinReadLine     = NULL;
47 
48 #define TAXFLAG_INHERITDIV    0x01
49 #define TAXFLAG_INHERITCODE   0x02
50 #define TAXFLAG_INHERITMITO   0x04
51 #define TAXFLAG_HIDDENGENBANK 0x10
52 #define TAXFLAG_HIDDENSUBTREE 0x20
53 
54 
55 static AjBool taxinReadEbi(AjPTaxin thys, AjPTax tax);
56 static AjBool taxinReadNcbi(AjPTaxin thys, AjPTax tax);
57 
58 
59 
60 
61 /* @datastatic TaxPInFormat *************************************************
62 **
63 ** Taxonomy input formats data structure
64 **
65 ** @alias TaxSInFormat
66 ** @alias TaxOInFormat
67 **
68 ** @attr Name [const char*] Format name
69 ** @attr Desc [const char*] Format description
70 ** @attr Alias [AjBool] Name is an alias for an identical definition
71 ** @attr Try [AjBool] If true, try for an unknown input. Duplicate names
72 **                    and read-anything formats are set false
73 ** @attr Read [AjBool function] Input function, returns ajTrue on success
74 ** @@
75 ******************************************************************************/
76 
77 typedef struct TaxSInFormat
78 {
79     const char *Name;
80     const char *Desc;
81     AjBool Alias;
82     AjBool Try;
83     AjBool (*Read) (AjPTaxin thys, AjPTax tax);
84 } TaxOInFormat;
85 
86 #define TaxPInFormat TaxOInFormat*
87 
88 static TaxOInFormat taxinFormatDef[] =
89 {
90 /* "Name",        "Description" */
91 /*     Alias,   Try,     */
92 /*     ReadFunction */
93   {"unknown",     "Unknown format",
94        AJFALSE, AJFALSE,
95        &taxinReadNcbi}, /* default to first format */
96   {"ncbi",          "NCBI taxonomy format",
97        AJFALSE, AJTRUE,
98        &taxinReadNcbi},
99   {"taxonomy",      "EBI taxonomy format",
100        AJFALSE, AJTRUE,
101        &taxinReadEbi},
102   {NULL, NULL, 0, 0, NULL}
103 };
104 
105 
106 
107 static ajuint taxinReadFmt(AjPTaxin taxin, AjPTax tax,
108                            ajuint format);
109 static AjBool taxinRead(AjPTaxin taxin, AjPTax tax);
110 static AjBool taxinformatFind(const AjPStr format, ajint* iformat);
111 static AjBool taxinFormatSet(AjPTaxin taxin, AjPTax tax);
112 static AjBool taxinListProcess(AjPTaxin taxin, AjPTax tax,
113                                const AjPStr listfile);
114 static void taxinListNoComment(AjPStr* text);
115 static void taxinQryRestore(AjPTaxin taxin, const AjPQueryList node);
116 static void taxinQrySave(AjPQueryList node, const AjPTaxin taxin);
117 static AjBool taxDefine(AjPTax thys, AjPTaxin taxin);
118 static AjBool taxinQryProcess(AjPTaxin taxin, AjPTax tax);
119 static AjBool taxinQueryMatch(const AjPQuery thys, const AjPTax tax);
120 
121 
122 
123 
124 /* @filesection ajtaxread ****************************************************
125 **
126 ** @nam1rule aj Function belongs to the AJAX library.
127 **
128 */
129 
130 
131 
132 
133 /* @datasection [AjPTaxin] Taxonomy input objects ***************************
134 **
135 ** Function is for manipulating taxonomy input objects
136 **
137 ** @nam2rule Taxin
138 ******************************************************************************/
139 
140 
141 
142 
143 /* @section Taxonomy input constructors ***************************************
144 **
145 ** All constructors return a new taxonomy input object by pointer. It
146 ** is the responsibility of the user to first destroy any previous
147 ** taxonomy input object. The target pointer does not need to be
148 ** initialised to NULL, but it is good programming practice to do so
149 ** anyway.
150 **
151 ** @fdata [AjPTaxin]
152 **
153 ** @nam3rule  New     Construct a new taxonomy input object
154 **
155 ** @valrule   *  [AjPTaxin] New taxonomy input object
156 **
157 ** @fcategory new
158 **
159 ******************************************************************************/
160 
161 
162 
163 
164 /* @func ajTaxinNew ***********************************************************
165 **
166 ** Creates a new taxonomy input object.
167 **
168 ** @return [AjPTaxin] New taxonomy input object.
169 ** @category new [AjPTaxin] Default constructor
170 **
171 ** @release 6.4.0
172 ** @@
173 ******************************************************************************/
174 
ajTaxinNew(void)175 AjPTaxin ajTaxinNew(void)
176 {
177     AjPTaxin pthis;
178 
179     AJNEW0(pthis);
180 
181     pthis->Input = ajTextinNewDatatype(AJDATATYPE_TAXON);
182 
183     pthis->TaxData      = NULL;
184 
185     return pthis;
186 }
187 
188 
189 
190 
191 
192 /* @section taxonomy input destructors ****************************************
193 **
194 ** Destruction destroys all internal data structures and frees the
195 ** memory allocated for the taxonomy input object.
196 **
197 ** @fdata [AjPTaxin]
198 **
199 ** @nam3rule Del Destructor
200 **
201 ** @argrule Del pthis [AjPTaxin*] Taxonomy input object
202 **
203 ** @valrule * [void]
204 **
205 ** @fcategory delete
206 **
207 ******************************************************************************/
208 
209 
210 
211 
212 /* @func ajTaxinDel ***********************************************************
213 **
214 ** Deletes a taxonomy input object.
215 **
216 ** @param [d] pthis [AjPTaxin*] Taxonomy input
217 ** @return [void]
218 ** @category delete [AjPTaxin] Default destructor
219 **
220 ** @release 6.4.0
221 ** @@
222 ******************************************************************************/
223 
ajTaxinDel(AjPTaxin * pthis)224 void ajTaxinDel(AjPTaxin* pthis)
225 {
226     AjPTaxin thys;
227 
228     if(!pthis)
229         return;
230 
231     thys = *pthis;
232 
233     if(!thys)
234         return;
235 
236     ajDebug("ajTaxinDel called qry:'%S'\n", thys->Input->Qry);
237 
238     ajTextinDel(&thys->Input);
239 
240     AJFREE(*pthis);
241 
242     return;
243 }
244 
245 
246 
247 
248 /* @section taxonomy input modifiers ******************************************
249 **
250 ** These functions use the contents of a taxonomy input object and
251 ** update them.
252 **
253 ** @fdata [AjPTaxin]
254 **
255 ** @nam3rule Clear Clear all values
256 ** @nam3rule Qry Reset using a query string
257 ** @suffix C Character string input
258 ** @suffix S String input
259 **
260 ** @argrule * thys [AjPTaxin] Taxonomy input object
261 ** @argrule C txt [const char*] Query text
262 ** @argrule S str [const AjPStr] query string
263 **
264 ** @valrule * [void]
265 **
266 ** @fcategory modify
267 **
268 ******************************************************************************/
269 
270 
271 
272 
273 /* @func ajTaxinClear *********************************************************
274 **
275 ** Clears a taxonomy input object back to "as new" condition, except
276 ** for the query list which must be preserved.
277 **
278 ** @param [w] thys [AjPTaxin] Taxonomy input
279 ** @return [void]
280 ** @category modify [AjPTaxin] Resets ready for reuse.
281 **
282 ** @release 6.4.0
283 ** @@
284 ******************************************************************************/
285 
ajTaxinClear(AjPTaxin thys)286 void ajTaxinClear(AjPTaxin thys)
287 {
288 
289     ajDebug("ajTaxinClear called\n");
290 
291     ajTextinClear(thys->Input);
292 
293     thys->TaxData = NULL;
294 
295     return;
296 }
297 
298 
299 
300 
301 /* @func ajTaxinQryC **********************************************************
302 **
303 ** Resets a taxonomy input object using a new Universal
304 ** Query Address
305 **
306 ** @param [u] thys [AjPTaxin] Taxonomy input object.
307 ** @param [r] txt [const char*] Query
308 ** @return [void]
309 **
310 ** @release 6.4.0
311 ** @@
312 ******************************************************************************/
313 
ajTaxinQryC(AjPTaxin thys,const char * txt)314 void ajTaxinQryC(AjPTaxin thys, const char* txt)
315 {
316     ajTaxinClear(thys);
317     ajStrAssignC(&thys->Input->Qry, txt);
318 
319     return;
320 }
321 
322 
323 
324 
325 
326 /* @func ajTaxinQryS **********************************************************
327 **
328 ** Resets a taxonomy input object using a new Universal
329 ** Query Address
330 **
331 ** @param [u] thys [AjPTaxin] Taxonomy input object.
332 ** @param [r] str [const AjPStr] Query
333 ** @return [void]
334 **
335 ** @release 6.4.0
336 ** @@
337 ******************************************************************************/
338 
ajTaxinQryS(AjPTaxin thys,const AjPStr str)339 void ajTaxinQryS(AjPTaxin thys, const AjPStr str)
340 {
341     ajTaxinClear(thys);
342     ajStrAssignS(&thys->Input->Qry, str);
343 
344     return;
345 }
346 
347 
348 
349 
350 /* @section casts *************************************************************
351 **
352 ** Return values
353 **
354 ** @fdata [AjPTaxin]
355 **
356 ** @nam3rule Trace Write debugging output
357 **
358 ** @argrule * thys [const AjPTaxin] Taxonomy input object
359 **
360 ** @valrule * [void]
361 **
362 ** @fcategory cast
363 **
364 ******************************************************************************/
365 
366 
367 
368 
369 /* @func ajTaxinTrace *********************************************************
370 **
371 ** Debug calls to trace the data in a taxonomy input object.
372 **
373 ** @param [r] thys [const AjPTaxin] Taxonomy input object.
374 ** @return [void]
375 **
376 ** @release 6.4.0
377 ** @@
378 ******************************************************************************/
379 
ajTaxinTrace(const AjPTaxin thys)380 void ajTaxinTrace(const AjPTaxin thys)
381 {
382     ajDebug("taxonomy input trace\n");
383     ajDebug("====================\n\n");
384 
385     ajTextinTrace(thys->Input);
386 
387     if(thys->TaxData)
388 	ajDebug( "  TaxData: exists\n");
389 
390     return;
391 }
392 
393 
394 
395 
396 /* @section Taxonomy data inputs **********************************************
397 **
398 ** These functions read the wxyxdesc data provided by the first argument
399 **
400 ** @fdata [AjPTaxin]
401 **
402 ** @nam3rule Read Read taxonomy data
403 **
404 ** @argrule Read taxin [AjPTaxin] Taxonomy input object
405 ** @argrule Read tax [AjPTax] Taxonomy data
406 **
407 ** @valrule * [AjBool] true on success
408 **
409 ** @fcategory input
410 **
411 ******************************************************************************/
412 
413 
414 
415 
416 /* @func ajTaxinRead **********************************************************
417 **
418 ** If the file is not yet open, calls taxinQryProcess to convert the query
419 ** into an open file stream.
420 **
421 ** Uses taxinRead for the actual file reading.
422 **
423 ** Returns the results in the AjPTax object.
424 **
425 ** @param [u] taxin [AjPTaxin] Taxonomy data input definitions
426 ** @param [w] tax [AjPTax] Taxonomy data returned.
427 ** @return [AjBool] ajTrue on success.
428 ** @category input [AjPTax] Master taxonomy data input,
429 **                  calls specific functions for file access type
430 **                  and taxonomy data format.
431 **
432 ** @release 6.4.0
433 ** @@
434 ******************************************************************************/
435 
ajTaxinRead(AjPTaxin taxin,AjPTax tax)436 AjBool ajTaxinRead(AjPTaxin taxin, AjPTax tax)
437 {
438     AjBool ret       = ajFalse;
439     AjPQueryList node = NULL;
440     AjBool listdata  = ajFalse;
441 
442     ajDebug("ajTaxinRead: Filebuff:%x\n",
443             taxin->Input->Filebuff);
444 
445     if(taxin->Input->Filebuff)
446     {
447 	/* (a) if file still open, keep reading */
448 	ajDebug("ajTaxinRead: input file '%F' still there, try again\n",
449 		taxin->Input->Filebuff->File);
450 	ret = taxinRead(taxin, tax);
451 	ajDebug("ajTaxinRead: open buffer  qry: '%S' returns: %B\n",
452 		taxin->Input->Qry, ret);
453     }
454     else
455     {
456 	/* (b) if we have a list, try the next query in the list */
457 	if(ajListGetLength(taxin->Input->List))
458 	{
459 	    listdata = ajTrue;
460 	    ajListPop(taxin->Input->List, (void**) &node);
461 
462 	    ajDebug("++pop from list '%S'\n", node->Qry);
463 	    ajTaxinQryS(taxin, node->Qry);
464 	    ajDebug("++SAVE TAXIN '%S' '%S' %d\n",
465 		    taxin->Input->Qry,
466 		    taxin->Input->Formatstr, taxin->Input->Format);
467 
468             taxinQryRestore(taxin, node);
469 
470 	    ajStrDel(&node->Qry);
471 	    ajStrDel(&node->Formatstr);
472 	    AJFREE(node);
473 
474 	    ajDebug("ajTaxinRead: open list, try '%S'\n", taxin->Input->Qry);
475 
476 	    if(!taxinQryProcess(taxin, tax) &&
477                !ajListGetLength(taxin->Input->List))
478 		return ajFalse;
479 
480 	    ret = taxinRead(taxin, tax);
481 	    ajDebug("ajTaxinRead: list qry: '%S' returns: %B\n",
482 		    taxin->Input->Qry, ret);
483 	}
484 	else
485 	{
486 	    ajDebug("ajTaxinRead: no file yet - test query '%S'\n",
487                     taxin->Input->Qry);
488 
489 	    /* (c) Must be a query - decode it */
490 	    if(!taxinQryProcess(taxin, tax) &&
491                !ajListGetLength(taxin->Input->List))
492 		return ajFalse;
493 
494 	    if(ajListGetLength(taxin->Input->List)) /* could be a new list */
495 		listdata = ajTrue;
496 
497 	    ret = taxinRead(taxin, tax);
498 	    ajDebug("ajTaxinRead: new qry: '%S' returns: %B\n",
499 		    taxin->Input->Qry, ret);
500 	}
501     }
502 
503     /* Now read whatever we got */
504 
505     while(!ret && ajListGetLength(taxin->Input->List))
506     {
507 	/* Failed, but we have a list still - keep trying it */
508         if(listdata)
509 	    ajErr("Failed to read taxons '%S'", taxin->Input->Qry);
510 
511 	listdata = ajTrue;
512 	ajListPop(taxin->Input->List,(void**) &node);
513 	ajDebug("++try again: pop from list '%S'\n", node->Qry);
514 	ajTaxinQryS(taxin, node->Qry);
515 	ajDebug("++SAVE (AGAIN) TAXIN '%S' '%S' %d\n",
516 		taxin->Input->Qry,
517 		taxin->Input->Formatstr, taxin->Input->Format);
518 
519 	taxinQryRestore(taxin, node);
520 
521 	ajStrDel(&node->Qry);
522 	ajStrDel(&node->Formatstr);
523 	AJFREE(node);
524 
525 	if(!taxinQryProcess(taxin, tax))
526 	    continue;
527 
528 	ret = taxinRead(taxin, tax);
529 	ajDebug("ajTaxinRead: list retry qry: '%S' returns: %B\n",
530 		taxin->Input->Qry, ret);
531     }
532 
533     if(!ret)
534     {
535 	if(listdata)
536 	    ajErr("Failed to read taxon '%S'", taxin->Input->Qry);
537 
538 	return ajFalse;
539     }
540 
541 
542     taxDefine(tax, taxin);
543 
544     return ajTrue;
545 }
546 
547 
548 
549 
550 /* @funcstatic taxinQueryMatch ************************************************
551 **
552 ** Compares a taxonomy data item to a query and returns true if they match.
553 **
554 ** @param [r] thys [const AjPQuery] query.
555 ** @param [r] tax [const AjPTax] Taxonomy data.
556 ** @return [AjBool] ajTrue if the taxonomy data matches the query.
557 **
558 ** @release 6.4.0
559 ** @@
560 ******************************************************************************/
561 
taxinQueryMatch(const AjPQuery thys,const AjPTax tax)562 static AjBool taxinQueryMatch(const AjPQuery thys, const AjPTax tax)
563 {
564     AjBool tested = ajFalse;
565     AjIList iterfield  = NULL;
566     AjPQueryField field = NULL;
567     AjBool ok = ajFalse;
568 
569     ajDebug("taxinQueryMatch '%S' fields: %Lu Case %B Done %B\n",
570 	    tax->Id, ajListGetLength(thys->QueryFields),
571             thys->CaseId, thys->QryDone);
572 
573     if(!thys)			   /* no query to test, that's fine */
574 	return ajTrue;
575 
576     if(thys->QryDone)			/* do we need to test here? */
577 	return ajTrue;
578 
579     /* test the query field(s) */
580 
581     iterfield = ajListIterNewread(thys->QueryFields);
582     while(!ajListIterDone(iterfield))
583     {
584         field = ajListIterGet(iterfield);
585 
586         ajDebug("  field: '%S' Query: '%S'\n",
587                 field->Field, field->Wildquery);
588         if(ajStrMatchC(field->Field, "id"))
589         {
590             ajDebug("  id test: '%S'\n",
591                     tax->Id);
592             if(thys->CaseId)
593             {
594                 if(ajStrMatchWildS(tax->Id, field->Wildquery))
595                 {
596                     ajListIterDel(&iterfield);
597                     return ajTrue;
598                 }
599             }
600             else
601             {
602                 if(ajStrMatchWildCaseS(tax->Id, field->Wildquery))
603                 {
604                     ajListIterDel(&iterfield);
605                     return ajTrue;
606                 }
607             }
608 
609             ajDebug("id test failed\n");
610             tested = ajTrue;
611             ok = ajFalse;
612         }
613 
614         if(ajStrMatchC(field->Field, "acc")) /* test id, use trueid */
615         {
616             if(ajStrMatchWildCaseS(tax->Id, field->Wildquery))
617             {
618                 ajListIterDel(&iterfield);
619                 return ajTrue;
620             }
621         }
622 
623     }
624 
625     ajListIterDel(&iterfield);
626 
627     if(!tested)		    /* nothing to test, so accept it anyway */
628     {
629         ajDebug("  no tests: assume OK\n");
630 	return ajTrue;
631     }
632 
633     ajDebug("result: %B\n", ok);
634 
635     return ok;
636 }
637 
638 
639 
640 
641 /* @funcstatic taxDefine ******************************************************
642 **
643 ** Make sure all taxonomy data object attributes are defined
644 ** using values from the taxonomy input object if needed
645 **
646 ** @param [w] thys [AjPTax] Taxonomy data returned.
647 ** @param [u] taxin [AjPTaxin] Taxonomy data input definitions
648 ** @return [AjBool] ajTrue on success.
649 **
650 ** @release 6.4.0
651 ** @@
652 ******************************************************************************/
653 
taxDefine(AjPTax thys,AjPTaxin taxin)654 static AjBool taxDefine(AjPTax thys, AjPTaxin taxin)
655 {
656 
657     /* if values are missing in the taxonomy object, we can use defaults
658        from taxin or calculate where possible */
659 
660     /* assign the dbname if defined in the taxin object */
661     if(ajStrGetLen(taxin->Input->Db))
662       ajStrAssignS(&thys->Db, taxin->Input->Db);
663 
664     return ajTrue;
665 }
666 
667 
668 
669 
670 
671 /* @funcstatic taxinReadFmt ***************************************************
672 **
673 ** Tests whether taxonomy data can be read using the specified format.
674 ** Then tests whether the taxonomy data matches taxonomy data query criteria
675 ** and checks any specified type. Applies upper and lower case.
676 **
677 ** @param [u] taxin [AjPTaxin] Taxonomy data input object
678 ** @param [w] tax [AjPTax] Taxonomy data object
679 ** @param [r] format [ajuint] input format code
680 ** @return [ajuint] 0 if successful.
681 **                  1 if the query match failed.
682 **                  2 if the taxonomy data type failed
683 **                  3 if it failed to read any taxonomy data
684 **
685 ** @release 6.4.0
686 ** @@
687 ** This is the only function that calls the appropriate Read function
688 ** taxinReadXxxxxx where Xxxxxxx is the supported taxonomy data format.
689 **
690 ** Some of the taxReadXxxxxx functions fail to reset the buffer correctly,
691 ** which is a very serious problem when cycling through all of them to
692 ** identify an unknown format. The extra ajFileBuffReset call at the end is
693 ** intended to address this problem. The individual functions should still
694 ** reset the buffer in case they are called from elsewhere.
695 **
696 ******************************************************************************/
697 
taxinReadFmt(AjPTaxin taxin,AjPTax tax,ajuint format)698 static ajuint taxinReadFmt(AjPTaxin taxin, AjPTax tax,
699                            ajuint format)
700 {
701     ajDebug("++taxinReadFmt format %d (%s) '%S'\n",
702 	    format, taxinFormatDef[format].Name,
703 	    taxin->Input->Qry);
704 
705     taxin->Input->Records = 0;
706 
707     /* Calling funclist taxinFormatDef() */
708     if((*taxinFormatDef[format].Read)(taxin, tax))
709     {
710 	ajDebug("taxinReadFmt success with format %d (%s)\n",
711 		format, taxinFormatDef[format].Name);
712         ajDebug("id: '%S'\n",
713                 tax->Id);
714 	taxin->Input->Format = format;
715 	ajStrAssignC(&taxin->Input->Formatstr, taxinFormatDef[format].Name);
716 	ajStrAssignC(&tax->Formatstr, taxinFormatDef[format].Name);
717 	ajStrAssignEmptyS(&tax->Db, taxin->Input->Db);
718 	ajStrAssignS(&tax->Filename, taxin->Input->Filename);
719 
720 	if(taxinQueryMatch(taxin->Input->Query, tax))
721 	{
722             /* ajTaxinTrace(taxin); */
723 
724             return FMT_OK;
725         }
726 
727 	ajDebug("query match failed, continuing ...\n");
728 	ajTaxClear(tax);
729 
730 	return FMT_NOMATCH;
731     }
732     else
733     {
734 	ajDebug("Testing input buffer: IsBuff: %B Eof: %B\n",
735 		ajFilebuffIsBuffered(taxin->Input->Filebuff),
736 		ajFilebuffIsEof(taxin->Input->Filebuff));
737 
738 	if(!ajFilebuffIsBuffered(taxin->Input->Filebuff) &&
739 	    ajFilebuffIsEof(taxin->Input->Filebuff))
740 	    return FMT_EOF;
741 
742 	ajFilebuffReset(taxin->Input->Filebuff);
743 	ajDebug("Format %d (%s) failed, file buffer reset by taxinReadFmt\n",
744 		format, taxinFormatDef[format].Name);
745 	/* ajFilebuffTraceFull(taxin->Filebuff, 10, 10);*/
746     }
747 
748     ajDebug("++taxinReadFmt failed - nothing read\n");
749 
750     return FMT_FAIL;
751 }
752 
753 
754 
755 
756 /* @funcstatic taxinRead ******************************************************
757 **
758 ** Given data in a taxin structure, tries to read everything needed
759 ** using the specified format or by trial and error.
760 **
761 ** @param [u] taxin [AjPTaxin] Taxonomy data input object
762 ** @param [w] tax [AjPTax] Taxonomy data object
763 ** @return [AjBool] ajTrue on success
764 **
765 ** @release 6.4.0
766 ** @@
767 ******************************************************************************/
768 
taxinRead(AjPTaxin taxin,AjPTax tax)769 static AjBool taxinRead(AjPTaxin taxin, AjPTax tax)
770 {
771     ajuint i;
772     ajuint istat = 0;
773     ajuint jstat = 0;
774 
775     AjPFilebuff buff = taxin->Input->Filebuff;
776     AjBool ok;
777 
778     AjPTextAccess  textaccess  = taxin->Input->Query->TextAccess;
779     AjPTaxAccess taxaccess = taxin->Input->Query->Access;
780 
781     ajTaxClear(tax);
782     ajDebug("taxinRead: cleared\n");
783 
784     if(taxin->Input->Single && taxin->Input->Count)
785     {
786 	/*
787 	** One taxonomy data item at a time is read.
788 	** The first taxonomy data item was read by ACD
789 	** for the following ones we need to reset the AjPTaxin
790 	**
791 	** Single is set by the access method
792 	*/
793 
794 	ajDebug("taxinRead: single access - count %d - call access"
795 		" routine again\n",
796 		taxin->Input->Count);
797 	/* Calling funclist taxinAccess() */
798 	if(textaccess)
799         {
800             if(!(*textaccess->Access)(taxin->Input))
801             {
802                 ajDebug("taxinRead: (*textaccess->Access)(taxin->Input) "
803                         "*failed*\n");
804 
805                 return ajFalse;
806             }
807         }
808 
809 	if(taxaccess)
810         {
811             if(!(*taxaccess->Access)(taxin))
812             {
813                 ajDebug("taxinRead: (*taxaccess->Access)(taxin) "
814                         "*failed*\n");
815 
816                 return ajFalse;
817             }
818         }
819 
820         buff = taxin->Input->Filebuff;
821     }
822 
823     ajDebug("taxinRead: taxin format %d '%S'\n", taxin->Input->Format,
824 	    taxin->Input->Formatstr);
825 
826     taxin->Input->Count++;
827 
828     if(!taxin->Input->Filebuff)
829 	return ajFalse;
830 
831     ok = ajFilebuffIsBuffered(taxin->Input->Filebuff);
832 
833     while(ok)
834     {				/* skip blank lines */
835         ok = ajBuffreadLine(taxin->Input->Filebuff, &taxinReadLine);
836 
837         if(!ajStrIsWhite(taxinReadLine))
838         {
839             ajFilebuffClear(taxin->Input->Filebuff,1);
840             break;
841         }
842     }
843 
844     if(!taxin->Input->Format)
845     {			   /* no format specified, try all defaults */
846 	for(i = 1; taxinFormatDef[i].Name; i++)
847 	{
848 	    if(!taxinFormatDef[i].Try)	/* skip if Try is ajFalse */
849 		continue;
850 
851 	    ajDebug("taxinRead:try format %d (%s)\n",
852 		    i, taxinFormatDef[i].Name);
853 
854 	    istat = taxinReadFmt(taxin, tax, i);
855 
856 	    switch(istat)
857 	    {
858 	    case FMT_OK:
859 		ajDebug("++taxinRead OK, set format %d\n",
860                         taxin->Input->Format);
861 		taxDefine(tax, taxin);
862 
863 		return ajTrue;
864 	    case FMT_BADTYPE:
865 		ajDebug("taxinRead: (a1) "
866                         "taxinReadFmt stat == BADTYPE *failed*\n");
867 
868 		return ajFalse;
869 	    case FMT_FAIL:
870 		ajDebug("taxinRead: (b1) "
871                         "taxinReadFmt stat == FAIL *failed*\n");
872 		break;			/* we can try next format */
873 	    case FMT_NOMATCH:
874 		ajDebug("taxinRead: (c1) "
875                         "taxinReadFmt stat==NOMATCH try again\n");
876 		break;
877 	    case FMT_EOF:
878 		ajDebug("taxinRead: (d1) "
879                         "taxinReadFmt stat == EOF *failed*\n");
880 		return ajFalse;			/* EOF and unbuffered */
881 	    case FMT_EMPTY:
882 		ajWarn("taxonomy data '%S' has zero length, ignored",
883 		       ajTaxGetQryS(tax));
884 		ajDebug("taxinRead: (e1) "
885                         "taxinReadFmt stat==EMPTY try again\n");
886 		break;
887 	    default:
888 		ajDebug("unknown code %d from taxinReadFmt\n", stat);
889 	    }
890 
891 	    ajTaxClear(tax);
892 
893 	    if(taxin->Input->Format)
894 		break;			/* we read something */
895 
896             ajFilebuffTrace(taxin->Input->Filebuff);
897 	}
898 
899 	if(!taxin->Input->Format)
900 	{		     /* all default formats failed, give up */
901 	    ajDebug("taxinRead:all default formats failed, give up\n");
902 
903 	    return ajFalse;
904 	}
905 
906 	ajDebug("++taxinRead set format %d\n",
907                 taxin->Input->Format);
908     }
909     else
910     {					/* one format specified */
911 	ajDebug("taxinRead: one format specified\n");
912 	ajFilebuffSetUnbuffered(taxin->Input->Filebuff);
913 
914 	ajDebug("++taxinRead known format %d\n",
915                 taxin->Input->Format);
916 	istat = taxinReadFmt(taxin, tax, taxin->Input->Format);
917 
918 	switch(istat)
919 	{
920 	case FMT_OK:
921 	    taxDefine(tax, taxin);
922 
923 	    return ajTrue;
924 	case FMT_BADTYPE:
925 	    ajDebug("taxinRead: (a2) "
926                     "taxinReadFmt stat == BADTYPE *failed*\n");
927 
928 	    return ajFalse;
929 
930         case FMT_FAIL:
931 	    ajDebug("taxinRead: (b2) "
932                     "taxinReadFmt stat == FAIL *failed*\n");
933 
934 	    return ajFalse;
935 
936         case FMT_NOMATCH:
937 	    ajDebug("taxinRead: (c2) "
938                     "taxinReadFmt stat == NOMATCH *try again*\n");
939 	    break;
940 	case FMT_EOF:
941 	    ajDebug("taxinRead: (d2) "
942                     "taxinReadFmt stat == EOF *try again*\n");
943             if(taxin->Input->Records)
944                 ajErr("Error reading file '%F' with format '%s': "
945                       "end-of-file before end of data "
946                       "(read %u records)",
947                       ajFilebuffGetFile(taxin->Input->Filebuff),
948                       taxinFormatDef[taxin->Input->Format].Name,
949                       taxin->Input->Records);
950 	    break;		     /* simply end-of-file */
951 	case FMT_EMPTY:
952 	    ajWarn("assmebly data '%S' has zero length, ignored",
953 		   ajTaxGetQryS(tax));
954 	    ajDebug("taxinRead: (e2) "
955                     "taxinReadFmt stat == EMPTY *try again*\n");
956 	    break;
957 	default:
958 	    ajDebug("unknown code %d from taxinReadFmt\n", stat);
959 	}
960 
961 	ajTaxClear(tax); /* 1 : read, failed to match id/acc/query */
962     }
963 
964     /* failed - probably entry/accession query failed. Can we try again? */
965 
966     ajDebug("taxinRead failed - try again with format %d '%s' code %d\n",
967 	    taxin->Input->Format,
968             taxinFormatDef[taxin->Input->Format].Name, istat);
969 
970     ajDebug("Search:%B Chunk:%B Data:%x ajFileBuffEmpty:%B\n",
971 	    taxin->Input->Search, taxin->Input->ChunkEntries,
972             taxin->Input->TextData, ajFilebuffIsEmpty(buff));
973 
974     if(ajFilebuffIsEmpty(buff) && taxin->Input->ChunkEntries)
975     {
976 	if(textaccess && !(*textaccess->Access)(taxin->Input))
977             return ajFalse;
978 	else if(taxaccess && !(*taxaccess->Access)(taxin))
979             return ajFalse;
980         buff = taxin->Input->Filebuff;
981     }
982 
983 
984     /* need to check end-of-file to avoid repeats */
985     while(taxin->Input->Search &&
986           (taxin->Input->TextData || !ajFilebuffIsEmpty(buff)))
987     {
988 	jstat = taxinReadFmt(taxin, tax, taxin->Input->Format);
989 
990 	switch(jstat)
991 	{
992 	case FMT_OK:
993 	    taxDefine(tax, taxin);
994 
995 	    return ajTrue;
996 
997         case FMT_BADTYPE:
998 	    ajDebug("taxinRead: (a3) "
999                     "taxinReadFmt stat == BADTYPE *failed*\n");
1000 
1001 	    return ajFalse;
1002 
1003         case FMT_FAIL:
1004 	    ajDebug("taxinRead: (b3) "
1005                     "taxinReadFmt stat == FAIL *failed*\n");
1006 
1007 	    return ajFalse;
1008 
1009 	case FMT_NOMATCH:
1010 	    ajDebug("taxinRead: (c3) "
1011                     "taxinReadFmt stat == NOMATCH *try again*\n");
1012 	    break;
1013 	case FMT_EOF:
1014 	    ajDebug("taxinRead: (d3) "
1015                     "taxinReadFmt stat == EOF *failed*\n");
1016 
1017 	    return ajFalse;			/* we already tried again */
1018 
1019         case FMT_EMPTY:
1020 	    if(istat != FMT_EMPTY)
1021                 ajWarn("assmebly data '%S' has zero length, ignored",
1022                        ajTaxGetQryS(tax));
1023 	    ajDebug("taxinRead: (e3) "
1024                     "taxinReadFmt stat == EMPTY *try again*\n");
1025 	    break;
1026 
1027         default:
1028 	    ajDebug("unknown code %d from taxinReadFmt\n", stat);
1029 	}
1030 
1031 	ajTaxClear(tax); /* 1 : read, failed to match id/acc/query */
1032     }
1033 
1034     if(taxin->Input->Format)
1035 	ajDebug("taxinRead: *failed* to read taxonomy data %S "
1036                 "using format %s\n",
1037 		taxin->Input->Qry,
1038                 taxinFormatDef[taxin->Input->Format].Name);
1039     else
1040 	ajDebug("taxinRead: *failed* to read taxonomy data %S "
1041                 "using any format\n",
1042 		taxin->Input->Qry);
1043 
1044     return ajFalse;
1045 }
1046 
1047 
1048 
1049 
1050 /* @funcstatic taxinReadEbi ***************************************************
1051 **
1052 ** Given data in a taxonomy structure, tries to read everything needed
1053 ** using EBI format.
1054 **
1055 ** @param [u] taxin [AjPTaxin] Tax input object
1056 ** @param [w] tax [AjPTax] tax object
1057 ** @return [AjBool] ajTrue on success
1058 **
1059 ** @release 6.6.0
1060 ** @@
1061 ******************************************************************************/
1062 
taxinReadEbi(AjPTaxin taxin,AjPTax tax)1063 static AjBool taxinReadEbi(AjPTaxin taxin, AjPTax tax)
1064 {
1065     AjPFilebuff buff;
1066 
1067     ajlong fpos     = 0;
1068     ajuint linecnt = 0;
1069 
1070     AjPStrTok handle = NULL;
1071     AjPStr tmpstr = NULL;
1072 
1073     ajDebug("taxinReadEbi\n");
1074     ajTaxClear(tax);
1075     buff = taxin->Input->Filebuff;
1076 
1077     /* ajFilebuffTrace(buff); */
1078 
1079     while(ajBuffreadLinePos(buff, &taxinReadLine, &fpos))
1080     {
1081         linecnt++;
1082     }
1083 
1084     ajStrTokenDel(&handle);
1085     ajStrDel(&tmpstr);
1086 
1087     return ajTrue;
1088 }
1089 
1090 
1091 
1092 
1093 /* @funcstatic taxinReadNcbi **************************************************
1094 **
1095 ** Given data in a taxonomy structure, tries to read everything needed
1096 ** using NCBI format.
1097 **
1098 ** @param [u] taxin [AjPTaxin] Tax input object
1099 ** @param [w] tax [AjPTax] tax object
1100 ** @return [AjBool] ajTrue on success
1101 **
1102 ** @release 6.4.0
1103 ** @@
1104 ******************************************************************************/
1105 
taxinReadNcbi(AjPTaxin taxin,AjPTax tax)1106 static AjBool taxinReadNcbi(AjPTaxin taxin, AjPTax tax)
1107 {
1108     AjPFilebuff buff;
1109 
1110     ajlong fpos     = 0;
1111     ajuint linecnt = 0;
1112     ajuint icode;
1113 
1114     AjPStrTok handle = NULL;
1115     AjPStr tmpstr = NULL;
1116 
1117     AjPTaxname name = NULL;
1118 
1119     ajDebug("taxinReadNcbi\n");
1120     ajTaxClear(tax);
1121     buff = taxin->Input->Filebuff;
1122 
1123     /* ajFilebuffTrace(buff); */
1124 
1125     while(ajBuffreadLinePos(buff, &taxinReadLine, &fpos))
1126     {
1127         ajStrTokenAssignC(&handle, taxinReadLine, "|");
1128 
1129         linecnt++;
1130         if(linecnt == 1)
1131         {
1132             ajStrTokenNextParse(handle, &tax->Id);
1133             ajStrTrimWhite(&tax->Id);
1134             ajStrToUint(tax->Id, &tax->Taxid);
1135 
1136             ajStrTokenNextParse(handle, &tmpstr);
1137             ajStrTrimWhite(&tmpstr);
1138             ajStrToUint(tmpstr, &tax->Parent);
1139 
1140             ajStrTokenNextParse(handle, &tax->Rank);
1141             ajStrTrimWhite(&tax->Rank);
1142 
1143             ajStrTokenNextParse(handle, &tax->Emblcode);
1144             ajStrTrimWhite(&tax->Emblcode);
1145 
1146             ajStrTokenNextParse(handle, &tmpstr);
1147             ajStrTrimWhite(&tmpstr);
1148             ajStrToUint(tmpstr, &icode);
1149             tax->Divid = icode;
1150 
1151             ajStrTokenNextParse(handle, &tmpstr);
1152             ajStrTrimWhite(&tmpstr);
1153             if(ajStrMatchC(tmpstr, "1"))
1154                 tax->Flags |= 1;
1155 
1156             ajStrTokenNextParse(handle, &tmpstr);
1157             ajStrTrimWhite(&tmpstr);
1158             ajStrToUint(tmpstr, &icode);
1159             tax->Gencode = icode;
1160 
1161             ajStrTokenNextParse(handle, &tmpstr);
1162             ajStrTrimWhite(&tmpstr);
1163             if(ajStrMatchC(tmpstr, "1"))
1164                 tax->Flags |= 2;
1165 
1166             ajStrTokenNextParse(handle, &tmpstr);
1167             ajStrTrimWhite(&tmpstr);
1168             ajStrToUint(tmpstr, &icode);
1169             tax->Mitocode = icode;
1170 
1171             ajStrTokenNextParse(handle, &tmpstr);
1172             ajStrTrimWhite(&tmpstr);
1173             if(ajStrMatchC(tmpstr, "1"))
1174                 tax->Flags |= 4;
1175 
1176             ajStrTokenNextParse(handle, &tmpstr);
1177             ajStrTrimWhite(&tmpstr);
1178             if(ajStrMatchC(tmpstr, "1"))
1179                 tax->Flags |= 8;
1180 
1181             ajStrTokenNextParse(handle, &tmpstr);
1182             ajStrTrimWhite(&tmpstr);
1183             if(ajStrMatchC(tmpstr, "1"))
1184                 tax->Flags |= 16;
1185 
1186             ajStrTokenNextParse(handle, &tax->Comment);
1187             ajStrTrimWhite(&tax->Comment);
1188         }
1189         else
1190         {
1191             ajStrTokenNextParse(handle, &tmpstr);
1192             ajStrTrimWhite(&tmpstr);
1193             if(!ajStrMatchS(tax->Id, tmpstr))
1194             {
1195                 ajDebug("names.dmp id '%S' != nodes.dmp id '%S'\n",
1196                        tax->Id, tmpstr);
1197                 break;
1198             }
1199 
1200             name = ajTaxnameNew();
1201 
1202             ajStrTokenNextParse(handle, &name->Name);
1203             ajStrTrimWhite(&name->Name);
1204 
1205             ajStrTokenNextParse(handle, &name->UniqueName);
1206             ajStrTrimWhite(&name->UniqueName);
1207 
1208             ajStrTokenNextParse(handle, &name->NameClass);
1209             ajStrTrimWhite(&name->NameClass);
1210 
1211             if(ajStrMatchC(name->NameClass, "scientific name"))
1212                 ajStrAssignEmptyS(&tax->Name, name->Name);
1213 
1214             ajListPushAppend(tax->Namelist, name);
1215         }
1216 
1217         ajDebug("line %u:%S\n", linecnt, taxinReadLine);
1218 
1219         /* add line to AjPTax object */
1220     }
1221 
1222     ajStrTokenDel(&handle);
1223     ajStrDel(&tmpstr);
1224 
1225     return ajTrue;
1226 }
1227 
1228 
1229 
1230 
1231 /* @datasection [none] Miscellaneous ******************************************
1232 **
1233 ** Taxonomy input internals
1234 **
1235 ** @nam2rule Taxin Taxonomy input
1236 **
1237 ******************************************************************************/
1238 
1239 
1240 
1241 
1242 /* @section Printing **********************************************************
1243 **
1244 ** Printing details of the internals to a file
1245 **
1246 ** @fdata [none]
1247 **
1248 ** @nam2rule Taxinprint
1249 **
1250 ** @fcategory output
1251 **
1252 ******************************************************************************/
1253 
1254 
1255 
1256 
1257 /* @section Print *************************************************************
1258 **
1259 ** Printing to a file
1260 **
1261 ** @fdata [none]
1262 **
1263 ** @nam3rule Book Print as docbook table
1264 ** @nam3rule Html Print as html table
1265 ** @nam3rule Wiki Print as wiki table
1266 ** @nam3rule Text Print as text
1267 **
1268 ** @argrule * outf [AjPFile] output file
1269 ** @argrule Text full [AjBool] Print all details
1270 **
1271 ** @valrule * [void]
1272 **
1273 ** @fcategory cast
1274 **
1275 ******************************************************************************/
1276 
1277 
1278 
1279 
1280 /* @func ajTaxinprintBook *****************************************************
1281 **
1282 ** Reports the internal data structures as a Docbook table
1283 **
1284 ** @param [u] outf [AjPFile] Output file
1285 ** @return [void]
1286 **
1287 ** @release 6.4.0
1288 ** @@
1289 ******************************************************************************/
1290 
ajTaxinprintBook(AjPFile outf)1291 void ajTaxinprintBook(AjPFile outf)
1292 {
1293     ajuint i = 0;
1294     ajuint j = 0;
1295     AjPStr namestr = NULL;
1296     AjPList fmtlist;
1297     AjPStr* names;
1298 
1299     fmtlist = ajListstrNew();
1300 
1301     ajFmtPrintF(outf, "<para>The supported taxonomy formats are summarised "
1302                 "in the table below. "
1303                 "The columns are as follows: "
1304                 "<emphasis>Input format</emphasis> (format name), "
1305                 "<emphasis>Try</emphasis> (indicates whether the "
1306                 "format can be detected automatically on input), and "
1307                 "<emphasis>Description</emphasis> (short description of "
1308                 "the format).</para>\n\n");
1309 
1310     ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
1311     ajFmtPrintF(outf, "  <caption>Input taxonomy formats</caption>\n");
1312     ajFmtPrintF(outf, "  <thead>\n");
1313     ajFmtPrintF(outf, "    <tr align=\"center\">\n");
1314     ajFmtPrintF(outf, "      <th>Input Format</th>\n");
1315     ajFmtPrintF(outf, "      <th>Try</th>\n");
1316     ajFmtPrintF(outf, "      <th>Description</th>\n");
1317     ajFmtPrintF(outf, "    </tr>\n");
1318     ajFmtPrintF(outf, "  </thead>\n");
1319     ajFmtPrintF(outf, "  <tbody>\n");
1320 
1321     for(i=1; taxinFormatDef[i].Name; i++)
1322     {
1323 	if(!taxinFormatDef[i].Alias)
1324         {
1325             namestr = ajStrNewC(taxinFormatDef[i].Name);
1326             ajListPushAppend(fmtlist, namestr);
1327             namestr = NULL;
1328         }
1329     }
1330 
1331     ajListSort(fmtlist, &ajStrVcmp);
1332     ajListstrToarray(fmtlist, &names);
1333 
1334     for(i=0; names[i]; i++)
1335     {
1336         for(j=0; taxinFormatDef[j].Name; j++)
1337         {
1338             if(ajStrMatchC(names[i],taxinFormatDef[j].Name))
1339             {
1340                 ajFmtPrintF(outf, "    <tr>\n");
1341                 ajFmtPrintF(outf, "      <td>%s</td>\n",
1342                             taxinFormatDef[j].Name);
1343                 ajFmtPrintF(outf, "      <td>%B</td>\n",
1344                             taxinFormatDef[j].Try);
1345                 ajFmtPrintF(outf, "      <td>%s</td>\n",
1346                             taxinFormatDef[j].Desc);
1347                 ajFmtPrintF(outf, "    </tr>\n");
1348             }
1349         }
1350     }
1351 
1352 
1353     ajFmtPrintF(outf, "  </tbody>\n");
1354     ajFmtPrintF(outf, "</table>\n");
1355     ajStrDel(&namestr);
1356 
1357     names = NULL;
1358     ajListstrFreeData(&fmtlist);
1359 
1360     return;
1361 }
1362 
1363 
1364 
1365 
1366 /* @func ajTaxinprintHtml *****************************************************
1367 **
1368 ** Reports the internal data structures as an HTML table
1369 **
1370 ** @param [u] outf [AjPFile] Output file
1371 ** @return [void]
1372 **
1373 ** @release 6.4.0
1374 ** @@
1375 ******************************************************************************/
1376 
ajTaxinprintHtml(AjPFile outf)1377 void ajTaxinprintHtml(AjPFile outf)
1378 {
1379     ajuint i = 0;
1380     ajuint j = 0;
1381 
1382     AjPStr namestr = NULL;
1383 
1384     ajFmtPrintF(outf, "<table border=3>");
1385     ajFmtPrintF(outf, "<tr><th>Input Format</th><th>Auto</th>\n");
1386     ajFmtPrintF(outf, "<th>Multi</th><th>Description</th></tr>\n");
1387 
1388     for(i=1; taxinFormatDef[i].Name; i++)
1389     {
1390         ajStrAssignC(&namestr, taxinFormatDef[i].Name);
1391 
1392 	if(!taxinFormatDef[i].Alias)
1393         {
1394             for(j=i+1; taxinFormatDef[j].Name; j++)
1395             {
1396                 if(taxinFormatDef[j].Read == taxinFormatDef[i].Read)
1397                 {
1398                     ajFmtPrintAppS(&namestr, " %s", taxinFormatDef[j].Name);
1399                     if(!taxinFormatDef[j].Alias)
1400                     {
1401                         ajWarn("Input format '%s' same as '%s' but not alias",
1402                                taxinFormatDef[j].Name,
1403                                taxinFormatDef[i].Name);
1404                     }
1405                 }
1406             }
1407 
1408 	    ajFmtPrintF(outf, "<tr><td>\n%S\n</td><td>%B</td>\n",
1409                         namestr,
1410 			taxinFormatDef[i].Try);
1411             ajFmtPrintF(outf, "<td>\n%s\n</td></tr>\n",
1412 			taxinFormatDef[i].Desc);
1413         }
1414 
1415     }
1416 
1417     ajFmtPrintF(outf, "</table>\n");
1418     ajStrDel(&namestr);
1419 
1420     return;
1421 }
1422 
1423 
1424 
1425 
1426 /* @func ajTaxinprintText *****************************************************
1427 **
1428 ** Reports the internal data structures
1429 **
1430 ** @param [u] outf [AjPFile] Output file
1431 ** @param [r] full [AjBool] Full report (usually ajFalse)
1432 ** @return [void]
1433 **
1434 ** @release 6.4.0
1435 ** @@
1436 ******************************************************************************/
1437 
ajTaxinprintText(AjPFile outf,AjBool full)1438 void ajTaxinprintText(AjPFile outf, AjBool full)
1439 {
1440     ajuint i = 0;
1441 
1442     ajFmtPrintF(outf, "\n");
1443     ajFmtPrintF(outf, "# Taxonomy input formats\n");
1444     ajFmtPrintF(outf, "# Name  Format name (or alias)\n");
1445     ajFmtPrintF(outf, "# Alias Alias name\n");
1446     ajFmtPrintF(outf, "# Try   Test for unknown input files\n");
1447     ajFmtPrintF(outf, "# Name         Alias Try "
1448 		"Description");
1449     ajFmtPrintF(outf, "\n");
1450     ajFmtPrintF(outf, "InFormat {\n");
1451 
1452     for(i=0; taxinFormatDef[i].Name; i++)
1453 	if(full || !taxinFormatDef[i].Alias)
1454 	    ajFmtPrintF(outf,
1455 			"  %-12s %5B %3B \"%s\"\n",
1456 			taxinFormatDef[i].Name,
1457 			taxinFormatDef[i].Alias,
1458 			taxinFormatDef[i].Try,
1459 			taxinFormatDef[i].Desc);
1460 
1461     ajFmtPrintF(outf, "}\n\n");
1462 
1463     return;
1464 }
1465 
1466 
1467 
1468 
1469 /* @func ajTaxinprintWiki *****************************************************
1470 **
1471 ** Reports the internal data structures as a wiki table
1472 **
1473 ** @param [u] outf [AjPFile] Output file
1474 ** @return [void]
1475 **
1476 ** @release 6.4.0
1477 ** @@
1478 ******************************************************************************/
1479 
ajTaxinprintWiki(AjPFile outf)1480 void ajTaxinprintWiki(AjPFile outf)
1481 {
1482     ajuint i = 0;
1483     ajuint j = 0;
1484 
1485     AjPStr namestr = NULL;
1486 
1487     ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
1488     ajFmtPrintF(outf, "|-\n");
1489     ajFmtPrintF(outf, "!Format!!Try!!"
1490                 "class=\"unsortable\"|Description\n");
1491 
1492     for(i=1; taxinFormatDef[i].Name; i++)
1493     {
1494         ajStrAssignC(&namestr, taxinFormatDef[i].Name);
1495 
1496 	if(!taxinFormatDef[i].Alias)
1497         {
1498             for(j=i+1; taxinFormatDef[j].Name; j++)
1499             {
1500                 if(taxinFormatDef[j].Read == taxinFormatDef[i].Read)
1501                 {
1502                     ajFmtPrintAppS(&namestr, "<br>%s",
1503                                    taxinFormatDef[j].Name);
1504                     if(!taxinFormatDef[j].Alias)
1505                     {
1506                         ajWarn("Input format '%s' same as '%s' but not alias",
1507                                taxinFormatDef[j].Name,
1508                                taxinFormatDef[i].Name);
1509                     }
1510                 }
1511             }
1512 
1513             ajFmtPrintF(outf, "|-\n");
1514 	    ajFmtPrintF(outf,
1515 			"|%S||%B||%s\n",
1516 			namestr,
1517 			taxinFormatDef[i].Try,
1518 			taxinFormatDef[i].Desc);
1519         }
1520 
1521     }
1522 
1523     ajFmtPrintF(outf, "|}\n\n");
1524     ajStrDel(&namestr);
1525 
1526     return;
1527 }
1528 
1529 
1530 
1531 
1532 /* @section Miscellaneous *****************************************************
1533 **
1534 ** Functions to initialise and clean up internals
1535 **
1536 ** @fdata [none]
1537 **
1538 ** @nam3rule Exit Clean up and exit
1539 **
1540 ** @valrule * [void]
1541 **
1542 ** @fcategory misc
1543 **
1544 ******************************************************************************/
1545 
1546 
1547 
1548 
1549 /* @func ajTaxinExit **********************************************************
1550 **
1551 ** Cleans up taxonomy input internal memory
1552 **
1553 ** @return [void]
1554 **
1555 ** @release 6.4.0
1556 ** @@
1557 ******************************************************************************/
1558 
ajTaxinExit(void)1559 void ajTaxinExit(void)
1560 {
1561     /* Query processing regular expressions */
1562 
1563     ajStrDel(&taxinReadLine);
1564 
1565     ajTableDel(&taxDbMethods);
1566 
1567     return;
1568 }
1569 
1570 
1571 
1572 
1573 /* @section Internals *********************************************************
1574 **
1575 ** Functions to return internal values
1576 **
1577 ** @fdata [none]
1578 **
1579 ** @nam3rule Type Internals for taxon datatype
1580 ** @nam4rule Get  Return a value
1581 ** @nam5rule Fields  Known query fields for ajTaxinRead
1582 ** @nam5rule Qlinks  Known query link operators for ajTaxinRead
1583 **
1584 ** @valrule * [const char*] Internal value
1585 **
1586 ** @fcategory misc
1587 **
1588 ******************************************************************************/
1589 
1590 
1591 
1592 
1593 /* @func ajTaxinTypeGetFields *************************************************
1594 **
1595 ** Returns the list of known field names for ajTaxinRead
1596 **
1597 ** @return [const char*] List of field names
1598 **
1599 ** @release 6.4.0
1600 ** @@
1601 ******************************************************************************/
1602 
ajTaxinTypeGetFields(void)1603 const char* ajTaxinTypeGetFields(void)
1604 {
1605     return "id acc";
1606 }
1607 
1608 
1609 
1610 
1611 /* @func ajTaxinTypeGetQlinks *************************************************
1612 **
1613 ** Returns the listof known query link operators for ajTaxinRead
1614 **
1615 ** @return [const char*] List of field names
1616 **
1617 ** @release 6.4.0
1618 ** @@
1619 ******************************************************************************/
1620 
ajTaxinTypeGetQlinks(void)1621 const char* ajTaxinTypeGetQlinks(void)
1622 {
1623     return "|";
1624 }
1625 
1626 
1627 
1628 
1629 /* @datasection [AjPTable] Internal call register table ***********************
1630 **
1631 ** Functions to manage the internal call register table that links the
1632 ** ajaxdb library functions with code in the core AJAX library.
1633 **
1634 ** @nam2rule Taxaccess Functions to manage taxdb call tables.
1635 **
1636 ******************************************************************************/
1637 
1638 
1639 
1640 
1641 /* @section Cast **************************************************************
1642 **
1643 ** Return a reference to the call table
1644 **
1645 ** @fdata [AjPTable] taxdb functions call table
1646 **
1647 ** @nam3rule Get Return a value
1648 ** @nam4rule Db Database access functions table
1649 ** @nam3rule Method Lookup an access method by name
1650 ** @nam4rule Test Return true if the access method exists
1651 ** @nam4rule MethodGet Return a method value
1652 ** @nam5rule Qlinks Return known query links for a named method
1653 ** @nam5rule Scope Return scope (entry, query or all) for a named method
1654 **
1655 ** @argrule Method method [const AjPStr] Method name
1656 **
1657 ** @valrule *Db [AjPTable] Call table of function names and references
1658 ** @valrule *Qlinks [const char*] Query link operators
1659 ** @valrule *Scope [ajuint] Scope flags
1660 ** @valrule *Test [AjBool] True if found
1661 **
1662 ** @fcategory cast
1663 **
1664 ******************************************************************************/
1665 
1666 
1667 
1668 
1669 /* @func ajTaxaccessGetDb *****************************************************
1670 **
1671 ** Returns the table in which taxonomy database access details are registered
1672 **
1673 ** @return [AjPTable] Access functions hash table
1674 **
1675 ** @release 6.4.0
1676 ** @@
1677 ******************************************************************************/
1678 
ajTaxaccessGetDb(void)1679 AjPTable ajTaxaccessGetDb(void)
1680 {
1681     if(!taxDbMethods)
1682         taxDbMethods = ajCallTableNew();
1683     return taxDbMethods;
1684 
1685 }
1686 
1687 
1688 
1689 
1690 /* @func ajTaxaccessMethodGetQlinks *******************************************
1691 **
1692 ** Tests for a named method for taxonomy data reading returns the
1693 ** known query link operators
1694 **
1695 ** @param [r] method [const AjPStr] Method required.
1696 ** @return [const char*] Known link operators
1697 **
1698 ** @release 6.4.0
1699 ** @@
1700 ******************************************************************************/
1701 
ajTaxaccessMethodGetQlinks(const AjPStr method)1702 const char* ajTaxaccessMethodGetQlinks(const AjPStr method)
1703 {
1704     AjPTaxAccess methoddata;
1705 
1706     methoddata = ajCallTableGetS(taxDbMethods, method);
1707     if(!methoddata)
1708         return NULL;
1709 
1710     return methoddata->Qlink;
1711 }
1712 
1713 
1714 
1715 
1716 /* @func ajTaxaccessMethodGetScope ********************************************
1717 **
1718 ** Tests for a named method for taxonomy data reading and returns the scope
1719 ** (entry, query or all).
1720 *
1721 ** @param [r] method [const AjPStr] Method required.
1722 ** @return [ajuint] Scope flags
1723 **
1724 ** @release 6.4.0
1725 ** @@
1726 ******************************************************************************/
1727 
ajTaxaccessMethodGetScope(const AjPStr method)1728 ajuint ajTaxaccessMethodGetScope(const AjPStr method)
1729 {
1730     AjPTaxAccess methoddata;
1731     ajuint ret = 0;
1732 
1733     methoddata = ajCallTableGetS(taxDbMethods, method);
1734     if(!methoddata)
1735         return 0;
1736 
1737     if(methoddata->Entry)
1738         ret |= AJMETHOD_ENTRY;
1739     if(methoddata->Query)
1740         ret |= AJMETHOD_QUERY;
1741     if(methoddata->All)
1742         ret |= AJMETHOD_ALL;
1743 
1744     return ret;
1745 }
1746 
1747 
1748 
1749 
1750 /* @func ajTaxaccessMethodTest ************************************************
1751 **
1752 ** Tests for a named method for taxonomy data reading.
1753 **
1754 ** @param [r] method [const AjPStr] Method required.
1755 ** @return [AjBool] ajTrue on success.
1756 **
1757 ** @release 6.4.0
1758 ** @@
1759 ******************************************************************************/
1760 
ajTaxaccessMethodTest(const AjPStr method)1761 AjBool ajTaxaccessMethodTest(const AjPStr method)
1762 {
1763     if(ajCallTableGetS(taxDbMethods, method))
1764       return ajTrue;
1765 
1766     return ajFalse;
1767 }
1768 
1769 
1770 
1771 
1772 /* @funcstatic taxinQryRestore ************************************************
1773 **
1774 ** Restores a taxonomy input specification from an AjPQueryList node
1775 **
1776 ** @param [w] taxin [AjPTaxin] Taxonomy input object
1777 ** @param [r] node [const AjPQueryList] Query list node
1778 ** @return [void]
1779 **
1780 ** @release 6.4.0
1781 ******************************************************************************/
1782 
taxinQryRestore(AjPTaxin taxin,const AjPQueryList node)1783 static void taxinQryRestore(AjPTaxin taxin, const AjPQueryList node)
1784 {
1785     taxin->Input->Format = node->Format;
1786     taxin->Input->Fpos   = node->Fpos;
1787     ajStrAssignS(&taxin->Input->Formatstr, node->Formatstr);
1788     ajStrAssignS(&taxin->Input->QryFields, node->QryFields);
1789 
1790     return;
1791 }
1792 
1793 
1794 
1795 
1796 /* @funcstatic taxinQrySave ***************************************************
1797 **
1798 ** Saves a taxonomy input specification in an AjPQueryList node
1799 **
1800 ** @param [w] node [AjPQueryList] Query list node
1801 ** @param [r] taxin [const AjPTaxin] Taxonomy input object
1802 ** @return [void]
1803 **
1804 ** @release 6.4.0
1805 ******************************************************************************/
1806 
taxinQrySave(AjPQueryList node,const AjPTaxin taxin)1807 static void taxinQrySave(AjPQueryList node, const AjPTaxin taxin)
1808 {
1809     node->Format   = taxin->Input->Format;
1810     node->Fpos     = taxin->Input->Fpos;
1811     ajStrAssignS(&node->Formatstr, taxin->Input->Formatstr);
1812     ajStrAssignS(&node->QryFields, taxin->Input->QryFields);
1813 
1814     return;
1815 }
1816 
1817 
1818 
1819 
1820 /* @funcstatic taxinQryProcess ************************************************
1821 **
1822 ** Converts a taxonomy data query into an open file.
1823 **
1824 ** Tests for "format::" and sets this if it is found
1825 **
1826 ** Then tests for "list:" or "@" and processes as a list file
1827 ** using taxinListProcess which in turn invokes taxinQryProcess
1828 ** until a valid query is found.
1829 **
1830 ** Then tests for dbname:query and opens the file (at the correct position
1831 ** if the database definition defines it)
1832 **
1833 ** If there is no database, looks for file:query and opens the file.
1834 ** In this case the file position is not known and taxonomy data reading
1835 ** will have to scan for the entry/entries we need.
1836 **
1837 ** @param [u] taxin [AjPTaxin] Taxonomy data input structure.
1838 ** @param [u] tax [AjPTax] Taxonomy data to be read.
1839 **                         The format will be replaced
1840 **                         if defined in the query string.
1841 ** @return [AjBool] ajTrue on success.
1842 **
1843 ** @release 6.4.0
1844 ** @@
1845 ******************************************************************************/
1846 
taxinQryProcess(AjPTaxin taxin,AjPTax tax)1847 static AjBool taxinQryProcess(AjPTaxin taxin, AjPTax tax)
1848 {
1849     AjBool ret = ajTrue;
1850     AjPStr qrystr = NULL;
1851     AjBool taxmethod = ajFalse;
1852     const AjPStr fmtstr = NULL;
1853     AjPTextin textin;
1854     AjPQuery qry;
1855     AjPTaxAccess taxaccess = NULL;
1856 
1857     textin = taxin->Input;
1858     qry = textin->Query;
1859 
1860     /* pick up the original query string */
1861     qrystr = ajStrNewS(textin->Qry);
1862 
1863     ajDebug("taxinQryProcess '%S'\n", qrystr);
1864 
1865     /* look for a format:: prefix */
1866     fmtstr = ajQuerystrParseFormat(&qrystr, textin, taxinformatFind);
1867     ajDebug("taxinQryProcess ... fmtstr '%S' '%S'\n", fmtstr, qrystr);
1868 
1869     /* (seq/feat) DO NOT look for a [range] suffix */
1870 
1871     /* look for a list:: or @:: listfile of queries  - process and return */
1872     if(ajQuerystrParseListfile(&qrystr))
1873     {
1874         ajDebug("taxinQryProcess ... listfile '%S'\n", qrystr);
1875         ret = taxinListProcess(taxin, tax, qrystr);
1876         ajStrDel(&qrystr);
1877         return ret;
1878     }
1879 
1880     /* try general text access methods (file, asis, text database access */
1881     ajDebug("taxinQryProcess ... no listfile '%S'\n", qrystr);
1882     if(!ajQuerystrParseRead(&qrystr, textin, taxinformatFind, &taxmethod))
1883     {
1884         ajStrDel(&qrystr);
1885         return ajFalse;
1886     }
1887 
1888     taxinFormatSet(taxin, tax);
1889 
1890     ajDebug("taxinQryProcess ... read nontext: %B '%S'\n",
1891             taxmethod, qrystr);
1892     ajStrDel(&qrystr);
1893 
1894     /* we found a non-text method */
1895     if(taxmethod)
1896     {
1897         ajDebug("taxinQryProcess ... call method '%S'\n", qry->Method);
1898         ajDebug("taxinQryProcess ... textin format %d '%S'\n",
1899                 textin->Format, textin->Formatstr);
1900         ajDebug("taxinQryProcess ...  query format  '%S'\n",
1901                 qry->Formatstr);
1902         qry->Access = ajCallTableGetS(taxDbMethods,qry->Method);
1903         taxaccess = qry->Access;
1904         return (*taxaccess->Access)(taxin);
1905     }
1906 
1907     ajDebug("taxinQryProcess text method '%S' success\n", qry->Method);
1908 
1909     return ajTrue;
1910 }
1911 
1912 
1913 
1914 
1915 
1916 /* @datasection [AjPList] Query field list ************************************
1917 **
1918 ** Query fields lists are handled internally. Only static functions
1919 ** should appear here
1920 **
1921 ******************************************************************************/
1922 
1923 
1924 
1925 
1926 /* @funcstatic taxinListProcess ***********************************************
1927 **
1928 ** Processes a file of queries.
1929 ** This function is called by, and calls, taxinQryProcess. There is
1930 ** a depth check to avoid infinite loops, for example where a list file
1931 ** refers to itself.
1932 **
1933 ** This function produces a list (AjPList) of queries with all list references
1934 ** expanded into lists of queries.
1935 **
1936 ** Because queries in a list can have their own format
1937 ** the prior settings are stored with each query in the list node so that they
1938 ** can be restored after.
1939 **
1940 ** @param [u] taxin [AjPTaxin] Taxonomy data input
1941 ** @param [u] tax [AjPTax] Taxonomy data
1942 ** @param [r] listfile [const AjPStr] Name of list file.,
1943 ** @return [AjBool] ajTrue on success.
1944 **
1945 ** @release 6.4.0
1946 ** @@
1947 ******************************************************************************/
1948 
taxinListProcess(AjPTaxin taxin,AjPTax tax,const AjPStr listfile)1949 static AjBool taxinListProcess(AjPTaxin taxin, AjPTax tax,
1950                                const AjPStr listfile)
1951 {
1952     AjPList list  = NULL;
1953     AjPFile file  = NULL;
1954     AjPStr token  = NULL;
1955     AjPStr rest  = NULL;
1956     AjBool ret       = ajFalse;
1957     AjPQueryList node = NULL;
1958 
1959     ajuint recnum = 0;
1960     static ajint depth    = 0;
1961     static ajint MAXDEPTH = 16;
1962 
1963     depth++;
1964     ajDebug("++taxinListProcess %S depth %d\n",
1965 	    listfile, depth);
1966 
1967     if(depth > MAXDEPTH)
1968 	ajFatal("Query list too deep");
1969 
1970     if(!taxin->Input->List)
1971 	taxin->Input->List = ajListNew();
1972 
1973     list = ajListNew();
1974 
1975     file = ajFileNewInNameS(listfile);
1976 
1977     if(!file)
1978     {
1979 	ajErr("Failed to open list file '%S'", listfile);
1980 	depth--;
1981 
1982 	return ret;
1983     }
1984 
1985     while(ajReadlineTrim(file, &taxinReadLine))
1986     {
1987         ++recnum;
1988 	taxinListNoComment(&taxinReadLine);
1989 
1990         if(ajStrExtractWord(taxinReadLine, &rest, &token))
1991         {
1992 
1993             if(ajStrGetLen(rest))
1994             {
1995                 ajErr("Bad record %u in list file '%S'\n'%S'",
1996                       recnum, listfile, taxinReadLine);
1997             }
1998             else if(ajStrGetLen(token))
1999             {
2000                 ajDebug("++Add to list: '%S'\n", token);
2001                 AJNEW0(node);
2002                 ajStrAssignS(&node->Qry, token);
2003                 taxinQrySave(node, taxin);
2004                 ajListPushAppend(list, node);
2005             }
2006         }
2007     }
2008 
2009     ajFileClose(&file);
2010     ajStrDel(&token);
2011     ajStrDel(&rest);
2012 
2013     ajDebug("Trace taxin->Input->List\n");
2014     ajQuerylistTrace(taxin->Input->List);
2015     ajDebug("Trace new list\n");
2016     ajQuerylistTrace(list);
2017     ajListPushlist(taxin->Input->List, &list);
2018 
2019     ajDebug("Trace combined taxin->Input->List\n");
2020     ajQuerylistTrace(taxin->Input->List);
2021 
2022     /*
2023      ** now try the first item on the list
2024      ** this can descend recursively if it is also a list
2025      ** which is why we check the depth above
2026      */
2027 
2028     if(ajListPop(taxin->Input->List, (void**) &node))
2029     {
2030         ajDebug("++pop first item '%S'\n", node->Qry);
2031 	ajTaxinQryS(taxin, node->Qry);
2032 	taxinQryRestore(taxin, node);
2033 	ajStrDel(&node->Qry);
2034 	ajStrDel(&node->Formatstr);
2035 	AJFREE(node);
2036 	ajDebug("descending with query '%S'\n", taxin->Input->Qry);
2037 	ret = taxinQryProcess(taxin, tax);
2038     }
2039 
2040     depth--;
2041     ajDebug("++taxinListProcess depth: %d returns: %B\n", depth, ret);
2042 
2043     return ret;
2044 }
2045 
2046 
2047 
2048 
2049 /* @funcstatic taxinListNoComment *********************************************
2050 **
2051 ** Strips comments from a character string (a line from an ACD file).
2052 ** Comments are blank lines or any text following a "#" character.
2053 **
2054 ** @param [u] text [AjPStr*] Line of text from input file.
2055 ** @return [void]
2056 **
2057 ** @release 6.4.0
2058 ** @@
2059 ******************************************************************************/
2060 
taxinListNoComment(AjPStr * text)2061 static void taxinListNoComment(AjPStr* text)
2062 {
2063     ajuint i;
2064     char *cp;
2065 
2066     i = ajStrGetLen(*text);
2067 
2068     if(!i)				/* empty string */
2069 	return;
2070 
2071     MAJSTRGETUNIQUESTR(text);
2072 
2073     cp = strchr(ajStrGetPtr(*text), '#');
2074 
2075     if(cp)
2076     {					/* comment found */
2077 	*cp = '\0';
2078 	ajStrSetValid(text);
2079     }
2080 
2081     return;
2082 }
2083 
2084 
2085 
2086 
2087 /* @funcstatic taxinFormatSet *************************************************
2088 **
2089 ** Sets the input format for taxonomy data using the taxonomy data
2090 ** input object's defined format
2091 **
2092 ** @param [u] taxin [AjPTaxin] Taxonomy data input.
2093 ** @param [u] tax [AjPTax] Taxonomy data
2094 ** @return [AjBool] ajTrue on success.
2095 **
2096 ** @release 6.4.0
2097 ** @@
2098 ******************************************************************************/
2099 
taxinFormatSet(AjPTaxin taxin,AjPTax tax)2100 static AjBool taxinFormatSet(AjPTaxin taxin, AjPTax tax)
2101 {
2102 
2103     if(ajStrGetLen(taxin->Input->Formatstr))
2104     {
2105 	ajDebug("... input format value '%S'\n",
2106                 taxin->Input->Formatstr);
2107 
2108 	if(taxinformatFind(taxin->Input->Formatstr,
2109                              &taxin->Input->Format))
2110 	{
2111 	    ajStrAssignS(&tax->Formatstr,
2112                          taxin->Input->Formatstr);
2113 	    tax->Format = taxin->Input->Format;
2114 	    ajDebug("...format OK '%S' = %d\n",
2115                     taxin->Input->Formatstr,
2116 		    taxin->Input->Format);
2117 	}
2118 	else
2119 	    ajDebug("...format unknown '%S'\n",
2120                     taxin->Input->Formatstr);
2121 
2122 	return ajTrue;
2123     }
2124     else
2125 	ajDebug("...input format not set\n");
2126 
2127 
2128     return ajFalse;
2129 }
2130 
2131 
2132 
2133 
2134 /* @datasection [AjPTaxall] Taxon Input Stream ********************************
2135 **
2136 ** Function is for manipulating taxon input stream objects
2137 **
2138 ** @nam2rule Taxall Taxon input stream objects
2139 **
2140 ******************************************************************************/
2141 
2142 
2143 
2144 
2145 /* @section Taxon Input Constructors ******************************************
2146 **
2147 ** All constructors return a new taxon input stream object by pointer. It
2148 ** is the responsibility of the user to first destroy any previous
2149 ** taxon input object. The target pointer does not need to be
2150 ** initialised to NULL, but it is good programming practice to do so
2151 ** anyway.
2152 **
2153 ** @fdata [AjPTaxall]
2154 **
2155 ** @nam3rule New Constructor
2156 **
2157 ** @valrule * [AjPTaxall] Taxon input stream object
2158 **
2159 ** @fcategory new
2160 **
2161 ******************************************************************************/
2162 
2163 
2164 
2165 
2166 /* @func ajTaxallNew **********************************************************
2167 **
2168 ** Creates a new taxon input stream object.
2169 **
2170 ** @return [AjPTaxall] New taxon input stream object.
2171 **
2172 ** @release 6.4.0
2173 ** @@
2174 ******************************************************************************/
2175 
ajTaxallNew(void)2176 AjPTaxall ajTaxallNew(void)
2177 {
2178     AjPTaxall pthis;
2179 
2180     AJNEW0(pthis);
2181 
2182     pthis->Taxin = ajTaxinNew();
2183     pthis->Tax   = ajTaxNew();
2184 
2185     return pthis;
2186 }
2187 
2188 
2189 
2190 
2191 
2192 /* ==================================================================== */
2193 /* ========================== destructors ============================= */
2194 /* ==================================================================== */
2195 
2196 
2197 
2198 
2199 /* @section Taxon Input Stream Destructors ************************************
2200 **
2201 ** Destruction destroys all internal data structures and frees the
2202 ** memory allocated for the taxon input stream object.
2203 **
2204 ** @fdata [AjPTaxall]
2205 **
2206 ** @nam3rule Del Destructor
2207 **
2208 ** @argrule Del pthis [AjPTaxall*] Taxon input stream
2209 **
2210 ** @valrule * [void]
2211 **
2212 ** @fcategory delete
2213 **
2214 ******************************************************************************/
2215 
2216 
2217 
2218 
2219 /* @func ajTaxallDel **********************************************************
2220 **
2221 ** Deletes a taxon input stream object.
2222 **
2223 ** @param [d] pthis [AjPTaxall*] taxon input stream
2224 ** @return [void]
2225 **
2226 ** @release 6.4.0
2227 ** @@
2228 ******************************************************************************/
2229 
ajTaxallDel(AjPTaxall * pthis)2230 void ajTaxallDel(AjPTaxall* pthis)
2231 {
2232     AjPTaxall thys;
2233 
2234     if(!pthis)
2235         return;
2236 
2237     thys = *pthis;
2238 
2239     if(!thys)
2240         return;
2241 
2242     ajTaxinDel(&thys->Taxin);
2243     if(!thys->Returned)
2244         ajTaxDel(&thys->Tax);
2245 
2246     AJFREE(*pthis);
2247 
2248     return;
2249 }
2250 
2251 
2252 
2253 
2254 /* ==================================================================== */
2255 /* =========================== Modifiers ============================== */
2256 /* ==================================================================== */
2257 
2258 
2259 
2260 
2261 /* @section taxon input stream modifiers **************************************
2262 **
2263 ** These functions use the contents of a taxon input stream object and
2264 ** update them.
2265 **
2266 ** @fdata [AjPTaxall]
2267 **
2268 ** @nam3rule Clear Clear all values
2269 **
2270 ** @argrule * thys [AjPTaxall] Taxon input stream object
2271 **
2272 ** @valrule * [void]
2273 **
2274 ** @fcategory modify
2275 **
2276 ******************************************************************************/
2277 
2278 
2279 
2280 
2281 /* @func ajTaxallClear ********************************************************
2282 **
2283 ** Clears a taxon input stream object back to "as new" condition, except
2284 ** for the query list which must be preserved.
2285 **
2286 ** @param [w] thys [AjPTaxall] Taxon input stream
2287 ** @return [void]
2288 **
2289 ** @release 6.4.0
2290 ** @@
2291 ******************************************************************************/
2292 
ajTaxallClear(AjPTaxall thys)2293 void ajTaxallClear(AjPTaxall thys)
2294 {
2295 
2296     ajDebug("ajTaxallClear called\n");
2297 
2298     ajTaxinClear(thys->Taxin);
2299 
2300     ajTaxClear(thys->Tax);
2301 
2302     thys->Returned = ajFalse;
2303 
2304     return;
2305 }
2306 
2307 
2308 
2309 
2310 /* @section taxon input stream casts ******************************************
2311 **
2312 ** These functions return the contents of a taxon input stream object
2313 **
2314 ** @fdata [AjPTaxall]
2315 **
2316 ** @nam3rule Get Get taxon input stream values
2317 ** @nam3rule Gettax Get taxon values
2318 ** @nam4rule GettaxId Get taxon identifier
2319 **
2320 ** @argrule * thys [const AjPTaxall] Taxon input stream object
2321 **
2322 ** @valrule * [const AjPStr] String value
2323 **
2324 ** @fcategory cast
2325 **
2326 ******************************************************************************/
2327 
2328 
2329 
2330 
2331 /* @func ajTaxallGettaxId *****************************************************
2332 **
2333 ** Returns the identifier of the current taxon in an input stream
2334 **
2335 ** @param [r] thys [const AjPTaxall] Taxon input stream
2336 ** @return [const AjPStr] Identifier
2337 **
2338 ** @release 6.4.0
2339 ** @@
2340 ******************************************************************************/
2341 
ajTaxallGettaxId(const AjPTaxall thys)2342 const AjPStr ajTaxallGettaxId(const AjPTaxall thys)
2343 {
2344     if(!thys)
2345         return NULL;
2346 
2347     ajDebug("ajTaxallGettaxId called\n");
2348 
2349     return ajTaxGetId(thys->Tax);
2350 }
2351 
2352 
2353 
2354 
2355 /* @section taxon input *******************************************************
2356 **
2357 ** These functions use a taxon input stream object to read data
2358 **
2359 ** @fdata [AjPTaxall]
2360 **
2361 ** @nam3rule Next Read next taxon
2362 **
2363 ** @argrule * thys [AjPTaxall] Taxon input stream object
2364 ** @argrule * Ptax [AjPTax*] Taxon object
2365 **
2366 ** @valrule * [AjBool] True on success
2367 **
2368 ** @fcategory input
2369 **
2370 ******************************************************************************/
2371 
2372 
2373 
2374 
2375 /* @func ajTaxallNext *********************************************************
2376 **
2377 ** Parse a taxon query into format, access, file and entry
2378 **
2379 ** Split at delimiters. Check for the first part as a valid format
2380 ** Check for the remaining first part as a database name or as a file
2381 ** that can be opened.
2382 ** Anything left is an entryname spec.
2383 **
2384 ** Return the results in the AjPTax object but leave the file open for
2385 ** future calls.
2386 **
2387 ** @param [w] thys [AjPTaxall] Taxon input stream
2388 ** @param [u] Ptax [AjPTax*] Taxon returned
2389 ** @return [AjBool] ajTrue on success.
2390 **
2391 ** @release 6.4.0
2392 ** @@
2393 ******************************************************************************/
2394 
ajTaxallNext(AjPTaxall thys,AjPTax * Ptax)2395 AjBool ajTaxallNext(AjPTaxall thys, AjPTax *Ptax)
2396 {
2397     ajDebug("ajTaxallNext count:%u\n", thys->Count);
2398 
2399     if(!thys->Count)
2400     {
2401 	thys->Count = 1;
2402 
2403 	thys->Totterms++;
2404 
2405 	*Ptax = thys->Tax;
2406 	thys->Returned = ajTrue;
2407 
2408 	return ajTrue;
2409     }
2410 
2411 
2412     if(ajTaxinRead(thys->Taxin, thys->Tax))
2413     {
2414 	thys->Count++;
2415 
2416 	thys->Totterms++;
2417 
2418 	*Ptax = thys->Tax;
2419 	thys->Returned = ajTrue;
2420 
2421 	ajDebug("ajTaxallNext success\n");
2422 
2423 	return ajTrue;
2424     }
2425 
2426     *Ptax = NULL;
2427 
2428     ajDebug("ajTaxallNext failed\n");
2429 
2430     ajTaxallClear(thys);
2431 
2432     return ajFalse;
2433 }
2434 
2435 
2436 
2437 
2438 /* @datasection [none] Input formats ******************************************
2439 **
2440 ** Input formats internals
2441 **
2442 ** @nam2rule Taxinformat Taxonomy data input format specific
2443 **
2444 ******************************************************************************/
2445 
2446 
2447 
2448 
2449 /* @section cast **************************************************************
2450 **
2451 ** Values for input formats
2452 **
2453 ** @fdata [none]
2454 **
2455 ** @nam3rule Find Return index to named format
2456 ** @nam3rule Test Test format value
2457 **
2458 ** @argrule * format [const AjPStr] Format name
2459 ** @argrule Find iformat [ajint*] Index matching format name
2460 **
2461 ** @valrule * [AjBool] True if found
2462 **
2463 ** @fcategory cast
2464 **
2465 ******************************************************************************/
2466 
2467 
2468 
2469 
2470 /* @funcstatic taxinformatFind ************************************************
2471 **
2472 ** Looks for the specified format(s) in the internal definitions and
2473 ** returns the index.
2474 **
2475 ** Sets iformat as the recognised format, and returns ajTrue.
2476 **
2477 ** @param [r] format [const AjPStr] Format required.
2478 ** @param [w] iformat [ajint*] Index
2479 ** @return [AjBool] ajTrue on success.
2480 **
2481 ** @release 6.4.0
2482 ** @@
2483 ******************************************************************************/
2484 
taxinformatFind(const AjPStr format,ajint * iformat)2485 static AjBool taxinformatFind(const AjPStr format, ajint* iformat)
2486 {
2487     AjPStr tmpformat = NULL;
2488     ajuint i = 0;
2489 
2490     /* ajDebug("taxinformatFind '%S'\n", format); */
2491     if(!ajStrGetLen(format))
2492 	return ajFalse;
2493 
2494     ajStrAssignS(&tmpformat, format);
2495     ajStrFmtLower(&tmpformat);
2496 
2497     for(i=0; taxinFormatDef[i].Name; i++)
2498     {
2499 	/* ajDebug("test %d '%s'\n",
2500            i, taxinFormatDef[i].Name); */
2501 	if(ajStrMatchC(tmpformat, taxinFormatDef[i].Name))
2502 	{
2503 	    *iformat = i;
2504 	    ajStrDel(&tmpformat);
2505 	    /* ajDebug("found '%s' at %d\n", taxinFormatDef[i].Name, i); */
2506 	    return ajTrue;
2507 	}
2508     }
2509 
2510     ajErr("Unknown input format '%S'", format);
2511 
2512     ajStrDel(&tmpformat);
2513 
2514     return ajFalse;
2515 }
2516 
2517 
2518 
2519 
2520 /* @func ajTaxinformatTest ****************************************************
2521 **
2522 ** Tests whether a named taxonomy data input format is known
2523 **
2524 ** @param [r] format [const AjPStr] Format
2525 ** @return [AjBool] ajTrue if formats was accepted
2526 **
2527 ** @release 6.4.0
2528 ** @@
2529 ******************************************************************************/
2530 
ajTaxinformatTest(const AjPStr format)2531 AjBool ajTaxinformatTest(const AjPStr format)
2532 {
2533     ajuint i;
2534 
2535     for(i=0; taxinFormatDef[i].Name; i++)
2536 	if(ajStrMatchCaseC(format, taxinFormatDef[i].Name))
2537 	    return ajTrue;
2538 
2539     return ajFalse;
2540 }
2541