1 /* @source ajtextread *********************************************************
2 **
3 ** AJAX text data reading functions
4 **
5 ** These functions control all aspects of AJAX text data reading
6 **
7 ** @author Copyright (C) 2010 Peter Rice
8 ** @version $Revision: 1.40 $
9 ** @modified Oct 5 pmr First version
10 ** @modified $Date: 2013/07/15 20:54:52 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA 02110-1301, USA.
27 **
28 ******************************************************************************/
29
30
31 #include "ajlib.h"
32
33 #include "ajtextread.h"
34 #include "ajtext.h"
35 #include "ajcall.h"
36 #include "ajlist.h"
37 #include "ajquery.h"
38 #include "ajnam.h"
39 #include "ajfileio.h"
40 #include "ajhttp.h"
41 #include "ajftp.h"
42
43
44 #include <string.h>
45
46 #ifndef WIN32
47 #include <sys/socket.h>
48 #include <netinet/in.h>
49 #include <arpa/inet.h>
50 #include <netdb.h>
51 #else
52 #include <winsock2.h>
53 #include <ws2tcpip.h>
54 #endif
55
56
57 AjPTable textDbMethods = NULL;
58
59 static AjPStr textinReadLine = NULL;
60
61 static AjBool textinReadText(AjPTextin thys, AjPText text);
62 static AjBool textinReadXml(AjPTextin thys, AjPText text);
63 static AjBool textinReadObo(AjPTextin thys, AjPText text);
64 static AjBool textinReadEmbl(AjPTextin thys, AjPText text);
65 static AjBool textinReadGenbank(AjPTextin thys, AjPText text);
66 static AjBool textinReadPdb(AjPTextin thys, AjPText text);
67
68
69
70
71 /* @datastatic TextPInFormat **************************************************
72 **
73 ** Text input formats data structure
74 **
75 ** @alias TextSInFormat
76 ** @alias TextOInFormat
77 **
78 ** @attr Name [const char*] Format name
79 ** @attr Obo [const char*] Ontology term id from EDAM
80 ** @attr Desc [const char*] Format description
81 ** @attr Alias [AjBool] Name is an alias for an identical definition
82 ** @attr Try [AjBool] If true, try for an unknown input. Duplicate names
83 ** and read-anything formats are set false
84 ** @attr Read [AjBool function] Input function, returns ajTrue on success
85 ** @@
86 ******************************************************************************/
87
88 typedef struct TextSInFormat
89 {
90 const char *Name;
91 const char *Obo;
92 const char *Desc;
93 AjBool Alias;
94 AjBool Try;
95 AjBool (*Read) (AjPTextin thys, AjPText text);
96 } TextOInFormat;
97
98 #define TextPInFormat TextOInFormat*
99
100 static TextOInFormat textinFormatDef[] =
101 {
102 /* "Name", "Description" */
103 /* Alias, Try, */
104 /* ReadFunction */
105 {"unknown", "0000", "Unknown format",
106 AJFALSE, AJFALSE,
107 &textinReadText}, /* alias for text */
108 {"text", "2330", "Plain text format",
109 AJFALSE, AJTRUE,
110 &textinReadText},
111 {"xml", "2332", "XML data",
112 AJFALSE, AJTRUE,
113 &textinReadXml},
114 {"obo", "2196", "OBO data",
115 AJFALSE, AJTRUE,
116 &textinReadObo},
117 {"embl", "1927", "EMBL data",
118 AJFALSE, AJFALSE,
119 &textinReadEmbl},
120 {"genbank", "1936", "Genbank data",
121 AJFALSE, AJFALSE,
122 &textinReadGenbank},
123 {"swissprot", "1963", "SwissProt data",
124 AJTRUE, AJFALSE,
125 &textinReadEmbl},
126 {"swiss", "1963", "SwissProt data",
127 AJTRUE, AJFALSE,
128 &textinReadEmbl},
129 {"uniprot", "2188", "UniProt data",
130 AJTRUE, AJFALSE,
131 &textinReadEmbl},
132 {"uniprotkb", "2187", "UniProt-like data",
133 AJTRUE, AJFALSE,
134 &textinReadEmbl},
135 {"ipi", "2189", "UniProt-like data",
136 AJTRUE, AJFALSE,
137 &textinReadEmbl},
138 {"pdb", "1476", "PDB data",
139 AJFALSE, AJFALSE,
140 &textinReadPdb},
141 {"mmcif", "0000", "mmCIF structure data",
142 AJFALSE, AJFALSE,
143 &textinReadPdb},
144 {"csv", "0000", "Comma separated values",
145 AJFALSE, AJFALSE,
146 &textinReadText},
147 {"tab", "0000", "Tab separated values",
148 AJFALSE, AJFALSE,
149 &textinReadText},
150 {"tsv", "0000", "Tab separated values",
151 AJTRUE, AJFALSE,
152 &textinReadText},
153 {"iprmctab", "0000", "IPRMC tab separated values",
154 AJFALSE, AJFALSE,
155 &textinReadText},
156 {"iprmc", "0000", "IPRMC text",
157 AJFALSE, AJFALSE,
158 &textinReadText},
159 {"interpro", "0000", "InterPRO format",
160 AJFALSE, AJFALSE,
161 &textinReadText},
162 {"livelists", "0000", "EBI livelists format",
163 AJFALSE, AJFALSE,
164 &textinReadText},
165 {"hgbase", "0000", "HGBASE format",
166 AJFALSE, AJFALSE,
167 &textinReadText},
168 {"annot", "0000", "General annotation",
169 AJTRUE, AJFALSE,
170 &textinReadText},
171 {"entrysize", "0000", "Entry size",
172 AJTRUE, AJFALSE,
173 &textinReadText},
174 {"tinyseq", "0000", "Refseq tinyseq format",
175 AJTRUE, AJFALSE,
176 &textinReadText},
177 {"uniref50", "0000", "UniRef50 format",
178 AJTRUE, AJFALSE,
179 &textinReadText},
180 {"uniref90", "0000", "UniRef90 format",
181 AJTRUE, AJFALSE,
182 &textinReadText},
183 {"uniref100", "0000", "UniRef100 format",
184 AJTRUE, AJFALSE,
185 &textinReadText},
186 {"uniparc", "0000", "UniParc format",
187 AJTRUE, AJFALSE,
188 &textinReadText},
189 {"resid", "0000", "ResId format",
190 AJTRUE, AJFALSE,
191 &textinReadText},
192 {"nrl1", "0000", "Patent non-redundant level 1 format",
193 AJTRUE, AJFALSE,
194 &textinReadText},
195 {"nrl2", "0000", "Patent non-redundant level 2 format",
196 AJTRUE, AJFALSE,
197 &textinReadText},
198 {"ris", "0000", "MedLine RIS format",
199 AJTRUE, AJFALSE,
200 &textinReadText},
201 {"isi", "0000", "MedLine ISI format",
202 AJTRUE, AJFALSE,
203 &textinReadText},
204 {"bibtex", "0000", "MedLine BibTex format",
205 AJTRUE, AJFALSE,
206 &textinReadText},
207 {"endnote", "0000", "MedLine EndNote format",
208 AJTRUE, AJFALSE,
209 &textinReadText},
210 {"medlineref", "0000", "MedLine ref format",
211 AJTRUE, AJFALSE,
212 &textinReadText},
213 {"medlinefull", "0000", "MedLine full format",
214 AJTRUE, AJFALSE,
215 &textinReadText},
216 {NULL, NULL, NULL, 0, 0, NULL}
217 };
218
219
220
221 static ajuint textinReadFmt(AjPTextin textin, AjPText text,
222 ajuint format);
223 static AjBool textinRead(AjPTextin textin, AjPText text);
224 static AjBool textinformatFind(const AjPStr format, ajint* iformat);
225 static AjBool textinFormatSet(AjPTextin textin, AjPText text);
226 static AjBool textinListProcess(AjPTextin textin, AjPText text,
227 const AjPStr listfile);
228 static void textinListNoComment(AjPStr* text);
229 static void textinQryRestore(AjPTextin textin, const AjPQueryList node);
230 static void textinQrySave(AjPQueryList node, const AjPTextin textin);
231 static AjBool textDefine(AjPText thys, AjPTextin textin);
232 static AjBool textinQryProcess(AjPTextin textin, AjPText text);
233 static AjBool textinQueryMatch(const AjPQuery thys, const AjPText text);
234
235
236
237
238 /* @filesection ajtextread ****************************************************
239 **
240 ** @nam1rule aj Function belongs to the AJAX library.
241 **
242 */
243
244
245
246
247 /* @datasection [AjPTextin] Text input objects ********************************
248 **
249 ** Function is for manipulating text input objects
250 **
251 ** @nam2rule Textin
252 ******************************************************************************/
253
254
255
256
257 /* @section Text Input Constructors *******************************************
258 **
259 ** All constructors return a new text input object by pointer. It
260 ** is the responsibility of the user to first destroy any previous
261 ** text input object. The target pointer does not need to be
262 ** initialised to NULL, but it is good programming practice to do so
263 ** anyway.
264 **
265 ** @fdata [AjPTextin]
266 **
267 ** @nam3rule New Construct a new text input object
268 ** @nam4rule Datatype Construct a new text input object for a
269 ** specific datatype
270 **
271 ** @argrule Datatype datatype [const AjEDataType] Enumerated datatype
272 **
273 ** @valrule * [AjPTextin] New text input object
274 **
275 ** @fcategory new
276 **
277 ******************************************************************************/
278
279
280
281
282 /* @func ajTextinNew **********************************************************
283 **
284 ** Creates a new text input object.
285 **
286 ** @return [AjPTextin] New text input object.
287 ** @category new [AjPTextin] Default constructor
288 **
289 ** @release 6.4.0
290 ** @@
291 ******************************************************************************/
292
ajTextinNew(void)293 AjPTextin ajTextinNew(void)
294 {
295 return ajTextinNewDatatype(AJDATATYPE_UNKNOWN);
296 }
297
298
299
300
301 /* @func ajTextinNewDatatype **************************************************
302 **
303 ** Creates a new text input object for assembly data
304 **
305 ** @param [r] datatype [const AjEDataType] Enumerated datatype
306 ** @return [AjPTextin] New text input object.
307 ** @category new [AjPTextin] Default constructor
308 **
309 ** @release 6.4.0
310 ** @@
311 ******************************************************************************/
312
ajTextinNewDatatype(const AjEDataType datatype)313 AjPTextin ajTextinNewDatatype(const AjEDataType datatype)
314 {
315 AjPTextin pthis;
316
317 AJNEW0(pthis);
318
319 pthis->Db = ajStrNew();
320 pthis->Qry = ajStrNew();
321 pthis->Formatstr = ajStrNew();
322 pthis->QryFields = ajStrNew();
323 pthis->Filename = ajStrNew();
324
325 pthis->Query = ajQueryNew(datatype);
326
327 pthis->Search = ajTrue;
328
329 return pthis;
330 }
331
332
333
334
335 /* @section Text Input Destructors ********************************************
336 **
337 ** Destruction destroys all internal data structures and frees the
338 ** memory allocated for the text input object.
339 **
340 ** @fdata [AjPTextin]
341 **
342 ** @nam3rule Del Destructor
343 ** @nam4rule Nofile File buffer is a copy, do not delete
344 **
345 ** @argrule Del pthis [AjPTextin*] Text input
346 **
347 ** @valrule * [void]
348 **
349 ** @fcategory delete
350 **
351 ******************************************************************************/
352
353
354
355
356 /* @func ajTextinDel **********************************************************
357 **
358 ** Deletes a text input object.
359 **
360 ** @param [d] pthis [AjPTextin*] Text input
361 ** @return [void]
362 ** @category delete [AjPTextin] Default destructor
363 **
364 ** @release 6.4.0
365 ** @@
366 ******************************************************************************/
367
ajTextinDel(AjPTextin * pthis)368 void ajTextinDel(AjPTextin* pthis)
369 {
370 AjPTextin thys;
371 AjPQueryList node = NULL;
372
373 if(!pthis)
374 return;
375
376 thys = *pthis;
377
378 if(!thys)
379 return;
380
381 ajDebug("ajTextinDel called qry:'%S' filebuff: %x\n",
382 thys->Qry, thys->Filebuff);
383
384 ajStrDel(&thys->Db);
385 ajStrDel(&thys->Qry);
386 ajStrDel(&thys->Formatstr);
387 ajStrDel(&thys->QryFields);
388 ajStrDel(&thys->Filename);
389
390 while(ajListGetLength(thys->List))
391 {
392 ajListPop(thys->List, (void**) &node);
393 ajStrDel(&node->Qry);
394 ajStrDel(&node->Formatstr);
395 AJFREE(node);
396 }
397
398 ajListFree(&thys->List);
399
400 ajQueryDel(&thys->Query);
401
402 if(thys->Filebuff)
403 ajFilebuffDel(&thys->Filebuff);
404
405 AJFREE(*pthis);
406
407 return;
408 }
409
410
411
412
413 /* @func ajTextinDelNofile ****************************************************
414 **
415 ** Deletes a text input object but do not close the file as it is a
416 ** copy of a file buffer elsewhere
417 **
418 ** @param [d] pthis [AjPTextin*] Text input
419 ** @return [void]
420 ** @category delete [AjPTextin] Default destructor
421 **
422 ** @release 6.4.0
423 ** @@
424 ******************************************************************************/
425
ajTextinDelNofile(AjPTextin * pthis)426 void ajTextinDelNofile(AjPTextin* pthis)
427 {
428 AjPTextin thys;
429 AjPQueryList node = NULL;
430
431 if(!pthis)
432 return;
433
434 thys = *pthis;
435
436 if(!thys)
437 return;
438
439 ajDebug("ajTextinDel called qry:'%S'\n", thys->Qry);
440
441 ajStrDel(&thys->Db);
442 ajStrDel(&thys->Qry);
443 ajStrDel(&thys->Formatstr);
444 ajStrDel(&thys->QryFields);
445 ajStrDel(&thys->Filename);
446
447 while(ajListGetLength(thys->List))
448 {
449 ajListPop(thys->List, (void**) &node);
450 ajStrDel(&node->Qry);
451 ajStrDel(&node->Formatstr);
452 AJFREE(node);
453 }
454
455 ajListFree(&thys->List);
456
457 ajQueryDel(&thys->Query);
458
459 AJFREE(*pthis);
460
461 return;
462 }
463
464
465
466
467 /* @section text input modifiers **********************************************
468 **
469 ** These functions use the contents of a text input object and
470 ** update them.
471 **
472 ** @fdata [AjPTextin]
473 **
474 ** @nam3rule Clear Clear all values
475 ** @nam3rule Qry Reset using a query string
476 ** @nam4rule Newfile Starting a new file to continue input
477 ** @nam4rule Newinput Continue input from same file
478 ** @nam4rule Nofile File buffer is a copy, do not delete
479 ** @suffix C Character string input
480 ** @suffix S String input
481 **
482 ** @argrule * thys [AjPTextin] Text input object
483 ** @argrule C txt [const char*] Query text
484 ** @argrule S str [const AjPStr] query string
485 **
486 ** @valrule * [void]
487 **
488 ** @fcategory modify
489 **
490 ******************************************************************************/
491
492
493
494
495 /* @func ajTextinClear ********************************************************
496 **
497 ** Clears a text input object back to "as new" condition, except
498 ** for the query list which must be preserved.
499 **
500 ** @param [w] thys [AjPTextin] Text input
501 ** @return [void]
502 **
503 ** @release 6.4.0
504 ** @@
505 ******************************************************************************/
506
ajTextinClear(AjPTextin thys)507 void ajTextinClear(AjPTextin thys)
508 {
509
510 ajDebug("ajTextinClear called\n");
511
512 if(!thys)
513 return;
514
515 ajStrSetClear(&thys->Db);
516 ajStrSetClear(&thys->Qry);
517 ajStrSetClear(&thys->Formatstr);
518 ajStrSetClear(&thys->QryFields);
519 ajStrSetClear(&thys->Filename);
520
521 /* preserve thys->List */
522
523 if(thys->Filebuff)
524 ajFilebuffDel(&thys->Filebuff);
525
526 if(thys->Filebuff)
527 ajFatal("ajTextinClear did not delete Filebuff");
528
529 ajQueryClear(thys->Query);
530 thys->TextData = NULL;
531
532 thys->Search = ajTrue;
533 thys->Single = ajFalse;
534 /* thys->CaseId= ajFalse;*/
535 /* thys->Multi = ajFalse;*/
536 /* thys->Text = ajFalse;*/
537 thys->ChunkEntries = ajFalse;
538
539 thys->Count = 0;
540 thys->Dataread = ajFalse;
541 thys->Datadone = ajFalse;
542 thys->Datacount = 0;
543
544 /* preserve thys->Filecount */
545 /* preserve thys->Entrycount */
546
547 thys->Records = 0;
548 thys->TotRecords = 0;
549 thys->Fpos = 0L;
550 thys->Curpos = 0L;
551
552 return;
553 }
554
555
556
557
558 /* @func ajTextinClearNewfile *************************************************
559 **
560 ** Clears a text input object statistics for continued input with a new file
561 **
562 ** @param [w] thys [AjPTextin] Text input
563 ** @return [void]
564 ** @category modify [AjPTextin] Resets ready for reuse.
565 **
566 ** @release 6.6.0
567 ** @@
568 ******************************************************************************/
569
ajTextinClearNewfile(AjPTextin thys)570 void ajTextinClearNewfile(AjPTextin thys)
571 {
572 ajDebug("ajTextinClearNewfile called\n");
573
574 thys->Filecount++;
575 thys->Records = 0;
576 thys->Fpos = 0L;
577 thys->Curpos = 0L;
578
579 return;
580 }
581
582
583
584
585 /* @func ajTextinClearNewinput ************************************************
586 **
587 ** Clears a text input object statistics for continued input with same file
588 **
589 ** @param [w] thys [AjPTextin] Text input
590 ** @return [void]
591 ** @category modify [AjPTextin] Resets ready for reuse.
592 **
593 ** @release 6.6.0
594 ** @@
595 ******************************************************************************/
596
ajTextinClearNewinput(AjPTextin thys)597 void ajTextinClearNewinput(AjPTextin thys)
598 {
599 ajDebug("ajTextinClearNewinput called\n");
600
601 thys->Records = 0;
602
603 return;
604 }
605
606
607
608
609 /* @func ajTextinClearNofile **************************************************
610 **
611 ** Clears a text input object back to "as new" condition, except
612 ** for the query list which must be preserved.
613 **
614 ** @param [w] thys [AjPTextin] Text input
615 ** @return [void]
616 ** @category modify [AjPTextin] Resets ready for reuse.
617 **
618 ** @release 6.4.0
619 ** @@
620 ******************************************************************************/
621
ajTextinClearNofile(AjPTextin thys)622 void ajTextinClearNofile(AjPTextin thys)
623 {
624
625 ajDebug("ajTextinClearNofile called\n");
626
627 ajStrSetClear(&thys->Db);
628 ajStrSetClear(&thys->Qry);
629 ajStrSetClear(&thys->Formatstr);
630 ajStrSetClear(&thys->QryFields);
631 ajStrSetClear(&thys->Filename);
632
633 /* preserve thys->List */
634
635 ajQueryClear(thys->Query);
636 thys->TextData = NULL;
637
638 thys->Search = ajTrue;
639 thys->Single = ajFalse;
640 /* thys->CaseId= ajFalse;*/
641 /* thys->Multi = ajFalse;*/
642 /* thys->Text = ajFalse;*/
643 thys->ChunkEntries = ajFalse;
644
645 thys->Count = 0;
646
647 /* preserve thys->Filecount */
648 /* preserve thys->Entrycount */
649
650 thys->Records = 0;
651 thys->TotRecords = 0;
652 thys->Fpos = 0L;
653 thys->Curpos = 0L;
654
655 return;
656 }
657
658
659
660
661 /* @func ajTextinQryC *********************************************************
662 **
663 ** Resets a text input object using a new Universal
664 ** Query Address
665 **
666 ** @param [u] thys [AjPTextin] text input object.
667 ** @param [r] txt [const char*] Query
668 ** @return [void]
669 **
670 ** @release 6.4.0
671 ** @@
672 ******************************************************************************/
673
ajTextinQryC(AjPTextin thys,const char * txt)674 void ajTextinQryC(AjPTextin thys, const char* txt)
675 {
676 ajTextinClear(thys);
677 ajStrAssignC(&thys->Qry, txt);
678
679 return;
680 }
681
682
683
684
685
686 /* @func ajTextinQryS *********************************************************
687 **
688 ** Resets a text input object using a new Universal
689 ** Query Address
690 **
691 ** @param [u] thys [AjPTextin] Text input object.
692 ** @param [r] str [const AjPStr] Query
693 ** @return [void]
694 **
695 ** @release 6.4.0
696 ** @@
697 ******************************************************************************/
698
ajTextinQryS(AjPTextin thys,const AjPStr str)699 void ajTextinQryS(AjPTextin thys, const AjPStr str)
700 {
701 ajTextinClear(thys);
702 ajStrAssignS(&thys->Qry, str);
703
704 return;
705 }
706
707
708
709
710 /* @section store ************************************************************
711 **
712 ** Read text data using a text input object and if saved text is
713 ** defined, maintain a text argument.
714 **
715 ** @fdata [AjPTextin]
716 **
717 ** @nam3rule Store Manage a text buffer for all input to date
718 ** @nam4rule Clear Reset the text buffer to the end if the last text input
719 ** @nam4rule Readline Read the next line of input
720 ** and store in buffer if required
721 ** @nam4rule Reset Reset the file buffer and any buffered text.
722 **
723 ** @argrule * thys [AjPTextin] Text input object
724 ** @argrule Clear lines [ajint] Number of lines to keep in buffer
725 ** @argrule Clear rdline [const AjPStr] Most recent input line to trim
726 ** from buffer
727 ** @argrule Readline pdest [AjPStr*] Latest input line
728 ** @argrule Store astr [AjPStr*] Buffered text data
729 **
730 ** @valrule * [void]
731 ** @valrule *Readline [AjBool] True on success
732 **
733 ** @fcategory cast
734 **
735 ******************************************************************************/
736
737
738
739
740 /* @func ajTextinStoreClear ***************************************************
741 **
742 ** Clear the latest record from a text input buffer and any saved buffered text
743 **
744 ** @param [u] thys [AjPTextin] Text input object
745 ** @param [r] lines [ajint] Number of lines to keep
746 ** @param [r] rdline [const AjPStr] Last input record to trim
747 ** @param [u] astr [AjPStr*] Current text buffer
748 ** @return [void]
749 ******************************************************************************/
750
ajTextinStoreClear(AjPTextin thys,ajint lines,const AjPStr rdline,AjPStr * astr)751 void ajTextinStoreClear(AjPTextin thys,
752 ajint lines, const AjPStr rdline,
753 AjPStr* astr)
754 {
755 ajDebug("ajTextinStoreClear Records: %u lines: %d len: %Lu\n",
756 thys->Records, lines, (ajulong) ajStrGetLen(rdline));
757
758 ajFilebuffClearStore(thys->Filebuff, lines, rdline, thys->Text, astr);
759
760 if(lines < 0)
761 {
762 thys->TotRecords -= thys->Records;
763 thys->Records = 0;
764 }
765
766 if(lines > 0)
767 {
768 if((ajint) thys->Records >= lines)
769 thys->Records -= lines;
770 else
771 thys->Records = 0;
772
773 if((ajint) thys->TotRecords >= lines)
774 thys->TotRecords -= lines;
775 else
776 thys->TotRecords = 0;
777 }
778
779 return;
780 }
781
782
783
784
785 /* @func ajTextinStoreReadline ************************************************
786 **
787 ** Read the next line of input and if required store in buffer
788 **
789 ** @param [u] thys [AjPTextin] Text input object
790 ** @param [w] pdest [AjPStr*] Next input record
791 ** @param [u] astr [AjPStr*] Current text buffer
792 ** @return [AjBool] True on success
793 ******************************************************************************/
794
ajTextinStoreReadline(AjPTextin thys,AjPStr * pdest,AjPStr * astr)795 AjBool ajTextinStoreReadline(AjPTextin thys,
796 AjPStr* pdest, AjPStr* astr)
797 {
798 AjBool ret;
799
800 ret = ajBuffreadLinePosStore(thys->Filebuff, pdest,
801 &thys->Curpos, thys->Text,
802 astr);
803 if(ret)
804 {
805 thys->Records++;
806 thys->TotRecords++;
807 }
808
809 return ret;
810 }
811
812
813
814
815 /* @func ajTextinStoreReset ***************************************************
816 **
817 ** Reset a text input object buffer and any saved buffered text
818 **
819 ** @param [u] thys [AjPTextin] Text input object
820 ** @param [u] astr [AjPStr*] Current text buffer
821 ** @return [void]
822 ******************************************************************************/
823
ajTextinStoreReset(AjPTextin thys,AjPStr * astr)824 void ajTextinStoreReset(AjPTextin thys, AjPStr* astr)
825 {
826 ajDebug("ajTextinStoreReset Records: %u\n", thys->Records);
827
828 ajFilebuffResetStore(thys->Filebuff, thys->Text, astr);
829
830 thys->Records = 0;
831
832 return;
833 }
834
835
836
837
838 /* @section casts *************************************************************
839 **
840 ** Return values
841 **
842 ** @fdata [AjPTextin]
843 **
844 ** @nam3rule Get Get text input stream values
845 ** @nam4rule GetCount Get text input count
846 ** @nam4rule GetFpos Get text input file position for latest record
847 ** @nam4rule GetQry Get text query
848 ** @nam4rule GetRecords get text input records count for this file
849 ** @nam4rule GetTotrecords Get text input total record count
850 ** @nam3rule Trace Write debugging output
851 ** @suffix S Return as a string object
852 **
853 ** @argrule * thys [const AjPTextin] Text input object
854 **
855 ** @valrule * [void]
856 ** @valrule *Count [ajuint] Count
857 ** @valrule *Fpos [ajlong] File position
858 ** @valrule *Records [ajuint] Record count
859 ** @valrule *Totrecords [ajuint] Totalecord count
860 ** @valrule *S [const AjPStr] String value
861 **
862 ** @fcategory cast
863 **
864 ******************************************************************************/
865
866
867
868
869 /* @func ajTextinGetCount *****************************************************
870 **
871 ** Returns the input count of a text input object
872 **
873 ** @param [r] thys [const AjPTextin] Text input object.
874 ** @return [ajuint] Input count
875 **
876 ** @release 6.6.0
877 ** @@
878 ******************************************************************************/
879
ajTextinGetCount(const AjPTextin thys)880 ajuint ajTextinGetCount(const AjPTextin thys)
881 {
882 return thys->Count;
883 }
884
885
886
887
888 /* @func ajTextinGetFpos ******************************************************
889 **
890 ** Returns the file position of a text input object
891 **
892 ** @param [r] thys [const AjPTextin] Text input object.
893 ** @return [ajlong] File position
894 **
895 ** @release 6.6.0
896 ** @@
897 ******************************************************************************/
898
ajTextinGetFpos(const AjPTextin thys)899 ajlong ajTextinGetFpos(const AjPTextin thys)
900 {
901 return thys->Curpos;
902 }
903
904
905
906
907 /* @func ajTextinGetQryS ******************************************************
908 **
909 ** Returns the query of a text input object
910 **
911 ** @param [r] thys [const AjPTextin] Text input object.
912 ** @return [const AjPStr] Query string
913 **
914 ** @release 6.4.0
915 ** @@
916 ******************************************************************************/
917
ajTextinGetQryS(const AjPTextin thys)918 const AjPStr ajTextinGetQryS(const AjPTextin thys)
919 {
920 return thys->Qry;
921 }
922
923
924
925
926 /* @func ajTextinGetRecords ***************************************************
927 **
928 ** Returns the record count of a text input object
929 **
930 ** @param [r] thys [const AjPTextin] Text input object.
931 ** @return [ajuint] Record count
932 **
933 ** @release 6.6.0
934 ** @@
935 ******************************************************************************/
936
ajTextinGetRecords(const AjPTextin thys)937 ajuint ajTextinGetRecords(const AjPTextin thys)
938 {
939 return thys->Records;
940 }
941
942
943
944
945 /* @func ajTextinGetTotrecords ************************************************
946 **
947 ** Returns the total record count of a text input object
948 **
949 ** @param [r] thys [const AjPTextin] Text input object.
950 ** @return [ajuint] Total record count
951 **
952 ** @release 6.6.0
953 ** @@
954 ******************************************************************************/
955
ajTextinGetTotrecords(const AjPTextin thys)956 ajuint ajTextinGetTotrecords(const AjPTextin thys)
957 {
958 return thys->TotRecords;
959 }
960
961
962
963
964 /* @func ajTextinTrace ********************************************************
965 **
966 ** Debug calls to trace the data in a text input object.
967 **
968 ** @param [r] thys [const AjPTextin] Text input object.
969 ** @return [void]
970 **
971 ** @release 6.4.0
972 ** @@
973 ******************************************************************************/
974
ajTextinTrace(const AjPTextin thys)975 void ajTextinTrace(const AjPTextin thys)
976 {
977 ajDebug("text input trace\n");
978 ajDebug( "====================\n\n");
979
980 if(ajStrGetLen(thys->Db))
981 ajDebug( " Db: '%S'\n", thys->Db);
982
983 if(ajStrGetLen(thys->Formatstr))
984 ajDebug( " Format: '%S' (%u)\n", thys->Formatstr, thys->Format);
985
986 if(ajStrGetLen(thys->QryFields))
987 ajDebug( " Fields: '%S'\n", thys->QryFields);
988
989 if(ajStrGetLen(thys->Qry))
990 ajDebug( " Query: '%S'\n", thys->Qry);
991
992 if(ajStrGetLen(thys->Filename))
993 ajDebug( " Filename: '%S'\n", thys->Filename);
994
995 if(ajListGetLength(thys->List))
996 ajDebug( " List: (%Lu)\n", ajListGetLength(thys->List));
997
998 if(thys->Filebuff)
999 ajDebug( " Filebuff: %F (%Ld)\n",
1000 ajFilebuffGetFile(thys->Filebuff),
1001 ajFileResetPos(ajFilebuffGetFile(thys->Filebuff)));
1002
1003 if(thys->Search)
1004 ajDebug( " Search: %B\n", thys->Search);
1005
1006 if(thys->Single)
1007 ajDebug( " Single: %B\n", thys->Single);
1008
1009 if(thys->Multi)
1010 ajDebug( " Multi: %B\n", thys->Multi);
1011
1012 if(thys->CaseId)
1013 ajDebug( " CaseId: %B\n", thys->CaseId);
1014
1015 if(thys->Text)
1016 ajDebug( " Savetext: %B\n", thys->Text);
1017
1018 if(thys->Count)
1019 ajDebug( " Count: %u\n", thys->Count);
1020
1021 if(thys->Filecount)
1022 ajDebug( " File count: %u\n", thys->Filecount);
1023
1024 if(thys->Entrycount)
1025 ajDebug( " Entry count: %u\n", thys->Entrycount);
1026
1027 if(thys->Fpos)
1028 ajDebug( " Fpos: %Ld\n", thys->Fpos);
1029
1030 if(thys->Curpos)
1031 ajDebug( "Curpos: %Ld\n", thys->Curpos);
1032
1033 if(thys->Query)
1034 ajQueryTrace(thys->Query);
1035
1036 if(thys->TextData)
1037 ajDebug( " TextData: exists\n");
1038
1039 return;
1040 }
1041
1042
1043
1044
1045 /* @section Text data inputs **************************************************
1046 **
1047 ** These functions read the text data provided by the first argument
1048 **
1049 ** @fdata [AjPTextin]
1050 **
1051 ** @nam3rule Read Read text data
1052 **
1053 ** @argrule Read textin [AjPTextin] Text input object
1054 ** @argrule Read text [AjPText] Text data
1055 **
1056 ** @valrule * [AjBool] true on success
1057 **
1058 ** @fcategory input
1059 **
1060 ******************************************************************************/
1061
1062
1063
1064
1065 /* @func ajTextinRead *********************************************************
1066 **
1067 ** If the file is not yet open, calls textinQryProcess to convert the
1068 ** query into an open file stream.
1069 **
1070 ** Uses textinRead for the actual file reading.
1071 **
1072 ** Returns the results in the AjPText object.
1073 **
1074 ** @param [u] textin [AjPTextin] text data input definitions
1075 ** @param [w] text [AjPText] text data returned.
1076 ** @return [AjBool] ajTrue on success.
1077 ** @category input [AjPText] Master text data input, calls specific functions
1078 ** for file access type and text data format.
1079 **
1080 ** @release 6.4.0
1081 ** @@
1082 ******************************************************************************/
1083
ajTextinRead(AjPTextin textin,AjPText text)1084 AjBool ajTextinRead(AjPTextin textin, AjPText text)
1085 {
1086 AjBool ret = ajFalse;
1087 AjPQueryList node = NULL;
1088 AjBool listdata = ajFalse;
1089
1090 if(textin->Filebuff)
1091 {
1092 /* (a) if file still open, keep reading */
1093 ajDebug("ajTextinRead: input file '%F' still there, try again\n",
1094 textin->Filebuff->File);
1095 ret = textinRead(textin, text);
1096 ajDebug("ajTextinRead: open buffer qry: '%S' returns: %B\n",
1097 textin->Qry, ret);
1098 }
1099 else
1100 {
1101 /* (b) if we have a list, try the next query in the list */
1102 if(ajListGetLength(textin->List))
1103 {
1104 listdata = ajTrue;
1105 ajListPop(textin->List, (void**) &node);
1106
1107 ajDebug("++pop from list '%S'\n", node->Qry);
1108 ajTextinQryS(textin, node->Qry);
1109 ajDebug("++SAVE TEXTIN '%S' '%S' %d\n",
1110 textin->Qry,
1111 textin->Formatstr, textin->Format);
1112
1113 textinQryRestore(textin, node);
1114
1115 ajStrDel(&node->Qry);
1116 ajStrDel(&node->Formatstr);
1117 AJFREE(node);
1118
1119 ajDebug("ajTextinRead: open list, try '%S'\n", textin->Qry);
1120
1121 if(!textinQryProcess(textin, text) &&
1122 !ajListGetLength(textin->List))
1123 return ajFalse;
1124
1125 ret = textinRead(textin, text);
1126 ajDebug("ajTextinRead: list qry: '%S' returns: %B\n",
1127 textin->Qry, ret);
1128 }
1129 else
1130 {
1131 ajDebug("ajTextinRead: no file yet - test query '%S'\n",
1132 textin->Qry);
1133
1134 /* (c) Must be a query - decode it */
1135 if(!textinQryProcess(textin, text) &&
1136 !ajListGetLength(textin->List))
1137 return ajFalse;
1138
1139 if(ajListGetLength(textin->List)) /* could be a new list */
1140 listdata = ajTrue;
1141
1142 ret = textinRead(textin, text);
1143 ajDebug("ajTextinRead: new qry: '%S' returns: %B\n",
1144 textin->Qry, ret);
1145 }
1146 }
1147
1148 /* Now read whatever we got */
1149
1150 while(!ret && ajListGetLength(textin->List))
1151 {
1152 /* Failed, but we have a list still - keep trying it */
1153 if(listdata)
1154 ajErr("Failed to read text data '%S'", textin->Qry);
1155
1156 listdata = ajTrue;
1157 ajListPop(textin->List,(void**) &node);
1158 ajDebug("++try again: pop from list '%S'\n", node->Qry);
1159 ajTextinQryS(textin, node->Qry);
1160 ajDebug("++SAVE (AGAIN) TEXTIN '%S' '%S' %d\n",
1161 textin->Qry,
1162 textin->Formatstr, textin->Format);
1163
1164 textinQryRestore(textin, node);
1165
1166 ajStrDel(&node->Qry);
1167 ajStrDel(&node->Formatstr);
1168 AJFREE(node);
1169
1170 if(!textinQryProcess(textin, text))
1171 continue;
1172
1173 ret = textinRead(textin, text);
1174 ajDebug("ajTextinRead: list retry qry: '%S' returns: %B\n",
1175 textin->Qry, ret);
1176 }
1177
1178 if(!ret)
1179 {
1180 if(listdata)
1181 ajErr("Failed to read text data '%S'", textin->Qry);
1182
1183 return ajFalse;
1184 }
1185
1186
1187 textDefine(text, textin);
1188
1189 return ajTrue;
1190 }
1191
1192
1193
1194
1195 /* @funcstatic textinQueryMatch ***********************************************
1196 **
1197 ** Compares a text data item to a query and returns true if they match.
1198 **
1199 ** @param [r] thys [const AjPQuery] query.
1200 ** @param [r] text [const AjPText] Text data.
1201 ** @return [AjBool] ajTrue if the text data matches the query.
1202 **
1203 ** @release 6.4.0
1204 ** @@
1205 ******************************************************************************/
1206
textinQueryMatch(const AjPQuery thys,const AjPText text)1207 static AjBool textinQueryMatch(const AjPQuery thys, const AjPText text)
1208 {
1209 AjBool tested = ajFalse;
1210 AjIList iterfield = NULL;
1211 AjPQueryField field = NULL;
1212 AjBool ok = ajFalse;
1213
1214 ajDebug("textinQueryMatch '%S' fields: %Lu Case %B Done %B\n",
1215 text->Id, ajListGetLength(thys->QueryFields),
1216 thys->CaseId, thys->QryDone);
1217
1218 if(!thys) /* no query to test, that's fine */
1219 return ajTrue;
1220
1221 if(thys->QryDone) /* do we need to test here? */
1222 return ajTrue;
1223
1224 /* test the query field(s) */
1225
1226 iterfield = ajListIterNewread(thys->QueryFields);
1227 while(!ajListIterDone(iterfield))
1228 {
1229 field = ajListIterGet(iterfield);
1230
1231 ajDebug(" field: '%S' Query: '%S'\n",
1232 field->Field, field->Wildquery);
1233 if(ajStrMatchC(field->Field, "id"))
1234 {
1235 ajDebug(" id test: '%S'\n",
1236 text->Id);
1237 if(thys->CaseId)
1238 {
1239 if(ajStrMatchWildS(text->Id, field->Wildquery))
1240 {
1241 ajListIterDel(&iterfield);
1242 return ajTrue;
1243 }
1244 }
1245 else
1246 {
1247 if(ajStrMatchWildCaseS(text->Id, field->Wildquery))
1248 {
1249 ajListIterDel(&iterfield);
1250 return ajTrue;
1251 }
1252 }
1253
1254 ajDebug("id test failed\n");
1255 tested = ajTrue;
1256 ok = ajFalse;
1257 }
1258
1259 if(ajStrMatchC(field->Field, "acc")) /* test id, use trueid */
1260 {
1261 if(ajStrMatchWildCaseS(text->Id, field->Wildquery))
1262 {
1263 ajListIterDel(&iterfield);
1264 return ajTrue;
1265 }
1266 }
1267
1268 }
1269
1270 ajListIterDel(&iterfield);
1271
1272 if(!tested) /* nothing to test, so accept it anyway */
1273 {
1274 ajDebug(" no tests: assume OK\n");
1275 return ajTrue;
1276 }
1277
1278 ajDebug("result: %B\n", ok);
1279
1280 return ok;
1281 }
1282
1283
1284
1285
1286 /* @funcstatic textDefine *****************************************************
1287 **
1288 ** Make sure all text data object attributes are defined
1289 ** using values from the text input object if needed
1290 **
1291 ** @param [w] thys [AjPText] Text data returned.
1292 ** @param [u] textin [AjPTextin] Text data input definitions
1293 ** @return [AjBool] ajTrue on success.
1294 **
1295 ** @release 6.4.0
1296 ** @@
1297 ******************************************************************************/
1298
textDefine(AjPText thys,AjPTextin textin)1299 static AjBool textDefine(AjPText thys, AjPTextin textin)
1300 {
1301
1302 /* if values are missing in the text object, we can use defaults
1303 from textin or calculate where possible */
1304
1305 /* assign the dbname if defined in the textin object */
1306 if(ajStrGetLen(textin->Db))
1307 ajStrAssignS(&thys->Db, textin->Db);
1308
1309 return ajTrue;
1310 }
1311
1312
1313
1314
1315
1316 /* @funcstatic textinReadFmt **************************************************
1317 **
1318 ** Tests whether an text data can be read using the specified format.
1319 ** Then tests whether the text data matches text data query criteria
1320 ** and checks any specified type. Applies upper and lower case.
1321 **
1322 ** @param [u] textin [AjPTextin] text data input object
1323 ** @param [w] text [AjPText] text data object
1324 ** @param [r] format [ajuint] input format code
1325 ** @return [ajuint] 0 if successful.
1326 ** 1 if the query match failed.
1327 ** 2 if the text data type failed
1328 ** 3 if it failed to read an text data
1329 **
1330 ** @release 6.4.0
1331 ** @@
1332 ** This is the only function that calls the appropriate Read function
1333 ** textinReadXxxxxx where Xxxxxxx is the supported text data format.
1334 **
1335 ** Some of the textReadXxxxxx functions fail to reset the buffer correctly,
1336 ** which is a very serious problem when cycling through all of them to
1337 ** identify an unknown format. The extra ajFileBuffReset call at the end is
1338 ** intended to address this problem. The individual functions should still
1339 ** reset the buffer in case they are called from elsewhere.
1340 **
1341 ******************************************************************************/
1342
textinReadFmt(AjPTextin textin,AjPText text,ajuint format)1343 static ajuint textinReadFmt(AjPTextin textin, AjPText text,
1344 ajuint format)
1345 {
1346 ajDebug("++textinReadFmt format %d (%s) '%S'\n",
1347 format, textinFormatDef[format].Name,
1348 textin->Qry);
1349
1350 textin->Records = 0;
1351
1352 /* Calling funclist textinFormatDef() */
1353 if((*textinFormatDef[format].Read)(textin, text))
1354 {
1355 ajDebug("textinReadFmt success with format %d (%s)\n",
1356 format, textinFormatDef[format].Name);
1357 ajDebug("id: '%S'\n",
1358 text->Id);
1359 textin->Format = format;
1360 ajStrAssignC(&textin->Formatstr, textinFormatDef[format].Name);
1361 ajStrAssignC(&text->Formatstr, textinFormatDef[format].Name);
1362 ajStrAssignEmptyS(&text->Db, textin->Db);
1363 ajStrAssignS(&text->Filename, textin->Filename);
1364 if(!ajStrGetLen(text->Id))
1365 {
1366 ajStrAssignS(&text->Id,
1367 ajFileGetPrintnameS(ajFilebuffGetFile(textin->Filebuff)));
1368 ajFilenameTrimAll(&text->Id);
1369 ajDebug("filename as id: '%S'\n",
1370 text->Id);
1371 }
1372
1373 if(textinQueryMatch(textin->Query, text))
1374 {
1375 /* ajTextinTrace(textin); */
1376
1377 return FMT_OK;
1378 }
1379
1380 ajDebug("query match failed, continuing ...\n");
1381 ajTextClear(text);
1382
1383 return FMT_NOMATCH;
1384 }
1385 else
1386 {
1387 ajDebug("Testing input buffer: IsBuff: %B Eof: %B\n",
1388 ajFilebuffIsBuffered(textin->Filebuff),
1389 ajFilebuffIsEof(textin->Filebuff));
1390
1391 if (!ajFilebuffIsBuffered(textin->Filebuff) &&
1392 ajFilebuffIsEof(textin->Filebuff))
1393 return FMT_EOF;
1394
1395 ajFilebuffReset(textin->Filebuff);
1396 ajDebug("Format %d (%s) failed, file buffer reset by textinReadFmt\n",
1397 format, textinFormatDef[format].Name);
1398 /* ajFilebuffTraceFull(textin->Filebuff, 10, 10);*/
1399 }
1400
1401 ajDebug("++textinReadFmt failed - nothing read\n");
1402
1403 return FMT_FAIL;
1404 }
1405
1406
1407
1408
1409 /* @funcstatic textinRead *****************************************************
1410 **
1411 ** Given data in a textin structure, tries to read everything needed
1412 ** using the specified format or by trial and error.
1413 **
1414 ** @param [u] textin [AjPTextin] text data input object
1415 ** @param [w] text [AjPText] text data object
1416 ** @return [AjBool] ajTrue on success
1417 **
1418 ** @release 6.4.0
1419 ** @@
1420 ******************************************************************************/
1421
textinRead(AjPTextin textin,AjPText text)1422 static AjBool textinRead(AjPTextin textin, AjPText text)
1423 {
1424 ajuint i;
1425 ajuint istat = 0;
1426 ajuint jstat = 0;
1427
1428 AjPFilebuff buff = textin->Filebuff;
1429 AjBool ok;
1430
1431 AjPTextAccess textaccess = textin->Query->TextAccess;
1432 AjPTextAccess textonlyaccess = textin->Query->Access;
1433
1434 ajTextClear(text);
1435 ajDebug("textinRead: cleared\n");
1436
1437 if(textin->Single && textin->Count)
1438 {
1439 /*
1440 ** One text data at a time is read.
1441 ** The first text data was read by ACD
1442 ** for the following ones we need to reset the AjPTextin
1443 **
1444 ** Single is set by the access method
1445 */
1446
1447 ajDebug("textinRead: single access - count %d - call access"
1448 " routine again\n",
1449 textin->Count);
1450 /* Calling funclist textinAccess() */
1451 if(textaccess)
1452 {
1453 if(!(*textaccess->Access)(textin))
1454 {
1455 ajDebug("textinRead: (*textaccess->Access)(textin) "
1456 "*failed*\n");
1457
1458 return ajFalse;
1459 }
1460 }
1461
1462 if(textonlyaccess)
1463 {
1464 if(!(*textonlyaccess->Access)(textin))
1465 {
1466 ajDebug("textinRead: (*textonlyaccess->Access)(textin) "
1467 "*failed*\n");
1468
1469 return ajFalse;
1470 }
1471 }
1472
1473 buff = textin->Filebuff;
1474 }
1475
1476 ajDebug("textinRead: textin format %d '%S'\n", textin->Format,
1477 textin->Formatstr);
1478
1479 textin->Count++;
1480
1481 if(!textin->Filebuff)
1482 return ajFalse;
1483
1484 ok = ajFilebuffIsBuffered(textin->Filebuff);
1485
1486 while(ok)
1487 { /* skip blank lines */
1488 ok = ajBuffreadLine(textin->Filebuff, &textinReadLine);
1489
1490 if(!ajStrIsWhite(textinReadLine))
1491 {
1492 ajFilebuffClear(textin->Filebuff,1);
1493 break;
1494 }
1495 }
1496
1497 if(!textin->Format)
1498 { /* no format specified, try all defaults */
1499 for(i = 1; textinFormatDef[i].Name; i++)
1500 {
1501 if(!textinFormatDef[i].Try) /* skip if Try is ajFalse */
1502 continue;
1503
1504 ajDebug("textinRead:try format %d (%s)\n",
1505 i, textinFormatDef[i].Name);
1506
1507 istat = textinReadFmt(textin, text, i);
1508
1509 switch(istat)
1510 {
1511 case FMT_OK:
1512 ajDebug("++textinRead OK, set format %d\n", textin->Format);
1513 textDefine(text, textin);
1514
1515 return ajTrue;
1516 case FMT_BADTYPE:
1517 ajDebug("textinRead: (a1) textinReadFmt stat == BADTYPE "
1518 "*failed*\n");
1519
1520 return ajFalse;
1521 case FMT_FAIL:
1522 ajDebug("textinRead: (b1) textinReadFmt stat == FAIL "
1523 "*failed*\n");
1524 break; /* we can try next format */
1525 case FMT_NOMATCH:
1526 ajDebug("textinRead: (c1) textinReadFmt stat==NOMATCH "
1527 "try again\n");
1528 break;
1529 case FMT_EOF:
1530 ajDebug("textinRead: (d1) textinReadFmt stat == EOF "
1531 "*failed*\n");
1532 return ajFalse; /* EOF and unbuffered */
1533 case FMT_EMPTY:
1534 ajWarn("text data '%S' has zero length, ignored",
1535 ajTextGetQryS(text));
1536 ajDebug("textinRead: (e1) textinReadFmt stat==EMPTY "
1537 "try again\n");
1538 break;
1539 default:
1540 ajDebug("unknown code %d from textinReadFmt\n", stat);
1541 }
1542
1543 ajTextClear(text);
1544
1545 if(textin->Format)
1546 break; /* we read something */
1547
1548 ajFilebuffTrace(textin->Filebuff);
1549 }
1550
1551 if(!textin->Format)
1552 { /* all default formats failed, give up */
1553 ajDebug("textinRead:all default formats failed, give up\n");
1554
1555 return ajFalse;
1556 }
1557
1558 ajDebug("++textinRead set format %d\n", textin->Format);
1559 }
1560 else
1561 { /* one format specified */
1562 ajDebug("textinRead: one format specified\n");
1563 ajFilebuffSetUnbuffered(textin->Filebuff);
1564
1565 ajDebug("++textinRead known format %d\n", textin->Format);
1566 istat = textinReadFmt(textin, text, textin->Format);
1567
1568 switch(istat)
1569 {
1570 case FMT_OK:
1571 textDefine(text, textin);
1572
1573 return ajTrue;
1574 case FMT_BADTYPE:
1575 ajDebug("textinRead: (a2) textinReadFmt stat == BADTYPE "
1576 "*failed*\n");
1577
1578 return ajFalse;
1579
1580 case FMT_FAIL:
1581 ajDebug("textinRead: (b2) textinReadFmt stat == FAIL "
1582 "*failed*\n");
1583
1584 return ajFalse;
1585
1586 case FMT_NOMATCH:
1587 ajDebug("textinRead: (c2) textinReadFmt stat == NOMATCH "
1588 "*try again*\n");
1589 break;
1590 case FMT_EOF:
1591 ajDebug("textinRead: (d2) textinReadFmt stat == EOF "
1592 "*try again*\n");
1593 if(textin->Records)
1594 ajErr("Error reading file '%F' with format '%s': "
1595 "end-of-file before end of data "
1596 "(read %u records)",
1597 ajFilebuffGetFile(textin->Filebuff),
1598 textinFormatDef[textin->Format].Name,
1599 textin->Records);
1600 break; /* simply end-of-file */
1601 case FMT_EMPTY:
1602 ajWarn("text data '%S' has zero length, ignored",
1603 ajTextGetQryS(text));
1604 ajDebug("textinRead: (e2) textinReadFmt stat == EMPTY "
1605 "*try again*\n");
1606 break;
1607 default:
1608 ajDebug("unknown code %d from textinReadFmt\n", stat);
1609 }
1610
1611 ajTextClear(text); /* 1 : read, failed to match id/acc/query */
1612 }
1613
1614 /* failed - probably entry/accession query failed. Can we try again? */
1615
1616 ajDebug("textinRead failed - try again with format %d '%s' code %d\n",
1617 textin->Format,
1618 textinFormatDef[textin->Format].Name, istat);
1619
1620 ajDebug("Search:%B Chunk:%B Data:%x ajFileBuffEmpty:%B\n",
1621 textin->Search, textin->ChunkEntries,
1622 textin->TextData, ajFilebuffIsEmpty(buff));
1623
1624 if(ajFilebuffIsEmpty(buff) && textin->ChunkEntries)
1625 {
1626 if(textaccess && !(*textaccess->Access)(textin))
1627 return ajFalse;
1628 else if(textonlyaccess && !(*textonlyaccess->Access)(textin))
1629 return ajFalse;
1630 buff = textin->Filebuff;
1631 }
1632
1633
1634 /* need to check end-of-file to avoid repeats */
1635 while(textin->Search &&
1636 (textin->TextData || !ajFilebuffIsEmpty(buff)))
1637 {
1638 jstat = textinReadFmt(textin, text, textin->Format);
1639
1640 switch(jstat)
1641 {
1642 case FMT_OK:
1643 textDefine(text, textin);
1644
1645 return ajTrue;
1646
1647 case FMT_BADTYPE:
1648 ajDebug("textinRead: (a3) textinReadFmt stat == BADTYPE "
1649 "*failed*\n");
1650
1651 return ajFalse;
1652
1653 case FMT_FAIL:
1654 ajDebug("textinRead: (b3) textinReadFmt stat == FAIL "
1655 "*failed*\n");
1656
1657 return ajFalse;
1658
1659 case FMT_NOMATCH:
1660 ajDebug("textinRead: (c3) textinReadFmt stat == NOMATCH "
1661 "*try again*\n");
1662 break;
1663 case FMT_EOF:
1664 ajDebug("textinRead: (d3) textinReadFmt stat == EOF "
1665 "*failed*\n");
1666
1667 return ajFalse; /* we already tried again */
1668
1669 case FMT_EMPTY:
1670 if(istat != FMT_EMPTY)
1671 ajWarn("text data '%S' has zero length, ignored",
1672 ajTextGetQryS(text));
1673 ajDebug("textinRead: (e3) textinReadFmt stat == EMPTY "
1674 "*try again*\n");
1675 break;
1676
1677 default:
1678 ajDebug("unknown code %d from textinReadFmt\n", stat);
1679 }
1680
1681 ajTextClear(text); /* 1 : read, failed to match id/acc/query */
1682 }
1683
1684 if(textin->Format)
1685 ajDebug("textinRead: *failed* to read text data %S using format %s\n",
1686 textin->Qry, textinFormatDef[textin->Format].Name);
1687 else
1688 ajDebug("textinRead: *failed* to read text data %S using any format\n",
1689 textin->Qry);
1690
1691 return ajFalse;
1692 }
1693
1694
1695
1696
1697 /* @funcstatic textinReadText *************************************************
1698 **
1699 ** Given data in a text structure, tries to read everything needed
1700 ** using the TEXT format.
1701 **
1702 ** @param [u] textin [AjPTextin] Text input object
1703 ** @param [w] text [AjPText] Text object
1704 ** @return [AjBool] ajTrue on success
1705 **
1706 ** @release 6.4.0
1707 ** @@
1708 ******************************************************************************/
1709
textinReadText(AjPTextin textin,AjPText text)1710 static AjBool textinReadText(AjPTextin textin, AjPText text)
1711 {
1712 AjPFilebuff buff;
1713
1714 ajuint linecnt = 0;
1715
1716 ajDebug("textinReadText\n");
1717 ajTextClear(text);
1718 buff = textin->Filebuff;
1719
1720 /* ajFilebuffTrace(buff); */
1721
1722 textin->Curpos = 0L;
1723 while (ajBuffreadLinePos(buff, &textinReadLine, &textin->Curpos))
1724 {
1725 linecnt++;
1726 ajStrTrimEndC(&textinReadLine, "\r\n");
1727
1728 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1729
1730 /* add line to AjPText object */
1731 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1732 }
1733 ajDebug("textinReadText read %u lines\n", linecnt);
1734
1735 if(!linecnt)
1736 return ajFalse;
1737
1738 return ajTrue;
1739 }
1740
1741
1742
1743
1744 /* @funcstatic textinReadXml **************************************************
1745 **
1746 ** Given data in a text structure, tries to read everything needed
1747 ** using the XML format.
1748 **
1749 ** @param [u] textin [AjPTextin] Text input object
1750 ** @param [w] text [AjPText] Text object
1751 ** @return [AjBool] ajTrue on success
1752 **
1753 ** @release 6.4.0
1754 ** @@
1755 ******************************************************************************/
1756
textinReadXml(AjPTextin textin,AjPText text)1757 static AjBool textinReadXml(AjPTextin textin, AjPText text)
1758 {
1759 AjPFilebuff buff;
1760 ajuint linecnt = 0;
1761
1762 ajDebug("textinReadXml\n");
1763 ajTextClear(text);
1764 buff = textin->Filebuff;
1765
1766 /* ajFilebuffTrace(buff); */
1767
1768 textin->Curpos = 0L;
1769 while (ajBuffreadLinePos(buff, &textinReadLine, &textin->Curpos))
1770 {
1771 linecnt++;
1772 ajStrTrimEndC(&textinReadLine, "\r\n");
1773
1774 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1775
1776 /* add line to AjPText object */
1777 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1778 }
1779 ajDebug("textinReadXml read %u lines\n", linecnt);
1780
1781 if(!linecnt)
1782 return ajFalse;
1783
1784 return ajTrue;
1785 }
1786
1787
1788
1789
1790 /* @funcstatic textinReadEmbl *************************************************
1791 **
1792 ** Given data in a text input stream, tries to read text
1793 ** using the EMBL format.
1794 **
1795 ** @param [u] textin [AjPTextin] Text input object
1796 ** @param [w] text [AjPText] Text object
1797 ** @return [AjBool] ajTrue on success
1798 **
1799 ** @release 6.4.0
1800 ** @@
1801 ******************************************************************************/
1802
textinReadEmbl(AjPTextin textin,AjPText text)1803 static AjBool textinReadEmbl(AjPTextin textin, AjPText text)
1804 {
1805 AjPFilebuff buff;
1806 ajuint linecnt = 0;
1807 AjBool ok = ajFalse;
1808
1809 ajDebug("textinReadObo\n");
1810 ajTextClear(text);
1811 buff = textin->Filebuff;
1812
1813 /* ajFilebuffTrace(buff); */
1814 ok = ajBuffreadLine(buff, &textinReadLine);
1815 while(ok && !ajStrPrefixC(textinReadLine, "ID "))
1816 ok = ajBuffreadLine(buff, &textinReadLine);
1817
1818 if(!ok)
1819 return ajFalse;
1820
1821 ajStrTrimEndC(&textinReadLine, "\r\n");
1822
1823 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1824
1825 /* add line to AjPText object */
1826 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1827
1828 ajFilebuffClear(buff, 1);
1829 ok = ajBuffreadLine(buff, &textinReadLine);
1830 if(ok)
1831 ok = ajBuffreadLine(buff, &textinReadLine);
1832
1833 while (ok)
1834 {
1835 ajStrTrimWhite(&textinReadLine);
1836
1837 if(!ajStrGetLen(textinReadLine))
1838 break;
1839
1840
1841 linecnt++;
1842 ajStrTrimEndC(&textinReadLine, "\r\n");
1843
1844 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1845
1846 /* add line to AjPText object */
1847 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1848
1849 if(ajStrMatchC(textinReadLine, "//"))
1850 break;
1851
1852 ok = ajBuffreadLine(buff, &textinReadLine);
1853 }
1854
1855 return ajTrue;
1856 }
1857
1858
1859
1860
1861 /* @funcstatic textinReadGenbank **********************************************
1862 **
1863 ** Given data in a text inpur stream, tries to read text
1864 ** using the GENBANK format.
1865 **
1866 ** @param [u] textin [AjPTextin] Text input object
1867 ** @param [w] text [AjPText] Text object
1868 ** @return [AjBool] ajTrue on success
1869 **
1870 ** @release 6.6.0
1871 ** @@
1872 ******************************************************************************/
1873
textinReadGenbank(AjPTextin textin,AjPText text)1874 static AjBool textinReadGenbank(AjPTextin textin, AjPText text)
1875 {
1876 AjPFilebuff buff;
1877 ajuint linecnt = 0;
1878 AjBool ok = ajFalse;
1879
1880 ajDebug("textinReadObo\n");
1881 ajTextClear(text);
1882 buff = textin->Filebuff;
1883
1884 /* ajFilebuffTrace(buff); */
1885 ok = ajBuffreadLine(buff, &textinReadLine);
1886 while(ok && !ajStrPrefixC(textinReadLine, "LOCUS "))
1887 ok = ajBuffreadLine(buff, &textinReadLine);
1888
1889 if(!ok)
1890 return ajFalse;
1891
1892 ajStrTrimEndC(&textinReadLine, "\r\n");
1893
1894 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1895
1896 /* add line to AjPText object */
1897 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1898
1899 ajFilebuffClear(buff, 1);
1900 ok = ajBuffreadLine(buff, &textinReadLine);
1901 if(ok)
1902 ok = ajBuffreadLine(buff, &textinReadLine);
1903
1904 while (ok)
1905 {
1906 ajStrTrimWhite(&textinReadLine);
1907
1908 if(!ajStrGetLen(textinReadLine))
1909 break;
1910
1911
1912 linecnt++;
1913 ajStrTrimEndC(&textinReadLine, "\r\n");
1914
1915 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1916
1917 /* add line to AjPText object */
1918 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1919
1920 if(ajStrMatchC(textinReadLine, "//"))
1921 break;
1922
1923 ok = ajBuffreadLine(buff, &textinReadLine);
1924 }
1925
1926 return ajTrue;
1927 }
1928
1929
1930
1931
1932 /* @funcstatic textinReadObo **************************************************
1933 **
1934 ** Given data in a text input stream, tries to read text
1935 ** using the OBO format.
1936 **
1937 ** @param [u] textin [AjPTextin] Text input object
1938 ** @param [w] text [AjPText] Text object
1939 ** @return [AjBool] ajTrue on success
1940 **
1941 ** @release 6.4.0
1942 ** @@
1943 ******************************************************************************/
1944
textinReadObo(AjPTextin textin,AjPText text)1945 static AjBool textinReadObo(AjPTextin textin, AjPText text)
1946 {
1947 AjPFilebuff buff;
1948 ajuint linecnt = 0;
1949 AjBool ok = ajFalse;
1950
1951 ajDebug("textinReadObo\n");
1952 ajTextClear(text);
1953 buff = textin->Filebuff;
1954
1955 /* ajFilebuffTrace(buff); */
1956 ok = ajBuffreadLine(buff, &textinReadLine);
1957 while(ok && !ajStrPrefixC(textinReadLine, "[Term]"))
1958 ok = ajBuffreadLine(buff, &textinReadLine);
1959
1960 if(!ok)
1961 return ajFalse;
1962
1963 ajStrTrimEndC(&textinReadLine, "\r\n");
1964
1965 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1966
1967 /* add line to AjPText object */
1968 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1969
1970 ajFilebuffClear(buff, 1);
1971 ok = ajBuffreadLine(buff, &textinReadLine);
1972 if(ok)
1973 ok = ajBuffreadLine(buff, &textinReadLine);
1974
1975 while (ok)
1976 {
1977 ajStrTrimWhite(&textinReadLine);
1978
1979 if(!ajStrGetLen(textinReadLine))
1980 break;
1981
1982 if(ajStrGetCharFirst(textinReadLine) == '[') /* new stanza */
1983 break;
1984
1985 linecnt++;
1986 ajStrTrimEndC(&textinReadLine, "\r\n");
1987
1988 ajDebug("line %u:%S\n", linecnt, textinReadLine);
1989
1990 /* add line to AjPText object */
1991 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
1992
1993 ok = ajBuffreadLine(buff, &textinReadLine);
1994 }
1995
1996 return ajTrue;
1997 }
1998
1999
2000
2001
2002 /* @funcstatic textinReadPdb * ************************************************
2003 **
2004 ** Given data in a text input stream, tries to read text
2005 ** using the PDB format.
2006 **
2007 ** @param [u] textin [AjPTextin] Text input object
2008 ** @param [w] text [AjPText] Text object
2009 ** @return [AjBool] ajTrue on success
2010 **
2011 ** @release 6.4.0
2012 ** @@
2013 ******************************************************************************/
2014
textinReadPdb(AjPTextin textin,AjPText text)2015 static AjBool textinReadPdb(AjPTextin textin, AjPText text)
2016 {
2017 AjPFilebuff buff;
2018 ajuint linecnt = 0;
2019 AjBool ok = ajFalse;
2020
2021 ajDebug("textinReadPdb\n");
2022 ajTextClear(text);
2023 buff = textin->Filebuff;
2024
2025 /* ajFilebuffTrace(buff); */
2026 ok = ajBuffreadLine(buff, &textinReadLine);
2027 while(ok && !ajStrPrefixC(textinReadLine, "HEADER "))
2028 ok = ajBuffreadLine(buff, &textinReadLine);
2029
2030 if(!ok)
2031 return ajFalse;
2032
2033 ajStrTrimEndC(&textinReadLine, "\r\n");
2034
2035 ajDebug("line %u:%S\n", linecnt, textinReadLine);
2036
2037 /* add line to AjPText object */
2038 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
2039
2040 ajFilebuffClear(buff, 1);
2041 ok = ajBuffreadLine(buff, &textinReadLine);
2042 if(ok)
2043 ok = ajBuffreadLine(buff, &textinReadLine);
2044
2045 while (ok)
2046 {
2047 ajStrTrimWhite(&textinReadLine);
2048
2049 if(!ajStrGetLen(textinReadLine))
2050 break;
2051
2052
2053 linecnt++;
2054 ajStrTrimEndC(&textinReadLine, "\r\n");
2055
2056 ajDebug("line %u:%S\n", linecnt, textinReadLine);
2057
2058 /* add line to AjPText object */
2059 ajListPushAppend(text->Lines, ajStrNewS(textinReadLine));
2060
2061 if(ajStrMatchC(textinReadLine, "END"))
2062 break;
2063
2064 ok = ajBuffreadLine(buff, &textinReadLine);
2065 }
2066
2067 return ajTrue;
2068 }
2069
2070
2071
2072
2073 /* @section File Access *******************************************************
2074 **
2075 ** These functions manage the text file access methods.
2076 **
2077 ** @fdata [AjPTextin]
2078 **
2079 ** @nam3rule Access Access method
2080 ** @nam4rule Asis Reads text using the 'filename' as the single input line
2081 ** @nam4rule File Reading an input file
2082 ** @nam4rule Ftp Reads text using the 'filename' as an FTP URL
2083 ** @nam4rule Http Reads text using the 'filename' as an HTTP URL
2084 ** @nam4rule Offset Reading an input file starting at a given offset position
2085 ** within the text input query
2086 **
2087 ** @argrule Access textin [AjPTextin] Text input object
2088 ** @valrule * [AjBool] True on success
2089 **
2090 ** @fcategory input
2091 **
2092 ******************************************************************************/
2093
2094
2095
2096
2097 /* @func ajTextinAccessAsis ***************************************************
2098 **
2099 ** Reads text using the 'filename' as the single input line
2100 **
2101 ** @param [u] textin [AjPTextin] Text input.
2102 ** @return [AjBool] ajTrue on success.
2103 **
2104 ** @release 6.4.0
2105 ** @@
2106 ******************************************************************************/
2107
ajTextinAccessAsis(AjPTextin textin)2108 AjBool ajTextinAccessAsis(AjPTextin textin)
2109 {
2110 AjPQuery qry;
2111
2112 qry = textin->Query;
2113
2114 if(!ajStrGetLen(qry->Filename))
2115 {
2116 ajErr("ASIS access: no text");
2117
2118 return ajFalse;
2119 }
2120
2121 ajDebug("ajTextinAccessAsis %S\n", qry->Filename);
2122
2123 ajFilebuffDel(&textin->Filebuff);
2124 textin->Filebuff = ajFilebuffNewLine(qry->Filename);
2125
2126 if(!textin->Filebuff)
2127 {
2128 ajDebug("Asis access: unable to use text '%S'\n", qry->Filename);
2129
2130 return ajFalse;
2131 }
2132
2133 ajStrAssignC(&textin->Filename, "asis");
2134 /*ajFilebuffTrace(textin->Filebuff);*/
2135
2136 return ajTrue;
2137 }
2138
2139
2140
2141
2142 /* @func ajTextinAccessFile ***************************************************
2143 **
2144 ** Reads data from a named file.
2145 **
2146 ** @param [u] textin [AjPTextin] Text input.
2147 ** @return [AjBool] ajTrue on success.
2148 **
2149 ** @release 6.4.0
2150 ** @@
2151 ******************************************************************************/
2152
ajTextinAccessFile(AjPTextin textin)2153 AjBool ajTextinAccessFile(AjPTextin textin)
2154 {
2155 AjPQuery qry;
2156
2157 qry = textin->Query;
2158
2159 if(!ajStrGetLen(qry->Filename))
2160 {
2161 ajErr("FILE access: no filename");
2162
2163 return ajFalse;
2164 }
2165
2166 ajDebug("ajTextinAccessFile %S\n", qry->Filename);
2167
2168 /* ajStrTraceT(qry->Filename, "qry->Filename (before):"); */
2169
2170 ajFilebuffDel(&textin->Filebuff);
2171 textin->Filebuff = ajFilebuffNewNameS(qry->Filename);
2172
2173 if(!textin->Filebuff)
2174 {
2175 ajDebug("FILE access: unable to open file '%S'\n", qry->Filename);
2176
2177 return ajFalse;
2178 }
2179
2180 /* ajStrTraceT(textin->Filename, "textin->Filename:"); */
2181 /* ajStrTraceT(qry->Filename, "qry->Filename (after):"); */
2182
2183 ajStrAssignS(&textin->Filename, qry->Filename);
2184
2185 return ajTrue;
2186 }
2187
2188
2189
2190
2191 /* @func ajTextinAccessFtp ****************************************************
2192 **
2193 ** Reads data from an FTP URL
2194 **
2195 ** @param [u] textin [AjPTextin] Text input.
2196 ** @return [AjBool] ajTrue on success.
2197 **
2198 ** @release 6.5.0
2199 ** @@
2200 ******************************************************************************/
2201
ajTextinAccessFtp(AjPTextin textin)2202 AjBool ajTextinAccessFtp(AjPTextin textin)
2203 {
2204 AjPQuery qry;
2205 AjPStr url = NULL;
2206
2207 AjPStr host = NULL;
2208 ajint iport;
2209
2210 AjPStr urlget = NULL;
2211 AjPUrlref urlref = NULL;
2212
2213 iport = 21;
2214 qry = textin->Query;
2215
2216 if(!ajStrGetLen(qry->Filename))
2217 {
2218 ajErr("FILE access: no filename");
2219
2220 return ajFalse;
2221 }
2222
2223 ajDebug("ajTextinAccessFtp %S\n", qry->Filename);
2224
2225 /* ajStrTraceT(qry->Filename, "qry->Filename (before):"); */
2226
2227 ajStrAssignS(&url, qry->Filename);
2228
2229 urlref = ajHttpUrlrefNew();
2230 ajHttpUrlrefParseS(&urlref, url);
2231 ajHttpUrlrefSplitPort(urlref);
2232 ajStrAssignS(&host,urlref->Host);
2233 if(ajStrGetLen(urlref->Port))
2234 ajStrToInt(urlref->Port, &iport);
2235 ajFmtPrintS(&urlget,"/%S",urlref->Absolute);
2236 ajHttpUrlrefDel(&urlref);
2237
2238 ajFilebuffDel(&textin->Filebuff);
2239 textin->Filebuff = ajFtpRead(NULL, host, iport, textin->Fpos, urlget);
2240
2241 ajStrDel(&host);
2242 ajStrDel(&urlget);
2243
2244 if(!textin->Filebuff)
2245 {
2246 ajDebug("FTP access: unable to open file '%S'\n", qry->Filename);
2247
2248 return ajFalse;
2249 }
2250
2251 /* ajStrTraceT(textin->Filename, "textin->Filename:"); */
2252 /* ajStrTraceT(qry->Filename, "qry->Filename (after):"); */
2253
2254 ajStrAssignS(&textin->Filename, qry->Filename);
2255
2256 ajDebug("FTP access: opened file '%S'\n", qry->Filename);
2257
2258 ajStrDel(&url);
2259
2260 return ajTrue;
2261 }
2262
2263
2264
2265
2266 /* @func ajTextinAccessHttp ***************************************************
2267 **
2268 ** Reads data from an HTTP URL. No HTML is stripped.
2269 **
2270 ** @param [u] textin [AjPTextin] Text input.
2271 ** @return [AjBool] ajTrue on success.
2272 **
2273 ** @release 6.5.0
2274 ** @@
2275 ******************************************************************************/
2276
ajTextinAccessHttp(AjPTextin textin)2277 AjBool ajTextinAccessHttp(AjPTextin textin)
2278 {
2279 AjPQuery qry;
2280 AjPStr url = NULL;
2281
2282 AjPStr host = NULL;
2283 ajint iport;
2284
2285 AjPStr urlget = NULL;
2286 AjPUrlref urlref = NULL;
2287 AjPStr version10 = NULL;
2288 AjBool ok;
2289
2290 iport = 80;
2291 qry = textin->Query;
2292
2293 if(!ajStrGetLen(qry->Filename))
2294 {
2295 ajErr("HTTP access: no filename");
2296
2297 return ajFalse;
2298 }
2299
2300 ajDebug("ajTextinAccessHttp %S\n", qry->Filename);
2301
2302 ajStrAssignS(&url, qry->Filename);
2303
2304 urlref = ajHttpUrlrefNew();
2305 ajHttpUrlrefParseS(&urlref, url);
2306 ajHttpUrlrefSplitPort(urlref);
2307 ajStrAssignS(&host,urlref->Host);
2308 if(ajStrGetLen(urlref->Port))
2309 ajStrToInt(urlref->Port, &iport);
2310 ajFmtPrintS(&urlget,"/%S",urlref->Absolute);
2311 ajHttpUrlrefDel(&urlref);
2312
2313 version10 = ajStrNewC("1.0");
2314
2315 ajFilebuffDel(&textin->Filebuff);
2316 textin->Filebuff = ajHttpReadPos(version10, url,
2317 NULL, host, iport, urlget, qry->Fpos);
2318 ajStrDel(&version10);
2319
2320 if(!textin->Filebuff)
2321 {
2322 if(iport == 80)
2323 ajErr("Cannot open HTTP connection 'http://%S%S'",
2324 host, urlget);
2325 else
2326 ajErr("Cannot open HTTP connection 'http://%S:%d%S'",
2327 host, iport, urlget);
2328 return ajFalse;
2329 }
2330
2331 /* skip past the header */
2332
2333 ok = ajBuffreadLine(textin->Filebuff, &textinReadLine);
2334
2335 switch(ajStrGetCharPos(textinReadLine, 9))
2336 {
2337 case '4':
2338 return ajFalse;
2339 default:
2340 break;
2341 }
2342
2343 while(ok && ajStrFindRestC(textinReadLine, "\r\n") >= 0)
2344 ajBuffreadLine(textin->Filebuff, &textinReadLine);
2345
2346 ajFilebuffClear(textin->Filebuff,1);
2347
2348
2349
2350 ajStrDel(&host);
2351 ajStrDel(&urlget);
2352
2353 if(!textin->Filebuff)
2354 {
2355 ajDebug("HTTP access: unable to open file '%S'\n", qry->Filename);
2356
2357 return ajFalse;
2358 }
2359
2360 /* ajStrTraceT(textin->Filename, "textin->Filename:"); */
2361 /* ajStrTraceT(qry->Filename, "qry->Filename (after):"); */
2362
2363 ajStrAssignS(&textin->Filename, url);
2364
2365 ajStrDel(&url);
2366
2367 return ajTrue;
2368 }
2369
2370
2371
2372
2373 /* @func ajTextinAccessOffset *************************************************
2374 **
2375 ** Reads a text from a named file, at a given offset within the file.
2376 **
2377 ** @param [u] textin [AjPTextin] Text input.
2378 ** @return [AjBool] ajTrue on success.
2379 **
2380 ** @release 6.4.0
2381 ** @@
2382 ******************************************************************************/
2383
ajTextinAccessOffset(AjPTextin textin)2384 AjBool ajTextinAccessOffset(AjPTextin textin)
2385 {
2386 AjPQuery qry;
2387
2388 qry = textin->Query;
2389
2390 if(!ajStrGetLen(qry->Filename))
2391 {
2392 ajErr("FILE access: no filename");
2393
2394 return ajFalse;
2395 }
2396
2397 ajDebug("ajTextinAccessOffset %S %Ld\n", qry->Filename, qry->Fpos);
2398
2399 /* ajStrTraceT(qry->Filename, "qry->Filename (before):"); */
2400
2401 ajFilebuffDel(&textin->Filebuff);
2402 textin->Filebuff = ajFilebuffNewNameS(qry->Filename);
2403
2404 if(!textin->Filebuff)
2405 {
2406 ajDebug("OFFSET access: unable to open file '%S'\n", qry->Filename);
2407
2408 return ajFalse;
2409 }
2410
2411 ajFileSeek(ajFilebuffGetFile(textin->Filebuff), qry->Fpos, 0);
2412 /* ajStrTraceT(textin->Filename, "textin->Filename:"); */
2413 /* ajStrTraceT(qry->Filename, "qry->Filename (after):"); */
2414 ajStrAssignS(&textin->Filename, qry->Filename);
2415
2416 return ajTrue;
2417 }
2418
2419
2420
2421
2422
2423 /* @datasection [none] Miscellaneous ******************************************
2424 **
2425 ** Text input internals
2426 **
2427 ** @nam2rule Textin Text input
2428 **
2429 ******************************************************************************/
2430
2431
2432
2433
2434 /* @section Printing **********************************************************
2435 **
2436 ** Printing details of the internals to a file
2437 **
2438 ** @fdata [none]
2439 **
2440 ** @nam2rule Textinprint
2441 **
2442 ** @fcategory output
2443 **
2444 ******************************************************************************/
2445
2446
2447
2448
2449 /* @section Print *************************************************************
2450 **
2451 ** Printing to a file
2452 **
2453 ** @fdata [none]
2454 **
2455 ** @nam3rule Book Print as docbook table
2456 ** @nam3rule Html Print as html table
2457 ** @nam3rule Wiki Print as wiki table
2458 ** @nam3rule Text Print as text
2459 **
2460 ** @argrule * outf [AjPFile] output file
2461 ** @argrule Text full [AjBool] Print all details
2462 **
2463 ** @valrule * [void]
2464 **
2465 ** @fcategory cast
2466 **
2467 ******************************************************************************/
2468
2469
2470
2471
2472 /* @func ajTextinprintBook ****************************************************
2473 **
2474 ** Reports the internal data structures as a Docbook table
2475 **
2476 ** @param [u] outf [AjPFile] Output file
2477 ** @return [void]
2478 **
2479 ** @release 6.4.0
2480 ** @@
2481 ******************************************************************************/
2482
ajTextinprintBook(AjPFile outf)2483 void ajTextinprintBook(AjPFile outf)
2484 {
2485 ajuint i = 0;
2486 ajuint j = 0;
2487 AjPStr namestr = NULL;
2488 AjPList fmtlist;
2489 AjPStr* names;
2490
2491 fmtlist = ajListstrNew();
2492
2493 ajFmtPrintF(outf, "<para>The supported text formats are summarised "
2494 "in the table below. "
2495 "The columns are as follows: "
2496 "<emphasis>Input format</emphasis> (format name), "
2497 "<emphasis>Try</emphasis> (indicates whether the "
2498 "format can be detected automatically on input), and "
2499 "<emphasis>Description</emphasis> (short description of "
2500 "the format).</para>\n\n");
2501
2502 ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
2503 ajFmtPrintF(outf, " <caption>Input text formats</caption>\n");
2504 ajFmtPrintF(outf, " <thead>\n");
2505 ajFmtPrintF(outf, " <tr align=\"center\">\n");
2506 ajFmtPrintF(outf, " <th>Input Format</th>\n");
2507 ajFmtPrintF(outf, " <th>Try</th>\n");
2508 ajFmtPrintF(outf, " <th>Description</th>\n");
2509 ajFmtPrintF(outf, " </tr>\n");
2510 ajFmtPrintF(outf, " </thead>\n");
2511 ajFmtPrintF(outf, " <tbody>\n");
2512
2513 for(i=1; textinFormatDef[i].Name; i++)
2514 {
2515 if(!textinFormatDef[i].Alias)
2516 {
2517 namestr = ajStrNewC(textinFormatDef[i].Name);
2518 ajListPushAppend(fmtlist, namestr);
2519 namestr = NULL;
2520 }
2521 }
2522
2523 ajListSort(fmtlist, &ajStrVcmp);
2524 ajListstrToarray(fmtlist, &names);
2525
2526 for(i=0; names[i]; i++)
2527 {
2528 for(j=0; textinFormatDef[j].Name; j++)
2529 {
2530 if(ajStrMatchC(names[i],textinFormatDef[j].Name))
2531 {
2532 ajFmtPrintF(outf, " <tr>\n");
2533 ajFmtPrintF(outf, " <td>%s</td>\n",
2534 textinFormatDef[j].Name);
2535 ajFmtPrintF(outf, " <td>%B</td>\n",
2536 textinFormatDef[j].Try);
2537 ajFmtPrintF(outf, " <td>%s</td>\n",
2538 textinFormatDef[j].Desc);
2539 ajFmtPrintF(outf, " </tr>\n");
2540 }
2541 }
2542 }
2543
2544
2545 ajFmtPrintF(outf, " </tbody>\n");
2546 ajFmtPrintF(outf, "</table>\n");
2547 ajStrDel(&namestr);
2548
2549 names = NULL;
2550 ajListstrFreeData(&fmtlist);
2551
2552 return;
2553 }
2554
2555
2556
2557
2558 /* @func ajTextinprintHtml ****************************************************
2559 **
2560 ** Reports the internal data structures as an HTML table
2561 **
2562 ** @param [u] outf [AjPFile] Output file
2563 ** @return [void]
2564 **
2565 ** @release 6.4.0
2566 ** @@
2567 ******************************************************************************/
2568
ajTextinprintHtml(AjPFile outf)2569 void ajTextinprintHtml(AjPFile outf)
2570 {
2571 ajuint i = 0;
2572 ajuint j = 0;
2573
2574 AjPStr namestr = NULL;
2575
2576 ajFmtPrintF(outf, "<table border=3>");
2577 ajFmtPrintF(outf, "<tr><th>Input Format</th><th>Auto</th>\n");
2578 ajFmtPrintF(outf, "<th>Multi</th><th>Description</th></tr>\n");
2579
2580 for(i=1; textinFormatDef[i].Name; i++)
2581 {
2582 ajStrAssignC(&namestr, textinFormatDef[i].Name);
2583
2584 if(!textinFormatDef[i].Alias)
2585 {
2586 for(j=i+1; textinFormatDef[j].Name; j++)
2587 {
2588 if(textinFormatDef[j].Read == textinFormatDef[i].Read)
2589 {
2590 ajFmtPrintAppS(&namestr, " %s", textinFormatDef[j].Name);
2591 if(!textinFormatDef[j].Alias)
2592 {
2593 ajWarn("Input format '%s' same as '%s' but not alias",
2594 textinFormatDef[j].Name,
2595 textinFormatDef[i].Name);
2596 }
2597 }
2598 }
2599
2600 ajFmtPrintF(outf, "<tr><td>\n%S\n</td><td>%B</td>\n",
2601 namestr,
2602 textinFormatDef[i].Try);
2603 ajFmtPrintF(outf, "<td>\n%s\n</td></tr>\n",
2604 textinFormatDef[i].Desc);
2605 }
2606
2607 }
2608
2609 ajFmtPrintF(outf, "</table>\n");
2610 ajStrDel(&namestr);
2611
2612 return;
2613 }
2614
2615
2616
2617
2618 /* @func ajTextinprintText ****************************************************
2619 **
2620 ** Reports the internal data structures
2621 **
2622 ** @param [u] outf [AjPFile] Output file
2623 ** @param [r] full [AjBool] Full report (usually ajFalse)
2624 ** @return [void]
2625 **
2626 ** @release 6.4.0
2627 ** @@
2628 ******************************************************************************/
2629
ajTextinprintText(AjPFile outf,AjBool full)2630 void ajTextinprintText(AjPFile outf, AjBool full)
2631 {
2632 ajuint i = 0;
2633
2634 ajFmtPrintF(outf, "\n");
2635 ajFmtPrintF(outf, "# Text input formats\n");
2636 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
2637 ajFmtPrintF(outf, "# Alias Alias name\n");
2638 ajFmtPrintF(outf, "# Try Test for unknown input files\n");
2639 ajFmtPrintF(outf, "# Name Alias Try "
2640 "Description");
2641 ajFmtPrintF(outf, "\n");
2642 ajFmtPrintF(outf, "InFormat {\n");
2643
2644 for(i=0; textinFormatDef[i].Name; i++)
2645 if(full || !textinFormatDef[i].Alias)
2646 ajFmtPrintF(outf,
2647 " %-12s %5B %3B \"%s\"\n",
2648 textinFormatDef[i].Name,
2649 textinFormatDef[i].Alias,
2650 textinFormatDef[i].Try,
2651 textinFormatDef[i].Desc);
2652
2653 ajFmtPrintF(outf, "}\n\n");
2654
2655 return;
2656 }
2657
2658
2659
2660
2661 /* @func ajTextinprintWiki ****************************************************
2662 **
2663 ** Reports the internal data structures as a wiki table
2664 **
2665 ** @param [u] outf [AjPFile] Output file
2666 ** @return [void]
2667 **
2668 ** @release 6.4.0
2669 ** @@
2670 ******************************************************************************/
2671
ajTextinprintWiki(AjPFile outf)2672 void ajTextinprintWiki(AjPFile outf)
2673 {
2674 ajuint i = 0;
2675 ajuint j = 0;
2676
2677 AjPStr namestr = NULL;
2678
2679 ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
2680 ajFmtPrintF(outf, "|-\n");
2681 ajFmtPrintF(outf, "!Format!!Try!!"
2682 "class=\"unsortable\"|Description\n");
2683
2684 for(i=1; textinFormatDef[i].Name; i++)
2685 {
2686 ajStrAssignC(&namestr, textinFormatDef[i].Name);
2687
2688 if(!textinFormatDef[i].Alias)
2689 {
2690 for(j=i+1; textinFormatDef[j].Name; j++)
2691 {
2692 if(textinFormatDef[j].Read == textinFormatDef[i].Read)
2693 {
2694 ajFmtPrintAppS(&namestr, "<br>%s",
2695 textinFormatDef[j].Name);
2696 if(!textinFormatDef[j].Alias)
2697 {
2698 ajWarn("Input format '%s' same as '%s' but not alias",
2699 textinFormatDef[j].Name,
2700 textinFormatDef[i].Name);
2701 }
2702 }
2703 }
2704
2705 ajFmtPrintF(outf, "|-\n");
2706 ajFmtPrintF(outf,
2707 "|%S||%B||%s\n",
2708 namestr,
2709 textinFormatDef[i].Try,
2710 textinFormatDef[i].Desc);
2711 }
2712
2713 }
2714
2715 ajFmtPrintF(outf, "|}\n\n");
2716 ajStrDel(&namestr);
2717
2718 return;
2719 }
2720
2721
2722
2723
2724 /* @datasection [none] Miscellaneous ******************************************
2725 **
2726 ** Text internals
2727 **
2728 ** @nam2rule Textin Text input
2729 **
2730 ******************************************************************************/
2731
2732
2733
2734
2735 /* @section Miscellaneous *****************************************************
2736 **
2737 ** Functions to initialise and clean up internals
2738 **
2739 ** @fdata [none]
2740 **
2741 ** @nam3rule Exit Clean up and exit
2742 **
2743 ** @valrule * [void]
2744 **
2745 ** @fcategory misc
2746 **
2747 ******************************************************************************/
2748
2749
2750
2751
2752 /* @func ajTextinExit *********************************************************
2753 **
2754 ** Cleans up text data input internal memory
2755 **
2756 ** @return [void]
2757 **
2758 ** @release 6.4.0
2759 ** @@
2760 ******************************************************************************/
2761
ajTextinExit(void)2762 void ajTextinExit(void)
2763 {
2764 /* Query processing regular expressions */
2765
2766 ajStrDel(&textinReadLine);
2767
2768 ajTableDel(&textDbMethods);
2769
2770 return;
2771 }
2772
2773
2774
2775
2776 /* @section Internals *********************************************************
2777 **
2778 ** Functions to return internal values
2779 **
2780 ** @fdata [none]
2781 **
2782 ** @nam3rule Type Internals for text datatype
2783 ** @nam4rule Get Return a value
2784 ** @nam5rule Fields Known query fields for ajTextinRead
2785 ** @nam5rule Qlinks Known query link operators for ajTextinRead
2786 **
2787 ** @valrule * [const char*] Internal value
2788 **
2789 ** @fcategory misc
2790 **
2791 ******************************************************************************/
2792
2793
2794
2795
2796 /* @func ajTextinTypeGetFields ************************************************
2797 **
2798 ** Returns the listof known field names for ajTextinRead
2799 **
2800 ** @return [const char*] List of field names
2801 **
2802 ** @release 6.4.0
2803 ** @@
2804 ******************************************************************************/
2805
ajTextinTypeGetFields(void)2806 const char* ajTextinTypeGetFields(void)
2807 {
2808 return "id";
2809 }
2810
2811
2812
2813
2814 /* @func ajTextinTypeGetQlinks ************************************************
2815 **
2816 ** Returns the listof known query link operators for ajTextinRead
2817 **
2818 ** @return [const char*] List of field names
2819 **
2820 ** @release 6.4.0
2821 ** @@
2822 ******************************************************************************/
2823
ajTextinTypeGetQlinks(void)2824 const char* ajTextinTypeGetQlinks(void)
2825 {
2826 return "|";
2827 }
2828
2829
2830
2831
2832 /* @datasection [AjPTable] Internal call register table ***********************
2833 **
2834 ** Functions to manage the internal call register table that links the
2835 ** ajaxdb library functions with code in the core AJAX library.
2836 **
2837 ** @nam2rule Textaccess Functions to manage textdb call tables.
2838 **
2839 ******************************************************************************/
2840
2841
2842
2843
2844 /* @section Cast **************************************************************
2845 **
2846 ** Return a reference to the call table
2847 **
2848 ** @fdata [AjPTable] textdb functions call table
2849 **
2850 ** @nam3rule Get Return a value
2851 ** @nam4rule Db Database access functions table
2852 ** @nam3rule Method Lookup an access method by name
2853 ** @nam4rule Test Return true if the access method exists
2854 ** @nam4rule MethodGet Return a method value
2855 ** @nam5rule Qlinks Return query link operators
2856 ** @nam5rule Scope Return scope (entry, query or all) for a named method
2857 **
2858 ** @argrule Method method [const AjPStr] Method name
2859 **
2860 ** @valrule *Db [AjPTable] Call table of function names and references
2861 ** @valrule *Qlinks [const char*] Query link operators
2862 ** @valrule *Scope [ajuint] Scope flags
2863 ** @valrule *Test [AjBool] True if found
2864 **
2865 ** @fcategory cast
2866 **
2867 ******************************************************************************/
2868
2869
2870
2871
2872 /* @func ajTextaccessGetDb ****************************************************
2873 **
2874 ** Returns the table in which text database access details are registered
2875 **
2876 ** @return [AjPTable] Access functions hash table
2877 **
2878 ** @release 6.4.0
2879 ** @@
2880 ******************************************************************************/
2881
ajTextaccessGetDb(void)2882 AjPTable ajTextaccessGetDb(void)
2883 {
2884 if(!textDbMethods)
2885 textDbMethods = ajCallTableNew();
2886 return textDbMethods;
2887
2888 }
2889
2890
2891
2892
2893 /* @func ajTextaccessMethodGetQlinks ******************************************
2894 **
2895 ** Tests for a named method for text file reading and returns the
2896 ** known query link operators
2897 **
2898 ** @param [r] method [const AjPStr] Method required.
2899 ** @return [const char*] Known link operators
2900 **
2901 ** @release 6.4.0
2902 ** @@
2903 ******************************************************************************/
2904
ajTextaccessMethodGetQlinks(const AjPStr method)2905 const char* ajTextaccessMethodGetQlinks(const AjPStr method)
2906 {
2907 AjPTextAccess methoddata;
2908
2909 methoddata = ajCallTableGetS(textDbMethods, method);
2910 if(!methoddata)
2911 return NULL;
2912
2913 return methoddata->Qlink;
2914 }
2915
2916
2917
2918
2919 /* @func ajTextaccessMethodGetScope *******************************************
2920 **
2921 ** Tests for a named method for text file reading and returns the scope
2922 ** (entry, query or all).
2923 *
2924 ** @param [r] method [const AjPStr] Method required.
2925 ** @return [ajuint] Scope flags
2926 **
2927 ** @release 6.4.0
2928 ** @@
2929 ******************************************************************************/
2930
ajTextaccessMethodGetScope(const AjPStr method)2931 ajuint ajTextaccessMethodGetScope(const AjPStr method)
2932 {
2933 AjPTextAccess methoddata;
2934 ajuint ret = 0;
2935
2936 methoddata = ajCallTableGetS(textDbMethods, method);
2937 if(!methoddata)
2938 return 0;
2939
2940 if(methoddata->Entry)
2941 ret |= AJMETHOD_ENTRY;
2942 if(methoddata->Query)
2943 ret |= AJMETHOD_QUERY;
2944 if(methoddata->All)
2945 ret |= AJMETHOD_ALL;
2946
2947 return ret;
2948 }
2949
2950
2951
2952
2953 /* @func ajTextaccessMethodTest ***********************************************
2954 ** Tests for a named method for text reading.
2955 **
2956 ** @param [r] method [const AjPStr] Method required.
2957 ** @return [AjBool] ajTrue on success.
2958 **
2959 ** @release 6.4.0
2960 ** @@
2961 ******************************************************************************/
2962
ajTextaccessMethodTest(const AjPStr method)2963 AjBool ajTextaccessMethodTest(const AjPStr method)
2964 {
2965 if(ajCallTableGetS(textDbMethods, method))
2966 return ajTrue;
2967
2968 return ajFalse;
2969 }
2970
2971
2972
2973
2974 /* @funcstatic textinQryRestore ***********************************************
2975 **
2976 ** Restores an text input specification from an AjPQueryList node
2977 **
2978 ** @param [w] textin [AjPTextin] Text input object
2979 ** @param [r] node [const AjPQueryList] Query list node
2980 ** @return [void]
2981 **
2982 ** @release 6.4.0
2983 ******************************************************************************/
2984
textinQryRestore(AjPTextin textin,const AjPQueryList node)2985 static void textinQryRestore(AjPTextin textin, const AjPQueryList node)
2986 {
2987 textin->Format = node->Format;
2988 textin->Fpos = node->Fpos;
2989 textin->Curpos = node->Fpos;
2990 ajStrAssignS(&textin->Formatstr, node->Formatstr);
2991 ajStrAssignS(&textin->QryFields, node->QryFields);
2992
2993 return;
2994 }
2995
2996
2997
2998
2999 /* @funcstatic textinQrySave **************************************************
3000 **
3001 ** Saves an text input specification in an AjPQueryList node
3002 **
3003 ** @param [w] node [AjPQueryList] Query list node
3004 ** @param [r] textin [const AjPTextin] Text input object
3005 ** @return [void]
3006 **
3007 ** @release 6.4.0
3008 ******************************************************************************/
3009
textinQrySave(AjPQueryList node,const AjPTextin textin)3010 static void textinQrySave(AjPQueryList node, const AjPTextin textin)
3011 {
3012 node->Format = textin->Format;
3013 node->Fpos = textin->Fpos;
3014 ajStrAssignS(&node->Formatstr, textin->Formatstr);
3015 ajStrAssignS(&node->QryFields, textin->QryFields);
3016
3017 return;
3018 }
3019
3020
3021
3022
3023 /* @funcstatic textinQryProcess ***********************************************
3024 **
3025 ** Converts an text query into an open file.
3026 **
3027 ** Tests for "format::" and sets this if it is found
3028 **
3029 ** Then tests for "list:" or "@" and processes as a list file
3030 ** using textinListProcess which in turn invokes textinQryProcess
3031 ** until a valid query is found.
3032 **
3033 ** Then tests for dbname:query and opens the file (at the correct position
3034 ** if the database definition defines it)
3035 **
3036 ** If there is no database, looks for file:query and opens the file.
3037 ** In this case the file position is not known and text data reading
3038 ** will have to scan for the entry/entries we need.
3039 **
3040 ** @param [u] textin [AjPTextin] text data input structure.
3041 ** @param [u] text [AjPText] text data to be read. The format will be replaced
3042 ** if defined in the query string.
3043 ** @return [AjBool] ajTrue on success.
3044 **
3045 ** @release 6.4.0
3046 ** @@
3047 ******************************************************************************/
3048
textinQryProcess(AjPTextin textin,AjPText text)3049 static AjBool textinQryProcess(AjPTextin textin, AjPText text)
3050 {
3051 AjBool ret = ajTrue;
3052 AjPStr qrystr = NULL;
3053 AjBool nontextmethod = ajFalse;
3054 const AjPStr fmtstr = NULL;
3055 AjPQuery qry;
3056
3057 qry = textin->Query;
3058
3059 /* pick up the original query string */
3060 qrystr = ajStrNewS(textin->Qry);
3061
3062 ajDebug("++textinQryProcess '%S' \n", qrystr);
3063
3064 /* look for a format:: prefix */
3065 fmtstr = ajQuerystrParseFormat(&qrystr, textin, textinformatFind);
3066 ajDebug("textinQryProcess ... fmtstr '%S' '%S'\n", fmtstr, qrystr);
3067
3068 /* (seq/feat only) DO NOT look for a [range] suffix */
3069 /* look for a list:: or @:: listfile of queries - process and return */
3070 if(ajQuerystrParseListfile(&qrystr))
3071 {
3072 ajDebug("textinQryProcess ... listfile '%S'\n", qrystr);
3073 ret = textinListProcess(textin, text, qrystr);
3074 ajStrDel(&qrystr);
3075 return ret;
3076 }
3077
3078 /* try general text access methods (file, asis, text database access */
3079 ajDebug("textinQryProcess ... no listfile '%S'\n", qrystr);
3080 if(!ajQuerystrParseRead(&qrystr, textin, textinformatFind, &nontextmethod))
3081 {
3082 ajStrDel(&qrystr);
3083 return ajFalse;
3084 }
3085
3086 textinFormatSet(textin, text);
3087
3088 ajDebug("textinQryProcess ... read nontext: %B '%S'\n",
3089 nontextmethod, qrystr);
3090 ajStrDel(&qrystr);
3091
3092 if(nontextmethod)
3093 {
3094 ajDebug("textinQryProcess ... call method '%S'\n", qry->Method);
3095 ajDebug("textinQryProcess ... textin format %d '%S'\n",
3096 textin->Format, textin->Formatstr);
3097 ajDebug("textinQryProcess ... query format '%S'\n",
3098 qry->Formatstr);
3099 /*
3100 ** skip this for text .... we already tried text access methods!
3101 qry->Access = ajCallTableGetS(xxxDbMethods,qry->Method);
3102 xxxaccess = qry->Access;
3103 return (*xxxaccess->Access)(oboin);
3104 */ }
3105
3106 ajDebug("seqinUsaProcess text method '%S' success\n", qry->Method);
3107
3108 return ajTrue;
3109 }
3110
3111
3112
3113
3114 /* @datasection [AjPList] Query field list ************************************
3115 **
3116 ** Query fields lists are handled internally. Only static functions
3117 ** should appear here
3118 **
3119 ******************************************************************************/
3120
3121
3122
3123
3124 /* @funcstatic textinListProcess **********************************************
3125 **
3126 ** Processes a file of queries.
3127 ** This function is called by, and calls, textinQryProcess. There is
3128 ** a depth check to avoid infinite loops, for example where a list file
3129 ** refers to itself.
3130 **
3131 ** This function produces a list (AjPList) of queries with all list references
3132 ** expanded into lists of queries.
3133 **
3134 ** Because queries in a list can have their own format
3135 ** the prior settings are stored with each query in the list node so that they
3136 ** can be restored after.
3137 **
3138 ** @param [u] textin [AjPTextin] text data input
3139 ** @param [u] text [AjPText] text data
3140 ** @param [r] listfile [const AjPStr] Name of list file.,
3141 ** @return [AjBool] ajTrue on success.
3142 **
3143 ** @release 6.4.0
3144 ** @@
3145 ******************************************************************************/
3146
textinListProcess(AjPTextin textin,AjPText text,const AjPStr listfile)3147 static AjBool textinListProcess(AjPTextin textin, AjPText text,
3148 const AjPStr listfile)
3149 {
3150 AjPList list = NULL;
3151 AjPFile file = NULL;
3152 AjPStr token = NULL;
3153 AjPStr rest = NULL;
3154 AjBool ret = ajFalse;
3155 AjPQueryList node = NULL;
3156
3157 ajuint recnum = 0;
3158 static ajint depth = 0;
3159 static ajint MAXDEPTH = 16;
3160
3161 depth++;
3162 ajDebug("++textListProcess %S depth %d\n",
3163 listfile, depth);
3164
3165 if(depth > MAXDEPTH)
3166 ajFatal("Query list too deep");
3167
3168 if(!textin->List)
3169 textin->List = ajListNew();
3170
3171 list = ajListNew();
3172
3173 file = ajFileNewInNameS(listfile);
3174
3175 if(!file)
3176 {
3177 ajErr("Failed to open list file '%S'", listfile);
3178 depth--;
3179
3180 return ret;
3181 }
3182
3183 while(ajReadlineTrim(file, &textinReadLine))
3184 {
3185 ++recnum;
3186 textinListNoComment(&textinReadLine);
3187
3188 if(ajStrExtractWord(textinReadLine, &rest, &token))
3189 {
3190 if(ajStrGetLen(rest))
3191 {
3192 ajErr("Bad record %u in list file '%S'\n'%S'",
3193 recnum, listfile, textinReadLine);
3194 }
3195 else if(ajStrGetLen(token))
3196 {
3197 ajDebug("++Add to list: '%S'\n", token);
3198 AJNEW0(node);
3199 ajStrAssignS(&node->Qry, token);
3200 textinQrySave(node, textin);
3201 ajListPushAppend(list, node);
3202 }
3203 }
3204 }
3205
3206 ajFileClose(&file);
3207 ajStrDel(&token);
3208 ajStrDel(&rest);
3209
3210 ajDebug("Trace textin->List\n");
3211 ajQuerylistTrace(textin->List);
3212 ajDebug("Trace new list\n");
3213 ajQuerylistTrace(list);
3214 ajListPushlist(textin->List, &list);
3215
3216 ajDebug("Trace combined textin->List\n");
3217 ajQuerylistTrace(textin->List);
3218
3219 /*
3220 ** now try the first item on the list
3221 ** this can descend recursively if it is also a list
3222 ** which is why we check the depth above
3223 */
3224
3225 if(ajListPop(textin->List, (void**) &node))
3226 {
3227 ajDebug("++pop first item '%S'\n", node->Qry);
3228 ajTextinQryS(textin, node->Qry);
3229 textinQryRestore(textin, node);
3230 ajStrDel(&node->Qry);
3231 ajStrDel(&node->Formatstr);
3232 AJFREE(node);
3233 ajDebug("descending with query '%S'\n", textin->Qry);
3234 ret = textinQryProcess(textin, text);
3235 }
3236
3237 depth--;
3238 ajDebug("++textListProcess depth: %d returns: %B\n", depth, ret);
3239
3240 return ret;
3241 }
3242
3243
3244
3245
3246 /* @funcstatic textinListNoComment ********************************************
3247 **
3248 ** Strips comments from a character string (a line from an ACD file).
3249 ** Comments are blank lines or any text following a "#" character.
3250 **
3251 ** @param [u] text [AjPStr*] Line of text from input file.
3252 ** @return [void]
3253 **
3254 ** @release 6.4.0
3255 ** @@
3256 ******************************************************************************/
3257
textinListNoComment(AjPStr * text)3258 static void textinListNoComment(AjPStr* text)
3259 {
3260 ajuint i;
3261 char *cp;
3262
3263 i = ajStrGetLen(*text);
3264
3265 if(!i) /* empty string */
3266 return;
3267
3268 MAJSTRGETUNIQUESTR(text);
3269
3270 cp = strchr(ajStrGetPtr(*text), '#');
3271
3272 if(cp)
3273 { /* comment found */
3274 *cp = '\0';
3275 ajStrSetValid(text);
3276 }
3277
3278 return;
3279 }
3280
3281
3282
3283
3284 /* @funcstatic textinFormatSet ************************************************
3285 **
3286 ** Sets the input format for text data using the text data input object's
3287 ** defined format
3288 **
3289 ** @param [u] textin [AjPTextin] text term input.
3290 ** @param [u] text [AjPText] text term.
3291 ** @return [AjBool] ajTrue on success.
3292 **
3293 ** @release 6.4.0
3294 ** @@
3295 ******************************************************************************/
3296
textinFormatSet(AjPTextin textin,AjPText text)3297 static AjBool textinFormatSet(AjPTextin textin, AjPText text)
3298 {
3299
3300 if(ajStrGetLen(textin->Formatstr))
3301 {
3302 ajDebug("... input format value '%S'\n", textin->Formatstr);
3303
3304 if(textinformatFind(textin->Formatstr, &textin->Format))
3305 {
3306 ajStrAssignS(&text->Formatstr, textin->Formatstr);
3307 text->Format = textin->Format;
3308 ajDebug("...format OK '%S' = %d\n", textin->Formatstr,
3309 textin->Format);
3310 }
3311 else
3312 ajDebug("...format unknown '%S'\n", textin->Formatstr);
3313
3314 return ajTrue;
3315 }
3316 else
3317 ajDebug("...input format not set\n");
3318
3319
3320 return ajFalse;
3321 }
3322
3323
3324
3325
3326 /* @datasection [AjPTextall] Text Input Stream ********************************
3327 **
3328 ** Function is for manipulating text block input stream objects
3329 **
3330 ** @nam2rule Textall Text input stream objects
3331 **
3332 ******************************************************************************/
3333
3334
3335
3336
3337 /* @section Text Input Constructors *******************************************
3338 **
3339 ** All constructors return a new text input stream object by pointer. It
3340 ** is the responsibility of the user to first destroy any previous
3341 ** text input object. The target pointer does not need to be
3342 ** initialised to NULL, but it is good programming practice to do so
3343 ** anyway.
3344 **
3345 ** @fdata [AjPTextall]
3346 **
3347 ** @nam3rule New Constructor
3348 **
3349 ** @valrule * [AjPTextall] Text input stream object
3350 **
3351 ** @fcategory new
3352 **
3353 ******************************************************************************/
3354
3355
3356
3357
3358 /* @func ajTextallNew *********************************************************
3359 **
3360 ** Creates a new text input stream object.
3361 **
3362 ** @return [AjPTextall] New text input stream object.
3363 **
3364 ** @release 6.4.0
3365 ** @@
3366 ******************************************************************************/
3367
ajTextallNew(void)3368 AjPTextall ajTextallNew(void)
3369 {
3370 AjPTextall pthis;
3371
3372 AJNEW0(pthis);
3373
3374 pthis->Textin = ajTextinNew();
3375 pthis->Text = ajTextNew();
3376
3377 return pthis;
3378 }
3379
3380
3381
3382
3383
3384 /* ==================================================================== */
3385 /* ========================== destructors ============================= */
3386 /* ==================================================================== */
3387
3388
3389
3390
3391 /* @section Text Input Stream Destructors *************************************
3392 **
3393 ** Destruction destroys all internal data structures and frees the
3394 ** memory allocated for the text input stream object.
3395 **
3396 ** @fdata [AjPTextall]
3397 **
3398 ** @nam3rule Del Destructor
3399 **
3400 ** @argrule Del pthis [AjPTextall*] Text input stream
3401 **
3402 ** @valrule * [void]
3403 **
3404 ** @fcategory delete
3405 **
3406 ******************************************************************************/
3407
3408
3409
3410
3411 /* @func ajTextallDel *********************************************************
3412 **
3413 ** Deletes a text input stream object.
3414 **
3415 ** @param [d] pthis [AjPTextall*] Text input stream
3416 ** @return [void]
3417 **
3418 ** @release 6.4.0
3419 ** @@
3420 ******************************************************************************/
3421
ajTextallDel(AjPTextall * pthis)3422 void ajTextallDel(AjPTextall* pthis)
3423 {
3424 AjPTextall thys;
3425
3426 if(!pthis)
3427 return;
3428
3429 thys = *pthis;
3430
3431 if(!thys)
3432 return;
3433
3434 ajTextinDel(&thys->Textin);
3435 if(!thys->Returned)
3436 ajTextDel(&thys->Text);
3437
3438 AJFREE(*pthis);
3439
3440 return;
3441 }
3442
3443
3444
3445
3446 /* ==================================================================== */
3447 /* =========================== Modifiers ============================== */
3448 /* ==================================================================== */
3449
3450
3451
3452
3453 /* @section Text input stream modifiers ***************************************
3454 **
3455 ** These functions use the contents of a text input stream object and
3456 ** update them.
3457 **
3458 ** @fdata [AjPTextall]
3459 **
3460 ** @nam3rule Clear Clear all values
3461 **
3462 ** @argrule * thys [AjPTextall] Text input stream object
3463 **
3464 ** @valrule * [void]
3465 **
3466 ** @fcategory modify
3467 **
3468 ******************************************************************************/
3469
3470
3471
3472
3473 /* @func ajTextallClear *******************************************************
3474 **
3475 ** Clears a text input stream object back to "as new" condition, except
3476 ** for the query list which must be preserved.
3477 **
3478 ** @param [w] thys [AjPTextall] Text input stream
3479 ** @return [void]
3480 **
3481 ** @release 6.4.0
3482 ** @@
3483 ******************************************************************************/
3484
ajTextallClear(AjPTextall thys)3485 void ajTextallClear(AjPTextall thys)
3486 {
3487
3488 ajDebug("ajTextallClear called\n");
3489
3490 if(!thys)
3491 return;
3492
3493 ajTextinClear(thys->Textin);
3494
3495 ajTextClear(thys->Text);
3496
3497 thys->Returned = ajFalse;
3498
3499 return;
3500 }
3501
3502
3503
3504
3505 /* @section Text input ********************************************************
3506 **
3507 ** These functions use a text input stream object to read text
3508 **
3509 ** @fdata [AjPTextall]
3510 **
3511 ** @nam3rule Next Read next text block
3512 **
3513 ** @argrule * thys [AjPTextall] Text input stream object
3514 ** @argrule * Ptext [AjPText*] Text object
3515 **
3516 ** @valrule * [AjBool] True on success
3517 **
3518 ** @fcategory input
3519 **
3520 ******************************************************************************/
3521
3522
3523
3524
3525 /* @func ajTextallNext ********************************************************
3526 **
3527 ** Parse a text query into format, access, file and entry
3528 **
3529 ** Split at delimiters. Check for the first part as a valid format
3530 ** Check for the remaining first part as a database name or as a file
3531 ** that can be opened.
3532 ** Anything left is an entryname spec.
3533 **
3534 ** Return the results in the AjPText object but leave the file open for
3535 ** future calls.
3536 **
3537 ** @param [w] thys [AjPTextall] Text input stream
3538 ** @param [u] Ptext [AjPText*] Text block returned
3539 ** @return [AjBool] ajTrue on success.
3540 **
3541 ** @release 6.4.0
3542 ** @@
3543 ******************************************************************************/
3544
ajTextallNext(AjPTextall thys,AjPText * Ptext)3545 AjBool ajTextallNext(AjPTextall thys, AjPText *Ptext)
3546 {
3547 if(!thys->Count)
3548 {
3549 thys->Count = 1;
3550
3551 thys->Totterms++;
3552
3553 *Ptext = thys->Text;
3554 thys->Returned = ajTrue;
3555
3556 return ajTrue;
3557 }
3558
3559
3560 if(ajTextinRead(thys->Textin, thys->Text))
3561 {
3562 thys->Count++;
3563
3564 thys->Totterms++;
3565
3566 *Ptext = thys->Text;
3567 thys->Returned = ajTrue;
3568
3569 ajDebug("ajTextallNext success\n");
3570
3571 return ajTrue;
3572 }
3573
3574 *Ptext = NULL;
3575
3576 ajDebug("ajTextallNext failed\n");
3577
3578 ajTextallClear(thys);
3579
3580 return ajFalse;
3581 }
3582
3583
3584
3585
3586 /* @datasection [none] Input formats ******************************************
3587 **
3588 ** Input formats internals
3589 **
3590 ** @nam2rule Textinformat Text data input format specific
3591 **
3592 ******************************************************************************/
3593
3594
3595
3596
3597 /* @section cast **************************************************************
3598 **
3599 ** Values for input formats
3600 **
3601 ** @fdata [none]
3602 **
3603 ** @nam3rule Find Return index to named format
3604 ** @nam3rule Term Test format EDAM term
3605 ** @nam3rule Test Test format value
3606 **
3607 ** @argrule Find format [const AjPStr] Format name
3608 ** @argrule Term term [const AjPStr] Format EDAM term
3609 ** @argrule Test format [const AjPStr] Format name
3610 ** @argrule Find iformat [ajuint*] Index matching format name
3611 **
3612 ** @valrule * [AjBool] True if found
3613 **
3614 ** @fcategory cast
3615 **
3616 ******************************************************************************/
3617
3618
3619
3620
3621 /* @funcstatic textinformatFind ***********************************************
3622 **
3623 ** Looks for the specified format(s) in the internal definitions and
3624 ** returns the index.
3625 **
3626 ** Sets iformat as the recognised format, and returns ajTrue.
3627 **
3628 ** @param [r] format [const AjPStr] Format required.
3629 ** @param [w] iformat [ajint*] Index
3630 ** @return [AjBool] ajTrue on success.
3631 **
3632 ** @release 6.4.0
3633 ** @@
3634 ******************************************************************************/
3635
textinformatFind(const AjPStr format,ajint * iformat)3636 static AjBool textinformatFind(const AjPStr format, ajint* iformat)
3637 {
3638 AjPStr tmpformat = NULL;
3639 ajuint i = 0;
3640
3641 ajDebug("textinformatFind '%S'\n", format);
3642 if(!ajStrGetLen(format))
3643 return ajFalse;
3644
3645 ajStrAssignS(&tmpformat, format);
3646 ajStrFmtLower(&tmpformat);
3647
3648 for(i=0; textinFormatDef[i].Name; i++)
3649 {
3650 ajDebug("test %d '%s' '%s' '%s'\n",
3651 i, textinFormatDef[i].Name,
3652 textinFormatDef[i].Obo,
3653 textinFormatDef[i].Desc);
3654 if(ajStrMatchC(tmpformat, textinFormatDef[i].Name) ||
3655 ajStrMatchC(format, textinFormatDef[i].Obo))
3656 {
3657 *iformat = i;
3658 ajStrDel(&tmpformat);
3659 ajDebug("found '%s' at %d\n", textinFormatDef[i].Name, i);
3660 return ajTrue;
3661 }
3662 }
3663
3664 ajStrDel(&tmpformat);
3665
3666 return ajFalse;
3667 }
3668
3669
3670
3671
3672 /* @func ajTextinformatTerm ***************************************************
3673 **
3674 ** tests whether a text input format term is known
3675 **
3676 ** @param [r] term [const AjPStr] Format term EDAM ID
3677 ** @return [AjBool] ajTrue if term was accepted
3678 **
3679 ** @release 6.4.0
3680 ** @@
3681 ******************************************************************************/
3682
ajTextinformatTerm(const AjPStr term)3683 AjBool ajTextinformatTerm(const AjPStr term)
3684 {
3685 ajuint i;
3686
3687 for(i=0; textinFormatDef[i].Name; i++)
3688 if(ajStrMatchC(term, textinFormatDef[i].Obo))
3689 return ajTrue;
3690
3691 return ajFalse;
3692 }
3693
3694
3695
3696
3697 /* @func ajTextinformatTest ***************************************************
3698 **
3699 ** tests whether a named text input format is known
3700 **
3701 ** @param [r] format [const AjPStr] Format
3702 ** @return [AjBool] ajTrue if format was accepted
3703 **
3704 ** @release 6.4.0
3705 ** @@
3706 ******************************************************************************/
3707
ajTextinformatTest(const AjPStr format)3708 AjBool ajTextinformatTest(const AjPStr format)
3709 {
3710 ajuint i;
3711
3712 for(i=0; textinFormatDef[i].Name; i++)
3713 {
3714 if(ajStrMatchCaseC(format, textinFormatDef[i].Name))
3715 return ajTrue;
3716 if(ajStrMatchC(format, textinFormatDef[i].Obo))
3717 return ajTrue;
3718 }
3719
3720 return ajFalse;
3721 }
3722