1 /* @source ajurlread **********************************************************
2 **
3 ** AJAX url reading functions
4 **
5 ** These functions control all aspects of AJAX url reading
6 **
7 ** @author Copyright (C) 2010 Peter Rice
8 ** @version $Revision: 1.19 $
9 ** @modified Oct 5 pmr First version
10 ** @modified $Date: 2012/12/07 10:07:32 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA 02110-1301, USA.
27 **
28 ******************************************************************************/
29
30 #include "ajlib.h"
31
32 #include "ajurlread.h"
33 #include "ajurl.h"
34 #include "ajcall.h"
35 #include "ajlist.h"
36 #include "ajquery.h"
37 #include "ajtextread.h"
38 #include "ajnam.h"
39 #include "ajfileio.h"
40 #include "ajresource.h"
41
42 #include <string.h>
43
44 AjPTable urlDbMethods = NULL;
45
46 static AjPStr urlinReadLine = NULL;
47
48
49 static AjBool urlinReadHtml(AjPUrlin thys, AjPUrl url);
50
51
52
53
54 /* @datastatic UrlPInFormat *************************************************
55 **
56 ** Url input formats data structure
57 **
58 ** @alias UrlSInFormat
59 ** @alias UrlOInFormat
60 **
61 ** @attr Name [const char*] Format name
62 ** @attr Obo [const char*] Ontology term id from EDAM
63 ** @attr Desc [const char*] Format description
64 ** @attr Alias [AjBool] Name is an alias for an identical definition
65 ** @attr Try [AjBool] If true, try for an unknown input. Duplicate names
66 ** and read-anything formats are set false
67 ** @attr Read [AjBool function] Input function, returns ajTrue on success
68 ** @@
69 ******************************************************************************/
70
71 typedef struct UrlSInFormat
72 {
73 const char *Name;
74 const char *Obo;
75 const char *Desc;
76 AjBool Alias;
77 AjBool Try;
78 AjBool (*Read) (AjPUrlin thys, AjPUrl url);
79 } UrlOInFormat;
80
81 #define UrlPInFormat UrlOInFormat*
82
83 static UrlOInFormat urlinFormatDef[] =
84 {
85 /* "Name", "OBOterm", "Description" */
86 /* Alias, Try, */
87 /* ReadFunction */
88 {"unknown", "0000", "Unknown format",
89 AJFALSE, AJFALSE,
90 &urlinReadHtml}, /* default to first format */
91 {"html", "2331", "Html format",
92 AJFALSE, AJTRUE,
93 &urlinReadHtml},
94 {"HTML", "2331", "Html format",
95 AJTRUE, AJFALSE,
96 &urlinReadHtml},
97 {NULL, NULL, NULL, 0, 0, NULL}
98 };
99
100
101
102 static AjBool urlinRead(AjPUrlin urlin, AjPUrl url);
103 static AjBool urlinformatFind(const AjPStr format, ajint* iformat);
104 static AjBool urlinFormatSet(AjPUrlin urlin, AjPUrl url);
105 static AjBool urlinListProcess(AjPUrlin urlin, AjPUrl url,
106 const AjPStr listfile);
107 static void urlinListNoComment(AjPStr* text);
108 static void urlinQryRestore(AjPUrlin urlin, const AjPQueryList node);
109 static void urlinQrySave(AjPQueryList node, const AjPUrlin urlin);
110 static AjBool urlDefine(AjPUrl thys, AjPUrlin urlin);
111 static AjBool urlinQryProcess(AjPUrlin urlin, AjPUrl url);
112
113
114
115
116 /* @filesection ajurlread ****************************************************
117 **
118 ** @nam1rule aj Function belongs to the AJAX library.
119 **
120 */
121
122
123
124
125 /* @datasection [AjPUrlin] Url input objects ***************************
126 **
127 ** Function is for manipulating url input objects
128 **
129 ** @nam2rule Urlin
130 ******************************************************************************/
131
132
133
134
135 /* @section Url input constructors ***************************************
136 **
137 ** All constructors return a new url input object by pointer. It
138 ** is the responsibility of the user to first destroy any previous
139 ** url input object. The target pointer does not need to be
140 ** initialised to NULL, but it is good programming practice to do so
141 ** anyway.
142 **
143 ** @fdata [AjPUrlin]
144 **
145 ** @nam3rule New Construct a new url input object
146 **
147 ** @valrule * [AjPUrlin] New url input object
148 **
149 ** @fcategory new
150 **
151 ******************************************************************************/
152
153
154
155
156 /* @func ajUrlinNew ***********************************************************
157 **
158 ** Creates a new url input object.
159 **
160 ** @return [AjPUrlin] New url input object.
161 ** @category new [AjPUrlin] Default constructor
162 **
163 ** @release 6.4.0
164 ** @@
165 ******************************************************************************/
166
ajUrlinNew(void)167 AjPUrlin ajUrlinNew(void)
168 {
169 AjPUrlin pthis;
170
171 AJNEW0(pthis);
172
173 pthis->Input = ajTextinNewDatatype(AJDATATYPE_URL);
174
175 pthis->Resource = NULL;
176 pthis->UrlData = NULL;
177
178 return pthis;
179 }
180
181
182
183
184
185 /* @section url input destructors *********************************************
186 **
187 ** Destruction destroys all internal data structures and frees the
188 ** memory allocated for the url input object.
189 **
190 ** @fdata [AjPUrlin]
191 **
192 ** @nam3rule Del Destructor
193 **
194 ** @argrule Del pthis [AjPUrlin*] Url input object
195 **
196 ** @valrule * [void]
197 **
198 ** @fcategory delete
199 **
200 ******************************************************************************/
201
202
203
204
205 /* @func ajUrlinDel ***********************************************************
206 **
207 ** Deletes an url input object.
208 **
209 ** @param [d] pthis [AjPUrlin*] Url input
210 ** @return [void]
211 ** @category delete [AjPUrlin] Default destructor
212 **
213 ** @release 6.4.0
214 ** @@
215 ******************************************************************************/
216
ajUrlinDel(AjPUrlin * pthis)217 void ajUrlinDel(AjPUrlin* pthis)
218 {
219 AjPUrlin thys;
220 AjPResquery resqry;
221
222 if(!pthis)
223 return;
224
225 thys = *pthis;
226
227 if(!thys)
228 return;
229
230 ajDebug("ajUrlinDel called qry:'%S'\n", thys->Input->Qry);
231
232 ajTextinDel(&thys->Input);
233 ajResourceDel(&thys->Resource);
234
235 ajStrDel(&thys->Identifiers);
236 ajStrDel(&thys->Accession);
237 ajStrDel(&thys->IdTypes);
238
239 ajListstrFree(&thys->UrlList);
240
241 while(ajListPop(thys->QryList, (void**)&resqry))
242 ajResqueryDel(&resqry);
243 ajListFree(&thys->QryList);
244
245 AJFREE(*pthis);
246
247 return;
248 }
249
250
251
252
253 /* @section url input modifiers ******************************************
254 **
255 ** These functions use the contents of an url input object and
256 ** update them.
257 **
258 ** @fdata [AjPUrlin]
259 **
260 ** @nam3rule Clear Clear all values
261 ** @nam3rule Qry Reset using a query string
262 ** @suffix C Character string input
263 ** @suffix S String input
264 **
265 ** @argrule * thys [AjPUrlin] Url input object
266 ** @argrule C txt [const char*] Query text
267 ** @argrule S str [const AjPStr] query string
268 **
269 ** @valrule * [void]
270 **
271 ** @fcategory modify
272 **
273 ******************************************************************************/
274
275
276
277
278 /* @func ajUrlinClear *********************************************************
279 **
280 ** Clears an url input object back to "as new" condition, except
281 ** for the query list which must be preserved.
282 **
283 ** @param [w] thys [AjPUrlin] Url input
284 ** @return [void]
285 ** @category modify [AjPUrlin] Resets ready for reuse.
286 **
287 ** @release 6.4.0
288 ** @@
289 ******************************************************************************/
290
ajUrlinClear(AjPUrlin thys)291 void ajUrlinClear(AjPUrlin thys)
292 {
293
294 ajDebug("ajUrlinClear called\n");
295
296 ajTextinClear(thys->Input);
297 ajResourceDel(&thys->Resource);
298 ajStrDel(&thys->Identifiers);
299 ajStrDel(&thys->Accession);
300 ajStrDel(&thys->IdTypes);
301 ajListstrFree(&thys->UrlList);
302
303 thys->IsSwiss = ajFalse;
304 thys->IsEmbl = ajFalse;
305
306 thys->UrlData = NULL;
307
308 return;
309 }
310
311
312
313
314 /* @func ajUrlinQryC **********************************************************
315 **
316 ** Resets an url input object using a new Universal
317 ** Query Address
318 **
319 ** @param [u] thys [AjPUrlin] Url input object.
320 ** @param [r] txt [const char*] Query
321 ** @return [void]
322 **
323 ** @release 6.4.0
324 ** @@
325 ******************************************************************************/
326
ajUrlinQryC(AjPUrlin thys,const char * txt)327 void ajUrlinQryC(AjPUrlin thys, const char* txt)
328 {
329 ajUrlinClear(thys);
330 ajStrAssignC(&thys->Input->Qry, txt);
331
332 return;
333 }
334
335
336
337
338
339 /* @func ajUrlinQryS **********************************************************
340 **
341 ** Resets an url input object using a new Universal
342 ** Query Address
343 **
344 ** @param [u] thys [AjPUrlin] Url input object.
345 ** @param [r] str [const AjPStr] Query
346 ** @return [void]
347 **
348 ** @release 6.4.0
349 ** @@
350 ******************************************************************************/
351
ajUrlinQryS(AjPUrlin thys,const AjPStr str)352 void ajUrlinQryS(AjPUrlin thys, const AjPStr str)
353 {
354 ajUrlinClear(thys);
355 ajStrAssignS(&thys->Input->Qry, str);
356
357 return;
358 }
359
360
361
362
363 /* @section casts *************************************************************
364 **
365 ** Return values
366 **
367 ** @fdata [AjPUrlin]
368 **
369 ** @nam3rule Trace Write debugging output
370 **
371 ** @argrule * thys [const AjPUrlin] Url input object
372 **
373 ** @valrule * [void]
374 **
375 ** @fcategory cast
376 **
377 ******************************************************************************/
378
379
380
381
382 /* @func ajUrlinTrace *********************************************************
383 **
384 ** Debug calls to trace the data in an url input object.
385 **
386 ** @param [r] thys [const AjPUrlin] Url input object.
387 ** @return [void]
388 **
389 ** @release 6.4.0
390 ** @@
391 ******************************************************************************/
392
ajUrlinTrace(const AjPUrlin thys)393 void ajUrlinTrace(const AjPUrlin thys)
394 {
395 ajDebug("url input trace\n");
396 ajDebug("====================\n\n");
397
398 ajTextinTrace(thys->Input);
399
400 if(thys->UrlData)
401 ajDebug( " UrlData: exists\n");
402
403 return;
404 }
405
406
407
408
409 /* @section Url data inputs **********************************************
410 **
411 ** These functions read the wxyxdesc data provided by the first argument
412 **
413 ** @fdata [AjPUrlin]
414 **
415 ** @nam3rule Read Read url data
416 **
417 ** @argrule Read urlin [AjPUrlin] Url input object
418 ** @argrule Read url [AjPUrl] Url data
419 **
420 ** @valrule * [AjBool] true on success
421 **
422 ** @fcategory input
423 **
424 ******************************************************************************/
425
426
427
428
429 /* @func ajUrlinRead **********************************************************
430 **
431 ** If the file is not yet open, calls urlinQryProcess to convert the query
432 ** into an open file stream.
433 **
434 ** Uses urlinRead for the actual file reading.
435 **
436 ** Returns the results in the AjPUrl object.
437 **
438 ** @param [u] urlin [AjPUrlin] Url data input definitions
439 ** @param [w] url [AjPUrl] Url data returned.
440 ** @return [AjBool] ajTrue on success.
441 ** @category input [AjPUrl] Master url data input,
442 ** calls specific functions for file access type
443 ** and url data format.
444 **
445 ** @release 6.4.0
446 ** @@
447 ******************************************************************************/
448
ajUrlinRead(AjPUrlin urlin,AjPUrl url)449 AjBool ajUrlinRead(AjPUrlin urlin, AjPUrl url)
450 {
451 AjBool ret = ajFalse;
452 AjPQueryList node = NULL;
453 AjBool listdata = ajFalse;
454
455 if(urlin->Resource)
456 {
457 /* (a) if file still open, keep reading */
458 ajDebug("ajUrlinRead: input resource '%S' still there, try again\n",
459 urlin->Resource->Id);
460 ret = urlinRead(urlin, url);
461 ajDebug("ajUrlinRead: open buffer qry: '%S' returns: %B\n",
462 urlin->Input->Qry, ret);
463 }
464 else
465 {
466 /* (b) if we have a list, try the next query in the list */
467 if(ajListGetLength(urlin->Input->List))
468 {
469 listdata = ajTrue;
470 ajListPop(urlin->Input->List, (void**) &node);
471
472 ajDebug("++pop from list '%S'\n", node->Qry);
473 ajUrlinQryS(urlin, node->Qry);
474 ajDebug("++SAVE WXYZIN '%S' '%S' %d\n",
475 urlin->Input->Qry,
476 urlin->Input->Formatstr, urlin->Input->Format);
477
478 urlinQryRestore(urlin, node);
479
480 ajStrDel(&node->Qry);
481 ajStrDel(&node->Formatstr);
482 AJFREE(node);
483
484 ajDebug("ajUrlinRead: open list, try '%S'\n",
485 urlin->Input->Qry);
486
487 if(!urlinQryProcess(urlin, url) &&
488 !ajListGetLength(urlin->Input->List))
489 return ajFalse;
490
491 ret = urlinRead(urlin, url);
492 ajDebug("ajUrlinRead: list qry: '%S' returns: %B\n",
493 urlin->Input->Qry, ret);
494 }
495 else
496 {
497 ajDebug("ajUrlinRead: no file yet - test query '%S'\n",
498 urlin->Input->Qry);
499
500 /* (c) Must be a query - decode it */
501 if(!urlinQryProcess(urlin, url) &&
502 !ajListGetLength(urlin->Input->List))
503 return ajFalse;
504
505 if(ajListGetLength(urlin->Input->List)) /* could be a new list */
506 listdata = ajTrue;
507
508 ret = urlinRead(urlin, url);
509 ajDebug("ajUrlinRead: new qry: '%S' returns: %B\n",
510 urlin->Input->Qry, ret);
511 }
512 }
513
514 /* Now read whatever we got */
515
516 while(!ret && ajListGetLength(urlin->Input->List))
517 {
518 /* Failed, but we have a list still - keep trying it */
519 if(listdata)
520 ajErr("Failed to read url data '%S'",
521 urlin->Input->Qry);
522
523 listdata = ajTrue;
524 ajListPop(urlin->Input->List,(void**) &node);
525 ajDebug("++try again: pop from list '%S'\n", node->Qry);
526 ajUrlinQryS(urlin, node->Qry);
527 ajDebug("++SAVE (AGAIN) WXYZIN '%S' '%S' %d\n",
528 urlin->Input->Qry,
529 urlin->Input->Formatstr, urlin->Input->Format);
530
531 urlinQryRestore(urlin, node);
532
533 ajStrDel(&node->Qry);
534 ajStrDel(&node->Formatstr);
535 AJFREE(node);
536
537 if(!urlinQryProcess(urlin, url))
538 continue;
539
540 ret = urlinRead(urlin, url);
541 ajDebug("ajUrlinRead: list retry qry: '%S' returns: %B\n",
542 urlin->Input->Qry, ret);
543 }
544
545 if(!ret)
546 {
547 if(listdata)
548 ajErr("Failed to read url data '%S'",
549 urlin->Input->Qry);
550
551 return ajFalse;
552 }
553
554
555 urlDefine(url, urlin);
556
557 return ajTrue;
558 }
559
560
561
562
563 /* @funcstatic urlDefine ******************************************************
564 **
565 ** Make sure all url data object attributes are defined
566 ** using values from the url input object if needed
567 **
568 ** @param [w] thys [AjPUrl] Url data returned.
569 ** @param [u] urlin [AjPUrlin] Url data input definitions
570 ** @return [AjBool] ajTrue on success.
571 **
572 ** @release 6.4.0
573 ** @@
574 ******************************************************************************/
575
urlDefine(AjPUrl thys,AjPUrlin urlin)576 static AjBool urlDefine(AjPUrl thys, AjPUrlin urlin)
577 {
578
579 /* if values are missing in the url object, we can use defaults
580 from urlin or calculate where possible */
581
582 /* assign the dbname if defined in the urlin object */
583 if(ajStrGetLen(urlin->Input->Db))
584 ajStrAssignS(&thys->Db, urlin->Input->Db);
585
586 return ajTrue;
587 }
588
589
590
591
592
593 /* @funcstatic urlinRead ******************************************************
594 **
595 ** Given data in an urlin structure, tries to read everything needed
596 ** using the specified format or by trial and error.
597 **
598 ** @param [u] urlin [AjPUrlin] Url data input object
599 ** @param [w] url [AjPUrl] Url data object
600 ** @return [AjBool] ajTrue on success
601 **
602 ** @release 6.4.0
603 ** @@
604 ******************************************************************************/
605
urlinRead(AjPUrlin urlin,AjPUrl url)606 static AjBool urlinRead(AjPUrlin urlin, AjPUrl url)
607 {
608 AjPResource drcat = NULL;
609 AjPResquery resqry = NULL;
610 AjPReslink reslnk = NULL;
611 AjPStr urlstr = NULL;
612 AjIList iter;
613 /* AjPTextin textin = urlin->Input;*/
614 AjPQuery qry = urlin->Input->Query;
615 AjPUrlAccess urlaccess = qry->Access;
616 AjBool ret = ajFalse;
617 ajuint nids = 0;
618 ajuint ntids = 0;
619 ajuint i;
620 AjPStrTok handle = NULL;
621 AjPStr snstr = NULL;
622 AjPStr idstr = NULL;
623 AjPStr tmpstr = NULL;
624
625 ajUrlClear(url);
626
627 ajDebug("urlinRead: swiss %B embl %B acc '%S' idtype '%S' id '%S'\n",
628 urlin->IsSwiss, urlin->IsEmbl,
629 urlin->Accession, urlin->IdTypes,
630 urlin->Identifiers);
631
632 if(!urlin->Resource)
633 {
634 if(!(*urlaccess->Access)(urlin))
635 return ajFalse;
636 }
637
638 drcat = urlin->Resource;
639 if(!urlin->UrlList)
640 {
641 urlin->UrlList = ajListstrNew();
642 urlin->QryList = ajListNew();
643
644 if(urlin->IsSwiss || urlin->IsEmbl)
645 {
646 iter = ajListIterNew(drcat->Xref);
647 while(!ajListIterDone(iter))
648 {
649 reslnk = ajListIterGet(iter);
650 ajDebug("test reslnk '%S' : '%S'\n",
651 reslnk->Source, reslnk->Term);
652 if(urlin->IsSwiss && ajStrPrefixC(reslnk->Source, "SP_"))
653 {
654 ajStrAssignS(&urlin->IdTypes, reslnk->Term);
655 ajStrCutBraces(&urlin->IdTypes);
656 ajDebug("idtypes %S: '%S'\n",
657 reslnk->Source, urlin->IdTypes);
658 }
659 if(urlin->IsEmbl && ajStrPrefixC(reslnk->Source, "EMBL_"))
660 {
661 ajStrAssignS(&urlin->IdTypes, reslnk->Term);
662 ajStrCutBraces(&urlin->IdTypes);
663 ajDebug("idtypes %S: '%S'\n",
664 reslnk->Source, urlin->IdTypes);
665 }
666 }
667 ajListIterDel(&iter);
668 }
669 if(ajStrGetLen(urlin->Accession))
670 {
671 }
672 if(ajStrGetLen(urlin->IdTypes))
673 {
674 ajStrCutBraces(&urlin->IdTypes);
675 }
676
677 nids = 1 + (ajuint) ajStrCalcCountK(urlin->Identifiers, ';');
678
679 iter = ajListIterNewread(drcat->Query);
680
681 while(!ajListIterDone(iter))
682 {
683 resqry = ajListIterGet(iter);
684
685 ajDebug("test qry %S | %S | %S\n",
686 resqry->Datatype, resqry->Format, resqry->Term);
687
688 ntids = 1 + (ajuint) ajStrCalcCountK(resqry->Term, ';');
689 if(ntids != nids)
690 continue;
691
692 if(!ajStrMatchC(resqry->Format, "HTML"))
693 continue;
694
695 if(ajStrGetLen(urlin->IdTypes))
696 {
697 if(!ajStrPrefixS(resqry->Term, urlin->IdTypes))
698 continue;
699 if(!ajStrMatchS(resqry->Term, urlin->IdTypes))
700 {
701 ajStrAssignS(&tmpstr, resqry->Term);
702 ajStrCutBraces(&tmpstr);
703 if(!ajStrMatchS(tmpstr, urlin->IdTypes))
704 continue;
705 }
706 }
707
708 urlstr = ajStrNewS(resqry->Url);
709 if(nids > 1)
710 {
711 handle = ajStrTokenNewC(urlin->Identifiers, ";");
712 for(i=0; i<nids; i++)
713 {
714 ajStrTokenNextParse(handle, &idstr);
715 ajFmtPrintS(&snstr, "%%s%u", (i+1));
716 ajStrExchangeSS(&urlstr, snstr, idstr);
717 }
718 ajStrTokenDel(&handle);
719 ajStrDel(&snstr);
720 ajStrDel(&idstr);
721 }
722 ajStrExchangeCS(&urlstr, "%s", urlin->Identifiers);
723 if(ajStrGetLen(urlin->Accession))
724 ajStrExchangeCS(&urlstr, "%u", urlin->Accession);
725
726 ajDebug("save query '%S'\n", urlstr);
727
728 ajListPushAppend(urlin->UrlList, urlstr);
729 urlstr = NULL;
730 ajListPushAppend(urlin->QryList, ajResqueryNewResquery(resqry));
731 }
732
733 ajListIterDel(&iter);
734 }
735
736 if(ajListGetLength(urlin->UrlList))
737 {
738 ajListstrPop(urlin->UrlList, &url->Full);
739 ajListPop(urlin->QryList, (void**) &url->Resqry);
740 ajStrAssignS(&url->Id, urlin->Identifiers);
741 ret = ajTrue;
742 }
743
744 if(!urlin->UrlList)
745 {
746 ajListstrFree(&urlin->UrlList);
747 }
748
749 ajStrDel(&tmpstr);
750 return ret;
751 }
752
753
754
755
756 /* @funcstatic urlinReadHtml **************************************************
757 **
758 ** Given data in an url structure, tries to read everything needed
759 ** using HTML format.
760 **
761 ** @param [u] urlin [AjPUrlin] Url input object
762 ** @param [w] url [AjPUrl] url object
763 ** @return [AjBool] ajTrue on success
764 **
765 ** @release 6.4.0
766 ** @@
767 ******************************************************************************/
768
urlinReadHtml(AjPUrlin urlin,AjPUrl url)769 static AjBool urlinReadHtml(AjPUrlin urlin, AjPUrl url)
770 {
771 AjPFilebuff buff;
772
773 ajlong fpos = 0;
774 ajuint linecnt = 0;
775
776 ajDebug("urlinReadHtml\n");
777 ajUrlClear(url);
778 buff = urlin->Input->Filebuff;
779
780 /* ajFilebuffTrace(buff); */
781
782 while (ajBuffreadLinePos(buff, &urlinReadLine, &fpos))
783 {
784 linecnt++;
785
786 if(ajStrGetCharLast(urlinReadLine) == '\n')
787 ajStrCutEnd(&urlinReadLine, 1);
788
789 if(ajStrGetCharLast(urlinReadLine) == '\r')
790 ajStrCutEnd(&urlinReadLine, 1);
791
792 ajDebug("line %u:%S\n", linecnt, urlinReadLine);
793
794 /* add line to AjPUrl object */
795 }
796
797 return ajTrue;
798 }
799
800
801
802
803 /* @datasection [none] Miscellaneous ******************************************
804 **
805 ** Url input internals
806 **
807 ** @nam2rule Urlin Url input
808 **
809 ******************************************************************************/
810
811
812
813
814 /* @section Printing **********************************************************
815 **
816 ** Printing details of the internals to a file
817 **
818 ** @fdata [none]
819 **
820 ** @nam2rule Urlinprint
821 **
822 ** @fcategory output
823 **
824 ******************************************************************************/
825
826
827
828
829 /* @section Print *************************************************************
830 **
831 ** Printing to a file
832 **
833 ** @fdata [none]
834 **
835 ** @nam3rule Book Print as docbook table
836 ** @nam3rule Html Print as html table
837 ** @nam3rule Wiki Print as wiki table
838 ** @nam3rule Text Print as text
839 **
840 ** @argrule * outf [AjPFile] output file
841 ** @argrule Text full [AjBool] Print all details
842 **
843 ** @valrule * [void]
844 **
845 ** @fcategory cast
846 **
847 ******************************************************************************/
848
849
850
851
852 /* @func ajUrlinprintBook *****************************************************
853 **
854 ** Reports the internal data structures as a Docbook table
855 **
856 ** @param [u] outf [AjPFile] Output file
857 ** @return [void]
858 **
859 ** @release 6.4.0
860 ** @@
861 ******************************************************************************/
862
ajUrlinprintBook(AjPFile outf)863 void ajUrlinprintBook(AjPFile outf)
864 {
865 ajuint i = 0;
866 ajuint j = 0;
867 AjPStr namestr = NULL;
868 AjPList fmtlist;
869 AjPStr* names;
870
871 fmtlist = ajListstrNew();
872
873 ajFmtPrintF(outf, "<para>The supported url formats are summarised "
874 "in the table below. "
875 "The columns are as follows: "
876 "<emphasis>Input format</emphasis> (format name), "
877 "<emphasis>Try</emphasis> (indicates whether the "
878 "format can be detected automatically on input), and "
879 "<emphasis>Description</emphasis> (short description of "
880 "the format).</para>\n\n");
881
882 ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
883 ajFmtPrintF(outf, " <caption>Input url formats</caption>\n");
884 ajFmtPrintF(outf, " <thead>\n");
885 ajFmtPrintF(outf, " <tr align=\"center\">\n");
886 ajFmtPrintF(outf, " <th>Input Format</th>\n");
887 ajFmtPrintF(outf, " <th>Try</th>\n");
888 ajFmtPrintF(outf, " <th>Description</th>\n");
889 ajFmtPrintF(outf, " </tr>\n");
890 ajFmtPrintF(outf, " </thead>\n");
891 ajFmtPrintF(outf, " <tbody>\n");
892
893 for(i=1; urlinFormatDef[i].Name; i++)
894 {
895 if(!urlinFormatDef[i].Alias)
896 {
897 namestr = ajStrNewC(urlinFormatDef[i].Name);
898 ajListPushAppend(fmtlist, namestr);
899 namestr = NULL;
900 }
901 }
902
903 ajListSort(fmtlist, &ajStrVcmp);
904 ajListstrToarray(fmtlist, &names);
905
906 for(i=0; names[i]; i++)
907 {
908 for(j=0; urlinFormatDef[j].Name; j++)
909 {
910 if(ajStrMatchC(names[i],urlinFormatDef[j].Name))
911 {
912 ajFmtPrintF(outf, " <tr>\n");
913 ajFmtPrintF(outf, " <td>%s</td>\n",
914 urlinFormatDef[j].Name);
915 ajFmtPrintF(outf, " <td>%B</td>\n",
916 urlinFormatDef[j].Try);
917 ajFmtPrintF(outf, " <td>%s</td>\n",
918 urlinFormatDef[j].Desc);
919 ajFmtPrintF(outf, " </tr>\n");
920 }
921 }
922 }
923
924
925 ajFmtPrintF(outf, " </tbody>\n");
926 ajFmtPrintF(outf, "</table>\n");
927 ajStrDel(&namestr);
928
929 names = NULL;
930 ajListstrFreeData(&fmtlist);
931
932 return;
933 }
934
935
936
937
938 /* @func ajUrlinprintHtml *****************************************************
939 **
940 ** Reports the internal data structures as an HTML table
941 **
942 ** @param [u] outf [AjPFile] Output file
943 ** @return [void]
944 **
945 ** @release 6.4.0
946 ** @@
947 ******************************************************************************/
948
ajUrlinprintHtml(AjPFile outf)949 void ajUrlinprintHtml(AjPFile outf)
950 {
951 ajuint i = 0;
952 ajuint j = 0;
953
954 AjPStr namestr = NULL;
955
956 ajFmtPrintF(outf, "<table border=3>");
957 ajFmtPrintF(outf, "<tr><th>Input Format</th><th>Auto</th>\n");
958 ajFmtPrintF(outf, "<th>Multi</th><th>Description</th></tr>\n");
959
960 for(i=1; urlinFormatDef[i].Name; i++)
961 {
962 ajStrAssignC(&namestr, urlinFormatDef[i].Name);
963
964 if(!urlinFormatDef[i].Alias)
965 {
966 for(j=i+1; urlinFormatDef[j].Name; j++)
967 {
968 if(urlinFormatDef[j].Read == urlinFormatDef[i].Read)
969 {
970 ajFmtPrintAppS(&namestr, " %s",
971 urlinFormatDef[j].Name);
972 if(!urlinFormatDef[j].Alias)
973 {
974 ajWarn("Input format '%s' same as '%s' but not alias",
975 urlinFormatDef[j].Name,
976 urlinFormatDef[i].Name);
977 }
978 }
979 }
980
981 ajFmtPrintF(outf, "<tr><td>\n%S\n</td><td>%B</td>\n",
982 namestr,
983 urlinFormatDef[i].Try);
984 ajFmtPrintF(outf, "<td>\n%s\n</td></tr>\n",
985 urlinFormatDef[i].Desc);
986 }
987
988 }
989
990 ajFmtPrintF(outf, "</table>\n");
991 ajStrDel(&namestr);
992
993 return;
994 }
995
996
997
998
999 /* @func ajUrlinprintText *****************************************************
1000 **
1001 ** Reports the internal data structures
1002 **
1003 ** @param [u] outf [AjPFile] Output file
1004 ** @param [r] full [AjBool] Full report (usually ajFalse)
1005 ** @return [void]
1006 **
1007 ** @release 6.4.0
1008 ** @@
1009 ******************************************************************************/
1010
ajUrlinprintText(AjPFile outf,AjBool full)1011 void ajUrlinprintText(AjPFile outf, AjBool full)
1012 {
1013 ajuint i = 0;
1014
1015 ajFmtPrintF(outf, "\n");
1016 ajFmtPrintF(outf, "# Url input formats\n");
1017 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
1018 ajFmtPrintF(outf, "# Alias Alias name\n");
1019 ajFmtPrintF(outf, "# Try Test for unknown input files\n");
1020 ajFmtPrintF(outf, "# Name Alias Try "
1021 "Description");
1022 ajFmtPrintF(outf, "\n");
1023 ajFmtPrintF(outf, "InFormat {\n");
1024
1025 for(i=0; urlinFormatDef[i].Name; i++)
1026 if(full || !urlinFormatDef[i].Alias)
1027 ajFmtPrintF(outf,
1028 " %-12s %5B %3B \"%s\"\n",
1029 urlinFormatDef[i].Name,
1030 urlinFormatDef[i].Alias,
1031 urlinFormatDef[i].Try,
1032 urlinFormatDef[i].Desc);
1033
1034 ajFmtPrintF(outf, "}\n\n");
1035
1036 return;
1037 }
1038
1039
1040
1041
1042 /* @func ajUrlinprintWiki *****************************************************
1043 **
1044 ** Reports the internal data structures as a wiki table
1045 **
1046 ** @param [u] outf [AjPFile] Output file
1047 ** @return [void]
1048 **
1049 ** @release 6.4.0
1050 ** @@
1051 ******************************************************************************/
1052
ajUrlinprintWiki(AjPFile outf)1053 void ajUrlinprintWiki(AjPFile outf)
1054 {
1055 ajuint i = 0;
1056 ajuint j = 0;
1057
1058 AjPStr namestr = NULL;
1059
1060 ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
1061 ajFmtPrintF(outf, "|-\n");
1062 ajFmtPrintF(outf, "!Format!!Try!!"
1063 "class=\"unsortable\"|Description\n");
1064
1065 for(i=1; urlinFormatDef[i].Name; i++)
1066 {
1067 ajStrAssignC(&namestr, urlinFormatDef[i].Name);
1068
1069 if(!urlinFormatDef[i].Alias)
1070 {
1071 for(j=i+1; urlinFormatDef[j].Name; j++)
1072 {
1073 if(urlinFormatDef[j].Read == urlinFormatDef[i].Read)
1074 {
1075 ajFmtPrintAppS(&namestr, "<br>%s",
1076 urlinFormatDef[j].Name);
1077 if(!urlinFormatDef[j].Alias)
1078 {
1079 ajWarn("Input format '%s' same as '%s' but not alias",
1080 urlinFormatDef[j].Name,
1081 urlinFormatDef[i].Name);
1082 }
1083 }
1084 }
1085
1086 ajFmtPrintF(outf, "|-\n");
1087 ajFmtPrintF(outf,
1088 "|%S||%B||%s\n",
1089 namestr,
1090 urlinFormatDef[i].Try,
1091 urlinFormatDef[i].Desc);
1092 }
1093
1094 }
1095
1096 ajFmtPrintF(outf, "|}\n\n");
1097 ajStrDel(&namestr);
1098
1099 return;
1100 }
1101
1102
1103
1104
1105 /* @section Miscellaneous *****************************************************
1106 **
1107 ** Functions to initialise and clean up internals
1108 **
1109 ** @fdata [none]
1110 **
1111 ** @nam3rule Exit Clean up and exit
1112 **
1113 ** @valrule * [void]
1114 **
1115 ** @fcategory misc
1116 **
1117 ******************************************************************************/
1118
1119
1120
1121
1122 /* @func ajUrlinExit **********************************************************
1123 **
1124 ** Cleans up url input internal memory
1125 **
1126 ** @return [void]
1127 **
1128 ** @release 6.4.0
1129 ** @@
1130 ******************************************************************************/
1131
ajUrlinExit(void)1132 void ajUrlinExit(void)
1133 {
1134 ajStrDel(&urlinReadLine);
1135
1136 ajTableDel(&urlDbMethods);
1137
1138 return;
1139 }
1140
1141
1142
1143
1144 /* @section Internals *********************************************************
1145 **
1146 ** Functions to return internal values
1147 **
1148 ** @fdata [none]
1149 **
1150 ** @nam3rule Type Internals for url datatype
1151 ** @nam4rule Get Return a value
1152 ** @nam5rule Fields Known query fields for ajUrlinRead
1153 ** @nam5rule Qlinks Known query link operators for ajUrlinRead
1154 **
1155 ** @valrule * [const char*] Internal value
1156 **
1157 ** @fcategory misc
1158 **
1159 ******************************************************************************/
1160
1161
1162
1163
1164 /* @func ajUrlinTypeGetFields *************************************************
1165 **
1166 ** Returns the listof known field names for ajUrlinRead
1167 **
1168 ** @return [const char*] List of field names
1169 **
1170 ** @release 6.4.0
1171 ** @@
1172 ******************************************************************************/
1173
ajUrlinTypeGetFields(void)1174 const char* ajUrlinTypeGetFields(void)
1175 {
1176 return "id acc";
1177 }
1178
1179
1180
1181
1182 /* @func ajUrlinTypeGetQlinks *************************************************
1183 **
1184 ** Returns the list of known query link operators for ajUrlinRead
1185 **
1186 ** @return [const char*] List of field names
1187 **
1188 ** @release 6.4.0
1189 ** @@
1190 ******************************************************************************/
1191
ajUrlinTypeGetQlinks(void)1192 const char* ajUrlinTypeGetQlinks(void)
1193 {
1194 return "|";
1195 }
1196
1197
1198
1199
1200 /* @datasection [AjPTable] Internal call register table ***********************
1201 **
1202 ** Functions to manage the internal call register table that links the
1203 ** ajaxdb library functions with code in the core AJAX library.
1204 **
1205 ** @nam2rule Urlaccess Functions to manage urldb call tables.
1206 **
1207 ******************************************************************************/
1208
1209
1210
1211
1212 /* @section Cast **************************************************************
1213 **
1214 ** Return a reference to the call table
1215 **
1216 ** @fdata [AjPTable] urldb functions call table
1217 **
1218 ** @nam3rule Get Return a value
1219 ** @nam4rule Db Database access functions table
1220 ** @nam3rule Method Lookup an access method by name
1221 ** @nam4rule Test Return true if the access method exists
1222 ** @nam4rule MethodGet Return a method value
1223 ** @nam5rule Qlinks Return known query links for a named method
1224 ** @nam5rule Scope Return scope (entry, query or all) for a named method
1225 **
1226 ** @argrule Method method [const AjPStr] Method name
1227 **
1228 ** @valrule *Db [AjPTable] Call table of function names and references
1229 ** @valrule *Qlinks [const char*] Query link operators
1230 ** @valrule *Scope [ajuint] Scope flags
1231 ** @valrule *Test [AjBool] True if found
1232 **
1233 ** @fcategory cast
1234 **
1235 ******************************************************************************/
1236
1237
1238
1239
1240 /* @func ajUrlaccessGetDb *****************************************************
1241 **
1242 ** Returns the table in which url database access details are registered
1243 **
1244 ** @return [AjPTable] Access functions hash table
1245 **
1246 ** @release 6.4.0
1247 ** @@
1248 ******************************************************************************/
1249
ajUrlaccessGetDb(void)1250 AjPTable ajUrlaccessGetDb(void)
1251 {
1252 if(!urlDbMethods)
1253 urlDbMethods = ajCallTableNew();
1254 return urlDbMethods;
1255
1256 }
1257
1258
1259
1260
1261 /* @func ajUrlaccessMethodGetQlinks *******************************************
1262 **
1263 ** Tests for a named method for url data reading returns the
1264 ** known query link operators
1265 **
1266 ** @param [r] method [const AjPStr] Method required.
1267 ** @return [const char*] Known link operators
1268 **
1269 ** @release 6.4.0
1270 ** @@
1271 ******************************************************************************/
1272
ajUrlaccessMethodGetQlinks(const AjPStr method)1273 const char* ajUrlaccessMethodGetQlinks(const AjPStr method)
1274 {
1275 AjPUrlAccess methoddata;
1276
1277 methoddata = ajCallTableGetS(urlDbMethods, method);
1278 if(!methoddata)
1279 return NULL;
1280
1281 return methoddata->Qlink;
1282 }
1283
1284
1285
1286
1287 /* @func ajUrlaccessMethodGetScope ********************************************
1288 **
1289 ** Tests for a named method for url data reading and returns the scope
1290 ** (entry, query or all).
1291 *
1292 ** @param [r] method [const AjPStr] Method required.
1293 ** @return [ajuint] Scope flags
1294 **
1295 ** @release 6.4.0
1296 ** @@
1297 ******************************************************************************/
1298
ajUrlaccessMethodGetScope(const AjPStr method)1299 ajuint ajUrlaccessMethodGetScope(const AjPStr method)
1300 {
1301 AjPUrlAccess methoddata;
1302 ajuint ret = 0;
1303
1304 methoddata = ajCallTableGetS(urlDbMethods, method);
1305 if(!methoddata)
1306 return 0;
1307
1308 if(methoddata->Entry)
1309 ret |= AJMETHOD_ENTRY;
1310 if(methoddata->Query)
1311 ret |= AJMETHOD_QUERY;
1312 if(methoddata->All)
1313 ret |= AJMETHOD_ALL;
1314
1315 return ret;
1316 }
1317
1318
1319
1320
1321 /* @func ajUrlaccessMethodTest ************************************************
1322 **
1323 ** Tests for a named method for url data reading.
1324 **
1325 ** @param [r] method [const AjPStr] Method required.
1326 ** @return [AjBool] ajTrue on success.
1327 **
1328 ** @release 6.4.0
1329 ** @@
1330 ******************************************************************************/
1331
ajUrlaccessMethodTest(const AjPStr method)1332 AjBool ajUrlaccessMethodTest(const AjPStr method)
1333 {
1334 if(ajCallTableGetS(urlDbMethods, method))
1335 return ajTrue;
1336
1337 return ajFalse;
1338 }
1339
1340
1341
1342
1343 /* @funcstatic urlinQryRestore ************************************************
1344 **
1345 ** Restores an url input specification from an AjPQueryList node
1346 **
1347 ** @param [w] urlin [AjPUrlin] Url input object
1348 ** @param [r] node [const AjPQueryList] Query list node
1349 ** @return [void]
1350 **
1351 ** @release 6.4.0
1352 ******************************************************************************/
1353
urlinQryRestore(AjPUrlin urlin,const AjPQueryList node)1354 static void urlinQryRestore(AjPUrlin urlin, const AjPQueryList node)
1355 {
1356 urlin->Input->Format = node->Format;
1357 ajStrAssignS(&urlin->Input->Formatstr, node->Formatstr);
1358
1359 return;
1360 }
1361
1362
1363
1364
1365 /* @funcstatic urlinQrySave ***************************************************
1366 **
1367 ** Saves an url input specification in an AjPQueryList node
1368 **
1369 ** @param [w] node [AjPQueryList] Query list node
1370 ** @param [r] urlin [const AjPUrlin] Url input object
1371 ** @return [void]
1372 **
1373 ** @release 6.4.0
1374 ******************************************************************************/
1375
urlinQrySave(AjPQueryList node,const AjPUrlin urlin)1376 static void urlinQrySave(AjPQueryList node, const AjPUrlin urlin)
1377 {
1378 node->Format = urlin->Input->Format;
1379 ajStrAssignS(&node->Formatstr, urlin->Input->Formatstr);
1380
1381 return;
1382 }
1383
1384
1385
1386
1387 /* @funcstatic urlinQryProcess ************************************************
1388 **
1389 ** Converts an url data query into an open file.
1390 **
1391 ** Tests for "format::" and sets this if it is found
1392 **
1393 ** Then tests for "list:" or "@" and processes as a list file
1394 ** using urlinListProcess which in turn invokes urlinQryProcess
1395 ** until a valid query is found.
1396 **
1397 ** Then tests for dbname:query and opens the file (at the correct position
1398 ** if the database definition defines it)
1399 **
1400 ** If there is no database, looks for file:query and opens the file.
1401 ** In this case the file position is not known and url data reading
1402 ** will have to scan for the entry/entries we need.
1403 **
1404 ** @param [u] urlin [AjPUrlin] Url data input structure.
1405 ** @param [u] url [AjPUrl] Url data to be read.
1406 ** The format will be replaced
1407 ** if defined in the query string.
1408 ** @return [AjBool] ajTrue on success.
1409 **
1410 ** @release 6.4.0
1411 ** @@
1412 ******************************************************************************/
1413
urlinQryProcess(AjPUrlin urlin,AjPUrl url)1414 static AjBool urlinQryProcess(AjPUrlin urlin, AjPUrl url)
1415 {
1416 AjBool ret = ajTrue;
1417 AjPStr qrystr = NULL;
1418 AjBool urlmethod = ajFalse;
1419 const AjPStr fmtstr = NULL;
1420 AjPTextin textin;
1421 AjPQuery qry;
1422 AjPUrlAccess urlaccess = NULL;
1423
1424 textin = urlin->Input;
1425 qry = textin->Query;
1426
1427 /* pick up the original query string */
1428 qrystr = ajStrNewS(textin->Qry);
1429
1430 ajDebug("urlinQryProcess '%S'\n", qrystr);
1431
1432 /* look for a format:: prefix */
1433 fmtstr = ajQuerystrParseFormat(&qrystr, textin, urlinformatFind);
1434 ajDebug("urlinQryProcess ... fmtstr '%S' '%S'\n", fmtstr, qrystr);
1435
1436 /* (seq/feat) DO NOT look for a [range] suffix */
1437
1438 /* look for a list:: or @:: listfile of queries - process and return */
1439 if(ajQuerystrParseListfile(&qrystr))
1440 {
1441 ajDebug("urlinQryProcess ... listfile '%S'\n", qrystr);
1442 ret = urlinListProcess(urlin, url, qrystr);
1443 ajStrDel(&qrystr);
1444 return ret;
1445 }
1446
1447 /* try general text access methods (file, asis, text database access */
1448 ajDebug("urlinQryProcess ... no listfile '%S'\n", qrystr);
1449 if(!ajQuerystrParseRead(&qrystr, textin, urlinformatFind, &urlmethod))
1450 {
1451 ajStrDel(&qrystr);
1452 return ajFalse;
1453 }
1454
1455 urlinFormatSet(urlin, url);
1456
1457 ajDebug("urlinQryProcess ... read nontext: %B '%S'\n",
1458 urlmethod, qrystr);
1459 ajStrDel(&qrystr);
1460
1461 /* we found a non-text method */
1462 if(urlmethod)
1463 {
1464 ajDebug("urlinQryProcess ... call method '%S'\n", qry->Method);
1465 ajDebug("urlinQryProcess ... textin format %d '%S'\n",
1466 textin->Format, textin->Formatstr);
1467 ajDebug("urlinQryProcess ... query format '%S'\n",
1468 qry->Formatstr);
1469 qry->Access = ajCallTableGetS(urlDbMethods,qry->Method);
1470 urlaccess = qry->Access;
1471 return (*urlaccess->Access)(urlin);
1472 }
1473
1474 ajDebug("urlinQryProcess text method '%S' success\n", qry->Method);
1475
1476 return ajTrue;
1477 }
1478
1479
1480
1481
1482
1483 /* @datasection [AjPList] Query field list ************************************
1484 **
1485 ** Query fields lists are handled internally. Only static functions
1486 ** should appear here
1487 **
1488 ******************************************************************************/
1489
1490
1491
1492
1493 /* @funcstatic urlinListProcess ***********************************************
1494 **
1495 ** Processes a file of queries.
1496 ** This function is called by, and calls, urlinQryProcess. There is
1497 ** a depth check to avoid infinite loops, for example where a list file
1498 ** refers to itself.
1499 **
1500 ** This function produces a list (AjPList) of queries with all list references
1501 ** expanded into lists of queries.
1502 **
1503 ** Because queries in a list can have their own format
1504 ** the prior settings are stored with each query in the list node so that they
1505 ** can be restored after.
1506 **
1507 ** @param [u] urlin [AjPUrlin] Url data input
1508 ** @param [u] url [AjPUrl] Url data
1509 ** @param [r] listfile [const AjPStr] Name of list file.,
1510 ** @return [AjBool] ajTrue on success.
1511 **
1512 ** @release 6.4.0
1513 ** @@
1514 ******************************************************************************/
1515
urlinListProcess(AjPUrlin urlin,AjPUrl url,const AjPStr listfile)1516 static AjBool urlinListProcess(AjPUrlin urlin, AjPUrl url,
1517 const AjPStr listfile)
1518 {
1519 AjPList list = NULL;
1520 AjPFile file = NULL;
1521 AjPStr token = NULL;
1522 AjPStr rest = NULL;
1523 AjBool ret = ajFalse;
1524 AjPQueryList node = NULL;
1525
1526 ajuint recnum = 0;
1527 static ajint depth = 0;
1528 static ajint MAXDEPTH = 16;
1529
1530 depth++;
1531 ajDebug("++urlinListProcess %S depth %d\n",
1532 listfile, depth);
1533
1534 if(depth > MAXDEPTH)
1535 ajFatal("Query list too deep");
1536
1537 if(!urlin->Input->List)
1538 urlin->Input->List = ajListNew();
1539
1540 list = ajListNew();
1541
1542 file = ajFileNewInNameS(listfile);
1543
1544 if(!file)
1545 {
1546 ajErr("Failed to open list file '%S'", listfile);
1547 depth--;
1548
1549 return ret;
1550 }
1551
1552 while(ajReadlineTrim(file, &urlinReadLine))
1553 {
1554 ++recnum;
1555 urlinListNoComment(&urlinReadLine);
1556
1557 if(ajStrExtractWord(urlinReadLine, &rest, &token))
1558 {
1559 if(ajStrGetLen(rest))
1560 {
1561 ajErr("Bad record %u in list file '%S'\n'%S'",
1562 recnum, listfile, urlinReadLine);
1563 }
1564 else if(ajStrGetLen(token))
1565 {
1566 ajDebug("++Add to list: '%S'\n", token);
1567 AJNEW0(node);
1568 ajStrAssignS(&node->Qry, token);
1569 urlinQrySave(node, urlin);
1570 ajListPushAppend(list, node);
1571 }
1572 }
1573 }
1574
1575 ajFileClose(&file);
1576 ajStrDel(&token);
1577 ajStrDel(&rest);
1578
1579 ajDebug("Trace urlin->Input->List\n");
1580 ajQuerylistTrace(urlin->Input->List);
1581 ajDebug("Trace new list\n");
1582 ajQuerylistTrace(list);
1583 ajListPushlist(urlin->Input->List, &list);
1584
1585 ajDebug("Trace combined urlin->Input->List\n");
1586 ajQuerylistTrace(urlin->Input->List);
1587
1588 /*
1589 ** now try the first item on the list
1590 ** this can descend recursively if it is also a list
1591 ** which is why we check the depth above
1592 */
1593
1594 if(ajListPop(urlin->Input->List, (void**) &node))
1595 {
1596 ajDebug("++pop first item '%S'\n", node->Qry);
1597 ajUrlinQryS(urlin, node->Qry);
1598 urlinQryRestore(urlin, node);
1599 ajStrDel(&node->Qry);
1600 ajStrDel(&node->Formatstr);
1601 AJFREE(node);
1602 ajDebug("descending with query '%S'\n", urlin->Input->Qry);
1603 ret = urlinQryProcess(urlin, url);
1604 }
1605
1606 depth--;
1607 ajDebug("++urlinListProcess depth: %d returns: %B\n", depth, ret);
1608
1609 return ret;
1610 }
1611
1612
1613
1614
1615 /* @funcstatic urlinListNoComment *********************************************
1616 **
1617 ** Strips comments from a character string (a line from an ACD file).
1618 ** Comments are blank lines or any text following a "#" character.
1619 **
1620 ** @param [u] text [AjPStr*] Line of text from input file.
1621 ** @return [void]
1622 **
1623 ** @release 6.4.0
1624 ** @@
1625 ******************************************************************************/
1626
urlinListNoComment(AjPStr * text)1627 static void urlinListNoComment(AjPStr* text)
1628 {
1629 ajuint i;
1630 char *cp;
1631
1632 i = ajStrGetLen(*text);
1633
1634 if(!i) /* empty string */
1635 return;
1636
1637 MAJSTRGETUNIQUESTR(text);
1638
1639 cp = strchr(ajStrGetPtr(*text), '#');
1640
1641 if(cp)
1642 { /* comment found */
1643 *cp = '\0';
1644 ajStrSetValid(text);
1645 }
1646
1647 return;
1648 }
1649
1650
1651
1652
1653 /* @funcstatic urlinFormatSet *************************************************
1654 **
1655 ** Sets the input format for url data using the url data
1656 ** input object's defined format
1657 **
1658 ** @param [u] urlin [AjPUrlin] Url data input.
1659 ** @param [u] url [AjPUrl] Url data
1660 ** @return [AjBool] ajTrue on success.
1661 **
1662 ** @release 6.4.0
1663 ** @@
1664 ******************************************************************************/
1665
urlinFormatSet(AjPUrlin urlin,AjPUrl url)1666 static AjBool urlinFormatSet(AjPUrlin urlin, AjPUrl url)
1667 {
1668
1669 if(ajStrGetLen(urlin->Input->Formatstr))
1670 {
1671 ajDebug("... input format value '%S'\n",
1672 urlin->Input->Formatstr);
1673
1674 if(urlinformatFind(urlin->Input->Formatstr,
1675 &urlin->Input->Format))
1676 {
1677 ajStrAssignS(&url->Formatstr,
1678 urlin->Input->Formatstr);
1679 url->Format = urlin->Input->Format;
1680 ajDebug("...format OK '%S' = %d\n",
1681 urlin->Input->Formatstr,
1682 urlin->Input->Format);
1683 }
1684 else
1685 ajDebug("...format unknown '%S'\n",
1686 urlin->Input->Formatstr);
1687
1688 return ajTrue;
1689 }
1690 else
1691 ajDebug("...input format not set\n");
1692
1693
1694 return ajFalse;
1695 }
1696
1697
1698
1699
1700 /* @datasection [AjPUrlall] Url Input Stream ********************************
1701 **
1702 ** Function is for manipulating url input stream objects
1703 **
1704 ** @nam2rule Urlall Url input stream objects
1705 **
1706 ******************************************************************************/
1707
1708
1709
1710
1711 /* @section Url Input Constructors ******************************************
1712 **
1713 ** All constructors return a new url input stream object by pointer. It
1714 ** is the responsibility of the user to first destroy any previous
1715 ** url input object. The target pointer does not need to be
1716 ** initialised to NULL, but it is good programming practice to do so
1717 ** anyway.
1718 **
1719 ** @fdata [AjPUrlall]
1720 **
1721 ** @nam3rule New Constructor
1722 **
1723 ** @valrule * [AjPUrlall] Url input stream object
1724 **
1725 ** @fcategory new
1726 **
1727 ******************************************************************************/
1728
1729
1730
1731
1732 /* @func ajUrlallNew **********************************************************
1733 **
1734 ** Creates a new url input stream object.
1735 **
1736 ** @return [AjPUrlall] New url input stream object.
1737 **
1738 ** @release 6.4.0
1739 ** @@
1740 ******************************************************************************/
1741
ajUrlallNew(void)1742 AjPUrlall ajUrlallNew(void)
1743 {
1744 AjPUrlall pthis;
1745
1746 AJNEW0(pthis);
1747
1748 pthis->Urlin = ajUrlinNew();
1749 pthis->Url = ajUrlNew();
1750
1751 return pthis;
1752 }
1753
1754
1755
1756
1757
1758 /* ==================================================================== */
1759 /* ========================== destructors ============================= */
1760 /* ==================================================================== */
1761
1762
1763
1764
1765 /* @section Url Input Stream Destructors ************************************
1766 **
1767 ** Destruction destroys all internal data structures and frees the
1768 ** memory allocated for the url input stream object.
1769 **
1770 ** @fdata [AjPUrlall]
1771 **
1772 ** @nam3rule Del Destructor
1773 **
1774 ** @argrule Del pthis [AjPUrlall*] Url input stream
1775 **
1776 ** @valrule * [void]
1777 **
1778 ** @fcategory delete
1779 **
1780 ******************************************************************************/
1781
1782
1783
1784
1785 /* @func ajUrlallDel **********************************************************
1786 **
1787 ** Deletes a url input stream object.
1788 **
1789 ** @param [d] pthis [AjPUrlall*] Url input stream
1790 ** @return [void]
1791 **
1792 ** @release 6.4.0
1793 ** @@
1794 ******************************************************************************/
1795
ajUrlallDel(AjPUrlall * pthis)1796 void ajUrlallDel(AjPUrlall* pthis)
1797 {
1798 AjPUrlall thys;
1799
1800 if(!pthis)
1801 return;
1802
1803 thys = *pthis;
1804
1805 if(!thys)
1806 return;
1807
1808 ajUrlinDel(&thys->Urlin);
1809 if(!thys->Returned)
1810 ajUrlDel(&thys->Url);
1811
1812 AJFREE(*pthis);
1813
1814 return;
1815 }
1816
1817
1818
1819
1820 /* ==================================================================== */
1821 /* =========================== Modifiers ============================== */
1822 /* ==================================================================== */
1823
1824
1825
1826
1827 /* @section Url input stream modifiers **************************************
1828 **
1829 ** These functions use the contents of a url input stream object and
1830 ** update them.
1831 **
1832 ** @fdata [AjPUrlall]
1833 **
1834 ** @nam3rule Clear Clear all values
1835 **
1836 ** @argrule * thys [AjPUrlall] Url input stream object
1837 **
1838 ** @valrule * [void]
1839 **
1840 ** @fcategory modify
1841 **
1842 ******************************************************************************/
1843
1844
1845
1846
1847 /* @func ajUrlallClear ********************************************************
1848 **
1849 ** Clears a url input stream object back to "as new" condition, except
1850 ** for the query list which must be preserved.
1851 **
1852 ** @param [w] thys [AjPUrlall] Url input stream
1853 ** @return [void]
1854 **
1855 ** @release 6.4.0
1856 ** @@
1857 ******************************************************************************/
1858
ajUrlallClear(AjPUrlall thys)1859 void ajUrlallClear(AjPUrlall thys)
1860 {
1861
1862 ajDebug("ajUrlallClear called\n");
1863
1864 ajUrlinClear(thys->Urlin);
1865
1866 ajUrlClear(thys->Url);
1867
1868 thys->Returned = ajFalse;
1869
1870 return;
1871 }
1872
1873
1874
1875
1876 /* @section Url input stream casts ******************************************
1877 **
1878 ** These functions return the contents of a url input stream object
1879 **
1880 ** @fdata [AjPUrlall]
1881 **
1882 ** @nam3rule Get Get url input stream values
1883 ** @nam3rule Geturl Get url values
1884 ** @nam4rule GeturlId Get url identifier
1885 **
1886 ** @argrule * thys [const AjPUrlall] Url input stream object
1887 **
1888 ** @valrule * [const AjPStr] String value
1889 **
1890 ** @fcategory cast
1891 **
1892 ******************************************************************************/
1893
1894
1895
1896
1897 /* @func ajUrlallGeturlId *****************************************************
1898 **
1899 ** Returns the identifier of the current url in an input stream
1900 **
1901 ** @param [r] thys [const AjPUrlall] Url input stream
1902 ** @return [const AjPStr] Identifier
1903 **
1904 ** @release 6.4.0
1905 ** @@
1906 ******************************************************************************/
1907
ajUrlallGeturlId(const AjPUrlall thys)1908 const AjPStr ajUrlallGeturlId(const AjPUrlall thys)
1909 {
1910 if(!thys)
1911 return NULL;
1912
1913 ajDebug("ajUrlallGeturlId called\n");
1914
1915 return ajUrlGetId(thys->Url);
1916 }
1917
1918
1919
1920
1921 /* @section url input *******************************************************
1922 **
1923 ** These functions use a url input stream object to read data
1924 **
1925 ** @fdata [AjPUrlall]
1926 **
1927 ** @nam3rule Next Read next url
1928 **
1929 ** @argrule * thys [AjPUrlall] Url input stream object
1930 ** @argrule * Purl [AjPUrl*] Url object
1931 **
1932 ** @valrule * [AjBool] True on success
1933 **
1934 ** @fcategory input
1935 **
1936 ******************************************************************************/
1937
1938
1939
1940
1941 /* @func ajUrlallNext *********************************************************
1942 **
1943 ** Parse a url query into format, access, file and entry
1944 **
1945 ** Split at delimiters. Check for the first part as a valid format
1946 ** Check for the remaining first part as a database name or as a file
1947 ** that can be opened.
1948 ** Anything left is an entryname spec.
1949 **
1950 ** Return the results in the AjPUrl object but leave the file open for
1951 ** future calls.
1952 **
1953 ** @param [w] thys [AjPUrlall] Url input stream
1954 ** @param [u] Purl [AjPUrl*] Url returned
1955 ** @return [AjBool] ajTrue on success.
1956 **
1957 ** @release 6.4.0
1958 ** @@
1959 ******************************************************************************/
1960
ajUrlallNext(AjPUrlall thys,AjPUrl * Purl)1961 AjBool ajUrlallNext(AjPUrlall thys, AjPUrl *Purl)
1962 {
1963 ajDebug("ajUrlallNext count:%u\n", thys->Count);
1964
1965 if(!thys->Count)
1966 {
1967 thys->Count = 1;
1968
1969 thys->Totterms++;
1970
1971 *Purl = thys->Url;
1972 thys->Returned = ajTrue;
1973
1974 return ajTrue;
1975 }
1976
1977
1978 if(ajUrlinRead(thys->Urlin, thys->Url))
1979 {
1980 thys->Count++;
1981
1982 thys->Totterms++;
1983
1984 *Purl = thys->Url;
1985 thys->Returned = ajTrue;
1986
1987 ajDebug("ajUrlallNext success\n");
1988
1989 return ajTrue;
1990 }
1991
1992 *Purl = NULL;
1993
1994 ajDebug("ajUrlallNext failed\n");
1995
1996 ajUrlallClear(thys);
1997
1998 return ajFalse;
1999 }
2000
2001
2002
2003
2004 /* @datasection [none] Input formats ******************************************
2005 **
2006 ** Input formats internals
2007 **
2008 ** @nam2rule Urlinformat Url data input format specific
2009 **
2010 ******************************************************************************/
2011
2012
2013
2014
2015 /* @section cast **************************************************************
2016 **
2017 ** Values for input formats
2018 **
2019 ** @fdata [none]
2020 **
2021 ** @nam3rule Find Return index to named format
2022 ** @nam3rule Term Test format EDAM term
2023 ** @nam3rule Test Test format value
2024 **
2025 ** @argrule Find format [const AjPStr] Format name
2026 ** @argrule Term term [const AjPStr] Format EDAM term
2027 ** @argrule Test format [const AjPStr] Format name
2028 ** @argrule Find iformat [ajint*] Index matching format name
2029 **
2030 ** @valrule * [AjBool] True if found
2031 **
2032 ** @fcategory cast
2033 **
2034 ******************************************************************************/
2035
2036
2037
2038
2039 /* @funcstatic urlinformatFind ************************************************
2040 **
2041 ** Looks for the specified format(s) in the internal definitions and
2042 ** returns the index.
2043 **
2044 ** Sets iformat as the recognised format, and returns ajTrue.
2045 **
2046 ** @param [r] format [const AjPStr] Format required.
2047 ** @param [w] iformat [ajint*] Index
2048 ** @return [AjBool] ajTrue on success.
2049 **
2050 ** @release 6.4.0
2051 ** @@
2052 ******************************************************************************/
2053
urlinformatFind(const AjPStr format,ajint * iformat)2054 static AjBool urlinformatFind(const AjPStr format, ajint* iformat)
2055 {
2056 AjPStr tmpformat = NULL;
2057 ajuint i = 0;
2058
2059 /* ajDebug("urlinformatFind '%S'\n", format); */
2060 if(!ajStrGetLen(format))
2061 return ajFalse;
2062
2063 ajStrAssignS(&tmpformat, format);
2064 ajStrFmtLower(&tmpformat);
2065
2066 for(i=0; urlinFormatDef[i].Name; i++)
2067 {
2068 /* ajDebug("test %d '%s' '%s' '%s'\n",
2069 i, urlinFormatDef[i].Name,
2070 urlinFormatDef[i].Obo,
2071 urlinFormatDef[i].Desc); */
2072 if(ajStrMatchCaseC(tmpformat, urlinFormatDef[i].Name) ||
2073 ajStrMatchC(format, urlinFormatDef[i].Obo))
2074 {
2075 *iformat = i;
2076 ajStrDel(&tmpformat);
2077 /* ajDebug("found '%s' at %d\n", urlinFormatDef[i].Name, i); */
2078 return ajTrue;
2079 }
2080 }
2081
2082 ajErr("Unknown input format '%S'", format);
2083
2084 ajStrDel(&tmpformat);
2085
2086 return ajFalse;
2087 }
2088
2089
2090
2091
2092 /* @func ajUrlinformatTerm ****************************************************
2093 **
2094 ** Tests whether a url data input format term is known
2095 **
2096 ** @param [r] term [const AjPStr] Format term EDAM ID
2097 ** @return [AjBool] ajTrue if term was accepted
2098 **
2099 ** @release 6.4.0
2100 ** @@
2101 ******************************************************************************/
2102
ajUrlinformatTerm(const AjPStr term)2103 AjBool ajUrlinformatTerm(const AjPStr term)
2104 {
2105 ajuint i;
2106
2107 for(i=0; urlinFormatDef[i].Name; i++)
2108 if(ajStrMatchC(term, urlinFormatDef[i].Obo))
2109 return ajTrue;
2110
2111 return ajFalse;
2112 }
2113
2114
2115
2116
2117 /* @func ajUrlinformatTest ****************************************************
2118 **
2119 ** Tests whether a named url data input format is known
2120 **
2121 ** @param [r] format [const AjPStr] Format
2122 ** @return [AjBool] ajTrue if format was accepted
2123 **
2124 ** @release 6.4.0
2125 ** @@
2126 ******************************************************************************/
2127
ajUrlinformatTest(const AjPStr format)2128 AjBool ajUrlinformatTest(const AjPStr format)
2129 {
2130 ajuint i;
2131
2132 for(i=0; urlinFormatDef[i].Name; i++)
2133 {
2134 if(ajStrMatchCaseC(format, urlinFormatDef[i].Name))
2135 return ajTrue;
2136 if(ajStrMatchC(format, urlinFormatDef[i].Obo))
2137 return ajTrue;
2138 }
2139
2140 return ajFalse;
2141 }
2142