1 /* @source ajurlread **********************************************************
2 **
3 ** AJAX url reading functions
4 **
5 ** These functions control all aspects of AJAX url reading
6 **
7 ** @author Copyright (C) 2010 Peter Rice
8 ** @version $Revision: 1.19 $
9 ** @modified Oct 5 pmr First version
10 ** @modified $Date: 2012/12/07 10:07:32 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA  02110-1301,  USA.
27 **
28 ******************************************************************************/
29 
30 #include "ajlib.h"
31 
32 #include "ajurlread.h"
33 #include "ajurl.h"
34 #include "ajcall.h"
35 #include "ajlist.h"
36 #include "ajquery.h"
37 #include "ajtextread.h"
38 #include "ajnam.h"
39 #include "ajfileio.h"
40 #include "ajresource.h"
41 
42 #include <string.h>
43 
44 AjPTable urlDbMethods = NULL;
45 
46 static AjPStr urlinReadLine     = NULL;
47 
48 
49 static AjBool urlinReadHtml(AjPUrlin thys, AjPUrl url);
50 
51 
52 
53 
54 /* @datastatic UrlPInFormat *************************************************
55 **
56 ** Url input formats data structure
57 **
58 ** @alias UrlSInFormat
59 ** @alias UrlOInFormat
60 **
61 ** @attr Name [const char*] Format name
62 ** @attr Obo  [const char*] Ontology term id from EDAM
63 ** @attr Desc [const char*] Format description
64 ** @attr Alias [AjBool] Name is an alias for an identical definition
65 ** @attr Try [AjBool] If true, try for an unknown input. Duplicate names
66 **                    and read-anything formats are set false
67 ** @attr Read [AjBool function] Input function, returns ajTrue on success
68 ** @@
69 ******************************************************************************/
70 
71 typedef struct UrlSInFormat
72 {
73     const char *Name;
74     const char *Obo;
75     const char *Desc;
76     AjBool Alias;
77     AjBool Try;
78     AjBool (*Read) (AjPUrlin thys, AjPUrl url);
79 } UrlOInFormat;
80 
81 #define UrlPInFormat UrlOInFormat*
82 
83 static UrlOInFormat urlinFormatDef[] =
84 {
85 /* "Name",        "OBOterm", "Description" */
86 /*     Alias,   Try,     */
87 /*     ReadFunction */
88   {"unknown",     "0000", "Unknown format",
89        AJFALSE, AJFALSE,
90        &urlinReadHtml}, /* default to first format */
91   {"html",        "2331", "Html format",
92        AJFALSE, AJTRUE,
93        &urlinReadHtml},
94   {"HTML",        "2331", "Html format",
95        AJTRUE, AJFALSE,
96        &urlinReadHtml},
97   {NULL, NULL, NULL, 0, 0, NULL}
98 };
99 
100 
101 
102 static AjBool urlinRead(AjPUrlin urlin, AjPUrl url);
103 static AjBool urlinformatFind(const AjPStr format, ajint* iformat);
104 static AjBool urlinFormatSet(AjPUrlin urlin, AjPUrl url);
105 static AjBool urlinListProcess(AjPUrlin urlin, AjPUrl url,
106                                const AjPStr listfile);
107 static void urlinListNoComment(AjPStr* text);
108 static void urlinQryRestore(AjPUrlin urlin, const AjPQueryList node);
109 static void urlinQrySave(AjPQueryList node, const AjPUrlin urlin);
110 static AjBool urlDefine(AjPUrl thys, AjPUrlin urlin);
111 static AjBool urlinQryProcess(AjPUrlin urlin, AjPUrl url);
112 
113 
114 
115 
116 /* @filesection ajurlread ****************************************************
117 **
118 ** @nam1rule aj Function belongs to the AJAX library.
119 **
120 */
121 
122 
123 
124 
125 /* @datasection [AjPUrlin] Url input objects ***************************
126 **
127 ** Function is for manipulating url input objects
128 **
129 ** @nam2rule Urlin
130 ******************************************************************************/
131 
132 
133 
134 
135 /* @section Url input constructors ***************************************
136 **
137 ** All constructors return a new url input object by pointer. It
138 ** is the responsibility of the user to first destroy any previous
139 ** url input object. The target pointer does not need to be
140 ** initialised to NULL, but it is good programming practice to do so
141 ** anyway.
142 **
143 ** @fdata [AjPUrlin]
144 **
145 ** @nam3rule  New     Construct a new url input object
146 **
147 ** @valrule   *  [AjPUrlin] New url input object
148 **
149 ** @fcategory new
150 **
151 ******************************************************************************/
152 
153 
154 
155 
156 /* @func ajUrlinNew ***********************************************************
157 **
158 ** Creates a new url input object.
159 **
160 ** @return [AjPUrlin] New url input object.
161 ** @category new [AjPUrlin] Default constructor
162 **
163 ** @release 6.4.0
164 ** @@
165 ******************************************************************************/
166 
ajUrlinNew(void)167 AjPUrlin ajUrlinNew(void)
168 {
169     AjPUrlin pthis;
170 
171     AJNEW0(pthis);
172 
173     pthis->Input = ajTextinNewDatatype(AJDATATYPE_URL);
174 
175     pthis->Resource = NULL;
176     pthis->UrlData  = NULL;
177 
178     return pthis;
179 }
180 
181 
182 
183 
184 
185 /* @section url input destructors *********************************************
186 **
187 ** Destruction destroys all internal data structures and frees the
188 ** memory allocated for the url input object.
189 **
190 ** @fdata [AjPUrlin]
191 **
192 ** @nam3rule Del Destructor
193 **
194 ** @argrule Del pthis [AjPUrlin*] Url input object
195 **
196 ** @valrule * [void]
197 **
198 ** @fcategory delete
199 **
200 ******************************************************************************/
201 
202 
203 
204 
205 /* @func ajUrlinDel ***********************************************************
206 **
207 ** Deletes an url input object.
208 **
209 ** @param [d] pthis [AjPUrlin*] Url input
210 ** @return [void]
211 ** @category delete [AjPUrlin] Default destructor
212 **
213 ** @release 6.4.0
214 ** @@
215 ******************************************************************************/
216 
ajUrlinDel(AjPUrlin * pthis)217 void ajUrlinDel(AjPUrlin* pthis)
218 {
219     AjPUrlin thys;
220     AjPResquery resqry;
221 
222     if(!pthis)
223         return;
224 
225     thys = *pthis;
226 
227     if(!thys)
228         return;
229 
230     ajDebug("ajUrlinDel called qry:'%S'\n", thys->Input->Qry);
231 
232     ajTextinDel(&thys->Input);
233     ajResourceDel(&thys->Resource);
234 
235     ajStrDel(&thys->Identifiers);
236     ajStrDel(&thys->Accession);
237     ajStrDel(&thys->IdTypes);
238 
239     ajListstrFree(&thys->UrlList);
240 
241     while(ajListPop(thys->QryList, (void**)&resqry))
242         ajResqueryDel(&resqry);
243     ajListFree(&thys->QryList);
244 
245     AJFREE(*pthis);
246 
247     return;
248 }
249 
250 
251 
252 
253 /* @section url input modifiers ******************************************
254 **
255 ** These functions use the contents of an url input object and
256 ** update them.
257 **
258 ** @fdata [AjPUrlin]
259 **
260 ** @nam3rule Clear Clear all values
261 ** @nam3rule Qry Reset using a query string
262 ** @suffix C Character string input
263 ** @suffix S String input
264 **
265 ** @argrule * thys [AjPUrlin] Url input object
266 ** @argrule C txt [const char*] Query text
267 ** @argrule S str [const AjPStr] query string
268 **
269 ** @valrule * [void]
270 **
271 ** @fcategory modify
272 **
273 ******************************************************************************/
274 
275 
276 
277 
278 /* @func ajUrlinClear *********************************************************
279 **
280 ** Clears an url input object back to "as new" condition, except
281 ** for the query list which must be preserved.
282 **
283 ** @param [w] thys [AjPUrlin] Url input
284 ** @return [void]
285 ** @category modify [AjPUrlin] Resets ready for reuse.
286 **
287 ** @release 6.4.0
288 ** @@
289 ******************************************************************************/
290 
ajUrlinClear(AjPUrlin thys)291 void ajUrlinClear(AjPUrlin thys)
292 {
293 
294     ajDebug("ajUrlinClear called\n");
295 
296     ajTextinClear(thys->Input);
297     ajResourceDel(&thys->Resource);
298     ajStrDel(&thys->Identifiers);
299     ajStrDel(&thys->Accession);
300     ajStrDel(&thys->IdTypes);
301     ajListstrFree(&thys->UrlList);
302 
303     thys->IsSwiss = ajFalse;
304     thys->IsEmbl = ajFalse;
305 
306     thys->UrlData = NULL;
307 
308     return;
309 }
310 
311 
312 
313 
314 /* @func ajUrlinQryC **********************************************************
315 **
316 ** Resets an url input object using a new Universal
317 ** Query Address
318 **
319 ** @param [u] thys [AjPUrlin] Url input object.
320 ** @param [r] txt [const char*] Query
321 ** @return [void]
322 **
323 ** @release 6.4.0
324 ** @@
325 ******************************************************************************/
326 
ajUrlinQryC(AjPUrlin thys,const char * txt)327 void ajUrlinQryC(AjPUrlin thys, const char* txt)
328 {
329     ajUrlinClear(thys);
330     ajStrAssignC(&thys->Input->Qry, txt);
331 
332     return;
333 }
334 
335 
336 
337 
338 
339 /* @func ajUrlinQryS **********************************************************
340 **
341 ** Resets an url input object using a new Universal
342 ** Query Address
343 **
344 ** @param [u] thys [AjPUrlin] Url input object.
345 ** @param [r] str [const AjPStr] Query
346 ** @return [void]
347 **
348 ** @release 6.4.0
349 ** @@
350 ******************************************************************************/
351 
ajUrlinQryS(AjPUrlin thys,const AjPStr str)352 void ajUrlinQryS(AjPUrlin thys, const AjPStr str)
353 {
354     ajUrlinClear(thys);
355     ajStrAssignS(&thys->Input->Qry, str);
356 
357     return;
358 }
359 
360 
361 
362 
363 /* @section casts *************************************************************
364 **
365 ** Return values
366 **
367 ** @fdata [AjPUrlin]
368 **
369 ** @nam3rule Trace Write debugging output
370 **
371 ** @argrule * thys [const AjPUrlin] Url input object
372 **
373 ** @valrule * [void]
374 **
375 ** @fcategory cast
376 **
377 ******************************************************************************/
378 
379 
380 
381 
382 /* @func ajUrlinTrace *********************************************************
383 **
384 ** Debug calls to trace the data in an url input object.
385 **
386 ** @param [r] thys [const AjPUrlin] Url input object.
387 ** @return [void]
388 **
389 ** @release 6.4.0
390 ** @@
391 ******************************************************************************/
392 
ajUrlinTrace(const AjPUrlin thys)393 void ajUrlinTrace(const AjPUrlin thys)
394 {
395     ajDebug("url input trace\n");
396     ajDebug("====================\n\n");
397 
398     ajTextinTrace(thys->Input);
399 
400     if(thys->UrlData)
401 	ajDebug( "  UrlData: exists\n");
402 
403     return;
404 }
405 
406 
407 
408 
409 /* @section Url data inputs **********************************************
410 **
411 ** These functions read the wxyxdesc data provided by the first argument
412 **
413 ** @fdata [AjPUrlin]
414 **
415 ** @nam3rule Read Read url data
416 **
417 ** @argrule Read urlin [AjPUrlin] Url input object
418 ** @argrule Read url [AjPUrl] Url data
419 **
420 ** @valrule * [AjBool] true on success
421 **
422 ** @fcategory input
423 **
424 ******************************************************************************/
425 
426 
427 
428 
429 /* @func ajUrlinRead **********************************************************
430 **
431 ** If the file is not yet open, calls urlinQryProcess to convert the query
432 ** into an open file stream.
433 **
434 ** Uses urlinRead for the actual file reading.
435 **
436 ** Returns the results in the AjPUrl object.
437 **
438 ** @param [u] urlin [AjPUrlin] Url data input definitions
439 ** @param [w] url [AjPUrl] Url data returned.
440 ** @return [AjBool] ajTrue on success.
441 ** @category input [AjPUrl] Master url data input,
442 **                  calls specific functions for file access type
443 **                  and url data format.
444 **
445 ** @release 6.4.0
446 ** @@
447 ******************************************************************************/
448 
ajUrlinRead(AjPUrlin urlin,AjPUrl url)449 AjBool ajUrlinRead(AjPUrlin urlin, AjPUrl url)
450 {
451     AjBool ret       = ajFalse;
452     AjPQueryList node = NULL;
453     AjBool listdata  = ajFalse;
454 
455     if(urlin->Resource)
456     {
457 	/* (a) if file still open, keep reading */
458 	ajDebug("ajUrlinRead: input resource '%S' still there, try again\n",
459 		urlin->Resource->Id);
460 	ret = urlinRead(urlin, url);
461 	ajDebug("ajUrlinRead: open buffer  qry: '%S' returns: %B\n",
462 		urlin->Input->Qry, ret);
463     }
464     else
465     {
466 	/* (b) if we have a list, try the next query in the list */
467 	if(ajListGetLength(urlin->Input->List))
468 	{
469 	    listdata = ajTrue;
470 	    ajListPop(urlin->Input->List, (void**) &node);
471 
472 	    ajDebug("++pop from list '%S'\n", node->Qry);
473 	    ajUrlinQryS(urlin, node->Qry);
474 	    ajDebug("++SAVE WXYZIN '%S' '%S' %d\n",
475 		    urlin->Input->Qry,
476 		    urlin->Input->Formatstr, urlin->Input->Format);
477 
478             urlinQryRestore(urlin, node);
479 
480 	    ajStrDel(&node->Qry);
481 	    ajStrDel(&node->Formatstr);
482 	    AJFREE(node);
483 
484 	    ajDebug("ajUrlinRead: open list, try '%S'\n",
485                     urlin->Input->Qry);
486 
487 	    if(!urlinQryProcess(urlin, url) &&
488                !ajListGetLength(urlin->Input->List))
489 		return ajFalse;
490 
491 	    ret = urlinRead(urlin, url);
492 	    ajDebug("ajUrlinRead: list qry: '%S' returns: %B\n",
493 		    urlin->Input->Qry, ret);
494 	}
495 	else
496 	{
497 	    ajDebug("ajUrlinRead: no file yet - test query '%S'\n",
498                     urlin->Input->Qry);
499 
500 	    /* (c) Must be a query - decode it */
501 	    if(!urlinQryProcess(urlin, url) &&
502                !ajListGetLength(urlin->Input->List))
503 		return ajFalse;
504 
505 	    if(ajListGetLength(urlin->Input->List)) /* could be a new list */
506 		listdata = ajTrue;
507 
508 	    ret = urlinRead(urlin, url);
509 	    ajDebug("ajUrlinRead: new qry: '%S' returns: %B\n",
510 		    urlin->Input->Qry, ret);
511 	}
512     }
513 
514     /* Now read whatever we got */
515 
516     while(!ret && ajListGetLength(urlin->Input->List))
517     {
518 	/* Failed, but we have a list still - keep trying it */
519         if(listdata)
520 	    ajErr("Failed to read url data '%S'",
521                   urlin->Input->Qry);
522 
523 	listdata = ajTrue;
524 	ajListPop(urlin->Input->List,(void**) &node);
525 	ajDebug("++try again: pop from list '%S'\n", node->Qry);
526 	ajUrlinQryS(urlin, node->Qry);
527 	ajDebug("++SAVE (AGAIN) WXYZIN '%S' '%S' %d\n",
528 		urlin->Input->Qry,
529 		urlin->Input->Formatstr, urlin->Input->Format);
530 
531 	urlinQryRestore(urlin, node);
532 
533 	ajStrDel(&node->Qry);
534 	ajStrDel(&node->Formatstr);
535 	AJFREE(node);
536 
537 	if(!urlinQryProcess(urlin, url))
538 	    continue;
539 
540 	ret = urlinRead(urlin, url);
541 	ajDebug("ajUrlinRead: list retry qry: '%S' returns: %B\n",
542 		urlin->Input->Qry, ret);
543     }
544 
545     if(!ret)
546     {
547 	if(listdata)
548 	    ajErr("Failed to read url data '%S'",
549                   urlin->Input->Qry);
550 
551 	return ajFalse;
552     }
553 
554 
555     urlDefine(url, urlin);
556 
557     return ajTrue;
558 }
559 
560 
561 
562 
563 /* @funcstatic urlDefine ******************************************************
564 **
565 ** Make sure all url data object attributes are defined
566 ** using values from the url input object if needed
567 **
568 ** @param [w] thys [AjPUrl] Url data returned.
569 ** @param [u] urlin [AjPUrlin] Url data input definitions
570 ** @return [AjBool] ajTrue on success.
571 **
572 ** @release 6.4.0
573 ** @@
574 ******************************************************************************/
575 
urlDefine(AjPUrl thys,AjPUrlin urlin)576 static AjBool urlDefine(AjPUrl thys, AjPUrlin urlin)
577 {
578 
579     /* if values are missing in the url object, we can use defaults
580        from urlin or calculate where possible */
581 
582     /* assign the dbname if defined in the urlin object */
583     if(ajStrGetLen(urlin->Input->Db))
584       ajStrAssignS(&thys->Db, urlin->Input->Db);
585 
586     return ajTrue;
587 }
588 
589 
590 
591 
592 
593 /* @funcstatic urlinRead ******************************************************
594 **
595 ** Given data in an urlin structure, tries to read everything needed
596 ** using the specified format or by trial and error.
597 **
598 ** @param [u] urlin [AjPUrlin] Url data input object
599 ** @param [w] url [AjPUrl] Url data object
600 ** @return [AjBool] ajTrue on success
601 **
602 ** @release 6.4.0
603 ** @@
604 ******************************************************************************/
605 
urlinRead(AjPUrlin urlin,AjPUrl url)606 static AjBool urlinRead(AjPUrlin urlin, AjPUrl url)
607 {
608     AjPResource drcat = NULL;
609     AjPResquery resqry = NULL;
610     AjPReslink reslnk = NULL;
611     AjPStr urlstr = NULL;
612     AjIList iter;
613 /*    AjPTextin textin = urlin->Input;*/
614     AjPQuery qry = urlin->Input->Query;
615     AjPUrlAccess urlaccess = qry->Access;
616     AjBool ret = ajFalse;
617     ajuint nids = 0;
618     ajuint ntids = 0;
619     ajuint i;
620     AjPStrTok handle = NULL;
621     AjPStr snstr = NULL;
622     AjPStr idstr = NULL;
623     AjPStr tmpstr = NULL;
624 
625     ajUrlClear(url);
626 
627     ajDebug("urlinRead: swiss %B embl %B acc  '%S' idtype '%S' id '%S'\n",
628             urlin->IsSwiss, urlin->IsEmbl,
629             urlin->Accession, urlin->IdTypes,
630             urlin->Identifiers);
631 
632     if(!urlin->Resource)
633     {
634         if(!(*urlaccess->Access)(urlin))
635             return ajFalse;
636     }
637 
638     drcat = urlin->Resource;
639     if(!urlin->UrlList)
640     {
641         urlin->UrlList = ajListstrNew();
642         urlin->QryList = ajListNew();
643 
644         if(urlin->IsSwiss || urlin->IsEmbl)
645         {
646             iter = ajListIterNew(drcat->Xref);
647             while(!ajListIterDone(iter))
648             {
649                 reslnk = ajListIterGet(iter);
650                 ajDebug("test reslnk '%S' : '%S'\n",
651                         reslnk->Source, reslnk->Term);
652                 if(urlin->IsSwiss && ajStrPrefixC(reslnk->Source, "SP_"))
653                 {
654                     ajStrAssignS(&urlin->IdTypes, reslnk->Term);
655                     ajStrCutBraces(&urlin->IdTypes);
656                     ajDebug("idtypes %S: '%S'\n",
657                             reslnk->Source, urlin->IdTypes);
658                 }
659                 if(urlin->IsEmbl  && ajStrPrefixC(reslnk->Source, "EMBL_"))
660                 {
661                     ajStrAssignS(&urlin->IdTypes, reslnk->Term);
662                     ajStrCutBraces(&urlin->IdTypes);
663                     ajDebug("idtypes %S: '%S'\n",
664                             reslnk->Source, urlin->IdTypes);
665                 }
666             }
667             ajListIterDel(&iter);
668         }
669         if(ajStrGetLen(urlin->Accession))
670         {
671         }
672         if(ajStrGetLen(urlin->IdTypes))
673         {
674             ajStrCutBraces(&urlin->IdTypes);
675         }
676 
677         nids = 1 + (ajuint) ajStrCalcCountK(urlin->Identifiers, ';');
678 
679         iter = ajListIterNewread(drcat->Query);
680 
681         while(!ajListIterDone(iter))
682         {
683             resqry = ajListIterGet(iter);
684 
685             ajDebug("test qry %S | %S | %S\n",
686                     resqry->Datatype, resqry->Format, resqry->Term);
687 
688             ntids = 1 + (ajuint) ajStrCalcCountK(resqry->Term, ';');
689             if(ntids != nids)
690                 continue;
691 
692             if(!ajStrMatchC(resqry->Format, "HTML"))
693                 continue;
694 
695             if(ajStrGetLen(urlin->IdTypes))
696             {
697                 if(!ajStrPrefixS(resqry->Term, urlin->IdTypes))
698                     continue;
699                 if(!ajStrMatchS(resqry->Term, urlin->IdTypes))
700                 {
701                     ajStrAssignS(&tmpstr, resqry->Term);
702                     ajStrCutBraces(&tmpstr);
703                     if(!ajStrMatchS(tmpstr, urlin->IdTypes))
704                        continue;
705                 }
706             }
707 
708             urlstr = ajStrNewS(resqry->Url);
709             if(nids > 1)
710             {
711                 handle = ajStrTokenNewC(urlin->Identifiers, ";");
712                 for(i=0; i<nids; i++)
713                 {
714                     ajStrTokenNextParse(handle, &idstr);
715                     ajFmtPrintS(&snstr, "%%s%u", (i+1));
716                     ajStrExchangeSS(&urlstr, snstr, idstr);
717                 }
718                 ajStrTokenDel(&handle);
719                 ajStrDel(&snstr);
720                 ajStrDel(&idstr);
721             }
722             ajStrExchangeCS(&urlstr, "%s", urlin->Identifiers);
723             if(ajStrGetLen(urlin->Accession))
724                 ajStrExchangeCS(&urlstr, "%u", urlin->Accession);
725 
726             ajDebug("save query '%S'\n", urlstr);
727 
728             ajListPushAppend(urlin->UrlList, urlstr);
729             urlstr = NULL;
730             ajListPushAppend(urlin->QryList, ajResqueryNewResquery(resqry));
731         }
732 
733         ajListIterDel(&iter);
734     }
735 
736     if(ajListGetLength(urlin->UrlList))
737     {
738         ajListstrPop(urlin->UrlList, &url->Full);
739         ajListPop(urlin->QryList, (void**) &url->Resqry);
740         ajStrAssignS(&url->Id, urlin->Identifiers);
741         ret = ajTrue;
742     }
743 
744     if(!urlin->UrlList)
745     {
746         ajListstrFree(&urlin->UrlList);
747     }
748 
749     ajStrDel(&tmpstr);
750     return ret;
751 }
752 
753 
754 
755 
756 /* @funcstatic urlinReadHtml **************************************************
757 **
758 ** Given data in an url structure, tries to read everything needed
759 ** using HTML format.
760 **
761 ** @param [u] urlin [AjPUrlin] Url input object
762 ** @param [w] url [AjPUrl] url object
763 ** @return [AjBool] ajTrue on success
764 **
765 ** @release 6.4.0
766 ** @@
767 ******************************************************************************/
768 
urlinReadHtml(AjPUrlin urlin,AjPUrl url)769 static AjBool urlinReadHtml(AjPUrlin urlin, AjPUrl url)
770 {
771     AjPFilebuff buff;
772 
773     ajlong fpos     = 0;
774     ajuint linecnt = 0;
775 
776     ajDebug("urlinReadHtml\n");
777     ajUrlClear(url);
778     buff = urlin->Input->Filebuff;
779 
780     /* ajFilebuffTrace(buff); */
781 
782     while (ajBuffreadLinePos(buff, &urlinReadLine, &fpos))
783     {
784         linecnt++;
785 
786         if(ajStrGetCharLast(urlinReadLine) == '\n')
787             ajStrCutEnd(&urlinReadLine, 1);
788 
789         if(ajStrGetCharLast(urlinReadLine) == '\r')
790             ajStrCutEnd(&urlinReadLine, 1);
791 
792         ajDebug("line %u:%S\n", linecnt, urlinReadLine);
793 
794         /* add line to AjPUrl object */
795     }
796 
797     return ajTrue;
798 }
799 
800 
801 
802 
803 /* @datasection [none] Miscellaneous ******************************************
804 **
805 ** Url input internals
806 **
807 ** @nam2rule Urlin Url input
808 **
809 ******************************************************************************/
810 
811 
812 
813 
814 /* @section Printing **********************************************************
815 **
816 ** Printing details of the internals to a file
817 **
818 ** @fdata [none]
819 **
820 ** @nam2rule Urlinprint
821 **
822 ** @fcategory output
823 **
824 ******************************************************************************/
825 
826 
827 
828 
829 /* @section Print *************************************************************
830 **
831 ** Printing to a file
832 **
833 ** @fdata [none]
834 **
835 ** @nam3rule Book Print as docbook table
836 ** @nam3rule Html Print as html table
837 ** @nam3rule Wiki Print as wiki table
838 ** @nam3rule Text Print as text
839 **
840 ** @argrule * outf [AjPFile] output file
841 ** @argrule Text full [AjBool] Print all details
842 **
843 ** @valrule * [void]
844 **
845 ** @fcategory cast
846 **
847 ******************************************************************************/
848 
849 
850 
851 
852 /* @func ajUrlinprintBook *****************************************************
853 **
854 ** Reports the internal data structures as a Docbook table
855 **
856 ** @param [u] outf [AjPFile] Output file
857 ** @return [void]
858 **
859 ** @release 6.4.0
860 ** @@
861 ******************************************************************************/
862 
ajUrlinprintBook(AjPFile outf)863 void ajUrlinprintBook(AjPFile outf)
864 {
865     ajuint i = 0;
866     ajuint j = 0;
867     AjPStr namestr = NULL;
868     AjPList fmtlist;
869     AjPStr* names;
870 
871     fmtlist = ajListstrNew();
872 
873     ajFmtPrintF(outf, "<para>The supported url formats are summarised "
874                 "in the table below. "
875                 "The columns are as follows: "
876                 "<emphasis>Input format</emphasis> (format name), "
877                 "<emphasis>Try</emphasis> (indicates whether the "
878                 "format can be detected automatically on input), and "
879                 "<emphasis>Description</emphasis> (short description of "
880                 "the format).</para>\n\n");
881 
882     ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
883     ajFmtPrintF(outf, "  <caption>Input url formats</caption>\n");
884     ajFmtPrintF(outf, "  <thead>\n");
885     ajFmtPrintF(outf, "    <tr align=\"center\">\n");
886     ajFmtPrintF(outf, "      <th>Input Format</th>\n");
887     ajFmtPrintF(outf, "      <th>Try</th>\n");
888     ajFmtPrintF(outf, "      <th>Description</th>\n");
889     ajFmtPrintF(outf, "    </tr>\n");
890     ajFmtPrintF(outf, "  </thead>\n");
891     ajFmtPrintF(outf, "  <tbody>\n");
892 
893     for(i=1; urlinFormatDef[i].Name; i++)
894     {
895 	if(!urlinFormatDef[i].Alias)
896         {
897             namestr = ajStrNewC(urlinFormatDef[i].Name);
898             ajListPushAppend(fmtlist, namestr);
899             namestr = NULL;
900         }
901     }
902 
903     ajListSort(fmtlist, &ajStrVcmp);
904     ajListstrToarray(fmtlist, &names);
905 
906     for(i=0; names[i]; i++)
907     {
908         for(j=0; urlinFormatDef[j].Name; j++)
909         {
910             if(ajStrMatchC(names[i],urlinFormatDef[j].Name))
911             {
912                 ajFmtPrintF(outf, "    <tr>\n");
913                 ajFmtPrintF(outf, "      <td>%s</td>\n",
914                             urlinFormatDef[j].Name);
915                 ajFmtPrintF(outf, "      <td>%B</td>\n",
916                             urlinFormatDef[j].Try);
917                 ajFmtPrintF(outf, "      <td>%s</td>\n",
918                             urlinFormatDef[j].Desc);
919                 ajFmtPrintF(outf, "    </tr>\n");
920             }
921         }
922     }
923 
924 
925     ajFmtPrintF(outf, "  </tbody>\n");
926     ajFmtPrintF(outf, "</table>\n");
927     ajStrDel(&namestr);
928 
929     names = NULL;
930     ajListstrFreeData(&fmtlist);
931 
932     return;
933 }
934 
935 
936 
937 
938 /* @func ajUrlinprintHtml *****************************************************
939 **
940 ** Reports the internal data structures as an HTML table
941 **
942 ** @param [u] outf [AjPFile] Output file
943 ** @return [void]
944 **
945 ** @release 6.4.0
946 ** @@
947 ******************************************************************************/
948 
ajUrlinprintHtml(AjPFile outf)949 void ajUrlinprintHtml(AjPFile outf)
950 {
951     ajuint i = 0;
952     ajuint j = 0;
953 
954     AjPStr namestr = NULL;
955 
956     ajFmtPrintF(outf, "<table border=3>");
957     ajFmtPrintF(outf, "<tr><th>Input Format</th><th>Auto</th>\n");
958     ajFmtPrintF(outf, "<th>Multi</th><th>Description</th></tr>\n");
959 
960     for(i=1; urlinFormatDef[i].Name; i++)
961     {
962         ajStrAssignC(&namestr, urlinFormatDef[i].Name);
963 
964 	if(!urlinFormatDef[i].Alias)
965         {
966             for(j=i+1; urlinFormatDef[j].Name; j++)
967             {
968                 if(urlinFormatDef[j].Read == urlinFormatDef[i].Read)
969                 {
970                     ajFmtPrintAppS(&namestr, " %s",
971                                    urlinFormatDef[j].Name);
972                     if(!urlinFormatDef[j].Alias)
973                     {
974                         ajWarn("Input format '%s' same as '%s' but not alias",
975                                urlinFormatDef[j].Name,
976                                urlinFormatDef[i].Name);
977                     }
978                 }
979             }
980 
981 	    ajFmtPrintF(outf, "<tr><td>\n%S\n</td><td>%B</td>\n",
982                         namestr,
983 			urlinFormatDef[i].Try);
984             ajFmtPrintF(outf, "<td>\n%s\n</td></tr>\n",
985 			urlinFormatDef[i].Desc);
986         }
987 
988     }
989 
990     ajFmtPrintF(outf, "</table>\n");
991     ajStrDel(&namestr);
992 
993     return;
994 }
995 
996 
997 
998 
999 /* @func ajUrlinprintText *****************************************************
1000 **
1001 ** Reports the internal data structures
1002 **
1003 ** @param [u] outf [AjPFile] Output file
1004 ** @param [r] full [AjBool] Full report (usually ajFalse)
1005 ** @return [void]
1006 **
1007 ** @release 6.4.0
1008 ** @@
1009 ******************************************************************************/
1010 
ajUrlinprintText(AjPFile outf,AjBool full)1011 void ajUrlinprintText(AjPFile outf, AjBool full)
1012 {
1013     ajuint i = 0;
1014 
1015     ajFmtPrintF(outf, "\n");
1016     ajFmtPrintF(outf, "# Url input formats\n");
1017     ajFmtPrintF(outf, "# Name  Format name (or alias)\n");
1018     ajFmtPrintF(outf, "# Alias Alias name\n");
1019     ajFmtPrintF(outf, "# Try   Test for unknown input files\n");
1020     ajFmtPrintF(outf, "# Name         Alias Try "
1021 		"Description");
1022     ajFmtPrintF(outf, "\n");
1023     ajFmtPrintF(outf, "InFormat {\n");
1024 
1025     for(i=0; urlinFormatDef[i].Name; i++)
1026 	if(full || !urlinFormatDef[i].Alias)
1027 	    ajFmtPrintF(outf,
1028 			"  %-12s %5B %3B \"%s\"\n",
1029 			urlinFormatDef[i].Name,
1030 			urlinFormatDef[i].Alias,
1031 			urlinFormatDef[i].Try,
1032 			urlinFormatDef[i].Desc);
1033 
1034     ajFmtPrintF(outf, "}\n\n");
1035 
1036     return;
1037 }
1038 
1039 
1040 
1041 
1042 /* @func ajUrlinprintWiki *****************************************************
1043 **
1044 ** Reports the internal data structures as a wiki table
1045 **
1046 ** @param [u] outf [AjPFile] Output file
1047 ** @return [void]
1048 **
1049 ** @release 6.4.0
1050 ** @@
1051 ******************************************************************************/
1052 
ajUrlinprintWiki(AjPFile outf)1053 void ajUrlinprintWiki(AjPFile outf)
1054 {
1055     ajuint i = 0;
1056     ajuint j = 0;
1057 
1058     AjPStr namestr = NULL;
1059 
1060     ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
1061     ajFmtPrintF(outf, "|-\n");
1062     ajFmtPrintF(outf, "!Format!!Try!!"
1063                 "class=\"unsortable\"|Description\n");
1064 
1065     for(i=1; urlinFormatDef[i].Name; i++)
1066     {
1067         ajStrAssignC(&namestr, urlinFormatDef[i].Name);
1068 
1069 	if(!urlinFormatDef[i].Alias)
1070         {
1071             for(j=i+1; urlinFormatDef[j].Name; j++)
1072             {
1073                 if(urlinFormatDef[j].Read == urlinFormatDef[i].Read)
1074                 {
1075                     ajFmtPrintAppS(&namestr, "<br>%s",
1076                                    urlinFormatDef[j].Name);
1077                     if(!urlinFormatDef[j].Alias)
1078                     {
1079                         ajWarn("Input format '%s' same as '%s' but not alias",
1080                                urlinFormatDef[j].Name,
1081                                urlinFormatDef[i].Name);
1082                     }
1083                 }
1084             }
1085 
1086             ajFmtPrintF(outf, "|-\n");
1087 	    ajFmtPrintF(outf,
1088 			"|%S||%B||%s\n",
1089 			namestr,
1090 			urlinFormatDef[i].Try,
1091 			urlinFormatDef[i].Desc);
1092         }
1093 
1094     }
1095 
1096     ajFmtPrintF(outf, "|}\n\n");
1097     ajStrDel(&namestr);
1098 
1099     return;
1100 }
1101 
1102 
1103 
1104 
1105 /* @section Miscellaneous *****************************************************
1106 **
1107 ** Functions to initialise and clean up internals
1108 **
1109 ** @fdata [none]
1110 **
1111 ** @nam3rule Exit Clean up and exit
1112 **
1113 ** @valrule * [void]
1114 **
1115 ** @fcategory misc
1116 **
1117 ******************************************************************************/
1118 
1119 
1120 
1121 
1122 /* @func ajUrlinExit **********************************************************
1123 **
1124 ** Cleans up url input internal memory
1125 **
1126 ** @return [void]
1127 **
1128 ** @release 6.4.0
1129 ** @@
1130 ******************************************************************************/
1131 
ajUrlinExit(void)1132 void ajUrlinExit(void)
1133 {
1134     ajStrDel(&urlinReadLine);
1135 
1136     ajTableDel(&urlDbMethods);
1137 
1138     return;
1139 }
1140 
1141 
1142 
1143 
1144 /* @section Internals *********************************************************
1145 **
1146 ** Functions to return internal values
1147 **
1148 ** @fdata [none]
1149 **
1150 ** @nam3rule Type Internals for url datatype
1151 ** @nam4rule Get  Return a value
1152 ** @nam5rule Fields  Known query fields for ajUrlinRead
1153 ** @nam5rule Qlinks  Known query link operators for ajUrlinRead
1154 **
1155 ** @valrule * [const char*] Internal value
1156 **
1157 ** @fcategory misc
1158 **
1159 ******************************************************************************/
1160 
1161 
1162 
1163 
1164 /* @func ajUrlinTypeGetFields *************************************************
1165 **
1166 ** Returns the listof known field names for ajUrlinRead
1167 **
1168 ** @return [const char*] List of field names
1169 **
1170 ** @release 6.4.0
1171 ** @@
1172 ******************************************************************************/
1173 
ajUrlinTypeGetFields(void)1174 const char* ajUrlinTypeGetFields(void)
1175 {
1176     return "id acc";
1177 }
1178 
1179 
1180 
1181 
1182 /* @func ajUrlinTypeGetQlinks *************************************************
1183 **
1184 ** Returns the list of known query link operators for ajUrlinRead
1185 **
1186 ** @return [const char*] List of field names
1187 **
1188 ** @release 6.4.0
1189 ** @@
1190 ******************************************************************************/
1191 
ajUrlinTypeGetQlinks(void)1192 const char* ajUrlinTypeGetQlinks(void)
1193 {
1194     return "|";
1195 }
1196 
1197 
1198 
1199 
1200 /* @datasection [AjPTable] Internal call register table ***********************
1201 **
1202 ** Functions to manage the internal call register table that links the
1203 ** ajaxdb library functions with code in the core AJAX library.
1204 **
1205 ** @nam2rule Urlaccess Functions to manage urldb call tables.
1206 **
1207 ******************************************************************************/
1208 
1209 
1210 
1211 
1212 /* @section Cast **************************************************************
1213 **
1214 ** Return a reference to the call table
1215 **
1216 ** @fdata [AjPTable] urldb functions call table
1217 **
1218 ** @nam3rule Get Return a value
1219 ** @nam4rule Db Database access functions table
1220 ** @nam3rule Method Lookup an access method by name
1221 ** @nam4rule Test Return true if the access method exists
1222 ** @nam4rule MethodGet Return a method value
1223 ** @nam5rule Qlinks Return known query links for a named method
1224 ** @nam5rule Scope Return scope (entry, query or all) for a named method
1225 **
1226 ** @argrule Method method [const AjPStr] Method name
1227 **
1228 ** @valrule *Db [AjPTable] Call table of function names and references
1229 ** @valrule *Qlinks [const char*] Query link operators
1230 ** @valrule *Scope [ajuint] Scope flags
1231 ** @valrule *Test [AjBool] True if found
1232 **
1233 ** @fcategory cast
1234 **
1235 ******************************************************************************/
1236 
1237 
1238 
1239 
1240 /* @func ajUrlaccessGetDb *****************************************************
1241 **
1242 ** Returns the table in which url database access details are registered
1243 **
1244 ** @return [AjPTable] Access functions hash table
1245 **
1246 ** @release 6.4.0
1247 ** @@
1248 ******************************************************************************/
1249 
ajUrlaccessGetDb(void)1250 AjPTable ajUrlaccessGetDb(void)
1251 {
1252     if(!urlDbMethods)
1253         urlDbMethods = ajCallTableNew();
1254     return urlDbMethods;
1255 
1256 }
1257 
1258 
1259 
1260 
1261 /* @func ajUrlaccessMethodGetQlinks *******************************************
1262 **
1263 ** Tests for a named method for url data reading returns the
1264 ** known query link operators
1265 **
1266 ** @param [r] method [const AjPStr] Method required.
1267 ** @return [const char*] Known link operators
1268 **
1269 ** @release 6.4.0
1270 ** @@
1271 ******************************************************************************/
1272 
ajUrlaccessMethodGetQlinks(const AjPStr method)1273 const char* ajUrlaccessMethodGetQlinks(const AjPStr method)
1274 {
1275     AjPUrlAccess methoddata;
1276 
1277     methoddata = ajCallTableGetS(urlDbMethods, method);
1278     if(!methoddata)
1279         return NULL;
1280 
1281     return methoddata->Qlink;
1282 }
1283 
1284 
1285 
1286 
1287 /* @func ajUrlaccessMethodGetScope ********************************************
1288 **
1289 ** Tests for a named method for url data reading and returns the scope
1290 ** (entry, query or all).
1291 *
1292 ** @param [r] method [const AjPStr] Method required.
1293 ** @return [ajuint] Scope flags
1294 **
1295 ** @release 6.4.0
1296 ** @@
1297 ******************************************************************************/
1298 
ajUrlaccessMethodGetScope(const AjPStr method)1299 ajuint ajUrlaccessMethodGetScope(const AjPStr method)
1300 {
1301     AjPUrlAccess methoddata;
1302     ajuint ret = 0;
1303 
1304     methoddata = ajCallTableGetS(urlDbMethods, method);
1305     if(!methoddata)
1306         return 0;
1307 
1308     if(methoddata->Entry)
1309         ret |= AJMETHOD_ENTRY;
1310     if(methoddata->Query)
1311         ret |= AJMETHOD_QUERY;
1312     if(methoddata->All)
1313         ret |= AJMETHOD_ALL;
1314 
1315     return ret;
1316 }
1317 
1318 
1319 
1320 
1321 /* @func ajUrlaccessMethodTest ************************************************
1322 **
1323 ** Tests for a named method for url data reading.
1324 **
1325 ** @param [r] method [const AjPStr] Method required.
1326 ** @return [AjBool] ajTrue on success.
1327 **
1328 ** @release 6.4.0
1329 ** @@
1330 ******************************************************************************/
1331 
ajUrlaccessMethodTest(const AjPStr method)1332 AjBool ajUrlaccessMethodTest(const AjPStr method)
1333 {
1334     if(ajCallTableGetS(urlDbMethods, method))
1335       return ajTrue;
1336 
1337     return ajFalse;
1338 }
1339 
1340 
1341 
1342 
1343 /* @funcstatic urlinQryRestore ************************************************
1344 **
1345 ** Restores an url input specification from an AjPQueryList node
1346 **
1347 ** @param [w] urlin [AjPUrlin] Url input object
1348 ** @param [r] node [const AjPQueryList] Query list node
1349 ** @return [void]
1350 **
1351 ** @release 6.4.0
1352 ******************************************************************************/
1353 
urlinQryRestore(AjPUrlin urlin,const AjPQueryList node)1354 static void urlinQryRestore(AjPUrlin urlin, const AjPQueryList node)
1355 {
1356     urlin->Input->Format = node->Format;
1357     ajStrAssignS(&urlin->Input->Formatstr, node->Formatstr);
1358 
1359     return;
1360 }
1361 
1362 
1363 
1364 
1365 /* @funcstatic urlinQrySave ***************************************************
1366 **
1367 ** Saves an url input specification in an AjPQueryList node
1368 **
1369 ** @param [w] node [AjPQueryList] Query list node
1370 ** @param [r] urlin [const AjPUrlin] Url input object
1371 ** @return [void]
1372 **
1373 ** @release 6.4.0
1374 ******************************************************************************/
1375 
urlinQrySave(AjPQueryList node,const AjPUrlin urlin)1376 static void urlinQrySave(AjPQueryList node, const AjPUrlin urlin)
1377 {
1378     node->Format   = urlin->Input->Format;
1379     ajStrAssignS(&node->Formatstr, urlin->Input->Formatstr);
1380 
1381     return;
1382 }
1383 
1384 
1385 
1386 
1387 /* @funcstatic urlinQryProcess ************************************************
1388 **
1389 ** Converts an url data query into an open file.
1390 **
1391 ** Tests for "format::" and sets this if it is found
1392 **
1393 ** Then tests for "list:" or "@" and processes as a list file
1394 ** using urlinListProcess which in turn invokes urlinQryProcess
1395 ** until a valid query is found.
1396 **
1397 ** Then tests for dbname:query and opens the file (at the correct position
1398 ** if the database definition defines it)
1399 **
1400 ** If there is no database, looks for file:query and opens the file.
1401 ** In this case the file position is not known and url data reading
1402 ** will have to scan for the entry/entries we need.
1403 **
1404 ** @param [u] urlin [AjPUrlin] Url data input structure.
1405 ** @param [u] url [AjPUrl] Url data to be read.
1406 **                         The format will be replaced
1407 **                         if defined in the query string.
1408 ** @return [AjBool] ajTrue on success.
1409 **
1410 ** @release 6.4.0
1411 ** @@
1412 ******************************************************************************/
1413 
urlinQryProcess(AjPUrlin urlin,AjPUrl url)1414 static AjBool urlinQryProcess(AjPUrlin urlin, AjPUrl url)
1415 {
1416     AjBool ret = ajTrue;
1417     AjPStr qrystr = NULL;
1418     AjBool urlmethod = ajFalse;
1419     const AjPStr fmtstr = NULL;
1420     AjPTextin textin;
1421     AjPQuery qry;
1422     AjPUrlAccess urlaccess = NULL;
1423 
1424     textin = urlin->Input;
1425     qry = textin->Query;
1426 
1427     /* pick up the original query string */
1428     qrystr = ajStrNewS(textin->Qry);
1429 
1430     ajDebug("urlinQryProcess '%S'\n", qrystr);
1431 
1432     /* look for a format:: prefix */
1433     fmtstr = ajQuerystrParseFormat(&qrystr, textin, urlinformatFind);
1434     ajDebug("urlinQryProcess ... fmtstr '%S' '%S'\n", fmtstr, qrystr);
1435 
1436     /* (seq/feat) DO NOT look for a [range] suffix */
1437 
1438     /* look for a list:: or @:: listfile of queries  - process and return */
1439     if(ajQuerystrParseListfile(&qrystr))
1440     {
1441         ajDebug("urlinQryProcess ... listfile '%S'\n", qrystr);
1442         ret = urlinListProcess(urlin, url, qrystr);
1443         ajStrDel(&qrystr);
1444         return ret;
1445     }
1446 
1447     /* try general text access methods (file, asis, text database access */
1448     ajDebug("urlinQryProcess ... no listfile '%S'\n", qrystr);
1449     if(!ajQuerystrParseRead(&qrystr, textin, urlinformatFind, &urlmethod))
1450     {
1451         ajStrDel(&qrystr);
1452         return ajFalse;
1453     }
1454 
1455     urlinFormatSet(urlin, url);
1456 
1457     ajDebug("urlinQryProcess ... read nontext: %B '%S'\n",
1458             urlmethod, qrystr);
1459     ajStrDel(&qrystr);
1460 
1461     /* we found a non-text method */
1462     if(urlmethod)
1463     {
1464         ajDebug("urlinQryProcess ... call method '%S'\n", qry->Method);
1465         ajDebug("urlinQryProcess ... textin format %d '%S'\n",
1466                 textin->Format, textin->Formatstr);
1467         ajDebug("urlinQryProcess ...  query format  '%S'\n",
1468                 qry->Formatstr);
1469         qry->Access = ajCallTableGetS(urlDbMethods,qry->Method);
1470         urlaccess = qry->Access;
1471         return (*urlaccess->Access)(urlin);
1472     }
1473 
1474     ajDebug("urlinQryProcess text method '%S' success\n", qry->Method);
1475 
1476     return ajTrue;
1477 }
1478 
1479 
1480 
1481 
1482 
1483 /* @datasection [AjPList] Query field list ************************************
1484 **
1485 ** Query fields lists are handled internally. Only static functions
1486 ** should appear here
1487 **
1488 ******************************************************************************/
1489 
1490 
1491 
1492 
1493 /* @funcstatic urlinListProcess ***********************************************
1494 **
1495 ** Processes a file of queries.
1496 ** This function is called by, and calls, urlinQryProcess. There is
1497 ** a depth check to avoid infinite loops, for example where a list file
1498 ** refers to itself.
1499 **
1500 ** This function produces a list (AjPList) of queries with all list references
1501 ** expanded into lists of queries.
1502 **
1503 ** Because queries in a list can have their own format
1504 ** the prior settings are stored with each query in the list node so that they
1505 ** can be restored after.
1506 **
1507 ** @param [u] urlin [AjPUrlin] Url data input
1508 ** @param [u] url [AjPUrl] Url data
1509 ** @param [r] listfile [const AjPStr] Name of list file.,
1510 ** @return [AjBool] ajTrue on success.
1511 **
1512 ** @release 6.4.0
1513 ** @@
1514 ******************************************************************************/
1515 
urlinListProcess(AjPUrlin urlin,AjPUrl url,const AjPStr listfile)1516 static AjBool urlinListProcess(AjPUrlin urlin, AjPUrl url,
1517                                const AjPStr listfile)
1518 {
1519     AjPList list  = NULL;
1520     AjPFile file  = NULL;
1521     AjPStr token  = NULL;
1522     AjPStr rest  = NULL;
1523     AjBool ret       = ajFalse;
1524     AjPQueryList node = NULL;
1525 
1526     ajuint recnum = 0;
1527     static ajint depth    = 0;
1528     static ajint MAXDEPTH = 16;
1529 
1530     depth++;
1531     ajDebug("++urlinListProcess %S depth %d\n",
1532 	    listfile, depth);
1533 
1534     if(depth > MAXDEPTH)
1535 	ajFatal("Query list too deep");
1536 
1537     if(!urlin->Input->List)
1538 	urlin->Input->List = ajListNew();
1539 
1540     list = ajListNew();
1541 
1542     file = ajFileNewInNameS(listfile);
1543 
1544     if(!file)
1545     {
1546 	ajErr("Failed to open list file '%S'", listfile);
1547 	depth--;
1548 
1549 	return ret;
1550     }
1551 
1552     while(ajReadlineTrim(file, &urlinReadLine))
1553     {
1554         ++recnum;
1555 	urlinListNoComment(&urlinReadLine);
1556 
1557         if(ajStrExtractWord(urlinReadLine, &rest, &token))
1558         {
1559             if(ajStrGetLen(rest))
1560             {
1561                 ajErr("Bad record %u in list file '%S'\n'%S'",
1562                       recnum, listfile, urlinReadLine);
1563             }
1564             else if(ajStrGetLen(token))
1565             {
1566                 ajDebug("++Add to list: '%S'\n", token);
1567                 AJNEW0(node);
1568                 ajStrAssignS(&node->Qry, token);
1569                 urlinQrySave(node, urlin);
1570                 ajListPushAppend(list, node);
1571             }
1572         }
1573     }
1574 
1575     ajFileClose(&file);
1576     ajStrDel(&token);
1577     ajStrDel(&rest);
1578 
1579     ajDebug("Trace urlin->Input->List\n");
1580     ajQuerylistTrace(urlin->Input->List);
1581     ajDebug("Trace new list\n");
1582     ajQuerylistTrace(list);
1583     ajListPushlist(urlin->Input->List, &list);
1584 
1585     ajDebug("Trace combined urlin->Input->List\n");
1586     ajQuerylistTrace(urlin->Input->List);
1587 
1588     /*
1589      ** now try the first item on the list
1590      ** this can descend recursively if it is also a list
1591      ** which is why we check the depth above
1592      */
1593 
1594     if(ajListPop(urlin->Input->List, (void**) &node))
1595     {
1596         ajDebug("++pop first item '%S'\n", node->Qry);
1597 	ajUrlinQryS(urlin, node->Qry);
1598 	urlinQryRestore(urlin, node);
1599 	ajStrDel(&node->Qry);
1600 	ajStrDel(&node->Formatstr);
1601 	AJFREE(node);
1602 	ajDebug("descending with query '%S'\n", urlin->Input->Qry);
1603 	ret = urlinQryProcess(urlin, url);
1604     }
1605 
1606     depth--;
1607     ajDebug("++urlinListProcess depth: %d returns: %B\n", depth, ret);
1608 
1609     return ret;
1610 }
1611 
1612 
1613 
1614 
1615 /* @funcstatic urlinListNoComment *********************************************
1616 **
1617 ** Strips comments from a character string (a line from an ACD file).
1618 ** Comments are blank lines or any text following a "#" character.
1619 **
1620 ** @param [u] text [AjPStr*] Line of text from input file.
1621 ** @return [void]
1622 **
1623 ** @release 6.4.0
1624 ** @@
1625 ******************************************************************************/
1626 
urlinListNoComment(AjPStr * text)1627 static void urlinListNoComment(AjPStr* text)
1628 {
1629     ajuint i;
1630     char *cp;
1631 
1632     i = ajStrGetLen(*text);
1633 
1634     if(!i)				/* empty string */
1635 	return;
1636 
1637     MAJSTRGETUNIQUESTR(text);
1638 
1639     cp = strchr(ajStrGetPtr(*text), '#');
1640 
1641     if(cp)
1642     {					/* comment found */
1643 	*cp = '\0';
1644 	ajStrSetValid(text);
1645     }
1646 
1647     return;
1648 }
1649 
1650 
1651 
1652 
1653 /* @funcstatic urlinFormatSet *************************************************
1654 **
1655 ** Sets the input format for url data using the url data
1656 ** input object's defined format
1657 **
1658 ** @param [u] urlin [AjPUrlin] Url data input.
1659 ** @param [u] url [AjPUrl] Url data
1660 ** @return [AjBool] ajTrue on success.
1661 **
1662 ** @release 6.4.0
1663 ** @@
1664 ******************************************************************************/
1665 
urlinFormatSet(AjPUrlin urlin,AjPUrl url)1666 static AjBool urlinFormatSet(AjPUrlin urlin, AjPUrl url)
1667 {
1668 
1669     if(ajStrGetLen(urlin->Input->Formatstr))
1670     {
1671 	ajDebug("... input format value '%S'\n",
1672                 urlin->Input->Formatstr);
1673 
1674 	if(urlinformatFind(urlin->Input->Formatstr,
1675                              &urlin->Input->Format))
1676 	{
1677 	    ajStrAssignS(&url->Formatstr,
1678                          urlin->Input->Formatstr);
1679 	    url->Format = urlin->Input->Format;
1680 	    ajDebug("...format OK '%S' = %d\n",
1681                     urlin->Input->Formatstr,
1682 		    urlin->Input->Format);
1683 	}
1684 	else
1685 	    ajDebug("...format unknown '%S'\n",
1686                     urlin->Input->Formatstr);
1687 
1688 	return ajTrue;
1689     }
1690     else
1691 	ajDebug("...input format not set\n");
1692 
1693 
1694     return ajFalse;
1695 }
1696 
1697 
1698 
1699 
1700 /* @datasection [AjPUrlall] Url Input Stream ********************************
1701 **
1702 ** Function is for manipulating url input stream objects
1703 **
1704 ** @nam2rule Urlall Url input stream objects
1705 **
1706 ******************************************************************************/
1707 
1708 
1709 
1710 
1711 /* @section Url Input Constructors ******************************************
1712 **
1713 ** All constructors return a new url input stream object by pointer. It
1714 ** is the responsibility of the user to first destroy any previous
1715 ** url input object. The target pointer does not need to be
1716 ** initialised to NULL, but it is good programming practice to do so
1717 ** anyway.
1718 **
1719 ** @fdata [AjPUrlall]
1720 **
1721 ** @nam3rule New Constructor
1722 **
1723 ** @valrule * [AjPUrlall] Url input stream object
1724 **
1725 ** @fcategory new
1726 **
1727 ******************************************************************************/
1728 
1729 
1730 
1731 
1732 /* @func ajUrlallNew **********************************************************
1733 **
1734 ** Creates a new url input stream object.
1735 **
1736 ** @return [AjPUrlall] New url input stream object.
1737 **
1738 ** @release 6.4.0
1739 ** @@
1740 ******************************************************************************/
1741 
ajUrlallNew(void)1742 AjPUrlall ajUrlallNew(void)
1743 {
1744     AjPUrlall pthis;
1745 
1746     AJNEW0(pthis);
1747 
1748     pthis->Urlin = ajUrlinNew();
1749     pthis->Url   = ajUrlNew();
1750 
1751     return pthis;
1752 }
1753 
1754 
1755 
1756 
1757 
1758 /* ==================================================================== */
1759 /* ========================== destructors ============================= */
1760 /* ==================================================================== */
1761 
1762 
1763 
1764 
1765 /* @section Url Input Stream Destructors ************************************
1766 **
1767 ** Destruction destroys all internal data structures and frees the
1768 ** memory allocated for the url input stream object.
1769 **
1770 ** @fdata [AjPUrlall]
1771 **
1772 ** @nam3rule Del Destructor
1773 **
1774 ** @argrule Del pthis [AjPUrlall*] Url input stream
1775 **
1776 ** @valrule * [void]
1777 **
1778 ** @fcategory delete
1779 **
1780 ******************************************************************************/
1781 
1782 
1783 
1784 
1785 /* @func ajUrlallDel **********************************************************
1786 **
1787 ** Deletes a url input stream object.
1788 **
1789 ** @param [d] pthis [AjPUrlall*] Url input stream
1790 ** @return [void]
1791 **
1792 ** @release 6.4.0
1793 ** @@
1794 ******************************************************************************/
1795 
ajUrlallDel(AjPUrlall * pthis)1796 void ajUrlallDel(AjPUrlall* pthis)
1797 {
1798     AjPUrlall thys;
1799 
1800     if(!pthis)
1801         return;
1802 
1803     thys = *pthis;
1804 
1805     if(!thys)
1806         return;
1807 
1808     ajUrlinDel(&thys->Urlin);
1809     if(!thys->Returned)
1810         ajUrlDel(&thys->Url);
1811 
1812     AJFREE(*pthis);
1813 
1814     return;
1815 }
1816 
1817 
1818 
1819 
1820 /* ==================================================================== */
1821 /* =========================== Modifiers ============================== */
1822 /* ==================================================================== */
1823 
1824 
1825 
1826 
1827 /* @section Url input stream modifiers **************************************
1828 **
1829 ** These functions use the contents of a url input stream object and
1830 ** update them.
1831 **
1832 ** @fdata [AjPUrlall]
1833 **
1834 ** @nam3rule Clear Clear all values
1835 **
1836 ** @argrule * thys [AjPUrlall] Url input stream object
1837 **
1838 ** @valrule * [void]
1839 **
1840 ** @fcategory modify
1841 **
1842 ******************************************************************************/
1843 
1844 
1845 
1846 
1847 /* @func ajUrlallClear ********************************************************
1848 **
1849 ** Clears a url input stream object back to "as new" condition, except
1850 ** for the query list which must be preserved.
1851 **
1852 ** @param [w] thys [AjPUrlall] Url input stream
1853 ** @return [void]
1854 **
1855 ** @release 6.4.0
1856 ** @@
1857 ******************************************************************************/
1858 
ajUrlallClear(AjPUrlall thys)1859 void ajUrlallClear(AjPUrlall thys)
1860 {
1861 
1862     ajDebug("ajUrlallClear called\n");
1863 
1864     ajUrlinClear(thys->Urlin);
1865 
1866     ajUrlClear(thys->Url);
1867 
1868     thys->Returned = ajFalse;
1869 
1870     return;
1871 }
1872 
1873 
1874 
1875 
1876 /* @section Url input stream casts ******************************************
1877 **
1878 ** These functions return the contents of a url input stream object
1879 **
1880 ** @fdata [AjPUrlall]
1881 **
1882 ** @nam3rule Get Get url input stream values
1883 ** @nam3rule Geturl Get url values
1884 ** @nam4rule GeturlId Get url identifier
1885 **
1886 ** @argrule * thys [const AjPUrlall] Url input stream object
1887 **
1888 ** @valrule * [const AjPStr] String value
1889 **
1890 ** @fcategory cast
1891 **
1892 ******************************************************************************/
1893 
1894 
1895 
1896 
1897 /* @func ajUrlallGeturlId *****************************************************
1898 **
1899 ** Returns the identifier of the current url in an input stream
1900 **
1901 ** @param [r] thys [const AjPUrlall] Url input stream
1902 ** @return [const AjPStr] Identifier
1903 **
1904 ** @release 6.4.0
1905 ** @@
1906 ******************************************************************************/
1907 
ajUrlallGeturlId(const AjPUrlall thys)1908 const AjPStr ajUrlallGeturlId(const AjPUrlall thys)
1909 {
1910     if(!thys)
1911         return NULL;
1912 
1913     ajDebug("ajUrlallGeturlId called\n");
1914 
1915     return ajUrlGetId(thys->Url);
1916 }
1917 
1918 
1919 
1920 
1921 /* @section url input *******************************************************
1922 **
1923 ** These functions use a url input stream object to read data
1924 **
1925 ** @fdata [AjPUrlall]
1926 **
1927 ** @nam3rule Next Read next url
1928 **
1929 ** @argrule * thys [AjPUrlall] Url input stream object
1930 ** @argrule * Purl [AjPUrl*] Url object
1931 **
1932 ** @valrule * [AjBool] True on success
1933 **
1934 ** @fcategory input
1935 **
1936 ******************************************************************************/
1937 
1938 
1939 
1940 
1941 /* @func ajUrlallNext *********************************************************
1942 **
1943 ** Parse a url query into format, access, file and entry
1944 **
1945 ** Split at delimiters. Check for the first part as a valid format
1946 ** Check for the remaining first part as a database name or as a file
1947 ** that can be opened.
1948 ** Anything left is an entryname spec.
1949 **
1950 ** Return the results in the AjPUrl object but leave the file open for
1951 ** future calls.
1952 **
1953 ** @param [w] thys [AjPUrlall] Url input stream
1954 ** @param [u] Purl [AjPUrl*] Url returned
1955 ** @return [AjBool] ajTrue on success.
1956 **
1957 ** @release 6.4.0
1958 ** @@
1959 ******************************************************************************/
1960 
ajUrlallNext(AjPUrlall thys,AjPUrl * Purl)1961 AjBool ajUrlallNext(AjPUrlall thys, AjPUrl *Purl)
1962 {
1963     ajDebug("ajUrlallNext count:%u\n", thys->Count);
1964 
1965     if(!thys->Count)
1966     {
1967 	thys->Count = 1;
1968 
1969 	thys->Totterms++;
1970 
1971 	*Purl = thys->Url;
1972 	thys->Returned = ajTrue;
1973 
1974 	return ajTrue;
1975     }
1976 
1977 
1978     if(ajUrlinRead(thys->Urlin, thys->Url))
1979     {
1980 	thys->Count++;
1981 
1982 	thys->Totterms++;
1983 
1984 	*Purl = thys->Url;
1985 	thys->Returned = ajTrue;
1986 
1987 	ajDebug("ajUrlallNext success\n");
1988 
1989 	return ajTrue;
1990     }
1991 
1992     *Purl = NULL;
1993 
1994     ajDebug("ajUrlallNext failed\n");
1995 
1996     ajUrlallClear(thys);
1997 
1998     return ajFalse;
1999 }
2000 
2001 
2002 
2003 
2004 /* @datasection [none] Input formats ******************************************
2005 **
2006 ** Input formats internals
2007 **
2008 ** @nam2rule Urlinformat Url data input format specific
2009 **
2010 ******************************************************************************/
2011 
2012 
2013 
2014 
2015 /* @section cast **************************************************************
2016 **
2017 ** Values for input formats
2018 **
2019 ** @fdata [none]
2020 **
2021 ** @nam3rule Find Return index to named format
2022 ** @nam3rule Term Test format EDAM term
2023 ** @nam3rule Test Test format value
2024 **
2025 ** @argrule Find format [const AjPStr] Format name
2026 ** @argrule Term term [const AjPStr] Format EDAM term
2027 ** @argrule Test format [const AjPStr] Format name
2028 ** @argrule Find iformat [ajint*] Index matching format name
2029 **
2030 ** @valrule * [AjBool] True if found
2031 **
2032 ** @fcategory cast
2033 **
2034 ******************************************************************************/
2035 
2036 
2037 
2038 
2039 /* @funcstatic urlinformatFind ************************************************
2040 **
2041 ** Looks for the specified format(s) in the internal definitions and
2042 ** returns the index.
2043 **
2044 ** Sets iformat as the recognised format, and returns ajTrue.
2045 **
2046 ** @param [r] format [const AjPStr] Format required.
2047 ** @param [w] iformat [ajint*] Index
2048 ** @return [AjBool] ajTrue on success.
2049 **
2050 ** @release 6.4.0
2051 ** @@
2052 ******************************************************************************/
2053 
urlinformatFind(const AjPStr format,ajint * iformat)2054 static AjBool urlinformatFind(const AjPStr format, ajint* iformat)
2055 {
2056     AjPStr tmpformat = NULL;
2057     ajuint i = 0;
2058 
2059     /* ajDebug("urlinformatFind '%S'\n", format); */
2060     if(!ajStrGetLen(format))
2061 	return ajFalse;
2062 
2063     ajStrAssignS(&tmpformat, format);
2064     ajStrFmtLower(&tmpformat);
2065 
2066     for(i=0; urlinFormatDef[i].Name; i++)
2067     {
2068 	/* ajDebug("test %d '%s' '%s' '%s'\n",
2069            i, urlinFormatDef[i].Name,
2070            urlinFormatDef[i].Obo,
2071            urlinFormatDef[i].Desc); */
2072 	if(ajStrMatchCaseC(tmpformat, urlinFormatDef[i].Name) ||
2073            ajStrMatchC(format, urlinFormatDef[i].Obo))
2074 	{
2075 	    *iformat = i;
2076 	    ajStrDel(&tmpformat);
2077 	    /* ajDebug("found '%s' at %d\n", urlinFormatDef[i].Name, i); */
2078 	    return ajTrue;
2079 	}
2080     }
2081 
2082     ajErr("Unknown input format '%S'", format);
2083 
2084     ajStrDel(&tmpformat);
2085 
2086     return ajFalse;
2087 }
2088 
2089 
2090 
2091 
2092 /* @func ajUrlinformatTerm ****************************************************
2093 **
2094 ** Tests whether a url data input format term is known
2095 **
2096 ** @param [r] term [const AjPStr] Format term EDAM ID
2097 ** @return [AjBool] ajTrue if term was accepted
2098 **
2099 ** @release 6.4.0
2100 ** @@
2101 ******************************************************************************/
2102 
ajUrlinformatTerm(const AjPStr term)2103 AjBool ajUrlinformatTerm(const AjPStr term)
2104 {
2105     ajuint i;
2106 
2107     for(i=0; urlinFormatDef[i].Name; i++)
2108 	if(ajStrMatchC(term, urlinFormatDef[i].Obo))
2109 	    return ajTrue;
2110 
2111     return ajFalse;
2112 }
2113 
2114 
2115 
2116 
2117 /* @func ajUrlinformatTest ****************************************************
2118 **
2119 ** Tests whether a named url data input format is known
2120 **
2121 ** @param [r] format [const AjPStr] Format
2122 ** @return [AjBool] ajTrue if format was accepted
2123 **
2124 ** @release 6.4.0
2125 ** @@
2126 ******************************************************************************/
2127 
ajUrlinformatTest(const AjPStr format)2128 AjBool ajUrlinformatTest(const AjPStr format)
2129 {
2130     ajuint i;
2131 
2132     for(i=0; urlinFormatDef[i].Name; i++)
2133     {
2134 	if(ajStrMatchCaseC(format, urlinFormatDef[i].Name))
2135 	    return ajTrue;
2136 	if(ajStrMatchC(format, urlinFormatDef[i].Obo))
2137 	    return ajTrue;
2138     }
2139 
2140     return ajFalse;
2141 }
2142