1 /* @source dbxgcg application
2 **
3 ** Index GCG and PIR/NBRF format databases
4 **
5 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 ******************************************************************************/
22 
23 #include "emboss.h"
24 
25 
26 
27 
28 #define GCGTYPE_OTHER 0
29 #define GCGTYPE_ID 1
30 #define GCGTYPE_ACC 2
31 #define GCGTYPE_DES 3
32 #define GCGTYPE_KEY 4
33 #define GCGTYPE_TAX 5
34 #define GCGTYPE_VER 6
35 
36 static AjPStr dbxgcgRefline = NULL;
37 static AjPStr dbxgcgSeqline = NULL;
38 static AjPStr dbxgcgTmpfd   = NULL;
39 static AjPStr dbxgcgTmpstr  = NULL;
40 static AjPStr dbxgcgTmpline = NULL;
41 static AjPStr dbxgcgGcgtype = NULL;
42 static AjPStr dbxgcgGcgdate = NULL;
43 static AjPStr dbxgcgTypstr  = NULL;
44 static AjPStr dbxgcgReflibstr = NULL;
45 
46 static AjPRegexp dbxgcg_embl_typexp = NULL;
47 static AjPRegexp dbxgcg_embl_idexp  = NULL;
48 static AjPRegexp dbxgcg_embl_verexp = NULL;
49 static AjPRegexp dbxgcg_embl_wrdexp = NULL;
50 static AjPRegexp dbxgcg_embl_phrexp = NULL;
51 static AjPRegexp dbxgcg_embl_taxexp = NULL;
52 
53 static AjPRegexp dbxgcg_gcg_rexp = NULL;
54 static AjPRegexp dbxgcg_gcg_sexp = NULL;
55 static AjPRegexp dbxgcg_splitexp = NULL;
56 
57 static AjPRegexp dbxgcg_pir_idexp  = NULL;
58 static AjPRegexp dbxgcg_pir_acexp  = NULL;
59 static AjPRegexp dbxgcg_pir_ac2exp = NULL;
60 static AjPRegexp dbxgcg_pir_keyexp = NULL;
61 static AjPRegexp dbxgcg_pir_taxexp = NULL;
62 static AjPRegexp dbxgcg_pir_tax2exp = NULL;
63 static AjPRegexp dbxgcg_pir_wrdexp = NULL;
64 static AjPRegexp dbxgcg_pir_phrexp = NULL;
65 static AjPRegexp dbxgcg_pir_pirexp = NULL;
66 
67 static AjPRegexp dbxgcg_genbank_typexp = NULL;
68 static AjPRegexp dbxgcg_genbank_morexp = NULL;
69 static AjPRegexp dbxgcg_genbank_wrdexp = NULL;
70 static AjPRegexp dbxgcg_genbank_phrexp = NULL;
71 static AjPRegexp dbxgcg_genbank_taxexp = NULL;
72 static AjPRegexp dbxgcg_genbank_verexp = NULL;
73 
74 static AjBool dbxgcg_ParseEmbl(AjPFile infr, AjPStr *reflibstr);
75 static AjBool dbxgcg_ParseGenbank( AjPFile infr, AjPStr *reflibstr);
76 static AjBool dbxgcg_ParsePir(AjPFile infr, AjPStr *reflibstr);
77 
78 static AjBool dbxgcg_NextEntry(EmbPBtreeEntry entry, AjPFile infs,
79 			       AjPFile infr, const AjPStr dbtype);
80 
81 static ajlong dbxgcg_gcggetent(EmbPBtreeEntry entry, AjPFile infs,
82 			       AjPFile infr, const AjPStr dbtype);
83 static ajlong dbxgcg_pirgetent(EmbPBtreeEntry entry, AjPFile infs,
84 			       AjPFile infr, const AjPStr dbtype);
85 
86 static ajlong dbxgcg_gcgappent(AjPFile infr, AjPFile infs,
87 			       AjPRegexp rexp, AjPRegexp sexp,
88 			       AjPStr* libstr);
89 
90 
91 
92 EmbPBtreeField accfield = NULL;
93 EmbPBtreeField svfield = NULL;
94 EmbPBtreeField orgfield = NULL;
95 EmbPBtreeField desfield = NULL;
96 EmbPBtreeField keyfield = NULL;
97 
98 
99 
100 
101 /* @datastatic DbxgcgPParser *************************************************
102 **
103 ** Parser definition structure
104 **
105 ** @alias DbxgcgSParser
106 ** @alias DbxgcgOParser
107 **
108 ** @attr Name [const char*] Parser name
109 ** @attr GcgType [AjBool] Gcg type parser if true, PIR type if false
110 ** @attr Padding [char[4]] Padding to alignment boundary
111 ** @attr Parser [AjBool function] Parser function
112 ** @@
113 ******************************************************************************/
114 
115 typedef struct DbxgcgSParser
116 {
117     const char* Name;
118     AjBool GcgType;
119     char   Padding[4];
120     AjBool (*Parser) (AjPFile infr, AjPStr *reflibstr);
121 } DbxgcgOParser;
122 #define DbxgcgPParser DbxgcgOParser*
123 
124 
125 
126 
127 static DbxgcgOParser parser[] =
128 {
129     {"EMBL", AJTRUE, "", dbxgcg_ParseEmbl},
130     {"SWISS", AJTRUE, "", dbxgcg_ParseEmbl},
131     {"GENBANK", AJTRUE, "", dbxgcg_ParseGenbank},
132     {"PIR", AJFALSE, "", dbxgcg_ParsePir},
133     {NULL, 0, "", NULL}
134 };
135 
136 
137 
138 
139 
140 /* @prog dbxgcg **************************************************************
141 **
142 ** Index a flat file database
143 **
144 ******************************************************************************/
145 
main(int argc,char ** argv)146 int main(int argc, char **argv)
147 {
148     EmbPBtreeEntry entry = NULL;
149 
150     AjPStr dbname   = NULL;
151     AjPStr dbrs     = NULL;
152     AjPStr release  = NULL;
153     AjPStr datestr  = NULL;
154     AjBool statistics;
155     AjBool compressed;
156 
157     AjPStr directory;
158     AjPStr indexdir;
159     AjPStr filename;
160     AjPStr exclude;
161     AjPStr dbtype = NULL;
162     AjPFile outf = NULL;
163 
164     AjPStr *fieldarray = NULL;
165 
166     ajint nfields;
167     ajint nfiles;
168 
169     AjPStr refname = NULL;
170     AjPStr seqname = NULL;
171     AjPStr thysfile = NULL;
172 
173     ajint i;
174     AjPFile infs = NULL;
175     AjPFile infr = NULL;
176 
177     ajulong nentries = 0L;
178     ajulong ientries = 0L;
179     AjPTime starttime = NULL;
180     AjPTime begintime = NULL;
181     AjPTime nowtime = NULL;
182 
183     ajulong idpricache=0L, idpriread = 0L, idpriwrite = 0L, idprisize= 0L;
184     ajulong idseccache=0L, idsecread = 0L, idsecwrite = 0L, idsecsize= 0L;
185     ajulong acpricache=0L, acpriread = 0L, acpriwrite = 0L, acprisize= 0L;
186     ajulong acseccache=0L, acsecread = 0L, acsecwrite = 0L, acsecsize= 0L;
187     ajulong svpricache=0L, svpriread = 0L, svpriwrite = 0L, svprisize= 0L;
188     ajulong svseccache=0L, svsecread = 0L, svsecwrite = 0L, svsecsize= 0L;
189     ajulong kwpricache=0L, kwpriread = 0L, kwpriwrite = 0L, kwprisize= 0L;
190     ajulong kwseccache=0L, kwsecread = 0L, kwsecwrite = 0L, kwsecsize= 0L;
191     ajulong depricache=0L, depriread = 0L, depriwrite = 0L, deprisize= 0L;
192     ajulong deseccache=0L, desecread = 0L, desecwrite = 0L, desecsize= 0L;
193     ajulong txpricache=0L, txpriread = 0L, txpriwrite = 0L, txprisize= 0L;
194     ajulong txseccache=0L, txsecread = 0L, txsecwrite = 0L, txsecsize= 0L;
195 
196     embInit("dbxgcg", argc, argv);
197 
198     dbtype     = ajAcdGetListSingle("idformat");
199     fieldarray = ajAcdGetList("fields");
200     directory  = ajAcdGetDirectoryName("directory");
201     outf       = ajAcdGetOutfile("outfile");
202     indexdir   = ajAcdGetOutdirName("indexoutdir");
203     filename   = ajAcdGetString("filenames");
204     exclude    = ajAcdGetString("exclude");
205     dbname     = ajAcdGetString("dbname");
206     dbrs       = ajAcdGetString("dbresource");
207     release    = ajAcdGetString("release");
208     datestr    = ajAcdGetString("date");
209     statistics = ajAcdGetBoolean("statistics");
210     compressed = ajAcdGetBoolean("compressed");
211 
212     entry = embBtreeEntryNew(1);
213     if(compressed)
214         embBtreeEntrySetCompressed(entry);
215 
216     nfields = embBtreeSetFields(entry,fieldarray);
217     embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory,
218 		      indexdir);
219 
220     for(i=0; i< nfields; i++)
221     {
222         if(ajStrMatchC(fieldarray[i], "acc"))
223         {
224             accfield = embBtreeGetFieldS(entry, fieldarray[i]);
225             if(compressed)
226                 embBtreeFieldSetCompressed(accfield);
227         }
228         else if(ajStrMatchC(fieldarray[i], "sv"))
229         {
230             svfield = embBtreeGetFieldS(entry, fieldarray[i]);
231             if(compressed)
232                 embBtreeFieldSetCompressed(svfield);
233         }
234         else if(ajStrMatchC(fieldarray[i], "des"))
235         {
236             desfield = embBtreeGetFieldS(entry, fieldarray[i]);
237             if(compressed)
238                 embBtreeFieldSetCompressed(desfield);
239         }
240         else if(ajStrMatchC(fieldarray[i], "key"))
241         {
242             keyfield = embBtreeGetFieldS(entry, fieldarray[i]);
243             if(compressed)
244                 embBtreeFieldSetCompressed(keyfield);
245         }
246         else if(ajStrMatchC(fieldarray[i], "org"))
247         {
248             orgfield = embBtreeGetFieldS(entry, fieldarray[i]);
249             if(compressed)
250                 embBtreeFieldSetCompressed(orgfield);
251         }
252         else if(!ajStrMatchC(fieldarray[i], "id"))
253             ajErr("Unknown field '%S' specified for indexing", fieldarray[i]);
254     }
255 
256     embBtreeGetRsInfo(entry);
257 
258     nfiles = embBtreeGetFiles(entry,directory,filename,exclude);
259     if(!nfiles)
260         ajDie("No input files in '%S' matched filename '%S'",
261               directory, filename);
262 
263 
264     for(i=0; i<nfiles; ++i)
265     {
266 	ajListPop(entry->files,(void **) &seqname);
267 	refname = ajStrNew();
268 	ajStrAssignS(&refname,seqname);
269 	ajFilenameReplaceExtC(&seqname,"seq");
270 	ajFilenameReplaceExtC(&refname,"ref");
271 	ajListstrPushAppend(entry->files, seqname);
272 	ajListstrPushAppend(entry->reffiles[0], refname);
273     }
274 
275 
276     embBtreeWriteEntryFile(entry);
277 
278     embBtreeOpenCaches(entry);
279 
280     starttime = ajTimeNewToday();
281 
282     ajFmtPrintF(outf, "Processing directory: %S\n", directory);
283 
284     for(i=0;i<nfiles;++i)
285     {
286         begintime = ajTimeNewToday();
287 
288 	ajListPop(entry->reffiles[0],(void **)&thysfile);
289 	ajListstrPushAppend(entry->files, thysfile);
290 	ajFmtPrintS(&dbxgcgTmpstr,"%S%S",entry->directory,thysfile);
291 	if(!(infr=ajFileNewInNameS(dbxgcgTmpstr)))
292 	    ajFatal("Cannot open input file %S\n",dbxgcgTmpstr);
293 
294 	ajListPop(entry->files,(void **)&thysfile);
295 	ajListstrPushAppend(entry->files, thysfile);
296 	ajFmtPrintS(&dbxgcgTmpstr,"%S%S",entry->directory,thysfile);
297 	if(!(infs=ajFileNewInNameS(dbxgcgTmpstr)))
298 	    ajFatal("Cannot open input file %S\n",dbxgcgTmpstr);
299 
300 	ajFilenameTrimPath(&dbxgcgTmpstr);
301 	ajFmtPrintF(outf,"Processing file: %S\n",dbxgcgTmpstr);
302 
303 	ientries = 0L;
304 
305 	while(dbxgcg_NextEntry(entry,infs,infr,dbtype))
306 	{
307 	    ++ientries;
308 
309 	    if(entry->do_id)
310                 embBtreeIndexEntry(entry, i);
311 
312 	    if(accfield)
313                 embBtreeIndexPrimary(accfield, entry, i);
314 
315 	    if(svfield)
316                 embBtreeIndexPrimary(svfield, entry, i);
317 
318 	    if(keyfield)
319                 embBtreeIndexSecondary(keyfield, entry);
320 
321 	    if(desfield)
322                 embBtreeIndexSecondary(desfield, entry);
323 
324 	    if(orgfield)
325                 embBtreeIndexSecondary(orgfield, entry);
326 	}
327 
328 	ajFileClose(&infs);
329 	ajFileClose(&infr);
330 	nentries += ientries;
331 	nowtime = ajTimeNewToday();
332 	ajFmtPrintF(outf, "entries: %Lu (%Lu) time: %.1fs (%.1fs)\n",
333 		    nentries, ientries,
334 		    ajTimeDiff(starttime, nowtime),
335 		    ajTimeDiff(begintime, nowtime));
336 
337         if(statistics)
338         {
339             if(entry->do_id)
340                 ajBtreeCacheStatsOut(outf, entry->idcache,
341                                      &idpricache, &idseccache,
342                                      &idpriread, &idsecread,
343                                      &idpriwrite, &idsecwrite,
344                                      &idprisize, &idsecsize);
345             if(accfield)
346                 ajBtreeCacheStatsOut(outf, accfield->cache,
347                                      &acpricache, &acseccache,
348                                      &acpriread, &acsecread,
349                                      &acpriwrite, &acsecwrite,
350                                      &acprisize, &acsecsize);
351             if(svfield)
352                 ajBtreeCacheStatsOut(outf, svfield->cache,
353                                      &svpricache, &svseccache,
354                                      &svpriread, &svsecread,
355                                      &svpriwrite, &svsecwrite,
356                                      &svprisize, &svsecsize);
357             if(keyfield)
358                 ajBtreeCacheStatsOut(outf, keyfield->cache,
359                                      &kwpricache, &kwseccache,
360                                      &kwpriread, &kwsecread,
361                                      &kwpriwrite, &kwsecwrite,
362                                      &kwprisize, &kwsecsize);
363             if(desfield)
364                 ajBtreeCacheStatsOut(outf, desfield->cache,
365                                      &depricache, &deseccache,
366                                      &depriread, &desecread,
367                                      &depriwrite, &desecwrite,
368                                      &deprisize, &desecsize);
369             if(orgfield)
370                 ajBtreeCacheStatsOut(outf, orgfield->cache,
371                                      &txpricache, &txseccache,
372                                      &txpriread, &txsecread,
373                                      &txpriwrite, &txsecwrite,
374                                      &txprisize, &txsecsize);
375         }
376 
377 	ajTimeDel(&begintime);
378 	ajTimeDel(&nowtime);
379     }
380 
381 
382     nowtime = ajTimeNewToday();
383     ajFmtPrintF(outf, "Total time: %.1fs\n", ajTimeDiff(starttime, nowtime));
384     ajTimeDel(&nowtime);
385     ajTimeDel(&starttime);
386 
387     embBtreeReportEntry(outf, entry);
388 
389     if(accfield)
390         embBtreeReportField(outf, accfield);
391     if(svfield)
392         embBtreeReportField(outf, svfield);
393     if(orgfield)
394         embBtreeReportField(outf, orgfield);
395     if(desfield)
396         embBtreeReportField(outf, desfield);
397     if(keyfield)
398         embBtreeReportField(outf, keyfield);
399 
400     embBtreeDumpParameters(entry);
401     embBtreeCloseCaches(entry);
402 
403     ajFileClose(&outf);
404     embBtreeEntryDel(&entry);
405 
406     ajStrDel(&filename);
407     ajStrDel(&exclude);
408     ajStrDel(&dbname);
409     ajStrDel(&dbrs);
410     ajStrDel(&release);
411     ajStrDel(&datestr);
412     ajStrDel(&directory);
413     ajStrDel(&indexdir);
414     ajStrDel(&dbtype);
415 
416     ajStrDel(&dbxgcgRefline);
417     ajStrDel(&dbxgcgSeqline);
418     ajStrDel(&dbxgcgTmpfd);
419     ajStrDel(&dbxgcgTmpstr);
420     ajStrDel(&dbxgcgTmpline);
421     ajStrDel(&dbxgcgGcgtype);
422     ajStrDel(&dbxgcgGcgdate);
423     ajStrDel(&dbxgcgTypstr);
424     ajStrDel(&dbxgcgReflibstr);
425 
426     nfields = 0;
427     while(fieldarray[nfields])
428 	ajStrDel(&fieldarray[nfields++]);
429     AJFREE(fieldarray);
430 
431     ajRegFree(&dbxgcg_embl_typexp);
432     ajRegFree(&dbxgcg_embl_idexp);
433     ajRegFree(&dbxgcg_embl_verexp);
434     ajRegFree(&dbxgcg_embl_wrdexp);
435     ajRegFree(&dbxgcg_embl_phrexp);
436     ajRegFree(&dbxgcg_embl_taxexp);
437 
438     ajRegFree(&dbxgcg_gcg_rexp);
439     ajRegFree(&dbxgcg_gcg_sexp);
440 
441     ajRegFree(&dbxgcg_splitexp);
442 
443     ajRegFree(&dbxgcg_pir_idexp);
444     ajRegFree(&dbxgcg_pir_acexp);
445     ajRegFree(&dbxgcg_pir_ac2exp);
446     ajRegFree(&dbxgcg_pir_keyexp);
447     ajRegFree(&dbxgcg_pir_taxexp);
448     ajRegFree(&dbxgcg_pir_tax2exp);
449     ajRegFree(&dbxgcg_pir_wrdexp);
450     ajRegFree(&dbxgcg_pir_phrexp);
451     ajRegFree(&dbxgcg_pir_pirexp);
452 
453     ajRegFree(&dbxgcg_genbank_typexp);
454     ajRegFree(&dbxgcg_genbank_morexp);
455     ajRegFree(&dbxgcg_genbank_wrdexp);
456     ajRegFree(&dbxgcg_genbank_phrexp);
457     ajRegFree(&dbxgcg_genbank_taxexp);
458     ajRegFree(&dbxgcg_genbank_verexp);
459 
460     embExit();
461 
462     return 0;
463 }
464 
465 
466 
467 
468 /* @funcstatic dbxgcg_NextEntry ***********************************************
469 **
470 ** Returns next database entry as an EmbPEntry object
471 **
472 ** @param [u] entry [EmbPBtreeEntry] b+tree entry pointer
473 ** @param [u] infs [AjPFile] sequence file
474 ** @param [u] infr [AjPFile] reference file
475 ** @param [r] dbtype [const AjPStr] Id format in GCG file
476 ** @return [AjBool] ajTrue if successful read
477 ** @@
478 ******************************************************************************/
479 
dbxgcg_NextEntry(EmbPBtreeEntry entry,AjPFile infs,AjPFile infr,const AjPStr dbtype)480 static AjBool dbxgcg_NextEntry(EmbPBtreeEntry entry, AjPFile infs,
481 			       AjPFile infr, const AjPStr dbtype)
482 {
483     char *p;
484 
485     if(!dbxgcg_splitexp)
486 	dbxgcg_splitexp = ajRegCompC("_0+$");
487 
488     entry->reffpos[0] = ajFileResetPos(infr);
489     entry->fpos    = ajFileResetPos(infs);
490 
491     if(!dbxgcg_gcggetent(entry, infs, infr, dbtype) &&
492        !dbxgcg_pirgetent(entry, infs, infr, dbtype))
493 	return ajFalse;
494 
495     ajDebug("id '%S' seqfpos:%d reffpos:%d\n",
496 	    entry->id, entry->fpos, entry->reffpos);
497 
498     ajStrAssignC(&dbxgcgTmpstr,ajStrGetPtr(entry->id));
499 
500     if(ajRegExec(dbxgcg_splitexp, entry->id))
501     {
502 	p  = strrchr(ajStrGetPtr(dbxgcgTmpstr),'_');
503 	*p = '\0';
504 	ajStrAssignC(&entry->id,ajStrGetPtr(dbxgcgTmpstr));
505     }
506 
507     return ajTrue;
508 }
509 
510 
511 
512 
513 /* @funcstatic dbxgcg_gcggetent ***********************************************
514 **
515 ** get a single entry from the GCG database files
516 **
517 ** @param [u] entry [EmbPBtreeEntry] b+tree entry pointer
518 ** @param [u] infs [AjPFile] sequence file
519 ** @param [u] infr [AjPFile] reference file
520 ** @param [r] dbtype [const AjPStr] Id format in GCG file
521 ** @return [ajlong] Sequence length
522 ** @@
523 ******************************************************************************/
524 
dbxgcg_gcggetent(EmbPBtreeEntry entry,AjPFile infs,AjPFile infr,const AjPStr dbtype)525 static ajlong dbxgcg_gcggetent(EmbPBtreeEntry entry, AjPFile infs,
526 			       AjPFile infr, const AjPStr dbtype)
527 {
528     static ajint called   = 0;
529     static ajint iparser  = -1;
530     ajlong gcglen = 0;
531     ajlong rblock;
532     ajint i;
533 
534     ajStrAssignC(&dbxgcgSeqline, "");
535     ajStrAssignC(&dbxgcgRefline, "");
536 
537     if(!called)
538     {
539 	for(i=0; parser[i].Name; i++)
540 	    if(ajStrMatchC(dbtype, parser[i].Name))
541 	    {
542 		iparser = i;
543 		break;
544 	    }
545 
546 	if(iparser < 0)
547 	    ajFatal("dbtype '%S' unknown", dbtype);
548 
549 	ajDebug("dbtype '%S' Parser %d\n", dbtype, iparser);
550 	called = 1;
551     }
552 
553     if(!parser[iparser].GcgType)
554     {
555       return 0;
556     }
557 
558     if(!dbxgcg_gcg_rexp)
559 	dbxgcg_gcg_rexp = ajRegCompC("^>>>>([^ \t\n]+)");
560 
561     if(!dbxgcg_gcg_sexp)
562 	dbxgcg_gcg_sexp = ajRegCompC("^>>>>([^ \t]+)[ \t]+"
563 				     "(Dummy Header|[^ \t]+)[ \t]+([^ \t]+)"
564 				     "[ \t]+([^ \t]+)[ \t]+([0-9]+)");
565 
566     /* check for seqid first line */
567     while(ajStrGetCharFirst(dbxgcgSeqline)!='>')
568     {
569 	if(!ajReadline(infs, &dbxgcgSeqline))
570 	  {
571               return 0;			/* end of file */
572 	  }
573 	ajDebug("... read until next seq %Ld '%S'\n",
574 		ajFileResetPos(infs), dbxgcgSeqline);
575     }
576 
577     ajDebug("dbxgcg_gcggetent .seq (%S) %Ld '%S'\n",
578 	    dbtype, ajFileResetPos(infs), dbxgcgSeqline);
579 
580     /* get the encoding/sequence length info */
581     if(!ajRegExec(dbxgcg_gcg_sexp, dbxgcgSeqline))
582     {
583         ajDebug("dbxgcg_gcggetent sequence expression FAILED\n");
584 	return 0;
585     }
586 
587     ajRegSubI(dbxgcg_gcg_sexp, 1, &entry->id);		/* Entry ID returned */
588 
589     ajRegSubI(dbxgcg_gcg_sexp, 2, &dbxgcgGcgdate);
590     ajRegSubI(dbxgcg_gcg_sexp, 3, &dbxgcgGcgtype);
591     ajRegSubI(dbxgcg_gcg_sexp, 5, &dbxgcgTmpstr);
592     ajStrToLong(dbxgcgTmpstr, &gcglen);
593 
594     ajDebug("new entry '%S' date:'%S' type:'%S' len:'%S'=%Ld\n",
595 	    entry->id, dbxgcgGcgdate, dbxgcgGcgtype, dbxgcgTmpstr, gcglen);
596 
597     ajDebug("dbxgcg_gcggetent .ref (%S) %Ld '%S'\n",
598 	    dbtype, ajFileResetPos(infr), dbxgcgRefline);
599 
600     /* check for refid first line */
601     while(ajStrGetCharFirst(dbxgcgRefline)!='>')
602     {
603 	if(!ajReadline(infr, &dbxgcgRefline))
604 	{
605 	    ajErr("ref ended before seq");
606 	    break;			/* end of file */
607 	}
608 	ajDebug("... read until next ref %Ld '%S'\n", ajFileResetPos(infr), dbxgcgRefline);
609     }
610 
611     /* get the encoding/sequence length info */
612 
613     ajRegExec(dbxgcg_gcg_rexp, dbxgcgRefline);
614     ajRegSubI(dbxgcg_gcg_rexp, 1, &dbxgcgReflibstr);
615 
616     (*parser[iparser].Parser)(infr,
617                               &dbxgcgReflibstr); /* writes alistfile data */
618 
619     /* get the description line */
620     ajReadline(infs, &dbxgcgSeqline);
621 
622     /* seek to the end of the sequence; +1 to jump over newline */
623     if(ajStrGetCharFirst(dbxgcgGcgtype)=='2')
624     {
625 	rblock = (gcglen+3)/4;
626 	ajFileSeek(infs,rblock+1,SEEK_CUR);
627     }
628     else
629 	ajFileSeek(infs,gcglen+1,SEEK_CUR);
630 
631     /*
632     **  for big entries, need to append until we have all the parts.
633     **  They are named with _0 on the first part, _1 on the second and so on.
634     **  or _00 on the first part, _01 on the second and so on.
635     **  We can look for the "id_" prefix.
636     */
637 
638     if(!ajStrSuffixC(entry->id, "_0") &&
639        !ajStrSuffixC(entry->id,"_00") &&
640        !ajStrSuffixC(entry->id,"_000") &&
641        !ajStrSuffixC(entry->id,"_0000"))
642 	return gcglen;
643 
644     gcglen += dbxgcg_gcgappent(infr, infs, dbxgcg_gcg_rexp, dbxgcg_gcg_sexp,
645 			       &entry->id);
646 
647     return gcglen;
648 }
649 
650 
651 
652 
653 /* @funcstatic dbxgcg_pirgetent ***********************************************
654 **
655 ** Get a single entry from the PIR database files
656 **
657 ** @param [u] entry [EmbPBtreeEntry] b+tree entry pointer
658 ** @param [u] infs [AjPFile] sequence file
659 ** @param [u] infr [AjPFile] reference file
660 ** @param [r] dbtype [const AjPStr] Id format in GCG file
661 ** @return [ajlong] Sequence length
662 ** @@
663 ******************************************************************************/
664 
dbxgcg_pirgetent(EmbPBtreeEntry entry,AjPFile infs,AjPFile infr,const AjPStr dbtype)665 static ajlong dbxgcg_pirgetent(EmbPBtreeEntry entry, AjPFile infs,
666 			       AjPFile infr, const AjPStr dbtype)
667 {
668     ajint i;
669     static ajint called  = 0;
670     static ajint iparser = -1;
671     ajlong gcglen;
672     ajlong spos = 0;
673 
674     ajStrAssignC(&dbxgcgSeqline, "");
675     ajStrAssignC(&dbxgcgRefline, "");
676 
677     if(!called)
678     {
679 	for(i=0; parser[i].Name; i++)
680 	    if(ajStrMatchC(dbtype, parser[i].Name))
681 	    {
682 		iparser = i;
683 		break;
684 	    }
685 
686 	if(iparser < 0)
687 	    ajFatal("dbtype '%S' unknown", dbtype);
688 	ajDebug("dbtype '%S' Parser %d\n", dbtype, iparser);
689 	called = 1;
690     }
691 
692     if(parser[iparser].GcgType)
693 	return 0;
694 
695     if(!dbxgcg_pir_pirexp)
696 	dbxgcg_pir_pirexp = ajRegCompC("^>..;([^ \t\n]+)");
697 
698     /* skip to seqid first line */
699     while(ajStrGetCharFirst(dbxgcgSeqline)!='>')
700 	if(!ajReadline(infs, &dbxgcgSeqline))
701         {
702 	    return 0;			/* end of file */
703         }
704 
705     ajDebug("dbxgcg_pirgetent .seq (%S) %Ld '%S' \n",
706 	    dbtype, ajFileResetPos(infs), dbxgcgSeqline);
707 
708     ajRegExec(dbxgcg_pir_pirexp, dbxgcgSeqline);
709 
710     /* skip to refid first line */
711     while(ajStrGetCharFirst(dbxgcgRefline)!='>')
712 	if(!ajReadline(infr, &dbxgcgRefline))
713 	{
714 	    ajErr("ref ended before seq"); /* end of file */
715 	    break;
716 	}
717 
718     /* get the encoding/sequence length info */
719 
720     ajRegExec(dbxgcg_pir_pirexp, dbxgcgRefline);
721     ajRegSubI(dbxgcg_pir_pirexp, 1, &dbxgcgReflibstr);
722     ajRegSubI(dbxgcg_pir_pirexp, 1, &entry->id);
723 
724     ajDebug("dbigcg_pirgetent seqid '%S' spos: %Ld\n",
725 	    entry->id, ajFileResetPos(infs));
726     ajDebug("dbxgcg_pirgetent refid '%S' spos: %Ld\n",
727 	    entry->id, ajFileResetPos(infr));
728 
729     (*parser[iparser].Parser)(infr,
730                               &dbxgcgReflibstr);/* writes alistfile data */
731 
732     /* get the description line */
733     ajReadline(infs, &dbxgcgSeqline);
734     gcglen = 0;
735 
736     /* seek to the end of the sequence; +1 to jump over newline */
737     while(ajStrGetCharFirst(dbxgcgSeqline)!='>')
738     {
739 	spos = ajFileResetPos(infs);
740 	if(!ajReadline(infs, &dbxgcgSeqline))
741 	{
742 	    spos = 0;
743 	    break;
744 	}
745 	gcglen += ajStrGetLen(dbxgcgSeqline);
746     }
747 
748     if(spos)
749 	ajFileSeek(infs, spos, 0);
750 
751     ajDebug("dbxgcg_pirgetent end spos %Ld line '%S'\n", spos, dbxgcgSeqline);
752 
753     return gcglen;
754 }
755 
756 
757 
758 
759 /* @funcstatic dbxgcg_gcgappent ***********************************************
760 **
761 ** Go to end of a split GCG entry
762 **
763 ** @param [u] infr [AjPFile] Reference file
764 ** @param [u] infs [AjPFile] Sequence file
765 ** @param [u] rexp [AjPRegexp] Regular expression to find ID in ref file
766 ** @param [u] sexp [AjPRegexp] Regular expression to find ID in seq file
767 ** @param [w] libstr [AjPStr*] ID
768 ** @return [ajlong] Sequence length for this section
769 ** @@
770 ******************************************************************************/
771 
dbxgcg_gcgappent(AjPFile infr,AjPFile infs,AjPRegexp rexp,AjPRegexp sexp,AjPStr * libstr)772 static ajlong dbxgcg_gcgappent(AjPFile infr, AjPFile infs,
773 			       AjPRegexp rexp, AjPRegexp sexp,
774 			       AjPStr* libstr)
775 {
776     AjPStr reflibstr = NULL;
777     AjPStr seqlibstr = NULL;
778     AjPStr testlibstr = NULL;
779     ajint ilen;
780 
781     AjBool isend;
782     const char *p;
783     char *q;
784     ajlong rpos;
785     ajlong spos;
786 
787     /*
788     ** keep reading until the end of entry is reached
789     ** and return the extra number of bases
790     */
791 
792     if(!testlibstr)
793 	testlibstr = ajStrNew();
794 
795     ajStrAssignS(&dbxgcgTmpstr,*libstr);
796 
797     ajDebug("dbi_gcgappent '%S'\n", dbxgcgTmpstr);
798 
799     p = ajStrGetPtr(dbxgcgTmpstr);
800     q = strrchr(p,'_');
801     *q = '\0';
802 
803 
804     ajFmtPrintS(&testlibstr, "%s_",p);
805     ilen = ajStrGetLen(testlibstr);
806 
807     isend = ajFalse;
808 
809     while(!isend)
810     {
811         spos = ajFileResetPos(infs);
812 	ajReadline(infs,&dbxgcgSeqline);
813 	while(strncmp(ajStrGetPtr(dbxgcgSeqline),">>>>",4))
814 	{
815 	    spos = ajFileResetPos(infs);
816 	    if(!ajReadline(infs, &dbxgcgSeqline))
817 	    {
818 	      ajStrDel(&reflibstr);
819 	      ajStrDel(&seqlibstr);
820 	      ajStrDel(&testlibstr);
821 	      ajDebug("end of file on seq\n");
822 	      return 1L;
823 	    }
824 	}
825 
826 	ajRegExec(sexp, dbxgcgSeqline);
827 	ajRegSubI(sexp, 1, &seqlibstr);
828 
829 	rpos = ajFileResetPos(infr);
830 	ajReadline(infr, &dbxgcgRefline);
831 
832 	while(ajStrGetCharFirst(dbxgcgRefline)!='>')
833 	{
834 	  rpos = ajFileResetPos(infr);
835 	  if(!ajReadline(infr, &dbxgcgRefline))
836 	  {
837 	    ajDebug("end of file on seq\n");
838 	    ajDebug("ref ended before seq\n");
839 	    ajErr("ref ended before seq\n");
840 	    break;
841 	  }
842 	}
843 
844 	ajRegExec(rexp, dbxgcgRefline);
845 	ajRegSubI(rexp, 1, &reflibstr);
846 
847 	if(ajStrCmpLenS(reflibstr, testlibstr, ilen) ||
848 	   ajStrCmpLenS(seqlibstr, testlibstr, ilen))
849 	    isend = ajTrue;
850 
851 	ajDebug("gcgappent %B test: '%S' seq: '%S' ref: '%S'\n",
852 		isend, testlibstr, seqlibstr, reflibstr);
853     }
854 
855     ajDebug("gcgappent done at seq: '%S' ref: '%S'\n", seqlibstr, reflibstr);
856 
857     ajStrAssignC(libstr,p);
858 
859     ajFileSeek(infr, rpos, 0);
860     ajFileSeek(infs, spos, 0);
861 
862     ajStrDel(&reflibstr);
863     ajStrDel(&seqlibstr);
864     ajStrDel(&testlibstr);
865 
866     return 1L;
867 }
868 
869 
870 
871 
872 /* @funcstatic dbxgcg_ParseEmbl ***********************************************
873 **
874 ** Parse the ID, accession from an EMBL or SWISSPROT entry
875 **
876 ** @param [u] infr [AjPFile] reference file
877 ** @param [w] id [AjPStr*] ID
878 ** @return [AjBool] ajTrue on success.
879 ** @@
880 ******************************************************************************/
881 
dbxgcg_ParseEmbl(AjPFile infr,AjPStr * id)882 static AjBool dbxgcg_ParseEmbl(AjPFile infr,
883 			       AjPStr *id)
884 {
885     ajint lineType;
886     ajlong rpos;
887 
888     if(!dbxgcg_embl_typexp)
889 	dbxgcg_embl_typexp = ajRegCompC("^([A-Z][A-Z]) +");
890 
891     if(!dbxgcg_embl_wrdexp)
892 	dbxgcg_embl_wrdexp = ajRegCompC("([A-Za-z0-9_]+)");
893 
894     if(!dbxgcg_embl_verexp)
895 	dbxgcg_embl_verexp = ajRegCompC("([A-Za-z0-9]+[.][0-9]+)");
896 
897     if(!dbxgcg_embl_phrexp)
898 	dbxgcg_embl_phrexp = ajRegCompC(" *([^;.\n\r]+)");
899 
900     if(!dbxgcg_embl_taxexp)
901 	dbxgcg_embl_taxexp = ajRegCompC(" *([^;.\n\r()]+)");
902 
903     if(!dbxgcg_embl_idexp)
904 	dbxgcg_embl_idexp = ajRegCompC("^ID   ([^ \t;]+)");
905 
906     rpos = ajFileResetPos(infr);
907     while(ajReadline(infr, &dbxgcgRefline))
908     {
909 	if(ajStrGetCharFirst(dbxgcgRefline) == '>')
910 	    break;
911 
912         rpos = ajFileResetPos(infr);
913 
914 	if(ajRegExec(dbxgcg_embl_typexp, dbxgcgRefline))
915 	{
916 	    ajRegSubI(dbxgcg_embl_typexp, 1, &dbxgcgTypstr);
917 	    if(ajStrMatchC(dbxgcgTypstr, "ID"))
918 		lineType = GCGTYPE_ID;
919 	    else if(ajStrMatchC(dbxgcgTypstr, "SV"))
920 		lineType = GCGTYPE_VER;
921 	    else if(ajStrMatchC(dbxgcgTypstr, "AC"))
922 		lineType = GCGTYPE_ACC;
923 	    else if(ajStrMatchC(dbxgcgTypstr, "DE"))
924 		lineType = GCGTYPE_DES;
925 	    else if(ajStrMatchC(dbxgcgTypstr, "KW"))
926 		lineType = GCGTYPE_KEY;
927 	    else if(ajStrMatchC(dbxgcgTypstr, "OS"))
928 		lineType = GCGTYPE_TAX;
929 	    else if(ajStrMatchC(dbxgcgTypstr, "OC"))
930 		lineType = GCGTYPE_TAX;
931 	    else
932 		lineType=GCGTYPE_OTHER;
933 
934 	    if(lineType != GCGTYPE_OTHER)
935 		ajRegPost(dbxgcg_embl_typexp, &dbxgcgTmpline);
936 	}
937 	else
938 	    lineType = GCGTYPE_OTHER;
939 
940 	if(lineType == GCGTYPE_ID)
941 	{
942 	    ajRegExec(dbxgcg_embl_idexp, dbxgcgRefline);
943 	    ajRegSubI(dbxgcg_embl_idexp, 1, id);
944 	    ajDebug("++id '%S'\n", *id);
945 	    continue;
946 	}
947 
948 	if(lineType == GCGTYPE_ACC && accfield)
949 	{
950             embBtreeParseField(dbxgcgTmpline, dbxgcg_embl_wrdexp, accfield);
951 	    continue;
952 	}
953 	else if(lineType == GCGTYPE_DES && desfield)
954 	{
955             embBtreeParseField(dbxgcgTmpline, dbxgcg_embl_wrdexp, desfield);
956 	    continue;
957 	}
958 	else if(lineType == GCGTYPE_VER && svfield)
959 	{
960             embBtreeParseField(dbxgcgTmpline, dbxgcg_embl_verexp, svfield);
961 	    continue;
962 	}
963 	else if(lineType == GCGTYPE_KEY && keyfield)
964 	{
965             embBtreeParseFieldTrim(dbxgcgTmpline, dbxgcg_embl_phrexp, keyfield);
966 	    continue;
967 	}
968 	else if(lineType == GCGTYPE_TAX && orgfield)
969 	{
970             embBtreeParseFieldTrim(dbxgcgTmpline, dbxgcg_embl_taxexp, orgfield);
971 	    continue;
972 	}
973     }
974 
975     if(rpos)
976         ajFileSeek(infr, rpos, 0);
977 
978     return ajFalse;
979 }
980 
981 
982 
983 
984 /* @funcstatic dbxgcg_ParseGenbank ********************************************
985 **
986 ** Parse the ID, accession from a Genbank entry
987 **
988 ** @param [u] infr [AjPFile] reference file
989 ** @param [w] id [AjPStr*] ID
990 ** @return [AjBool] ajTrue on success.
991 ** @@
992 ******************************************************************************/
993 
dbxgcg_ParseGenbank(AjPFile infr,AjPStr * id)994 static AjBool dbxgcg_ParseGenbank(AjPFile infr,
995 			       AjPStr *id)
996 {
997     ajlong rpos = 0;
998     ajint lineType=GCGTYPE_OTHER;
999 
1000     if(!dbxgcg_genbank_typexp)
1001 	dbxgcg_genbank_typexp = ajRegCompC("^(  )?([A-Z]+)");
1002 
1003     if(!dbxgcg_genbank_morexp)
1004 	dbxgcg_genbank_morexp = ajRegCompC("^            ");
1005 
1006     if(!dbxgcg_genbank_wrdexp)
1007 	dbxgcg_genbank_wrdexp = ajRegCompC("([A-Za-z0-9_]+)");
1008 
1009     if(!dbxgcg_genbank_phrexp)
1010 	dbxgcg_genbank_phrexp = ajRegCompC(" *([^;.\n\r]+)");
1011 
1012     if(!dbxgcg_genbank_taxexp)
1013 	dbxgcg_genbank_taxexp = ajRegCompC(" *([^;.\n\r()]+)");
1014 
1015     if(!dbxgcg_genbank_verexp)
1016 	dbxgcg_genbank_verexp = ajRegCompC("([A-Za-z0-9]+)( +GI:([0-9]+))?");
1017 
1018     while(ajReadline(infr, &dbxgcgRefline))
1019     {
1020 	if(ajStrGetCharFirst(dbxgcgRefline) == '>')
1021 	    break;
1022 
1023         rpos = ajFileResetPos(infr);
1024 	ajStrAssignS(&dbxgcgTmpstr,dbxgcgRefline);
1025 
1026 	if(ajRegExec(dbxgcg_genbank_typexp, dbxgcgTmpstr))
1027 	{
1028 	    ajRegSubI(dbxgcg_genbank_typexp, 2, &dbxgcgTypstr);
1029 	    if(ajStrMatchC(dbxgcgTypstr, "LOCUS"))
1030 		lineType = GCGTYPE_ID;
1031 	    else if(ajStrMatchC(dbxgcgTypstr, "VERSION"))
1032 		lineType = GCGTYPE_VER;
1033 	    else if(ajStrMatchC(dbxgcgTypstr, "ACCESSION"))
1034 		lineType = GCGTYPE_ACC;
1035 	    else if(ajStrMatchC(dbxgcgTypstr, "DEFINITION"))
1036 		lineType = GCGTYPE_DES;
1037 	    else if(ajStrMatchC(dbxgcgTypstr, "KEYWORDS"))
1038 		lineType = GCGTYPE_KEY;
1039 	    else if(ajStrMatchC(dbxgcgTypstr, "ORGANISM"))
1040 		lineType = GCGTYPE_TAX;
1041 	    else
1042 		lineType=GCGTYPE_OTHER;
1043 
1044 	    if(lineType != GCGTYPE_OTHER)
1045 		ajRegPost(dbxgcg_genbank_typexp, &dbxgcgTmpline);
1046 	    ajDebug("++type line %d\n", lineType);
1047 	}
1048 	else if(lineType != GCGTYPE_OTHER &&
1049                 ajRegExec(dbxgcg_genbank_morexp, dbxgcgRefline))
1050 	{
1051 	    ajRegPost(dbxgcg_genbank_morexp, &dbxgcgTmpline);
1052 	    ajDebug("++more line %d\n", lineType);
1053 	}
1054 	else
1055 	    lineType = GCGTYPE_OTHER;
1056 
1057 	if(lineType == GCGTYPE_ID)
1058 	{
1059 	    ajRegExec(dbxgcg_genbank_wrdexp, dbxgcgTmpline);
1060 	    ajRegSubI(dbxgcg_genbank_wrdexp, 1, id);
1061 	}
1062 	else if(lineType == GCGTYPE_ACC && accfield)
1063 	{
1064             embBtreeParseField(dbxgcgTmpline, dbxgcg_genbank_wrdexp, accfield);
1065 	    continue;
1066 	}
1067 	else if(lineType == GCGTYPE_DES && desfield)
1068 	{
1069             embBtreeParseField(dbxgcgTmpline, dbxgcg_genbank_wrdexp, desfield);
1070 	    continue;
1071 	}
1072 	else if(lineType == GCGTYPE_KEY && keyfield)
1073 	{
1074             embBtreeParseField(dbxgcgTmpline, dbxgcg_genbank_phrexp, keyfield);
1075 	    continue;
1076 	}
1077 	else if(lineType == GCGTYPE_TAX && orgfield)
1078 	{
1079             embBtreeParseField(dbxgcgTmpline, dbxgcg_genbank_taxexp, orgfield);
1080 	    continue;
1081 	}
1082 	else if(lineType == GCGTYPE_VER && svfield)
1083 	{
1084             embBtreeParseFieldThird(dbxgcgTmpline, dbxgcg_genbank_verexp,
1085                                     svfield);
1086 	    continue;
1087 	}
1088 
1089     }
1090 
1091     if(rpos)
1092 	ajFileSeek(infr, rpos, 0);
1093 
1094     return ajFalse;
1095 }
1096 
1097 
1098 
1099 
1100 /* @funcstatic dbxgcg_ParsePir ************************************************
1101 **
1102 ** Parse the ID, accession from a PIR entry
1103 **
1104 ** @param [u] infr [AjPFile] reference file
1105 ** @param [w] id [AjPStr*] ID
1106 ** @return [AjBool] ajTrue on success.
1107 ** @@
1108 ******************************************************************************/
1109 
1110 
dbxgcg_ParsePir(AjPFile infr,AjPStr * id)1111 static AjBool dbxgcg_ParsePir(AjPFile infr,
1112 			       AjPStr *id)
1113 {
1114     ajlong rpos;
1115 
1116     if(!dbxgcg_pir_wrdexp)
1117 	dbxgcg_pir_wrdexp = ajRegCompC("([A-Za-z0-9_]+)");
1118 
1119     if(!dbxgcg_pir_idexp)
1120 	dbxgcg_pir_idexp = ajRegCompC("^>..;([^;.\n\r]+)");
1121 
1122     if(!dbxgcg_pir_phrexp)				/* allow . for "sp." */
1123 	dbxgcg_pir_phrexp = ajRegCompC(" *([^,;\n\r]+)");
1124 
1125     if(!dbxgcg_pir_tax2exp)				/* allow . for "sp." */
1126 	dbxgcg_pir_tax2exp = ajRegCompC(" *([^,;\n\r()]+)");
1127 
1128     if(!dbxgcg_pir_acexp)
1129 	dbxgcg_pir_acexp = ajRegCompC("^C;Accession:");
1130 
1131     if(!dbxgcg_pir_ac2exp)
1132 	dbxgcg_pir_ac2exp = ajRegCompC("([A-Za-z0-9]+)");
1133 
1134     if(!dbxgcg_pir_taxexp)
1135 	dbxgcg_pir_taxexp = ajRegCompC("^C;Species:");
1136 
1137     if(!dbxgcg_pir_keyexp)
1138 	dbxgcg_pir_keyexp = ajRegCompC("^C;Keywords:");
1139 
1140     rpos = ajFileResetPos(infr);
1141 
1142     ajDebug("++id '%S'\n", *id);
1143 
1144 
1145     ajReadline(infr, &dbxgcgRefline);
1146     ajDebug("line-2 '%S'\n", dbxgcgRefline);
1147 
1148     if(desfield)
1149     {
1150         embBtreeParseField(dbxgcgRefline, dbxgcg_pir_wrdexp, desfield);
1151     }
1152 
1153     while(ajStrGetCharFirst(dbxgcgRefline)!='>')
1154     {
1155         rpos = ajFileResetPos(infr);
1156 	ajStrAssignS(&dbxgcgTmpstr,dbxgcgRefline);
1157 
1158         if(accfield)
1159         {
1160             if(ajRegExec(dbxgcg_pir_acexp, dbxgcgRefline))
1161             {
1162                 ajRegPost(dbxgcg_pir_acexp, &dbxgcgTmpline);
1163                 embBtreeParseField(dbxgcgTmpline, dbxgcg_pir_ac2exp, accfield);
1164             }
1165         }
1166 
1167 	if(keyfield)
1168 	{
1169 	    if(ajRegExec(dbxgcg_pir_keyexp, dbxgcgRefline))
1170 	    {
1171 		ajRegPost(dbxgcg_pir_keyexp, &dbxgcgTmpline);
1172                 embBtreeParseFieldTrim(dbxgcgTmpline, dbxgcg_pir_phrexp,
1173                                        keyfield);
1174 	    }
1175 	}
1176 
1177 	if(orgfield)
1178 	{
1179 	    if(ajRegExec(dbxgcg_pir_taxexp, dbxgcgRefline))
1180 	    {
1181 		ajRegPost(dbxgcg_pir_taxexp, &dbxgcgTmpline);
1182                 embBtreeParseFieldTrim(dbxgcgTmpline, dbxgcg_pir_tax2exp,
1183                                        orgfield);
1184 	    }
1185 	}
1186 
1187 	if(!ajReadline(infr, &dbxgcgRefline))
1188 	{
1189 	    rpos = 0;
1190 	    break;
1191 	}
1192     }
1193 
1194     if(rpos)
1195 	ajFileSeek(infr, rpos, 0);
1196 
1197     return ajFalse;
1198 }
1199