1 /* @Source edamclean application
2 **
3 ** Validate and fix EDAM OBO ontology
4 **
5 ** @author: Copyright (C) Jon Ison (jison@ebi.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 **
22 *******************************************************************************
23 **
24 **  EDAMCLEAN documentation
25 **  See http://wwww.emboss.org
26 **
27 **  Please cite the authors and EMBOSS.
28 **
29 **  Email jison@ebi.ac.uk.
30 **
31 **
32 **  edamclean reads EDAM (OBO format file), validates the file syntax, writes a
33 **  report from parsing and (optionally) fixes the term numbering and
34 **  relations.
35 **  It has optional modes of operation:
36 **  1. Report only
37 **  2. Renumber terms
38 **  3. Fix relations
39 **  4. Output PURL XML (single file)
40 **  5. Output PURL XML (one file / term)
41 **
42 **  1. Report only
43 **  Write an informative report from parsing but do not change the file.
44 **  The following checks are performed:
45 **  i.    All ids in the file are unique
46 **  ii.   All term names within a namespace are unique
47 **  iii.  All values after namespace: are valid (see below).
48 **  iv.   All field names are valid; either a standard OBO field, a relation
49 **        or a token that must be ignored (see below).
50 **  v.    All terms have the following fields in the order specified
51 **        (optional fields are in parenthesis):
52 **        id, name, namespace, def, (comment), (synonym), is_a
53 **  vi.   Terms in specific namespaces have all mandatory relations defined
54 **        and do not have disallowed relations. See "Rules" below.
55 **  vii.  End-points (term names) of all relations exist. See "Rules" below.
56 **        This includes checking for mismatches between term id and name
57 **        (in comment) in relations lines
58 **  viii. All id: lines have the format:   id: EDAM:0000000
59 **  ix.   All def: lines have the format:  def: "Some text."
60 **        [EDAM:EBI "EMBRACE definition"]
61 **  x.    All relation lines have the format:  RelationName: EDAM:0000000
62 **        ! Term name
63 **  xi.   All comment: values are *not* in quotes ("")
64 **  xii.  All synonym: values are *not* in quotes ("")
65 **
66 **  2. Renumber terms
67 **  Write a report as above.
68 **  Renumber all terms so that they have unique ids, starting with
69 **  EDAM:0000000
70 **  for the first term in the file and increasing by 1 thereon.
71 **
72 **  3. Fix relations
73 **  Write a report as above.  If no errors reported, correct term ids used
74 **  in all relations fields.
75 **
76 **  4. Output PURL XML (single file)
77 **  Write a report as above. Then write XML output for term submission to PURL.org
78 ** <purls>
79 ** <purl id="/tld/subdomain/testPartial" type="partial">
80 ** -
81 ** <maintainers>
82 ** <uid>jon</uid>
83 ** </maintainers>
84 ** <target url="http://wwwdev.ebi.ac.uk/Tools/dbfetch/dbfetch/edam/0000352"/>
85 ** </purl>
86 ** </purls>
87 **
88 **
89 **  5. Output PURL XML (one file / term)
90 **  As option 4. above, but write a single XML file per term to the specified directory.
91 **
92 **
93 **  edamclean parameters:
94 **  Name of OBO format file (input)
95 **  Name of OBO format file (output)
96 **  Name of report file (output)
97 **  Boolean (whether to fix the output file)
98 **
99 **
100 ** Standard OBO fields
101 **   id:
102 **   name:
103 **   namespace:
104 **   def:
105 **   comment:
106 **   synonym:
107 **   xref:
108 **   is_obsolete:
109 **   consider:
110 **
111 ** Relations
112 **   is_a
113 **   has_part
114 **   is_part_of
115 **   concerns
116 **   is_concern_of
117 **   has_input
118 **   is_input_of
119 **   has_output
120 **   is_output_of
121 **   has_source
122 **   is_source_of
123 **   has_identifier
124 **   is_identifier_of
125 **   has_attribute
126 **   is_attribute_of
127 **   has_format
128 **   is_format_of
129 **
130 ** Namespace
131 **   entity
132 **   topic
133 **   operation
134 **   resource
135 **   data
136 **   format
137 **
138 **
139 ** Tokens to ignore
140 ** Lines beginning with the following tokens are not parsed and are
141 ** preserved as-is in the output:
142 **   !
143 **   format-version
144 **   date
145 **   data-version
146 **   xref
147 **
148 **
149 ** Rules
150 ** Rules for which term types (rules in a namespace) may or must be related
151 ** to which other term types are described under "Rules by term type" in
152 ** the EDAM on-line documentation.
153 ** See http://www.ebi.ac.uk/~jison/edam.html#6.1.
154 **
155 **
156 ** Notes
157 ** Typedef definitions are *not* validated and are preserved as-is in the
158 ** output.
159 **
160 ** Important!
161 ** 1. The program should not be run in modes 2 or 3 (ie. generate an EDAM
162 ** output file) until all  reported problems (from mode 1) have been fixed
163 ** by hand - *except* "Non-unique id" errors!  Results are undefined otherwise.
164 ** 2. All [Term] definitions in the input file *must* appear before the
165 ** first [Typedef] definition - terms appearing after are *not* validated
166 ** fully.
167 **
168 ** Known Issues
169 ** edamclean does not detect the fact that the root term of each branch does
170 ** not need to have an is_a relation.  Disregard the messages in the lof file
171 ** to that effect (this could fairly easily be fixed).
172 **
173 ** edamclean will identify (and warn about) identical term names in cases
174 ** where one of the terms has been made obsolete.  Arguably this is the
175 ** the correct behaviour.
176 **
177 ** The code that checks for "field in wrong order" does not make all the checks
178 ** it might, e.g. does not check whether other relations appear before is_a.
179 **
180 ** It does not check for multiple (erroneous) comment: lines
181 **
182 ** It does not suppress (irrelevant) error messages for obsolete terms.
183 **
184 ** It does not check for duplicate relationships (where these are not allowed)
185 ** e.g. an exact duplication of a relationship line.
186 ******************************************************************************/
187 
188  #include "emboss.h"
189 
190 
191 
192 
193 
194 /******************************************************************************
195 **
196 ** GLOBAL VARIABLES
197 **
198 ******************************************************************************/
199 
200 #define NFIELDS 10
201 
202 static const char *FIELDS[NFIELDS] =
203 {
204     "id:",
205     "name:",
206     "namespace:",
207     "def:",
208     "comment:",
209     "synonym:",
210     "xref:",
211     "is_obsolete:",
212     "consider:",
213     "relationship:"
214 };
215 
216 
217 
218 
219 
220 #define NRELATIONS 18
221 /* 'consider' field is treated as a relation */
222 
223 static const char *RELATIONS[NRELATIONS] =
224 {
225     "is_a:",
226     "has_part:",
227     "is_part_of:",
228     "concerns:",
229     "is_concern_of:",
230     "has_input:",
231     "is_input_of:",
232     "has_output:",
233     "is_output_of:",
234     "has_source:",
235     "is_source_of:",
236     "has_identifier:",
237     "is_identifier_of:",
238     "has_attribute:",
239     "is_attribute_of:",
240     "has_format:",
241     "is_format_of:",
242     "consider:"
243 };
244 
245 
246 
247 
248 #define NNAMESPACES 6
249 
250 static const char *NAMESPACES[NNAMESPACES] =
251 {
252     "entity",
253     "topic",
254     "operation",
255     "resource",
256     "data",
257     "format"
258 };
259 
260 
261 
262 
263 enum _namespace
264 {
265     entity,
266     topic,
267     operation,
268     resource,
269     data,
270     format
271 };
272 
273 
274 
275 
276 #define NOTHER 12
277 
278 static const char *OTHER[NOTHER] =
279 {
280     "!",
281     "format-version:",
282     "date:",
283     "data-version:",
284     "subsetdef:",
285     "[Term]",
286     "[Typedef]",
287     "inverse_of:",
288     "is_anti_symmetric:",
289     "is_cyclic:",
290     "is_transitive:",
291     "\0"   /* NULL string to allow empty lines */
292 };
293 
294 
295 
296 
297 /******************************************************************************
298 **
299 ** DATA STRUCTURES
300 **
301 ******************************************************************************/
302 
303 
304 
305 
306 /* @datastatic PTerm *******************************************************
307 **
308 ** Term object
309 ** Holds name and identifier of a single EDAM term
310 **
311 ** @alias STerm
312 ** @alias OTerm
313 **
314 ** @attr name [AjPStr]  Name of term
315 ** @attr id   [AjPStr]  Id of term
316 ** @attr line [ajint]   Line number of name: field for the term
317 ** @attr Padding [ajint]   Padding to alignment boundary
318 ******************************************************************************/
319 
320 typedef struct STerm
321 {
322     AjPStr  name;
323     AjPStr  id;
324     ajint   line;
325     ajint   Padding;
326 } OTerm;
327  #define PTerm OTerm*
328 
329 
330 
331 
332 /* @datastatic PNamespace *****************************************************
333 **
334 ** Namespace object
335 ** Holds name and array of terms for an EDAM namespace.
336 ** Only one copy of the terms is kept in memory (list holds pointers only)
337 **
338 ** @alias SNamespace
339 ** @alias ONamespace
340 **
341 ** @attr name    [AjPStr]     Name of namespace
342 ** @attr terms   [PTerm*]     Array of terms
343 ** @attr list    [AjPList]    List of terms*
344 ** @attr n       [ajint]      Size of array / list
345 ** @attr Padding [ajint]      Padding to alignment boundary
346 *****************************************************************************/
347 
348 typedef struct SNamespace
349 {
350     AjPStr   name;
351     PTerm    *terms;
352     AjPList  list;
353     ajint    n;
354     ajint    Padding;
355 } ONamespace;
356  #define PNamespace ONamespace*
357 
358 
359 
360 
361 /******************************************************************************
362 **
363 ** PROTOTYPES
364 **
365 ******************************************************************************/
366 
367 static PTerm       ajTermNew(void);
368 static PNamespace  ajNamespaceNew(void);
369 static void        ajTermDel(PTerm *P);
370 static void        ajNamespaceDel(PNamespace *P);
371 static const AjPStr FindTerm(ajint namespace, const AjPStr termname,
372                              PNamespace *namespaces);
373 
374 
375 
376 
377 /******************************************************************************
378 **
379 ** FUNCTIONS
380 **
381 ******************************************************************************/
382 
383 
384 
385 
386 /* @funcstatic ajTermNew ***************************************************
387 **
388 ** Term constructor
389 **
390 ** @return [PTerm] New object
391 ** @@
392 ******************************************************************************/
393 
ajTermNew(void)394 static PTerm ajTermNew(void)
395 {
396     PTerm ret;
397 
398     AJNEW0(ret);
399     ret->name  = ajStrNew();
400     ret->id    = ajStrNew();
401     ret->line  = 0;
402 
403     return ret;
404 }
405 
406 
407 
408 
409 /* @funcstatic ajNamespaceNew ************************************************
410 **
411 ** Namespace constructor
412 ** The array is *not* allocated.
413 **
414 ** @return [PNamespace] New object
415 ** @@
416 ******************************************************************************/
417 
ajNamespaceNew(void)418 static PNamespace ajNamespaceNew(void)
419 {
420     PNamespace ret;
421 
422     AJNEW0(ret);
423     ret->name  = ajStrNew();
424     ret->terms = NULL;
425     ret->list  = ajListstrNew();
426     ret->n     = 0;
427 
428     return ret;
429 }
430 
431 
432 
433 
434 /* @funcstatic ajTermDel ***************************************************
435 **
436 ** Term destructor
437 **
438 ** @param [d] P       [PTerm*]  Term object to delete
439 ** @return [void]
440 ** @@
441 ******************************************************************************/
442 
ajTermDel(PTerm * P)443 static void ajTermDel(PTerm *P)
444 {
445     if(!P)
446         ajFatal("Null arg error 1 in ajTermDel");
447     else if(!(*P))
448         ajFatal("Null arg error 2 in ajTermDel");
449 
450     ajStrDel(&(*P)->name);
451     ajStrDel(&(*P)->id);
452 
453     AJFREE(*P);
454     *P=NULL;
455 
456     return;
457 }
458 
459 
460 
461 
462 /* @funcstatic ajNamespaceDel ************************************************
463 **
464 ** Namespace destructor
465 **
466 ** @param [d] P       [PNamespace*]  Namespace object to delete
467 ** @return [void]
468 ** @@
469 ******************************************************************************/
470 
ajNamespaceDel(PNamespace * P)471 static void ajNamespaceDel(PNamespace *P)
472 {
473     int i;
474 
475     if(!P)
476         ajFatal("Null arg error 1 in ajNamespaceDel");
477     else if(!(*P))
478         ajFatal("Null arg error 2 in ajNamespaceDel");
479 
480     ajStrDel(&(*P)->name);
481 
482     if((*P)->n)
483     {
484         for(i=0;i<(*P)->n;i++)
485             ajTermDel(&(*P)->terms[i]);
486 
487         AJFREE((*P)->terms);
488     }
489 
490     ajListstrFree(&(*P)->list);
491 
492     AJFREE(*P);
493     *P=NULL;
494 
495     return;
496 }
497 
498 
499 
500 
501 /* @funcstatic FindTerm ***************************************************
502 **
503 ** Finds a term within a namespace index and returns its identifier in the
504 ** namespace array.
505 **
506 ** Returns NULL if term is not found
507 **
508 ** @param [r] namespace    [ajint]  Namespace index as integer
509 ** @param [r] termname     [const AjPStr] Name of term
510 ** @param [u] namespaces   [PNamespace*] Array of namespace objects
511 ** @return [const AjPStr] Term identifier
512 ** @@
513 ******************************************************************************/
514 
FindTerm(ajint namespace,const AjPStr termname,PNamespace * namespaces)515 static const AjPStr FindTerm(ajint namespace, const AjPStr termname,
516                        PNamespace *namespaces)
517 {
518     ajint x;
519 
520     if(!termname || !namespaces)
521         ajFatal("Bad args to FindTerm");
522 
523     for(x=0; x<namespaces[namespace]->n; x++)
524         if(ajStrMatchS(termname, namespaces[namespace]->terms[x]->name))
525             return namespaces[namespace]->terms[x]->id;
526 
527     return NULL;
528 }
529 
530 
531 
532 
533 /* @prog edamclean ********************************************************
534 **
535 ** Validate and fix EDAM OBO ontology
536 **
537 *****************************************************************************/
538 
main(ajint argc,char ** argv)539 int main(ajint argc, char **argv)
540 {
541     /* Variable declarations */
542     AjPFile    inf_edam        = NULL;    /* Name of EDAM (input) file      */
543     AjPFile    ouf_edam        = NULL;    /* Name of EDAM (output) file     */
544     AjPFile    ouf_log         = NULL;    /* Name of report (output) file   */
545     AjPFile    ouf_xml         = NULL;    /* Name of XML (output) file   */
546     AjPFile    tmp_xml         = NULL;    /* Temp. XML (output) file   */
547     AjPDirout  xmloutdir       = NULL;    /* XML (output) file directory */
548     AjPStr     mode            = NULL;    /* Mode of operation              */
549     AjPList    list_tmp        = NULL;    /* Temporary list                 */
550     AjPStr    *fields          = NULL;    /* Array of valid tokens for first
551                                              word in line */
552     ajint      nfields         = 0;       /* Size of fields array           */
553     AjPStr    *ids             = NULL;    /* Array of all ids in file       */
554     ajint      nids            = 0;       /* Size of ids                    */
555     const AjPStr id            = NULL;    /* ID of a term                   */
556 
557     AjPStr     line    = NULL;    /* A line from the input file             */
558     ajint      linecnt = 0;       /* Line number of line                    */
559     ajint      termcnt = 0;       /* Count of term definitions              */
560     AjPStr     tok     = NULL;    /* A token from line                      */
561     AjBool     done            = ajFalse; /* Housekeeping  */
562     ajint      x               = 0;       /* Housekeeping  */
563     ajint      y               = 0;       /* Housekeeping  */
564     ajint      z               = 0;       /* Housekeeping  */
565     ajint      idx             = 0;       /* Housekeeping  */
566     AjPStr     name            = NULL;    /* Name of a term */
567     AjPStr     namespace       = NULL;    /* Namespace of a term */
568     AjPStr     relation        = NULL;    /* Relationship name, e.g. "is_a" */
569     AjPStr     tmp_name        = NULL;    /* Temp. name of a term */
570     AjPStr     tmp_id          = NULL;    /* Temp. id of a term */
571     AjPStr     tmp_str         = NULL;    /* Temp. string */
572     PTerm      tmp_term        = NULL;    /* Temp. term pointer */
573     ajint      tmp_line        = 0;       /* Temp. line number */
574     PNamespace namespaces[NNAMESPACES];   /* Array of namespace objects */
575 
576     AjBool    done_first      = ajFalse;  /* Housekeeping ... read first term */
577     AjBool    first           = ajFalse;  /* Housekeeping ... on first term */
578     AjBool    found_id        = ajFalse;
579     AjBool    in_typedef      = ajFalse;  /* In a [Typedef] statement */
580 
581     AjBool    found_name             = ajFalse;
582     AjBool    found_namespace        = ajFalse;
583     AjBool    found_def              = ajFalse;
584     AjBool    found_comment          = ajFalse;
585     AjBool    found_synonym          = ajFalse;
586     AjBool    found_xref             = ajFalse;
587     AjBool    found_is_obsolete      = ajFalse;
588     AjBool    found_consider         = ajFalse;
589     AjBool    found_isa              = ajFalse;
590     AjBool    found_concerns         = ajFalse;
591     AjBool    found_is_concern_of    = ajFalse;
592     AjBool    found_has_input        = ajFalse;
593     AjBool    found_is_input_of      = ajFalse;
594     AjBool    found_has_output       = ajFalse;
595     AjBool    found_is_output_of     = ajFalse;
596     AjBool    found_has_source       = ajFalse;
597     AjBool    found_is_source_of     = ajFalse;
598     AjBool    found_has_identifier   = ajFalse;
599     AjBool    found_is_identifier_of = ajFalse;
600     AjBool    found_has_attribute    = ajFalse;
601     AjBool    found_is_attribute_of  = ajFalse;
602     AjBool    found_has_part         = ajFalse;
603     AjBool    found_is_part_of       = ajFalse;
604     AjBool    found_has_format       = ajFalse;
605     AjBool    found_is_format_of     = ajFalse;
606 
607 
608 
609 
610     /* Read data from acd */
611     embInit("edamclean", argc, argv);
612 
613 
614     /* ACD data handling */
615     inf_edam   = ajAcdGetInfile("edaminfile");
616     ouf_edam   = ajAcdGetOutfile("edamoutfile");
617     ouf_log    = ajAcdGetOutfile("logfile");
618     ouf_xml    = ajAcdGetOutfile("xmlfile");
619     xmloutdir  = ajAcdGetOutdir("xmloutdir");
620     mode       = ajAcdGetSelectSingle("mode");
621 /*    taxdir    = ajAcdGetDirectory("taxdirectory"); */
622 
623     ajFmtPrint("MODE : %S\n", mode);
624 
625     /*
626     ajTaxLoad(taxdir);
627     ajOboParseObofile(inf_edam, "noidorder,nounkid");
628     ajFileSeek(inf_edam, 0, 0);
629     embExit(); */
630 
631     /* Memory allocation */
632     line       = ajStrNew();
633     tok        = ajStrNew();
634     name       = ajStrNew();
635     namespace  = ajStrNew();
636     relation   = ajStrNew();
637     tmp_name = ajStrNew();
638     tmp_id   = ajStrNew();
639 
640     for(x=0; x<NNAMESPACES; x++)
641     {
642         namespaces[x] = ajNamespaceNew();
643         ajStrAssignC(&(namespaces[x]->name), NAMESPACES[x]);
644     }
645 
646 
647 
648     /*  Check for valid first tokens */
649     /* First, write array of valid tokens for first word in line */
650     list_tmp = ajListstrNew();
651 
652     for(x=0; x<NFIELDS; x++)
653     {
654         tmp_str = ajStrNew();
655         ajStrAssignC(&tmp_str, FIELDS[x]);
656         ajListstrPushAppend(list_tmp, tmp_str);
657     }
658 
659     for(x=0; x<NRELATIONS; x++)
660     {
661         tmp_str = ajStrNew();
662         ajStrAssignC(&tmp_str, RELATIONS[x]);
663         ajListstrPushAppend(list_tmp, tmp_str);
664     }
665 
666     for(x=0; x<NOTHER; x++)
667     {
668         tmp_str = ajStrNew();
669         ajStrAssignC(&tmp_str, OTHER[x]);
670         ajListstrPushAppend(list_tmp, tmp_str);
671     }
672 
673     nfields = ajListstrToarray(list_tmp, &fields);
674     ajListstrFree(&list_tmp);
675 
676     ajFmtPrintF(ouf_log, "1. FIRST TOKEN IN LINES\n");
677 
678 
679 
680 
681 
682 
683 
684         /* Output PURL XML */
685     if(ajStrMatchC(mode, "Output PURL XML (single file)") ||
686        ajStrMatchC(mode, "Output PURL XML (one file / term)"))
687     {
688         if(ajStrMatchC(mode, "Output PURL XML (single file)"))
689         {
690             tmp_xml = ouf_xml;
691             ajFmtPrintF(tmp_xml, "<purls>\n");
692         }
693 
694 
695         for(in_typedef=ajFalse; ajReadline(inf_edam, &line); )
696         {
697             if(ajStrPrefixC(line, "[Typedef]"))
698                 in_typedef=ajTrue;
699             else if(ajStrPrefixC(line, "[Term]"))
700                 in_typedef=ajFalse;
701 
702             if(in_typedef)
703                 continue;
704 
705             if(ajStrPrefixC(line, "namespace:"))
706             {
707 
708                 if(ajStrMatchC(mode, "Output PURL XML (one file / term)"))
709                 {
710                     if(!(tmp_xml=ajFileNewOutNameDirS(tmp_id, xmloutdir)))
711                         ajFatal("Could not create file");
712                     else
713                         ajFmtPrintF(tmp_xml, "<purls>\n");
714                 }
715 
716 
717 
718                 ajStrAssignClear(&tok);
719                 ajFmtScanS(line, "%*s %S", &tok);
720                 ajStrRemoveWhite(&tok);
721                 ajFmtPrintF(tmp_xml,
722                             "<purl id=\"/edam/%S/%S\" type=\"partial\">\n"
723                             "<maintainers>\n"
724                             "<uid>jon</uid>\n"
725                             "</maintainers>\n"
726                             "<target url=\"http://wwwdev.ebi.ac.uk/Tools/dbfetch/dbfetch/edam/%S\"/>\n"
727                             "</purl>\n", tok, tmp_id, tmp_id);
728 
729 
730                 if(ajStrMatchC(mode, "Output PURL XML (one file / term)"))
731                 {
732                     ajFmtPrintF(tmp_xml, "</purls>\n");
733                     ajFileClose(&tmp_xml);
734                 }
735 
736             }
737 
738             if(ajStrPrefixC(line, "id:"))
739             {
740                 ajStrParseC(line, ":");
741                 ajStrParseC(NULL, ":");
742                 ajStrAssignS(&tmp_id, ajStrParseC(NULL, ":"));
743                 ajStrRemoveWhite(&tmp_id);
744             }
745         }
746 
747         if(ajStrMatchC(mode, "Output PURL XML (single file)"))
748             ajFmtPrintF(tmp_xml, "</purls>\n");
749     }
750 
751     exit(0);
752 
753 
754 
755 
756 
757     for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
758     {
759         ajStrAssignClear(&tok);
760         ajFmtScanS(line, "%S", &tok);
761 
762         for(x=0, done=ajFalse;x<nfields;x++)
763             if(ajStrMatchS(tok, fields[x]))
764             {
765                 done = ajTrue;
766                 break;
767             }
768 
769         if(!done)
770             ajFmtPrintF(ouf_log, "Line %6d : Invalid 1st token: %S (%S)\n",
771                         linecnt+1, tok, line);
772     }
773 
774     ajFmtPrintF(ouf_log, "\n\n");
775     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
776 
777 
778     /*  Check for valid namespace: values */
779     ajFmtPrintF(ouf_log, "2. NAMESPACE VALUES\n");
780     for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
781     {
782         if(ajStrPrefixC(line, "namespace:"))
783         {
784             ajStrAssignClear(&tok);
785             ajFmtScanS(line, "%*s %S", &tok);
786 
787             for(x=0, done=ajFalse;x<NNAMESPACES;x++)
788                 if(ajStrMatchC(tok, NAMESPACES[x]))
789                 {
790                     done = ajTrue;
791                     break;
792                 }
793 
794             if(!done)
795                 ajFmtPrintF(ouf_log, "Line %6d : Invalid namespace: %S "
796                             "(%S)\n", linecnt+1, tok, line);
797         }
798 
799     }
800 
801 
802     ajFmtPrintF(ouf_log, "\n\n");
803     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
804 
805 
806 
807     /*  Check for valid comment: and synonym: values */
808     ajFmtPrintF(ouf_log, "3. COMMENT / SYNONYM VALUES\n");
809 
810     for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
811     {
812         if(ajStrPrefixC(line, "comment:"))
813             if(ajStrFindAnyK(line, '\"') != -1)
814                 ajFmtPrintF(ouf_log, "Line %6d : Invalid quote in line "
815                             "(%S)\n", linecnt+1, line);
816     }
817     ajFmtPrintF(ouf_log, "\n\n");
818     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
819 
820 
821     for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
822     {
823         if(ajStrPrefixC(line, "synonym:"))
824             if(ajStrCalcCountK(line, '\"') != 2)
825                 ajFmtPrintF(ouf_log, "Line %6d : Wrong number of quotes in line "
826                             "(%S)\n", linecnt+1, line);
827     }
828     ajFmtPrintF(ouf_log, "\n\n");
829     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
830 
831 
832     /*  Check for id: line format (also build list of term ids) */
833     list_tmp = ajListstrNew();
834     ajFmtPrintF(ouf_log, "4. id: LINE FORMAT\n");
835 
836     for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
837     {
838         /* Stop checking once first [Typedef] line is found. */
839 /*         if(ajStrPrefixC(line, "[Typedef]"))
840            break; */
841 
842         if(ajStrPrefixC(line, "[Typedef]"))
843             in_typedef=ajTrue;
844         else if(ajStrPrefixC(line, "[Term]"))
845             in_typedef=ajFalse;
846 
847         if(ajStrPrefixC(line, "id:"))
848         {
849             if(in_typedef)
850                 continue;
851 
852             if(ajStrCalcCountC(line, ":")!=2)
853                 ajFmtPrintF(ouf_log, "Line %6d : Invalid id: line format - "
854                             "wrong number of colon (:) (%S) \n",
855                             linecnt+1, line);
856             else
857             {
858                 ajStrParseC(line, ":");
859                 ajStrAssignS(&tok, ajStrParseC(NULL, ":"));
860                 ajStrRemoveWhite(&tok);
861                 if(!ajStrMatchC(tok, "EDAM"))
862                     ajFmtPrintF(ouf_log, "Line %6d : Invalid id: line format "
863                                 "- no 'EDAM' token (%S)\n", linecnt+1, line);
864 
865                 ajStrAssignS(&tok, ajStrParseC(NULL, ":"));
866                 ajStrRemoveWhite(&tok);
867 
868                 if(ajStrGetLen(tok) != 7)
869                     ajFmtPrintF(ouf_log, "Line %6d : Invalid id: line format "
870                                 "- id number wrong (%S)\n", linecnt+1, line);
871 
872                 tmp_str = ajStrNew();
873                 ajStrAssignS(&tmp_str, tok);
874 
875                 ajListstrPushAppend(list_tmp, tmp_str);
876             }
877         }
878     }
879     nids = ajListstrToarray(list_tmp, &ids);
880     ajListstrFree(&list_tmp);
881     ajFmtPrintF(ouf_log, "\n\n");
882     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
883 
884     /*  Check for def: line format */
885     ajFmtPrintF(ouf_log, "5. def: LINE FORMAT\n");
886     for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
887     {
888         /* Stop checking once first [Typedef] line is found. */
889 /*        if(ajStrPrefixC(line, "[Typedef]"))
890           break; */
891 
892         if(ajStrPrefixC(line, "[Typedef]"))
893             in_typedef=ajTrue;
894         else if(ajStrPrefixC(line, "[Term]"))
895             in_typedef=ajFalse;
896 
897         if(ajStrPrefixC(line, "def:"))
898         {
899             if(in_typedef)
900                 continue;
901 
902             if(ajStrCalcCountC(line, ":[")!=3)
903                 ajFmtPrintF(ouf_log, "Line %6d : Invalid def: line format - "
904                             "wrong number of colon ':' or open-bracket '[' "
905                             "(%S)\n", linecnt+1, line);
906             else
907             {
908                 ajStrParseC(line, ":[");
909                 ajStrAssignS(&tok, ajStrParseC(NULL, ":["));
910 
911                 /* Check for 2 double quotes */
912                 if(ajStrCalcCountC(tok, "\"")!=2)
913                     ajFmtPrintF(ouf_log, "Line %6d : Invalid def: line format "
914                                 "- wrong number of double quotes (%S)\n",
915                                 linecnt+1, line);
916 
917                 /* Careful - different tokeniser used (no colon) ! */
918                 ajStrAssignClear(&tok);
919                 ajStrAssignS(&tok, ajStrParseC(NULL, "["));
920                 ajStrRemoveLastNewline(&tok);
921                 ajStrRemoveWhiteExcess(&tok);
922 
923 /*                   (!ajStrMatchC(tok, "EDAM:EBI \"EMBRACE definition\"]"))) */
924 
925                 /* Check for line suffix */
926                 if((!ajStrPrefixC(tok, "EDAM:")) ||
927                    (!ajStrSuffixC(tok, "\"EMBRACE definition\"]")))
928                     ajFmtPrintF(ouf_log, "Line %6d : Invalid def: line "
929                                 "format - invalid suffix (%S)\n",
930                                 linecnt+1, line);
931             }
932         }
933     }
934     ajFmtPrintF(ouf_log, "\n\n");
935     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
936 
937 
938     /*  Check for relations line format */
939     ajFmtPrintF(ouf_log, "6. RELATIONS LINE FORMAT\n");
940 
941     for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
942     {
943         if(ajStrPrefixC(line, "[Typedef]"))
944             in_typedef=ajTrue;
945         else if(ajStrPrefixC(line, "[Term]"))
946             in_typedef=ajFalse;
947 
948         if(in_typedef)
949             continue;
950 
951         ajStrAssignClear(&relation);
952 
953         if(ajStrPrefixC(line, "relationship:"))
954             ajFmtScanS(line, "%*S %S", &relation);
955         else
956             ajFmtScanS(line, "%S", &relation);
957 
958         for(x=0; x<NRELATIONS; x++)
959         {
960             if(ajStrMatchC(relation, RELATIONS[x]))
961             {
962 
963 
964                 if(((ajStrPrefixC(line, "relationship:")) &&
965                     (ajStrCalcCountC(line, "!:")!=4)) ||
966                    ((!ajStrPrefixC(line, "relationship:")) &&
967                     (ajStrCalcCountC(line, "!:")!=3)))
968                 {
969                     ajFmtPrintF(ouf_log, "Line %6d : Invalid relations line "
970                                 "format1 (%S)\n", linecnt+1, line);
971 /*                    ajFmtPrint("relation: %S  RELATIONS[%d]: %s", relation, x, RELATIONS[x]); */
972                 }
973                 else
974                 {
975                     ajStrAssignS(&tok, ajStrParseC(line, ":! "));
976                     ajStrRemoveWhite(&tok);
977 
978 
979                     /* Discard first "relationship:" token and get next one */
980                     if(ajStrMatchC(tok, "relationship"))
981                         ajStrParseC(NULL, ":! ");
982 
983                     /* Get supposed "EDAM" token */
984                     ajStrAssignS(&tok, ajStrParseC(NULL, ":! "));
985                     ajStrRemoveWhite(&tok);
986 
987                     /* Check for "EDAM" */
988                     if(!ajStrMatchC(tok, "EDAM"))
989                         ajFmtPrintF(ouf_log, "Line %6d : Invalid relations "
990                                     "line format2 (%S)\n", linecnt+1, line);
991 
992                     /* Check for 7 digit number */
993                     ajStrAssignS(&tok, ajStrParseC(NULL, ":! "));
994                     ajStrRemoveWhite(&tok);
995 
996                     if(ajStrGetLen(tok) != 7)
997                         ajFmtPrintF(ouf_log, "Line %6d : Invalid relations "
998                                     "line format3 (%S)\n", linecnt+1, line);
999 
1000                     /* Check for non-NULL terminal comment */
1001                     ajStrAssignS(&tok, ajStrParseC(NULL, ":! "));
1002 
1003                     if(ajStrGetLen(tok) == 0)
1004                         ajFmtPrintF(ouf_log, "Line %6d : Invalid relations "
1005                                     "line format4 (%S)\n", linecnt+1, line);
1006                     break;
1007 
1008                 }
1009             }
1010         }
1011     }
1012     ajFmtPrintF(ouf_log, "\n\n");
1013     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
1014 
1015 
1016 
1017     /*  Check for unique ids */
1018     ajFmtPrintF(ouf_log, "7. UNIQUE IDS\n");
1019     for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
1020     {
1021         /* Stop checking once first [Typedef] line is found. */
1022 /*        if(ajStrPrefixC(line, "[Typedef]"))
1023           break; */
1024 
1025         if(ajStrPrefixC(line, "id:"))
1026         {
1027             ajStrParseC(line, ":");
1028             ajStrParseC(NULL, ":");
1029             ajStrAssignClear(&tok);
1030             ajStrAssignS(&tok, ajStrParseC(NULL, ":"));
1031 
1032             ajStrRemoveWhite(&tok);
1033 
1034             for(x=0, y=0; x<nids; x++)
1035             {
1036                 if(ajStrMatchS(tok, ids[x]))
1037                 {
1038                     y++;
1039                     if(y>1)
1040                     {
1041                         ajFmtPrintF(ouf_log, "Line %6d : Non-unique id: %S "
1042                                     "%S\n", linecnt+1, tok, line);
1043                         break;
1044                     }
1045                 }
1046             }
1047 
1048         }
1049     }
1050 
1051     ajFmtPrintF(ouf_log, "\n\n");
1052     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
1053 
1054     /*  Check for mandatory fields / field order */
1055     ajFmtPrintF(ouf_log, "8. STANDARD MANDATORY FIELDS AND FIELD ORDER\n");
1056 
1057     for(in_typedef=ajFalse, first = ajTrue, done_first=ajFalse, linecnt=0;
1058         ajReadline(inf_edam, &line); linecnt++)
1059     {
1060         /*      id, name, namespace, def, (comment), (synonym), is_a */
1061 
1062         /* Stop checking once first [Typedef] line is found. */
1063 /*        if(ajStrPrefixC(line, "[Typedef]"))
1064           break; */
1065 
1066         if(ajStrPrefixC(line, "[Typedef]"))
1067             in_typedef=ajTrue;
1068 
1069         if(ajStrPrefixC(line, "[Term]"))
1070         {
1071             in_typedef=ajFalse;
1072 
1073             /* Process previous term */
1074             if(done_first)
1075             {
1076                 if(!found_id)
1077                     ajFmtPrintF(ouf_log, "Line %6d : No id: field in "
1078                                 "term\n", tmp_line);
1079                 if(!found_name)
1080                     ajFmtPrintF(ouf_log, "Line %6d : No name: field in "
1081                                 "term\n", tmp_line);
1082                 if(!found_namespace)
1083                     ajFmtPrintF(ouf_log, "Line %6d : No namespace: field in "
1084                                 "term\n", tmp_line);
1085                 if(!found_def)
1086                     ajFmtPrintF(ouf_log, "Line %6d : No def: field in "
1087                                 "term\n", tmp_line);
1088                 /* No is_a needed for first term in file or for obsolete terms*/
1089                 if((!found_isa) && (!first)  && (!found_is_obsolete))
1090                     ajFmtPrintF(ouf_log, "Line %6d : No is_a: field in "
1091                                 "term\n", tmp_line);
1092 
1093                 first = ajFalse;
1094 
1095 
1096                 /* entity */
1097                 if(ajStrMatchC(namespace, NAMESPACES[0]))
1098                 {
1099                     if(found_concerns || found_has_input ||
1100                        found_is_input_of || found_has_output ||
1101                        found_is_output_of || found_has_source ||
1102                        found_is_source_of || found_is_identifier_of ||
1103                        found_is_format_of || found_has_format ||
1104                        found_is_attribute_of)
1105                         ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1106                                     "for term in this namespace\n", tmp_line);
1107 
1108                 }
1109                 /* topic */
1110                 else if(ajStrMatchC(namespace, NAMESPACES[1]))
1111                 {
1112                     if(!found_concerns)
1113                         ajFmtPrintF(ouf_log, "Line %6d : No concerns: relation in term\n", tmp_line);
1114 
1115                     if(found_is_concern_of || found_has_input ||
1116                        found_is_input_of || found_has_output ||
1117                        found_is_output_of || found_has_source ||
1118                        found_is_source_of || found_has_identifier ||
1119                        found_is_identifier_of || found_has_attribute ||
1120                        found_is_attribute_of || found_has_part ||
1121                        found_is_format_of || found_has_format ||
1122                        found_is_part_of)
1123                         ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1124                                     "for term in this namespace\n", tmp_line);
1125                 }
1126                 /* operation */
1127                 else if(ajStrMatchC(namespace, NAMESPACES[2]))
1128                 {
1129                     if(!found_is_concern_of)
1130                         ajFmtPrintF(ouf_log, "Line %6d : No is_concern_of: relation in term\n", tmp_line);
1131 
1132                     if(found_concerns || found_is_input_of ||
1133                        found_is_output_of || found_has_source ||
1134                        found_is_source_of || found_has_identifier ||
1135                        found_is_identifier_of || found_has_attribute ||
1136                        found_is_attribute_of || found_has_part ||
1137                        found_is_format_of || found_has_format ||
1138                        found_is_part_of)
1139                         ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1140                                     "for term in this namespace\n", tmp_line);
1141                 }
1142                 /* resource */
1143                 else if(ajStrMatchC(namespace, NAMESPACES[3]))
1144                 {
1145                     if(!found_is_source_of)
1146                         ajFmtPrintF(ouf_log, "Line %6d : No is_source_of: "
1147                                     "relation in term\n", tmp_line);
1148 
1149                     if(found_concerns || found_has_input ||
1150                        found_is_input_of || found_has_output ||
1151                        found_is_output_of || found_has_source ||
1152                        found_is_identifier_of || found_has_attribute ||
1153                        found_is_format_of || found_has_format ||
1154                        found_is_attribute_of)
1155                         ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1156                                     "for term in this namespace\n", tmp_line);
1157                 }
1158                 /* data */
1159                 else if(ajStrMatchC(namespace, NAMESPACES[4]))
1160                 {
1161                     if(found_concerns || found_is_concern_of ||
1162                        found_has_input || found_has_output ||
1163                        found_is_source_of || found_has_attribute ||
1164                        found_is_format_of)
1165                         ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1166                                     "for term in this namespace\n", tmp_line);
1167                 }
1168 
1169                 /* format */
1170                 else if(ajStrMatchC(namespace, NAMESPACES[5]))
1171                 {
1172                     if(found_concerns         ||
1173                        found_is_concern_of    ||
1174                        found_has_input        ||
1175                        found_is_input_of      ||
1176                        found_has_output       ||
1177                        found_is_output_of     ||
1178                        found_has_source       ||
1179                        found_is_source_of     ||
1180                        found_has_identifier   ||
1181                        found_is_identifier_of ||
1182                        found_has_attribute    ||
1183                        found_is_attribute_of  ||
1184                        found_has_part         ||
1185                        found_is_part_of       ||
1186                        found_has_format  )
1187                         ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1188                                     "for term in this namespace\n", tmp_line);
1189 
1190 
1191 
1192 
1193                 }
1194                 /* edam_identifier */
1195                 /*
1196                 else if(ajStrMatchC(namespace, NAMESPACES[6]))
1197                 {
1198                     if(!found_is_identifier_of)
1199                         ajFmtPrintF(ouf_log, "Line %6d : No is_identifier_of: "
1200                                     "relation in term\n", tmp_line);
1201 
1202                     if(found_concerns || found_is_concern_of ||
1203                        found_has_input || found_has_output ||
1204                        found_is_source_of || found_has_identifier ||
1205                        found_has_attribute || found_is_attribute_of ||
1206                        found_is_format_of || found_has_format ||
1207                        found_has_part || found_is_part_of)
1208                         ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1209                                     "for term in this namespace\n", tmp_line);
1210                 }
1211                 */
1212             }
1213 
1214 
1215 
1216             tmp_line        = linecnt+1;
1217 
1218             found_id               = ajFalse;
1219             found_name             = ajFalse;
1220             found_namespace        = ajFalse;
1221             found_def              = ajFalse;
1222             found_comment          = ajFalse;
1223             found_synonym          = ajFalse;
1224             found_xref             = ajFalse;
1225             found_is_obsolete      = ajFalse;
1226             found_consider         = ajFalse;
1227             found_isa              = ajFalse;
1228             found_concerns         = ajFalse;
1229             found_is_concern_of    = ajFalse;
1230             found_has_input        = ajFalse;
1231             found_is_input_of      = ajFalse;
1232             found_has_output       = ajFalse;
1233             found_is_output_of     = ajFalse;
1234             found_has_source       = ajFalse;
1235             found_is_source_of     = ajFalse;
1236             found_has_identifier   = ajFalse;
1237             found_is_identifier_of = ajFalse;
1238             found_has_attribute    = ajFalse;
1239             found_is_attribute_of  = ajFalse;
1240             found_has_part         = ajFalse;
1241             found_is_part_of       = ajFalse;
1242             found_has_format       = ajFalse;
1243             found_is_format_of     = ajFalse;
1244 
1245 
1246             done_first = ajTrue;
1247         }
1248         else if(in_typedef)
1249             continue;
1250         else if(ajStrPrefixC(line, "id:"))
1251         {
1252             found_id = ajTrue;
1253 
1254             if(found_name || found_namespace || found_def || found_comment ||
1255                found_synonym || found_isa || found_xref || found_is_obsolete
1256                || found_consider)
1257                 ajFmtPrintF(ouf_log, "Line %6d : id: field in wrong order "
1258                             "(%S)\n", linecnt+1, line);
1259         }
1260 
1261         else if(ajStrPrefixC(line, "name:"))
1262         {
1263             found_name = ajTrue;
1264             if( (!found_id) || found_namespace || found_def || found_comment ||
1265                 found_synonym || found_isa || found_xref || found_is_obsolete
1266                 || found_consider)
1267                 ajFmtPrintF(ouf_log, "Line %6d : name: field in wrong order "
1268                             "(%S)\n", linecnt+1, line);
1269         }
1270         else if(ajStrPrefixC(line, "namespace:"))
1271         {
1272             found_namespace = ajTrue;
1273             ajFmtScanS(line, "%*s %S", &namespace);
1274 
1275             if((!found_id) || (!found_name) || found_def || found_comment ||
1276                found_synonym || found_isa || found_xref || found_is_obsolete
1277                || found_consider)
1278                 ajFmtPrintF(ouf_log, "Line %6d : namespace: field in wrong "
1279                             "order (%S)\n", linecnt+1, line);
1280         }
1281         else if(ajStrPrefixC(line, "def:"))
1282         {
1283             found_def = ajTrue;
1284 
1285             if((!found_id) || (!found_name) || (!found_namespace) ||
1286                found_comment || found_synonym || found_isa || found_xref
1287                || found_is_obsolete || found_consider)
1288                 ajFmtPrintF(ouf_log, "Line %6d : def: field in wrong order "
1289                             "(%S)\n", linecnt+1, line);
1290         }
1291         else if(ajStrPrefixC(line, "comment:"))
1292         {
1293             found_comment = ajTrue;
1294 
1295             if((!found_id) || (!found_name) || (!found_namespace) ||
1296                (!found_def) || found_synonym || found_isa || found_xref
1297                || found_is_obsolete || found_consider)
1298                 ajFmtPrintF(ouf_log, "Line %6d : comment: field in wrong "
1299                             "order (%S)\n", linecnt+1, line);
1300         }
1301         else if(ajStrPrefixC(line, "synonym:"))
1302         {
1303             found_synonym = ajTrue;
1304             if((!found_id) || (!found_name) || (!found_namespace) ||
1305                (!found_def) || found_isa || found_xref || found_is_obsolete
1306                || found_consider)
1307                 ajFmtPrintF(ouf_log, "Line %6d : synonym: field in wrong "
1308                             "order (%S)\n", linecnt+1, line);
1309         }
1310         else if(ajStrPrefixC(line, "is_a:"))
1311         {
1312             found_isa = ajTrue;
1313             if((!found_id) || (!found_name) || (!found_namespace) ||
1314                (!found_def) || found_is_obsolete
1315                || found_consider)
1316                 ajFmtPrintF(ouf_log, "Line %6d : is_a: field in wrong order "
1317                             "(%S)\n", linecnt+1, line);
1318         }
1319         else if(ajStrPrefixC(line, "concerns:"))
1320             found_concerns         = ajTrue;
1321         else if(ajStrPrefixC(line, "is_concern_of:"))
1322             found_is_concern_of    = ajTrue;
1323         else if(ajStrPrefixC(line, "has_input:"))
1324             found_has_input        = ajTrue;
1325         else if(ajStrPrefixC(line, "is_input_of:"))
1326             found_is_input_of      = ajTrue;
1327         else if(ajStrPrefixC(line, "has_output:"))
1328             found_has_output       = ajTrue;
1329         else if(ajStrPrefixC(line, "is_output_of:"))
1330             found_is_output_of     = ajTrue;
1331         else if(ajStrPrefixC(line, "has_source:"))
1332             found_has_source       = ajTrue;
1333         else if(ajStrPrefixC(line, "is_source_of:"))
1334             found_is_source_of     = ajTrue;
1335         else if(ajStrPrefixC(line, "has_identifier:"))
1336             found_has_identifier   = ajTrue;
1337         else if(ajStrPrefixC(line, "is_identifier_of:"))
1338             found_is_identifier_of = ajTrue;
1339         else if(ajStrPrefixC(line, "has_attribute:"))
1340             found_has_attribute    = ajTrue;
1341         else if(ajStrPrefixC(line, "is_attribute_of:"))
1342             found_is_attribute_of  = ajTrue;
1343         else if(ajStrPrefixC(line, "has_part:"))
1344             found_has_part         = ajTrue;
1345         else if(ajStrPrefixC(line, "is_part_of:"))
1346             found_is_part_of       = ajTrue;
1347         else if(ajStrPrefixC(line, "has_format:"))
1348             found_has_format       = ajTrue;
1349         else if(ajStrPrefixC(line, "is_format_of:"))
1350             found_is_format_of     = ajTrue;
1351         else if(ajStrPrefixC(line, "xref:"))
1352             found_xref             = ajTrue;
1353         else if(ajStrPrefixC(line, "is_obsolete:"))
1354             found_is_obsolete      = ajTrue;
1355         else if(ajStrPrefixC(line, "consider:"))
1356             found_consider         = ajTrue;
1357     }
1358     ajFmtPrintF(ouf_log, "\n\n");
1359     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
1360 
1361 
1362     /*  Check for unique names within each namespace */
1363     ajFmtPrintF(ouf_log, "9. UNIQUE NAMES WITHIN EACH NAMESPACE\n");
1364 
1365     for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
1366     {
1367         if(ajStrPrefixC(line, "[Typedef]"))
1368             in_typedef=ajTrue;
1369 
1370         /* First build the arrays of names in each namespace */
1371         if(ajStrPrefixC(line, "[Term]"))
1372         {
1373             tmp_term = ajTermNew();
1374             in_typedef=ajFalse;
1375         }
1376         else if(in_typedef)
1377             continue;
1378         else if(ajStrPrefixC(line, "name:"))
1379         {
1380             ajStrParseC(line, ":");
1381             ajStrAssignS(&tmp_term->name, ajStrParseC(NULL, ":"));
1382             ajStrRemoveWhiteExcess(&tmp_term->name);
1383             tmp_term->line = linecnt;
1384         }
1385         else if(ajStrPrefixC(line, "id:"))
1386         {
1387             ajStrParseC(line, ":");
1388             ajStrParseC(NULL, ":");
1389             ajStrAssignS(&tmp_term->id, ajStrParseC(NULL, ":"));
1390             ajStrRemoveWhiteExcess(&tmp_term->id);
1391         }
1392         else if(ajStrPrefixC(line, "namespace:"))
1393         {
1394             ajFmtScanS(line, "%*s %S", &namespace);
1395             for(x=0; x<NNAMESPACES; x++)
1396                 if(ajStrMatchC(namespace, NAMESPACES[x]))
1397                     ajListPushAppend(namespaces[x]->list, tmp_term);
1398         }
1399     }
1400 
1401     for(x=0; x<NNAMESPACES; x++)
1402         namespaces[x]->n = ajListToarray(namespaces[x]->list,
1403                                          (void***) &(namespaces[x]->terms));
1404 
1405     for(x=0; x<NNAMESPACES; x++)
1406     {
1407         ajFmtPrintF(ouf_log, "9.%d %s\n", x+1, NAMESPACES[x]);
1408         for(y=0; y<namespaces[x]->n; y++)
1409             for(z=0; z<namespaces[x]->n; z++)
1410                 if((y!=z) && (ajStrMatchS(namespaces[x]->terms[y]->name,
1411                                           namespaces[x]->terms[z]->name)))
1412                 {
1413                     ajFmtPrintF(ouf_log, "Line %6d : Non-unique name: %S "
1414                                 "(First used on line %d)\n",
1415                                 namespaces[x]->terms[y]->line,
1416                                 namespaces[x]->terms[y]->name,
1417                                 namespaces[x]->terms[z]->line);
1418                     break;
1419                 }
1420     }
1421     ajFmtPrintF(ouf_log, "\n\n");
1422     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
1423 
1424 
1425 /*
1426     for(x=0; x<NNAMESPACES; x++)
1427     {
1428         ajFmtPrint("namespaces[%d]->name): %S\n", x, namespaces[x]->name);
1429         fflush(stdout);
1430     }
1431 */
1432 
1433     /*  Check for valid end-points of relations */
1434     ajFmtPrintF(ouf_log, "10. VALID END-POINTS OF RELATIONS\n");
1435 
1436     for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
1437     {
1438         if(ajStrPrefixC(line, "namespace:"))
1439         {
1440 /*          ajFmtPrint("line === %S\n", line);  fflush(stdout); */
1441 
1442 
1443             /* Identify index of this namespace */
1444             ajFmtScanS(line, "%*s %S", &namespace);
1445 
1446 /*          ajFmtPrint("namespace === %S\n", namespace);  fflush(stdout); */
1447 
1448 
1449 /*
1450   for(x=0; x<NNAMESPACES; x++)
1451     {
1452         ajFmtPrint("namespaces[%d]->name): %S\n", x, namespaces[x]->name);
1453         fflush(stdout);
1454     }
1455 */
1456 
1457             for(idx=0; idx<NNAMESPACES; idx++)
1458             {
1459 /*              ajFmtPrint("namespace === %S\n", namespace);  fflush(stdout);
1460                 ajFmtPrint("namespaces[%d]->name === %S++\n", idx, namespaces[idx]->name);  fflush(stdout);
1461 */
1462 
1463                 if(ajStrMatchS(namespace, namespaces[idx]->name))
1464                     break;
1465                 else
1466                     ajFmtPrint("Failing to match %S to %S\n", namespace, namespaces[idx]->name);
1467             }
1468 
1469             if(idx==NNAMESPACES)
1470                 ajFatal("namespace not found - cannot recover\nline: %S\nnamespace: %S\n", line, namespace);
1471 
1472 /*          if(!ajStrMatchC(mode, "Fix relations"))
1473             continue; */
1474         }
1475 
1476         ajStrAssignClear(&tok);
1477 
1478         if(ajStrPrefixC(line, "relationship:"))
1479             ajFmtScanS(line, "%*S %S", &tok);
1480         else
1481             ajFmtPrintS(&tok, "%S", &tok);
1482 
1483 
1484         for(x=0, done=ajFalse; x<NRELATIONS; x++)
1485             if(ajStrMatchC(tok, RELATIONS[x]))
1486             {
1487                 done=ajTrue;
1488                 break;
1489             }
1490 
1491         /* Relation line */
1492         if(done)
1493         {
1494             if(ajStrCalcCountC(line, ":!")!=3)
1495             {
1496                 ajFmtPrintF(ouf_log, "Line %6d : Cannot check end-point "
1497                             "(Invalid relations line format) (%S)\n",
1498                             linecnt+1, line);
1499 
1500                 if(ajStrMatchC(mode, "Fix relations"))
1501                     ajFatal("Unable to recover from invalid relations line "
1502                             "format");
1503             }
1504 
1505             else
1506             {
1507                 ajStrAssignS(&relation, ajStrParseC(line, ":! "));
1508 
1509                 /* Get second token in line if on "relationship:" line */
1510                 if(ajStrMatchC(relation, "relationship"))
1511                     ajStrAssignS(&relation, ajStrParseC(NULL, ":! "));
1512 
1513                 ajStrParseC(NULL, ":! ");
1514                 ajStrParseC(NULL, ":! ");
1515                 ajStrAssignS(&tmp_name, ajStrParseC(NULL, ":! "));
1516                 ajStrRemoveWhiteExcess(&tmp_name);
1517 
1518 /*
1519                 if(ajStrMatchC(tmp_name, "Protein secondary database"))
1520                 ajFmtPrint("Protein secondary database ...\n");
1521 */
1522 
1523 
1524                 if(ajStrPrefixC(line, "relationship: is_a:"))
1525                 {
1526                     if(!(id=FindTerm(idx, tmp_name, namespaces)))
1527 
1528 
1529                         /*
1530                         ** Must also check children of root node where
1531                         ** start/end-points of is_a relation are in
1532                         ** different namespaces
1533                         */
1534                         /*
1535                         if(!(id=FindTerm(edam_term, tmp_name, namespaces)))
1536                         {
1537                         */
1538                             /*
1539                             ** Must also check in "data" namespace
1540                             ** for identifiers
1541                             */
1542                             /*
1543                             if(idx==edam_identifier)
1544                             {
1545                                 if(!(id=FindTerm(data, tmp_name,
1546                                                  namespaces)))
1547                                     ajFmtPrintF(ouf_log,
1548                                                 "Line %6d : End-point term of "
1549                                                 "relation does not exist (%S)"
1550                                                 "\n",
1551                                                 linecnt+1, line);
1552                             }
1553                             else */
1554 
1555                                 ajFmtPrintF(ouf_log,
1556                                             "Line %6d : End-point term of "
1557                                             "relation does not exist (%S)\n",
1558                                             linecnt+1, line);
1559 /*                        } */
1560 
1561 
1562 /*                  if(ajStrMatchC(tmp_name, "Protein secondary database"))
1563                     {
1564                     if(id!=-1)
1565                     ajFmtPrint("... found !!!!\n");
1566                     else
1567                     ajFmtPrint("... not found !!!!\n");
1568                     } */
1569                 }
1570                 else if(ajStrPrefixC(line, "relationship: has_part:") ||
1571                         ajStrPrefixC(line, "relationship: is_part_of:"))
1572                 {
1573                     if(!(id=FindTerm(idx, tmp_name, namespaces)))
1574                         ajFmtPrintF(ouf_log,
1575                                     "Line %6d : End-point term of relation "
1576                                     "does not exist (%S)\n",
1577                                     linecnt+1, line);
1578                 }
1579                 else if (ajStrPrefixC(line, "relationship: concerns:"))
1580                 {
1581                     if(!(id=FindTerm(entity, tmp_name, namespaces)))
1582                         if(!(id=FindTerm(operation, tmp_name, namespaces)))
1583                             if(!(id=FindTerm(resource, tmp_name, namespaces)))
1584                                 ajFmtPrintF(ouf_log,
1585                                             "Line %6d : End-point term of "
1586                                             "relation does not exist (%S)\n",
1587                                             linecnt+1, line);
1588                 }
1589                 else if (ajStrPrefixC(line, "relationship: is_concern_of:"))
1590                 {
1591                     if(!(id=FindTerm(topic, tmp_name, namespaces)))
1592                         ajFmtPrintF(ouf_log,
1593                                     "Line %6d : End-point term of relation "
1594                                     "does not exist (%S)\n",
1595                                     linecnt+1, line);
1596                 }
1597                 else if (ajStrPrefixC(line, "relationship: has_input:") ||
1598                          ajStrPrefixC(line, "relationship: has_output:") ||
1599                          ajStrPrefixC(line, "relationship: has_attribute:") ||
1600                          ajStrPrefixC(line, "relationship: is_source_of:") ||
1601                          ajStrPrefixC(line, "relationship: is_format_of:"))
1602                 {
1603                     if(!(id=FindTerm(data, tmp_name, namespaces)))
1604                         ajFmtPrintF(ouf_log,
1605                                     "Line %6d : End-point term of relation "
1606                                     "does not exist (%S)\n",
1607                                     linecnt+1, line);
1608                 }
1609                 else if (ajStrPrefixC(line, "relationship: is_input_of:") ||
1610                          ajStrPrefixC(line, "relationship: is_output_of:"))
1611                 {
1612                     if(!(id=FindTerm(operation, tmp_name, namespaces)))
1613                         ajFmtPrintF(ouf_log,
1614                                     "Line %6d : End-point term of relation "
1615                                     "does not exist (%S)\n",
1616                                     linecnt+1, line);
1617                 }
1618                 else if (ajStrPrefixC(line, "relationship: has_source:"))
1619                 {
1620                     if(!(id=FindTerm(resource, tmp_name, namespaces)))
1621                             ajFmtPrintF(ouf_log, "Line %6d : End-point term of relation does not exist (%S)\n",
1622                                         linecnt+1, line);
1623                 }
1624                 else if (ajStrPrefixC(line, "relationship: has_identifier:"))
1625                 {
1626                     /* if(!(id=FindTerm(edam_identifier, tmp_name, namespaces))) */
1627                     if(!(id=FindTerm(data, tmp_name, namespaces)))
1628                         ajFmtPrintF(ouf_log,
1629                                     "Line %6d : End-point term of relation "
1630                                     "does not exist (%S)\n",
1631                                     linecnt+1, line);
1632                 }
1633                 else if (ajStrPrefixC(line, "relationship: is_identifier_of:"))
1634                 {
1635                     if(!(id=FindTerm(entity, tmp_name, namespaces)))
1636                         if(!(id=FindTerm(resource, tmp_name, namespaces)))
1637                                 if(!(id=FindTerm(data, tmp_name,
1638                                                  namespaces)))
1639                                     ajFmtPrintF(ouf_log, "Line %6d : End-point "
1640                                                 "term of relation does not "
1641                                                 "exist (%S)\n",
1642                                                 linecnt+1, line);
1643                 }
1644                 else if (ajStrPrefixC(line,  "relationship: is_attribute_of:"))
1645                 {
1646                     if(!(id=FindTerm(entity, tmp_name, namespaces)))
1647                         ajFmtPrintF(ouf_log, "Line %6d : End-point term of "
1648                                     "relation does not exist (%S)\n",
1649                                     linecnt+1, line);
1650                 }
1651                 else if (ajStrPrefixC(line,  "relationship: has_format:"))
1652                 {
1653                     if(!(id=FindTerm(format, tmp_name, namespaces)))
1654                         ajFmtPrintF(ouf_log, "Line %6d : End-point term of "
1655                                     "relation does not exist (%S)\n",
1656                                     linecnt+1, line);
1657                 }
1658                 /* Check all namespaces for 'consider' field */
1659                 else if (ajStrPrefixC(line, "relationship: consider:"))
1660                 {
1661                     if(!(id=FindTerm(entity, tmp_name, namespaces)))
1662                         if(!(id=FindTerm(topic, tmp_name, namespaces)))
1663                             if(!(id=FindTerm(operation, tmp_name, namespaces)))
1664                                 if(!(id=FindTerm(resource, tmp_name, namespaces)))
1665                                         if(!(id=FindTerm(data, tmp_name, namespaces)))
1666                                             if(!(id=FindTerm(format, tmp_name, namespaces)))
1667                                                 ajFmtPrintF(ouf_log,
1668                                                             "Line %6d : End-point term of "
1669                                                             "relation does not exist (%S)\n",
1670                                                             linecnt+1, line);
1671                 }
1672 
1673                 else
1674                     ajFatal("Unknown relation");
1675 
1676                 /*
1677                   if(ajStrMatchC(tmp_name, "Protein secondary database"))
1678                   {
1679                   if(id!=NULL)
1680                   ajFmtPrint("... found !\n");
1681                   else
1682                   ajFmtPrint("... not found !\n");
1683                   }
1684                 */
1685 
1686                 /* Relation line ... write to output file */
1687                 if(ajStrMatchC(mode, "Fix relations"))
1688                 {
1689                     if(id != NULL)
1690                         ajFmtPrintF(ouf_edam, "relationship: %S: EDAM:%S ! %S\n", relation,
1691                                     id, tmp_name);
1692                     else
1693                         ajFatal("Could not find end-point term of relation "
1694                                 "(or recover) : %S", tmp_name);
1695                 }
1696             }
1697         }
1698         /* Not a relation line ... write to output file */
1699         else if(ajStrMatchC(mode, "Fix relations"))
1700             ajFmtPrintF(ouf_edam, "%S", line);
1701 
1702 
1703     }
1704 
1705     ajFmtPrintF(ouf_log, "\n\n");
1706     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
1707 
1708 
1709     /* Renumber term ids */
1710     if(ajStrMatchC(mode, "Renumber terms"))
1711     {
1712 /*
1713         for(termcnt=0, found_typedef = ajFalse; ajReadline(inf_edam, &line); )
1714         {
1715             if(ajStrPrefixC(line, "[Term]"))
1716                 termcnt++;
1717             else if(ajStrPrefixC(line, "[Typedef]"))
1718                 found_typedef = ajTrue;
1719 
1720             if(ajStrPrefixC(line, "id:") && (!found_typedef))
1721                 ajFmtPrintF(ouf_edam, "id: EDAM:%07d\n", termcnt);
1722             else
1723                 ajFmtPrintF(ouf_edam, "%S", line);
1724         }
1725 */
1726 
1727         for(termcnt=0; ajReadline(inf_edam, &line); )
1728         {
1729             if(ajStrPrefixC(line, "[Term]"))
1730                 termcnt++;
1731 
1732             if(ajStrPrefixC(line, "id:"))
1733                 ajFmtPrintF(ouf_edam, "id: EDAM:%07d\n", termcnt);
1734             else
1735                 ajFmtPrintF(ouf_edam, "%S", line);
1736         }
1737     }
1738     ajFileSeek(inf_edam, 0, 0);    /* Rewind file */
1739 
1740 
1741 
1742 
1743 
1744 
1745     /* Clean up and exit */
1746 /*
1747   for(x=0; x<nfields; x++)
1748   ajStrDel(&fields[x]);
1749   AJFREE(fields);
1750 
1751   for(x=0; x<nids; x++)
1752   ajStrDel(&ids[x]);
1753   AJFREE(ids);
1754 */
1755 
1756 
1757     ajFileClose(&inf_edam);
1758     ajFileClose(&ouf_edam);
1759     ajFileClose(&ouf_log);
1760     if(ouf_xml)
1761         ajFileClose(&ouf_xml);
1762     ajStrDel(&line);
1763     ajStrDel(&tok);
1764     ajStrDel(&name);
1765     ajStrDel(&namespace);
1766     ajStrDel(&relation);
1767     ajStrDel(&tmp_name);
1768     ajStrDel(&tmp_id);
1769 
1770 
1771     for(x=0; x<NNAMESPACES; x++)
1772         ajNamespaceDel(&namespaces[x]);
1773     ajExit();
1774     return 0;
1775 
1776     ajDiroutDel(&xmloutdir);
1777 
1778 }
1779