1 /* @Source edamclean application
2 **
3 ** Validate and fix EDAM OBO ontology
4 **
5 ** @author: Copyright (C) Jon Ison (jison@ebi.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 **
22 *******************************************************************************
23 **
24 ** EDAMCLEAN documentation
25 ** See http://wwww.emboss.org
26 **
27 ** Please cite the authors and EMBOSS.
28 **
29 ** Email jison@ebi.ac.uk.
30 **
31 **
32 ** edamclean reads EDAM (OBO format file), validates the file syntax, writes a
33 ** report from parsing and (optionally) fixes the term numbering and
34 ** relations.
35 ** It has optional modes of operation:
36 ** 1. Report only
37 ** 2. Renumber terms
38 ** 3. Fix relations
39 ** 4. Output PURL XML (single file)
40 ** 5. Output PURL XML (one file / term)
41 **
42 ** 1. Report only
43 ** Write an informative report from parsing but do not change the file.
44 ** The following checks are performed:
45 ** i. All ids in the file are unique
46 ** ii. All term names within a namespace are unique
47 ** iii. All values after namespace: are valid (see below).
48 ** iv. All field names are valid; either a standard OBO field, a relation
49 ** or a token that must be ignored (see below).
50 ** v. All terms have the following fields in the order specified
51 ** (optional fields are in parenthesis):
52 ** id, name, namespace, def, (comment), (synonym), is_a
53 ** vi. Terms in specific namespaces have all mandatory relations defined
54 ** and do not have disallowed relations. See "Rules" below.
55 ** vii. End-points (term names) of all relations exist. See "Rules" below.
56 ** This includes checking for mismatches between term id and name
57 ** (in comment) in relations lines
58 ** viii. All id: lines have the format: id: EDAM:0000000
59 ** ix. All def: lines have the format: def: "Some text."
60 ** [EDAM:EBI "EMBRACE definition"]
61 ** x. All relation lines have the format: RelationName: EDAM:0000000
62 ** ! Term name
63 ** xi. All comment: values are *not* in quotes ("")
64 ** xii. All synonym: values are *not* in quotes ("")
65 **
66 ** 2. Renumber terms
67 ** Write a report as above.
68 ** Renumber all terms so that they have unique ids, starting with
69 ** EDAM:0000000
70 ** for the first term in the file and increasing by 1 thereon.
71 **
72 ** 3. Fix relations
73 ** Write a report as above. If no errors reported, correct term ids used
74 ** in all relations fields.
75 **
76 ** 4. Output PURL XML (single file)
77 ** Write a report as above. Then write XML output for term submission to PURL.org
78 ** <purls>
79 ** <purl id="/tld/subdomain/testPartial" type="partial">
80 ** -
81 ** <maintainers>
82 ** <uid>jon</uid>
83 ** </maintainers>
84 ** <target url="http://wwwdev.ebi.ac.uk/Tools/dbfetch/dbfetch/edam/0000352"/>
85 ** </purl>
86 ** </purls>
87 **
88 **
89 ** 5. Output PURL XML (one file / term)
90 ** As option 4. above, but write a single XML file per term to the specified directory.
91 **
92 **
93 ** edamclean parameters:
94 ** Name of OBO format file (input)
95 ** Name of OBO format file (output)
96 ** Name of report file (output)
97 ** Boolean (whether to fix the output file)
98 **
99 **
100 ** Standard OBO fields
101 ** id:
102 ** name:
103 ** namespace:
104 ** def:
105 ** comment:
106 ** synonym:
107 ** xref:
108 ** is_obsolete:
109 ** consider:
110 **
111 ** Relations
112 ** is_a
113 ** has_part
114 ** is_part_of
115 ** concerns
116 ** is_concern_of
117 ** has_input
118 ** is_input_of
119 ** has_output
120 ** is_output_of
121 ** has_source
122 ** is_source_of
123 ** has_identifier
124 ** is_identifier_of
125 ** has_attribute
126 ** is_attribute_of
127 ** has_format
128 ** is_format_of
129 **
130 ** Namespace
131 ** entity
132 ** topic
133 ** operation
134 ** resource
135 ** data
136 ** format
137 **
138 **
139 ** Tokens to ignore
140 ** Lines beginning with the following tokens are not parsed and are
141 ** preserved as-is in the output:
142 ** !
143 ** format-version
144 ** date
145 ** data-version
146 ** xref
147 **
148 **
149 ** Rules
150 ** Rules for which term types (rules in a namespace) may or must be related
151 ** to which other term types are described under "Rules by term type" in
152 ** the EDAM on-line documentation.
153 ** See http://www.ebi.ac.uk/~jison/edam.html#6.1.
154 **
155 **
156 ** Notes
157 ** Typedef definitions are *not* validated and are preserved as-is in the
158 ** output.
159 **
160 ** Important!
161 ** 1. The program should not be run in modes 2 or 3 (ie. generate an EDAM
162 ** output file) until all reported problems (from mode 1) have been fixed
163 ** by hand - *except* "Non-unique id" errors! Results are undefined otherwise.
164 ** 2. All [Term] definitions in the input file *must* appear before the
165 ** first [Typedef] definition - terms appearing after are *not* validated
166 ** fully.
167 **
168 ** Known Issues
169 ** edamclean does not detect the fact that the root term of each branch does
170 ** not need to have an is_a relation. Disregard the messages in the lof file
171 ** to that effect (this could fairly easily be fixed).
172 **
173 ** edamclean will identify (and warn about) identical term names in cases
174 ** where one of the terms has been made obsolete. Arguably this is the
175 ** the correct behaviour.
176 **
177 ** The code that checks for "field in wrong order" does not make all the checks
178 ** it might, e.g. does not check whether other relations appear before is_a.
179 **
180 ** It does not check for multiple (erroneous) comment: lines
181 **
182 ** It does not suppress (irrelevant) error messages for obsolete terms.
183 **
184 ** It does not check for duplicate relationships (where these are not allowed)
185 ** e.g. an exact duplication of a relationship line.
186 ******************************************************************************/
187
188 #include "emboss.h"
189
190
191
192
193
194 /******************************************************************************
195 **
196 ** GLOBAL VARIABLES
197 **
198 ******************************************************************************/
199
200 #define NFIELDS 10
201
202 static const char *FIELDS[NFIELDS] =
203 {
204 "id:",
205 "name:",
206 "namespace:",
207 "def:",
208 "comment:",
209 "synonym:",
210 "xref:",
211 "is_obsolete:",
212 "consider:",
213 "relationship:"
214 };
215
216
217
218
219
220 #define NRELATIONS 18
221 /* 'consider' field is treated as a relation */
222
223 static const char *RELATIONS[NRELATIONS] =
224 {
225 "is_a:",
226 "has_part:",
227 "is_part_of:",
228 "concerns:",
229 "is_concern_of:",
230 "has_input:",
231 "is_input_of:",
232 "has_output:",
233 "is_output_of:",
234 "has_source:",
235 "is_source_of:",
236 "has_identifier:",
237 "is_identifier_of:",
238 "has_attribute:",
239 "is_attribute_of:",
240 "has_format:",
241 "is_format_of:",
242 "consider:"
243 };
244
245
246
247
248 #define NNAMESPACES 6
249
250 static const char *NAMESPACES[NNAMESPACES] =
251 {
252 "entity",
253 "topic",
254 "operation",
255 "resource",
256 "data",
257 "format"
258 };
259
260
261
262
263 enum _namespace
264 {
265 entity,
266 topic,
267 operation,
268 resource,
269 data,
270 format
271 };
272
273
274
275
276 #define NOTHER 12
277
278 static const char *OTHER[NOTHER] =
279 {
280 "!",
281 "format-version:",
282 "date:",
283 "data-version:",
284 "subsetdef:",
285 "[Term]",
286 "[Typedef]",
287 "inverse_of:",
288 "is_anti_symmetric:",
289 "is_cyclic:",
290 "is_transitive:",
291 "\0" /* NULL string to allow empty lines */
292 };
293
294
295
296
297 /******************************************************************************
298 **
299 ** DATA STRUCTURES
300 **
301 ******************************************************************************/
302
303
304
305
306 /* @datastatic PTerm *******************************************************
307 **
308 ** Term object
309 ** Holds name and identifier of a single EDAM term
310 **
311 ** @alias STerm
312 ** @alias OTerm
313 **
314 ** @attr name [AjPStr] Name of term
315 ** @attr id [AjPStr] Id of term
316 ** @attr line [ajint] Line number of name: field for the term
317 ** @attr Padding [ajint] Padding to alignment boundary
318 ******************************************************************************/
319
320 typedef struct STerm
321 {
322 AjPStr name;
323 AjPStr id;
324 ajint line;
325 ajint Padding;
326 } OTerm;
327 #define PTerm OTerm*
328
329
330
331
332 /* @datastatic PNamespace *****************************************************
333 **
334 ** Namespace object
335 ** Holds name and array of terms for an EDAM namespace.
336 ** Only one copy of the terms is kept in memory (list holds pointers only)
337 **
338 ** @alias SNamespace
339 ** @alias ONamespace
340 **
341 ** @attr name [AjPStr] Name of namespace
342 ** @attr terms [PTerm*] Array of terms
343 ** @attr list [AjPList] List of terms*
344 ** @attr n [ajint] Size of array / list
345 ** @attr Padding [ajint] Padding to alignment boundary
346 *****************************************************************************/
347
348 typedef struct SNamespace
349 {
350 AjPStr name;
351 PTerm *terms;
352 AjPList list;
353 ajint n;
354 ajint Padding;
355 } ONamespace;
356 #define PNamespace ONamespace*
357
358
359
360
361 /******************************************************************************
362 **
363 ** PROTOTYPES
364 **
365 ******************************************************************************/
366
367 static PTerm ajTermNew(void);
368 static PNamespace ajNamespaceNew(void);
369 static void ajTermDel(PTerm *P);
370 static void ajNamespaceDel(PNamespace *P);
371 static const AjPStr FindTerm(ajint namespace, const AjPStr termname,
372 PNamespace *namespaces);
373
374
375
376
377 /******************************************************************************
378 **
379 ** FUNCTIONS
380 **
381 ******************************************************************************/
382
383
384
385
386 /* @funcstatic ajTermNew ***************************************************
387 **
388 ** Term constructor
389 **
390 ** @return [PTerm] New object
391 ** @@
392 ******************************************************************************/
393
ajTermNew(void)394 static PTerm ajTermNew(void)
395 {
396 PTerm ret;
397
398 AJNEW0(ret);
399 ret->name = ajStrNew();
400 ret->id = ajStrNew();
401 ret->line = 0;
402
403 return ret;
404 }
405
406
407
408
409 /* @funcstatic ajNamespaceNew ************************************************
410 **
411 ** Namespace constructor
412 ** The array is *not* allocated.
413 **
414 ** @return [PNamespace] New object
415 ** @@
416 ******************************************************************************/
417
ajNamespaceNew(void)418 static PNamespace ajNamespaceNew(void)
419 {
420 PNamespace ret;
421
422 AJNEW0(ret);
423 ret->name = ajStrNew();
424 ret->terms = NULL;
425 ret->list = ajListstrNew();
426 ret->n = 0;
427
428 return ret;
429 }
430
431
432
433
434 /* @funcstatic ajTermDel ***************************************************
435 **
436 ** Term destructor
437 **
438 ** @param [d] P [PTerm*] Term object to delete
439 ** @return [void]
440 ** @@
441 ******************************************************************************/
442
ajTermDel(PTerm * P)443 static void ajTermDel(PTerm *P)
444 {
445 if(!P)
446 ajFatal("Null arg error 1 in ajTermDel");
447 else if(!(*P))
448 ajFatal("Null arg error 2 in ajTermDel");
449
450 ajStrDel(&(*P)->name);
451 ajStrDel(&(*P)->id);
452
453 AJFREE(*P);
454 *P=NULL;
455
456 return;
457 }
458
459
460
461
462 /* @funcstatic ajNamespaceDel ************************************************
463 **
464 ** Namespace destructor
465 **
466 ** @param [d] P [PNamespace*] Namespace object to delete
467 ** @return [void]
468 ** @@
469 ******************************************************************************/
470
ajNamespaceDel(PNamespace * P)471 static void ajNamespaceDel(PNamespace *P)
472 {
473 int i;
474
475 if(!P)
476 ajFatal("Null arg error 1 in ajNamespaceDel");
477 else if(!(*P))
478 ajFatal("Null arg error 2 in ajNamespaceDel");
479
480 ajStrDel(&(*P)->name);
481
482 if((*P)->n)
483 {
484 for(i=0;i<(*P)->n;i++)
485 ajTermDel(&(*P)->terms[i]);
486
487 AJFREE((*P)->terms);
488 }
489
490 ajListstrFree(&(*P)->list);
491
492 AJFREE(*P);
493 *P=NULL;
494
495 return;
496 }
497
498
499
500
501 /* @funcstatic FindTerm ***************************************************
502 **
503 ** Finds a term within a namespace index and returns its identifier in the
504 ** namespace array.
505 **
506 ** Returns NULL if term is not found
507 **
508 ** @param [r] namespace [ajint] Namespace index as integer
509 ** @param [r] termname [const AjPStr] Name of term
510 ** @param [u] namespaces [PNamespace*] Array of namespace objects
511 ** @return [const AjPStr] Term identifier
512 ** @@
513 ******************************************************************************/
514
FindTerm(ajint namespace,const AjPStr termname,PNamespace * namespaces)515 static const AjPStr FindTerm(ajint namespace, const AjPStr termname,
516 PNamespace *namespaces)
517 {
518 ajint x;
519
520 if(!termname || !namespaces)
521 ajFatal("Bad args to FindTerm");
522
523 for(x=0; x<namespaces[namespace]->n; x++)
524 if(ajStrMatchS(termname, namespaces[namespace]->terms[x]->name))
525 return namespaces[namespace]->terms[x]->id;
526
527 return NULL;
528 }
529
530
531
532
533 /* @prog edamclean ********************************************************
534 **
535 ** Validate and fix EDAM OBO ontology
536 **
537 *****************************************************************************/
538
main(ajint argc,char ** argv)539 int main(ajint argc, char **argv)
540 {
541 /* Variable declarations */
542 AjPFile inf_edam = NULL; /* Name of EDAM (input) file */
543 AjPFile ouf_edam = NULL; /* Name of EDAM (output) file */
544 AjPFile ouf_log = NULL; /* Name of report (output) file */
545 AjPFile ouf_xml = NULL; /* Name of XML (output) file */
546 AjPFile tmp_xml = NULL; /* Temp. XML (output) file */
547 AjPDirout xmloutdir = NULL; /* XML (output) file directory */
548 AjPStr mode = NULL; /* Mode of operation */
549 AjPList list_tmp = NULL; /* Temporary list */
550 AjPStr *fields = NULL; /* Array of valid tokens for first
551 word in line */
552 ajint nfields = 0; /* Size of fields array */
553 AjPStr *ids = NULL; /* Array of all ids in file */
554 ajint nids = 0; /* Size of ids */
555 const AjPStr id = NULL; /* ID of a term */
556
557 AjPStr line = NULL; /* A line from the input file */
558 ajint linecnt = 0; /* Line number of line */
559 ajint termcnt = 0; /* Count of term definitions */
560 AjPStr tok = NULL; /* A token from line */
561 AjBool done = ajFalse; /* Housekeeping */
562 ajint x = 0; /* Housekeeping */
563 ajint y = 0; /* Housekeeping */
564 ajint z = 0; /* Housekeeping */
565 ajint idx = 0; /* Housekeeping */
566 AjPStr name = NULL; /* Name of a term */
567 AjPStr namespace = NULL; /* Namespace of a term */
568 AjPStr relation = NULL; /* Relationship name, e.g. "is_a" */
569 AjPStr tmp_name = NULL; /* Temp. name of a term */
570 AjPStr tmp_id = NULL; /* Temp. id of a term */
571 AjPStr tmp_str = NULL; /* Temp. string */
572 PTerm tmp_term = NULL; /* Temp. term pointer */
573 ajint tmp_line = 0; /* Temp. line number */
574 PNamespace namespaces[NNAMESPACES]; /* Array of namespace objects */
575
576 AjBool done_first = ajFalse; /* Housekeeping ... read first term */
577 AjBool first = ajFalse; /* Housekeeping ... on first term */
578 AjBool found_id = ajFalse;
579 AjBool in_typedef = ajFalse; /* In a [Typedef] statement */
580
581 AjBool found_name = ajFalse;
582 AjBool found_namespace = ajFalse;
583 AjBool found_def = ajFalse;
584 AjBool found_comment = ajFalse;
585 AjBool found_synonym = ajFalse;
586 AjBool found_xref = ajFalse;
587 AjBool found_is_obsolete = ajFalse;
588 AjBool found_consider = ajFalse;
589 AjBool found_isa = ajFalse;
590 AjBool found_concerns = ajFalse;
591 AjBool found_is_concern_of = ajFalse;
592 AjBool found_has_input = ajFalse;
593 AjBool found_is_input_of = ajFalse;
594 AjBool found_has_output = ajFalse;
595 AjBool found_is_output_of = ajFalse;
596 AjBool found_has_source = ajFalse;
597 AjBool found_is_source_of = ajFalse;
598 AjBool found_has_identifier = ajFalse;
599 AjBool found_is_identifier_of = ajFalse;
600 AjBool found_has_attribute = ajFalse;
601 AjBool found_is_attribute_of = ajFalse;
602 AjBool found_has_part = ajFalse;
603 AjBool found_is_part_of = ajFalse;
604 AjBool found_has_format = ajFalse;
605 AjBool found_is_format_of = ajFalse;
606
607
608
609
610 /* Read data from acd */
611 embInit("edamclean", argc, argv);
612
613
614 /* ACD data handling */
615 inf_edam = ajAcdGetInfile("edaminfile");
616 ouf_edam = ajAcdGetOutfile("edamoutfile");
617 ouf_log = ajAcdGetOutfile("logfile");
618 ouf_xml = ajAcdGetOutfile("xmlfile");
619 xmloutdir = ajAcdGetOutdir("xmloutdir");
620 mode = ajAcdGetSelectSingle("mode");
621 /* taxdir = ajAcdGetDirectory("taxdirectory"); */
622
623 ajFmtPrint("MODE : %S\n", mode);
624
625 /*
626 ajTaxLoad(taxdir);
627 ajOboParseObofile(inf_edam, "noidorder,nounkid");
628 ajFileSeek(inf_edam, 0, 0);
629 embExit(); */
630
631 /* Memory allocation */
632 line = ajStrNew();
633 tok = ajStrNew();
634 name = ajStrNew();
635 namespace = ajStrNew();
636 relation = ajStrNew();
637 tmp_name = ajStrNew();
638 tmp_id = ajStrNew();
639
640 for(x=0; x<NNAMESPACES; x++)
641 {
642 namespaces[x] = ajNamespaceNew();
643 ajStrAssignC(&(namespaces[x]->name), NAMESPACES[x]);
644 }
645
646
647
648 /* Check for valid first tokens */
649 /* First, write array of valid tokens for first word in line */
650 list_tmp = ajListstrNew();
651
652 for(x=0; x<NFIELDS; x++)
653 {
654 tmp_str = ajStrNew();
655 ajStrAssignC(&tmp_str, FIELDS[x]);
656 ajListstrPushAppend(list_tmp, tmp_str);
657 }
658
659 for(x=0; x<NRELATIONS; x++)
660 {
661 tmp_str = ajStrNew();
662 ajStrAssignC(&tmp_str, RELATIONS[x]);
663 ajListstrPushAppend(list_tmp, tmp_str);
664 }
665
666 for(x=0; x<NOTHER; x++)
667 {
668 tmp_str = ajStrNew();
669 ajStrAssignC(&tmp_str, OTHER[x]);
670 ajListstrPushAppend(list_tmp, tmp_str);
671 }
672
673 nfields = ajListstrToarray(list_tmp, &fields);
674 ajListstrFree(&list_tmp);
675
676 ajFmtPrintF(ouf_log, "1. FIRST TOKEN IN LINES\n");
677
678
679
680
681
682
683
684 /* Output PURL XML */
685 if(ajStrMatchC(mode, "Output PURL XML (single file)") ||
686 ajStrMatchC(mode, "Output PURL XML (one file / term)"))
687 {
688 if(ajStrMatchC(mode, "Output PURL XML (single file)"))
689 {
690 tmp_xml = ouf_xml;
691 ajFmtPrintF(tmp_xml, "<purls>\n");
692 }
693
694
695 for(in_typedef=ajFalse; ajReadline(inf_edam, &line); )
696 {
697 if(ajStrPrefixC(line, "[Typedef]"))
698 in_typedef=ajTrue;
699 else if(ajStrPrefixC(line, "[Term]"))
700 in_typedef=ajFalse;
701
702 if(in_typedef)
703 continue;
704
705 if(ajStrPrefixC(line, "namespace:"))
706 {
707
708 if(ajStrMatchC(mode, "Output PURL XML (one file / term)"))
709 {
710 if(!(tmp_xml=ajFileNewOutNameDirS(tmp_id, xmloutdir)))
711 ajFatal("Could not create file");
712 else
713 ajFmtPrintF(tmp_xml, "<purls>\n");
714 }
715
716
717
718 ajStrAssignClear(&tok);
719 ajFmtScanS(line, "%*s %S", &tok);
720 ajStrRemoveWhite(&tok);
721 ajFmtPrintF(tmp_xml,
722 "<purl id=\"/edam/%S/%S\" type=\"partial\">\n"
723 "<maintainers>\n"
724 "<uid>jon</uid>\n"
725 "</maintainers>\n"
726 "<target url=\"http://wwwdev.ebi.ac.uk/Tools/dbfetch/dbfetch/edam/%S\"/>\n"
727 "</purl>\n", tok, tmp_id, tmp_id);
728
729
730 if(ajStrMatchC(mode, "Output PURL XML (one file / term)"))
731 {
732 ajFmtPrintF(tmp_xml, "</purls>\n");
733 ajFileClose(&tmp_xml);
734 }
735
736 }
737
738 if(ajStrPrefixC(line, "id:"))
739 {
740 ajStrParseC(line, ":");
741 ajStrParseC(NULL, ":");
742 ajStrAssignS(&tmp_id, ajStrParseC(NULL, ":"));
743 ajStrRemoveWhite(&tmp_id);
744 }
745 }
746
747 if(ajStrMatchC(mode, "Output PURL XML (single file)"))
748 ajFmtPrintF(tmp_xml, "</purls>\n");
749 }
750
751 exit(0);
752
753
754
755
756
757 for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
758 {
759 ajStrAssignClear(&tok);
760 ajFmtScanS(line, "%S", &tok);
761
762 for(x=0, done=ajFalse;x<nfields;x++)
763 if(ajStrMatchS(tok, fields[x]))
764 {
765 done = ajTrue;
766 break;
767 }
768
769 if(!done)
770 ajFmtPrintF(ouf_log, "Line %6d : Invalid 1st token: %S (%S)\n",
771 linecnt+1, tok, line);
772 }
773
774 ajFmtPrintF(ouf_log, "\n\n");
775 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
776
777
778 /* Check for valid namespace: values */
779 ajFmtPrintF(ouf_log, "2. NAMESPACE VALUES\n");
780 for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
781 {
782 if(ajStrPrefixC(line, "namespace:"))
783 {
784 ajStrAssignClear(&tok);
785 ajFmtScanS(line, "%*s %S", &tok);
786
787 for(x=0, done=ajFalse;x<NNAMESPACES;x++)
788 if(ajStrMatchC(tok, NAMESPACES[x]))
789 {
790 done = ajTrue;
791 break;
792 }
793
794 if(!done)
795 ajFmtPrintF(ouf_log, "Line %6d : Invalid namespace: %S "
796 "(%S)\n", linecnt+1, tok, line);
797 }
798
799 }
800
801
802 ajFmtPrintF(ouf_log, "\n\n");
803 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
804
805
806
807 /* Check for valid comment: and synonym: values */
808 ajFmtPrintF(ouf_log, "3. COMMENT / SYNONYM VALUES\n");
809
810 for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
811 {
812 if(ajStrPrefixC(line, "comment:"))
813 if(ajStrFindAnyK(line, '\"') != -1)
814 ajFmtPrintF(ouf_log, "Line %6d : Invalid quote in line "
815 "(%S)\n", linecnt+1, line);
816 }
817 ajFmtPrintF(ouf_log, "\n\n");
818 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
819
820
821 for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
822 {
823 if(ajStrPrefixC(line, "synonym:"))
824 if(ajStrCalcCountK(line, '\"') != 2)
825 ajFmtPrintF(ouf_log, "Line %6d : Wrong number of quotes in line "
826 "(%S)\n", linecnt+1, line);
827 }
828 ajFmtPrintF(ouf_log, "\n\n");
829 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
830
831
832 /* Check for id: line format (also build list of term ids) */
833 list_tmp = ajListstrNew();
834 ajFmtPrintF(ouf_log, "4. id: LINE FORMAT\n");
835
836 for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
837 {
838 /* Stop checking once first [Typedef] line is found. */
839 /* if(ajStrPrefixC(line, "[Typedef]"))
840 break; */
841
842 if(ajStrPrefixC(line, "[Typedef]"))
843 in_typedef=ajTrue;
844 else if(ajStrPrefixC(line, "[Term]"))
845 in_typedef=ajFalse;
846
847 if(ajStrPrefixC(line, "id:"))
848 {
849 if(in_typedef)
850 continue;
851
852 if(ajStrCalcCountC(line, ":")!=2)
853 ajFmtPrintF(ouf_log, "Line %6d : Invalid id: line format - "
854 "wrong number of colon (:) (%S) \n",
855 linecnt+1, line);
856 else
857 {
858 ajStrParseC(line, ":");
859 ajStrAssignS(&tok, ajStrParseC(NULL, ":"));
860 ajStrRemoveWhite(&tok);
861 if(!ajStrMatchC(tok, "EDAM"))
862 ajFmtPrintF(ouf_log, "Line %6d : Invalid id: line format "
863 "- no 'EDAM' token (%S)\n", linecnt+1, line);
864
865 ajStrAssignS(&tok, ajStrParseC(NULL, ":"));
866 ajStrRemoveWhite(&tok);
867
868 if(ajStrGetLen(tok) != 7)
869 ajFmtPrintF(ouf_log, "Line %6d : Invalid id: line format "
870 "- id number wrong (%S)\n", linecnt+1, line);
871
872 tmp_str = ajStrNew();
873 ajStrAssignS(&tmp_str, tok);
874
875 ajListstrPushAppend(list_tmp, tmp_str);
876 }
877 }
878 }
879 nids = ajListstrToarray(list_tmp, &ids);
880 ajListstrFree(&list_tmp);
881 ajFmtPrintF(ouf_log, "\n\n");
882 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
883
884 /* Check for def: line format */
885 ajFmtPrintF(ouf_log, "5. def: LINE FORMAT\n");
886 for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
887 {
888 /* Stop checking once first [Typedef] line is found. */
889 /* if(ajStrPrefixC(line, "[Typedef]"))
890 break; */
891
892 if(ajStrPrefixC(line, "[Typedef]"))
893 in_typedef=ajTrue;
894 else if(ajStrPrefixC(line, "[Term]"))
895 in_typedef=ajFalse;
896
897 if(ajStrPrefixC(line, "def:"))
898 {
899 if(in_typedef)
900 continue;
901
902 if(ajStrCalcCountC(line, ":[")!=3)
903 ajFmtPrintF(ouf_log, "Line %6d : Invalid def: line format - "
904 "wrong number of colon ':' or open-bracket '[' "
905 "(%S)\n", linecnt+1, line);
906 else
907 {
908 ajStrParseC(line, ":[");
909 ajStrAssignS(&tok, ajStrParseC(NULL, ":["));
910
911 /* Check for 2 double quotes */
912 if(ajStrCalcCountC(tok, "\"")!=2)
913 ajFmtPrintF(ouf_log, "Line %6d : Invalid def: line format "
914 "- wrong number of double quotes (%S)\n",
915 linecnt+1, line);
916
917 /* Careful - different tokeniser used (no colon) ! */
918 ajStrAssignClear(&tok);
919 ajStrAssignS(&tok, ajStrParseC(NULL, "["));
920 ajStrRemoveLastNewline(&tok);
921 ajStrRemoveWhiteExcess(&tok);
922
923 /* (!ajStrMatchC(tok, "EDAM:EBI \"EMBRACE definition\"]"))) */
924
925 /* Check for line suffix */
926 if((!ajStrPrefixC(tok, "EDAM:")) ||
927 (!ajStrSuffixC(tok, "\"EMBRACE definition\"]")))
928 ajFmtPrintF(ouf_log, "Line %6d : Invalid def: line "
929 "format - invalid suffix (%S)\n",
930 linecnt+1, line);
931 }
932 }
933 }
934 ajFmtPrintF(ouf_log, "\n\n");
935 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
936
937
938 /* Check for relations line format */
939 ajFmtPrintF(ouf_log, "6. RELATIONS LINE FORMAT\n");
940
941 for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
942 {
943 if(ajStrPrefixC(line, "[Typedef]"))
944 in_typedef=ajTrue;
945 else if(ajStrPrefixC(line, "[Term]"))
946 in_typedef=ajFalse;
947
948 if(in_typedef)
949 continue;
950
951 ajStrAssignClear(&relation);
952
953 if(ajStrPrefixC(line, "relationship:"))
954 ajFmtScanS(line, "%*S %S", &relation);
955 else
956 ajFmtScanS(line, "%S", &relation);
957
958 for(x=0; x<NRELATIONS; x++)
959 {
960 if(ajStrMatchC(relation, RELATIONS[x]))
961 {
962
963
964 if(((ajStrPrefixC(line, "relationship:")) &&
965 (ajStrCalcCountC(line, "!:")!=4)) ||
966 ((!ajStrPrefixC(line, "relationship:")) &&
967 (ajStrCalcCountC(line, "!:")!=3)))
968 {
969 ajFmtPrintF(ouf_log, "Line %6d : Invalid relations line "
970 "format1 (%S)\n", linecnt+1, line);
971 /* ajFmtPrint("relation: %S RELATIONS[%d]: %s", relation, x, RELATIONS[x]); */
972 }
973 else
974 {
975 ajStrAssignS(&tok, ajStrParseC(line, ":! "));
976 ajStrRemoveWhite(&tok);
977
978
979 /* Discard first "relationship:" token and get next one */
980 if(ajStrMatchC(tok, "relationship"))
981 ajStrParseC(NULL, ":! ");
982
983 /* Get supposed "EDAM" token */
984 ajStrAssignS(&tok, ajStrParseC(NULL, ":! "));
985 ajStrRemoveWhite(&tok);
986
987 /* Check for "EDAM" */
988 if(!ajStrMatchC(tok, "EDAM"))
989 ajFmtPrintF(ouf_log, "Line %6d : Invalid relations "
990 "line format2 (%S)\n", linecnt+1, line);
991
992 /* Check for 7 digit number */
993 ajStrAssignS(&tok, ajStrParseC(NULL, ":! "));
994 ajStrRemoveWhite(&tok);
995
996 if(ajStrGetLen(tok) != 7)
997 ajFmtPrintF(ouf_log, "Line %6d : Invalid relations "
998 "line format3 (%S)\n", linecnt+1, line);
999
1000 /* Check for non-NULL terminal comment */
1001 ajStrAssignS(&tok, ajStrParseC(NULL, ":! "));
1002
1003 if(ajStrGetLen(tok) == 0)
1004 ajFmtPrintF(ouf_log, "Line %6d : Invalid relations "
1005 "line format4 (%S)\n", linecnt+1, line);
1006 break;
1007
1008 }
1009 }
1010 }
1011 }
1012 ajFmtPrintF(ouf_log, "\n\n");
1013 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
1014
1015
1016
1017 /* Check for unique ids */
1018 ajFmtPrintF(ouf_log, "7. UNIQUE IDS\n");
1019 for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
1020 {
1021 /* Stop checking once first [Typedef] line is found. */
1022 /* if(ajStrPrefixC(line, "[Typedef]"))
1023 break; */
1024
1025 if(ajStrPrefixC(line, "id:"))
1026 {
1027 ajStrParseC(line, ":");
1028 ajStrParseC(NULL, ":");
1029 ajStrAssignClear(&tok);
1030 ajStrAssignS(&tok, ajStrParseC(NULL, ":"));
1031
1032 ajStrRemoveWhite(&tok);
1033
1034 for(x=0, y=0; x<nids; x++)
1035 {
1036 if(ajStrMatchS(tok, ids[x]))
1037 {
1038 y++;
1039 if(y>1)
1040 {
1041 ajFmtPrintF(ouf_log, "Line %6d : Non-unique id: %S "
1042 "%S\n", linecnt+1, tok, line);
1043 break;
1044 }
1045 }
1046 }
1047
1048 }
1049 }
1050
1051 ajFmtPrintF(ouf_log, "\n\n");
1052 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
1053
1054 /* Check for mandatory fields / field order */
1055 ajFmtPrintF(ouf_log, "8. STANDARD MANDATORY FIELDS AND FIELD ORDER\n");
1056
1057 for(in_typedef=ajFalse, first = ajTrue, done_first=ajFalse, linecnt=0;
1058 ajReadline(inf_edam, &line); linecnt++)
1059 {
1060 /* id, name, namespace, def, (comment), (synonym), is_a */
1061
1062 /* Stop checking once first [Typedef] line is found. */
1063 /* if(ajStrPrefixC(line, "[Typedef]"))
1064 break; */
1065
1066 if(ajStrPrefixC(line, "[Typedef]"))
1067 in_typedef=ajTrue;
1068
1069 if(ajStrPrefixC(line, "[Term]"))
1070 {
1071 in_typedef=ajFalse;
1072
1073 /* Process previous term */
1074 if(done_first)
1075 {
1076 if(!found_id)
1077 ajFmtPrintF(ouf_log, "Line %6d : No id: field in "
1078 "term\n", tmp_line);
1079 if(!found_name)
1080 ajFmtPrintF(ouf_log, "Line %6d : No name: field in "
1081 "term\n", tmp_line);
1082 if(!found_namespace)
1083 ajFmtPrintF(ouf_log, "Line %6d : No namespace: field in "
1084 "term\n", tmp_line);
1085 if(!found_def)
1086 ajFmtPrintF(ouf_log, "Line %6d : No def: field in "
1087 "term\n", tmp_line);
1088 /* No is_a needed for first term in file or for obsolete terms*/
1089 if((!found_isa) && (!first) && (!found_is_obsolete))
1090 ajFmtPrintF(ouf_log, "Line %6d : No is_a: field in "
1091 "term\n", tmp_line);
1092
1093 first = ajFalse;
1094
1095
1096 /* entity */
1097 if(ajStrMatchC(namespace, NAMESPACES[0]))
1098 {
1099 if(found_concerns || found_has_input ||
1100 found_is_input_of || found_has_output ||
1101 found_is_output_of || found_has_source ||
1102 found_is_source_of || found_is_identifier_of ||
1103 found_is_format_of || found_has_format ||
1104 found_is_attribute_of)
1105 ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1106 "for term in this namespace\n", tmp_line);
1107
1108 }
1109 /* topic */
1110 else if(ajStrMatchC(namespace, NAMESPACES[1]))
1111 {
1112 if(!found_concerns)
1113 ajFmtPrintF(ouf_log, "Line %6d : No concerns: relation in term\n", tmp_line);
1114
1115 if(found_is_concern_of || found_has_input ||
1116 found_is_input_of || found_has_output ||
1117 found_is_output_of || found_has_source ||
1118 found_is_source_of || found_has_identifier ||
1119 found_is_identifier_of || found_has_attribute ||
1120 found_is_attribute_of || found_has_part ||
1121 found_is_format_of || found_has_format ||
1122 found_is_part_of)
1123 ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1124 "for term in this namespace\n", tmp_line);
1125 }
1126 /* operation */
1127 else if(ajStrMatchC(namespace, NAMESPACES[2]))
1128 {
1129 if(!found_is_concern_of)
1130 ajFmtPrintF(ouf_log, "Line %6d : No is_concern_of: relation in term\n", tmp_line);
1131
1132 if(found_concerns || found_is_input_of ||
1133 found_is_output_of || found_has_source ||
1134 found_is_source_of || found_has_identifier ||
1135 found_is_identifier_of || found_has_attribute ||
1136 found_is_attribute_of || found_has_part ||
1137 found_is_format_of || found_has_format ||
1138 found_is_part_of)
1139 ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1140 "for term in this namespace\n", tmp_line);
1141 }
1142 /* resource */
1143 else if(ajStrMatchC(namespace, NAMESPACES[3]))
1144 {
1145 if(!found_is_source_of)
1146 ajFmtPrintF(ouf_log, "Line %6d : No is_source_of: "
1147 "relation in term\n", tmp_line);
1148
1149 if(found_concerns || found_has_input ||
1150 found_is_input_of || found_has_output ||
1151 found_is_output_of || found_has_source ||
1152 found_is_identifier_of || found_has_attribute ||
1153 found_is_format_of || found_has_format ||
1154 found_is_attribute_of)
1155 ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1156 "for term in this namespace\n", tmp_line);
1157 }
1158 /* data */
1159 else if(ajStrMatchC(namespace, NAMESPACES[4]))
1160 {
1161 if(found_concerns || found_is_concern_of ||
1162 found_has_input || found_has_output ||
1163 found_is_source_of || found_has_attribute ||
1164 found_is_format_of)
1165 ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1166 "for term in this namespace\n", tmp_line);
1167 }
1168
1169 /* format */
1170 else if(ajStrMatchC(namespace, NAMESPACES[5]))
1171 {
1172 if(found_concerns ||
1173 found_is_concern_of ||
1174 found_has_input ||
1175 found_is_input_of ||
1176 found_has_output ||
1177 found_is_output_of ||
1178 found_has_source ||
1179 found_is_source_of ||
1180 found_has_identifier ||
1181 found_is_identifier_of ||
1182 found_has_attribute ||
1183 found_is_attribute_of ||
1184 found_has_part ||
1185 found_is_part_of ||
1186 found_has_format )
1187 ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1188 "for term in this namespace\n", tmp_line);
1189
1190
1191
1192
1193 }
1194 /* edam_identifier */
1195 /*
1196 else if(ajStrMatchC(namespace, NAMESPACES[6]))
1197 {
1198 if(!found_is_identifier_of)
1199 ajFmtPrintF(ouf_log, "Line %6d : No is_identifier_of: "
1200 "relation in term\n", tmp_line);
1201
1202 if(found_concerns || found_is_concern_of ||
1203 found_has_input || found_has_output ||
1204 found_is_source_of || found_has_identifier ||
1205 found_has_attribute || found_is_attribute_of ||
1206 found_is_format_of || found_has_format ||
1207 found_has_part || found_is_part_of)
1208 ajFmtPrintF(ouf_log, "Line %6d : Relation not allowed "
1209 "for term in this namespace\n", tmp_line);
1210 }
1211 */
1212 }
1213
1214
1215
1216 tmp_line = linecnt+1;
1217
1218 found_id = ajFalse;
1219 found_name = ajFalse;
1220 found_namespace = ajFalse;
1221 found_def = ajFalse;
1222 found_comment = ajFalse;
1223 found_synonym = ajFalse;
1224 found_xref = ajFalse;
1225 found_is_obsolete = ajFalse;
1226 found_consider = ajFalse;
1227 found_isa = ajFalse;
1228 found_concerns = ajFalse;
1229 found_is_concern_of = ajFalse;
1230 found_has_input = ajFalse;
1231 found_is_input_of = ajFalse;
1232 found_has_output = ajFalse;
1233 found_is_output_of = ajFalse;
1234 found_has_source = ajFalse;
1235 found_is_source_of = ajFalse;
1236 found_has_identifier = ajFalse;
1237 found_is_identifier_of = ajFalse;
1238 found_has_attribute = ajFalse;
1239 found_is_attribute_of = ajFalse;
1240 found_has_part = ajFalse;
1241 found_is_part_of = ajFalse;
1242 found_has_format = ajFalse;
1243 found_is_format_of = ajFalse;
1244
1245
1246 done_first = ajTrue;
1247 }
1248 else if(in_typedef)
1249 continue;
1250 else if(ajStrPrefixC(line, "id:"))
1251 {
1252 found_id = ajTrue;
1253
1254 if(found_name || found_namespace || found_def || found_comment ||
1255 found_synonym || found_isa || found_xref || found_is_obsolete
1256 || found_consider)
1257 ajFmtPrintF(ouf_log, "Line %6d : id: field in wrong order "
1258 "(%S)\n", linecnt+1, line);
1259 }
1260
1261 else if(ajStrPrefixC(line, "name:"))
1262 {
1263 found_name = ajTrue;
1264 if( (!found_id) || found_namespace || found_def || found_comment ||
1265 found_synonym || found_isa || found_xref || found_is_obsolete
1266 || found_consider)
1267 ajFmtPrintF(ouf_log, "Line %6d : name: field in wrong order "
1268 "(%S)\n", linecnt+1, line);
1269 }
1270 else if(ajStrPrefixC(line, "namespace:"))
1271 {
1272 found_namespace = ajTrue;
1273 ajFmtScanS(line, "%*s %S", &namespace);
1274
1275 if((!found_id) || (!found_name) || found_def || found_comment ||
1276 found_synonym || found_isa || found_xref || found_is_obsolete
1277 || found_consider)
1278 ajFmtPrintF(ouf_log, "Line %6d : namespace: field in wrong "
1279 "order (%S)\n", linecnt+1, line);
1280 }
1281 else if(ajStrPrefixC(line, "def:"))
1282 {
1283 found_def = ajTrue;
1284
1285 if((!found_id) || (!found_name) || (!found_namespace) ||
1286 found_comment || found_synonym || found_isa || found_xref
1287 || found_is_obsolete || found_consider)
1288 ajFmtPrintF(ouf_log, "Line %6d : def: field in wrong order "
1289 "(%S)\n", linecnt+1, line);
1290 }
1291 else if(ajStrPrefixC(line, "comment:"))
1292 {
1293 found_comment = ajTrue;
1294
1295 if((!found_id) || (!found_name) || (!found_namespace) ||
1296 (!found_def) || found_synonym || found_isa || found_xref
1297 || found_is_obsolete || found_consider)
1298 ajFmtPrintF(ouf_log, "Line %6d : comment: field in wrong "
1299 "order (%S)\n", linecnt+1, line);
1300 }
1301 else if(ajStrPrefixC(line, "synonym:"))
1302 {
1303 found_synonym = ajTrue;
1304 if((!found_id) || (!found_name) || (!found_namespace) ||
1305 (!found_def) || found_isa || found_xref || found_is_obsolete
1306 || found_consider)
1307 ajFmtPrintF(ouf_log, "Line %6d : synonym: field in wrong "
1308 "order (%S)\n", linecnt+1, line);
1309 }
1310 else if(ajStrPrefixC(line, "is_a:"))
1311 {
1312 found_isa = ajTrue;
1313 if((!found_id) || (!found_name) || (!found_namespace) ||
1314 (!found_def) || found_is_obsolete
1315 || found_consider)
1316 ajFmtPrintF(ouf_log, "Line %6d : is_a: field in wrong order "
1317 "(%S)\n", linecnt+1, line);
1318 }
1319 else if(ajStrPrefixC(line, "concerns:"))
1320 found_concerns = ajTrue;
1321 else if(ajStrPrefixC(line, "is_concern_of:"))
1322 found_is_concern_of = ajTrue;
1323 else if(ajStrPrefixC(line, "has_input:"))
1324 found_has_input = ajTrue;
1325 else if(ajStrPrefixC(line, "is_input_of:"))
1326 found_is_input_of = ajTrue;
1327 else if(ajStrPrefixC(line, "has_output:"))
1328 found_has_output = ajTrue;
1329 else if(ajStrPrefixC(line, "is_output_of:"))
1330 found_is_output_of = ajTrue;
1331 else if(ajStrPrefixC(line, "has_source:"))
1332 found_has_source = ajTrue;
1333 else if(ajStrPrefixC(line, "is_source_of:"))
1334 found_is_source_of = ajTrue;
1335 else if(ajStrPrefixC(line, "has_identifier:"))
1336 found_has_identifier = ajTrue;
1337 else if(ajStrPrefixC(line, "is_identifier_of:"))
1338 found_is_identifier_of = ajTrue;
1339 else if(ajStrPrefixC(line, "has_attribute:"))
1340 found_has_attribute = ajTrue;
1341 else if(ajStrPrefixC(line, "is_attribute_of:"))
1342 found_is_attribute_of = ajTrue;
1343 else if(ajStrPrefixC(line, "has_part:"))
1344 found_has_part = ajTrue;
1345 else if(ajStrPrefixC(line, "is_part_of:"))
1346 found_is_part_of = ajTrue;
1347 else if(ajStrPrefixC(line, "has_format:"))
1348 found_has_format = ajTrue;
1349 else if(ajStrPrefixC(line, "is_format_of:"))
1350 found_is_format_of = ajTrue;
1351 else if(ajStrPrefixC(line, "xref:"))
1352 found_xref = ajTrue;
1353 else if(ajStrPrefixC(line, "is_obsolete:"))
1354 found_is_obsolete = ajTrue;
1355 else if(ajStrPrefixC(line, "consider:"))
1356 found_consider = ajTrue;
1357 }
1358 ajFmtPrintF(ouf_log, "\n\n");
1359 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
1360
1361
1362 /* Check for unique names within each namespace */
1363 ajFmtPrintF(ouf_log, "9. UNIQUE NAMES WITHIN EACH NAMESPACE\n");
1364
1365 for(in_typedef=ajFalse, linecnt=0; ajReadline(inf_edam, &line); linecnt++)
1366 {
1367 if(ajStrPrefixC(line, "[Typedef]"))
1368 in_typedef=ajTrue;
1369
1370 /* First build the arrays of names in each namespace */
1371 if(ajStrPrefixC(line, "[Term]"))
1372 {
1373 tmp_term = ajTermNew();
1374 in_typedef=ajFalse;
1375 }
1376 else if(in_typedef)
1377 continue;
1378 else if(ajStrPrefixC(line, "name:"))
1379 {
1380 ajStrParseC(line, ":");
1381 ajStrAssignS(&tmp_term->name, ajStrParseC(NULL, ":"));
1382 ajStrRemoveWhiteExcess(&tmp_term->name);
1383 tmp_term->line = linecnt;
1384 }
1385 else if(ajStrPrefixC(line, "id:"))
1386 {
1387 ajStrParseC(line, ":");
1388 ajStrParseC(NULL, ":");
1389 ajStrAssignS(&tmp_term->id, ajStrParseC(NULL, ":"));
1390 ajStrRemoveWhiteExcess(&tmp_term->id);
1391 }
1392 else if(ajStrPrefixC(line, "namespace:"))
1393 {
1394 ajFmtScanS(line, "%*s %S", &namespace);
1395 for(x=0; x<NNAMESPACES; x++)
1396 if(ajStrMatchC(namespace, NAMESPACES[x]))
1397 ajListPushAppend(namespaces[x]->list, tmp_term);
1398 }
1399 }
1400
1401 for(x=0; x<NNAMESPACES; x++)
1402 namespaces[x]->n = ajListToarray(namespaces[x]->list,
1403 (void***) &(namespaces[x]->terms));
1404
1405 for(x=0; x<NNAMESPACES; x++)
1406 {
1407 ajFmtPrintF(ouf_log, "9.%d %s\n", x+1, NAMESPACES[x]);
1408 for(y=0; y<namespaces[x]->n; y++)
1409 for(z=0; z<namespaces[x]->n; z++)
1410 if((y!=z) && (ajStrMatchS(namespaces[x]->terms[y]->name,
1411 namespaces[x]->terms[z]->name)))
1412 {
1413 ajFmtPrintF(ouf_log, "Line %6d : Non-unique name: %S "
1414 "(First used on line %d)\n",
1415 namespaces[x]->terms[y]->line,
1416 namespaces[x]->terms[y]->name,
1417 namespaces[x]->terms[z]->line);
1418 break;
1419 }
1420 }
1421 ajFmtPrintF(ouf_log, "\n\n");
1422 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
1423
1424
1425 /*
1426 for(x=0; x<NNAMESPACES; x++)
1427 {
1428 ajFmtPrint("namespaces[%d]->name): %S\n", x, namespaces[x]->name);
1429 fflush(stdout);
1430 }
1431 */
1432
1433 /* Check for valid end-points of relations */
1434 ajFmtPrintF(ouf_log, "10. VALID END-POINTS OF RELATIONS\n");
1435
1436 for(linecnt=0; ajReadline(inf_edam, &line); linecnt++)
1437 {
1438 if(ajStrPrefixC(line, "namespace:"))
1439 {
1440 /* ajFmtPrint("line === %S\n", line); fflush(stdout); */
1441
1442
1443 /* Identify index of this namespace */
1444 ajFmtScanS(line, "%*s %S", &namespace);
1445
1446 /* ajFmtPrint("namespace === %S\n", namespace); fflush(stdout); */
1447
1448
1449 /*
1450 for(x=0; x<NNAMESPACES; x++)
1451 {
1452 ajFmtPrint("namespaces[%d]->name): %S\n", x, namespaces[x]->name);
1453 fflush(stdout);
1454 }
1455 */
1456
1457 for(idx=0; idx<NNAMESPACES; idx++)
1458 {
1459 /* ajFmtPrint("namespace === %S\n", namespace); fflush(stdout);
1460 ajFmtPrint("namespaces[%d]->name === %S++\n", idx, namespaces[idx]->name); fflush(stdout);
1461 */
1462
1463 if(ajStrMatchS(namespace, namespaces[idx]->name))
1464 break;
1465 else
1466 ajFmtPrint("Failing to match %S to %S\n", namespace, namespaces[idx]->name);
1467 }
1468
1469 if(idx==NNAMESPACES)
1470 ajFatal("namespace not found - cannot recover\nline: %S\nnamespace: %S\n", line, namespace);
1471
1472 /* if(!ajStrMatchC(mode, "Fix relations"))
1473 continue; */
1474 }
1475
1476 ajStrAssignClear(&tok);
1477
1478 if(ajStrPrefixC(line, "relationship:"))
1479 ajFmtScanS(line, "%*S %S", &tok);
1480 else
1481 ajFmtPrintS(&tok, "%S", &tok);
1482
1483
1484 for(x=0, done=ajFalse; x<NRELATIONS; x++)
1485 if(ajStrMatchC(tok, RELATIONS[x]))
1486 {
1487 done=ajTrue;
1488 break;
1489 }
1490
1491 /* Relation line */
1492 if(done)
1493 {
1494 if(ajStrCalcCountC(line, ":!")!=3)
1495 {
1496 ajFmtPrintF(ouf_log, "Line %6d : Cannot check end-point "
1497 "(Invalid relations line format) (%S)\n",
1498 linecnt+1, line);
1499
1500 if(ajStrMatchC(mode, "Fix relations"))
1501 ajFatal("Unable to recover from invalid relations line "
1502 "format");
1503 }
1504
1505 else
1506 {
1507 ajStrAssignS(&relation, ajStrParseC(line, ":! "));
1508
1509 /* Get second token in line if on "relationship:" line */
1510 if(ajStrMatchC(relation, "relationship"))
1511 ajStrAssignS(&relation, ajStrParseC(NULL, ":! "));
1512
1513 ajStrParseC(NULL, ":! ");
1514 ajStrParseC(NULL, ":! ");
1515 ajStrAssignS(&tmp_name, ajStrParseC(NULL, ":! "));
1516 ajStrRemoveWhiteExcess(&tmp_name);
1517
1518 /*
1519 if(ajStrMatchC(tmp_name, "Protein secondary database"))
1520 ajFmtPrint("Protein secondary database ...\n");
1521 */
1522
1523
1524 if(ajStrPrefixC(line, "relationship: is_a:"))
1525 {
1526 if(!(id=FindTerm(idx, tmp_name, namespaces)))
1527
1528
1529 /*
1530 ** Must also check children of root node where
1531 ** start/end-points of is_a relation are in
1532 ** different namespaces
1533 */
1534 /*
1535 if(!(id=FindTerm(edam_term, tmp_name, namespaces)))
1536 {
1537 */
1538 /*
1539 ** Must also check in "data" namespace
1540 ** for identifiers
1541 */
1542 /*
1543 if(idx==edam_identifier)
1544 {
1545 if(!(id=FindTerm(data, tmp_name,
1546 namespaces)))
1547 ajFmtPrintF(ouf_log,
1548 "Line %6d : End-point term of "
1549 "relation does not exist (%S)"
1550 "\n",
1551 linecnt+1, line);
1552 }
1553 else */
1554
1555 ajFmtPrintF(ouf_log,
1556 "Line %6d : End-point term of "
1557 "relation does not exist (%S)\n",
1558 linecnt+1, line);
1559 /* } */
1560
1561
1562 /* if(ajStrMatchC(tmp_name, "Protein secondary database"))
1563 {
1564 if(id!=-1)
1565 ajFmtPrint("... found !!!!\n");
1566 else
1567 ajFmtPrint("... not found !!!!\n");
1568 } */
1569 }
1570 else if(ajStrPrefixC(line, "relationship: has_part:") ||
1571 ajStrPrefixC(line, "relationship: is_part_of:"))
1572 {
1573 if(!(id=FindTerm(idx, tmp_name, namespaces)))
1574 ajFmtPrintF(ouf_log,
1575 "Line %6d : End-point term of relation "
1576 "does not exist (%S)\n",
1577 linecnt+1, line);
1578 }
1579 else if (ajStrPrefixC(line, "relationship: concerns:"))
1580 {
1581 if(!(id=FindTerm(entity, tmp_name, namespaces)))
1582 if(!(id=FindTerm(operation, tmp_name, namespaces)))
1583 if(!(id=FindTerm(resource, tmp_name, namespaces)))
1584 ajFmtPrintF(ouf_log,
1585 "Line %6d : End-point term of "
1586 "relation does not exist (%S)\n",
1587 linecnt+1, line);
1588 }
1589 else if (ajStrPrefixC(line, "relationship: is_concern_of:"))
1590 {
1591 if(!(id=FindTerm(topic, tmp_name, namespaces)))
1592 ajFmtPrintF(ouf_log,
1593 "Line %6d : End-point term of relation "
1594 "does not exist (%S)\n",
1595 linecnt+1, line);
1596 }
1597 else if (ajStrPrefixC(line, "relationship: has_input:") ||
1598 ajStrPrefixC(line, "relationship: has_output:") ||
1599 ajStrPrefixC(line, "relationship: has_attribute:") ||
1600 ajStrPrefixC(line, "relationship: is_source_of:") ||
1601 ajStrPrefixC(line, "relationship: is_format_of:"))
1602 {
1603 if(!(id=FindTerm(data, tmp_name, namespaces)))
1604 ajFmtPrintF(ouf_log,
1605 "Line %6d : End-point term of relation "
1606 "does not exist (%S)\n",
1607 linecnt+1, line);
1608 }
1609 else if (ajStrPrefixC(line, "relationship: is_input_of:") ||
1610 ajStrPrefixC(line, "relationship: is_output_of:"))
1611 {
1612 if(!(id=FindTerm(operation, tmp_name, namespaces)))
1613 ajFmtPrintF(ouf_log,
1614 "Line %6d : End-point term of relation "
1615 "does not exist (%S)\n",
1616 linecnt+1, line);
1617 }
1618 else if (ajStrPrefixC(line, "relationship: has_source:"))
1619 {
1620 if(!(id=FindTerm(resource, tmp_name, namespaces)))
1621 ajFmtPrintF(ouf_log, "Line %6d : End-point term of relation does not exist (%S)\n",
1622 linecnt+1, line);
1623 }
1624 else if (ajStrPrefixC(line, "relationship: has_identifier:"))
1625 {
1626 /* if(!(id=FindTerm(edam_identifier, tmp_name, namespaces))) */
1627 if(!(id=FindTerm(data, tmp_name, namespaces)))
1628 ajFmtPrintF(ouf_log,
1629 "Line %6d : End-point term of relation "
1630 "does not exist (%S)\n",
1631 linecnt+1, line);
1632 }
1633 else if (ajStrPrefixC(line, "relationship: is_identifier_of:"))
1634 {
1635 if(!(id=FindTerm(entity, tmp_name, namespaces)))
1636 if(!(id=FindTerm(resource, tmp_name, namespaces)))
1637 if(!(id=FindTerm(data, tmp_name,
1638 namespaces)))
1639 ajFmtPrintF(ouf_log, "Line %6d : End-point "
1640 "term of relation does not "
1641 "exist (%S)\n",
1642 linecnt+1, line);
1643 }
1644 else if (ajStrPrefixC(line, "relationship: is_attribute_of:"))
1645 {
1646 if(!(id=FindTerm(entity, tmp_name, namespaces)))
1647 ajFmtPrintF(ouf_log, "Line %6d : End-point term of "
1648 "relation does not exist (%S)\n",
1649 linecnt+1, line);
1650 }
1651 else if (ajStrPrefixC(line, "relationship: has_format:"))
1652 {
1653 if(!(id=FindTerm(format, tmp_name, namespaces)))
1654 ajFmtPrintF(ouf_log, "Line %6d : End-point term of "
1655 "relation does not exist (%S)\n",
1656 linecnt+1, line);
1657 }
1658 /* Check all namespaces for 'consider' field */
1659 else if (ajStrPrefixC(line, "relationship: consider:"))
1660 {
1661 if(!(id=FindTerm(entity, tmp_name, namespaces)))
1662 if(!(id=FindTerm(topic, tmp_name, namespaces)))
1663 if(!(id=FindTerm(operation, tmp_name, namespaces)))
1664 if(!(id=FindTerm(resource, tmp_name, namespaces)))
1665 if(!(id=FindTerm(data, tmp_name, namespaces)))
1666 if(!(id=FindTerm(format, tmp_name, namespaces)))
1667 ajFmtPrintF(ouf_log,
1668 "Line %6d : End-point term of "
1669 "relation does not exist (%S)\n",
1670 linecnt+1, line);
1671 }
1672
1673 else
1674 ajFatal("Unknown relation");
1675
1676 /*
1677 if(ajStrMatchC(tmp_name, "Protein secondary database"))
1678 {
1679 if(id!=NULL)
1680 ajFmtPrint("... found !\n");
1681 else
1682 ajFmtPrint("... not found !\n");
1683 }
1684 */
1685
1686 /* Relation line ... write to output file */
1687 if(ajStrMatchC(mode, "Fix relations"))
1688 {
1689 if(id != NULL)
1690 ajFmtPrintF(ouf_edam, "relationship: %S: EDAM:%S ! %S\n", relation,
1691 id, tmp_name);
1692 else
1693 ajFatal("Could not find end-point term of relation "
1694 "(or recover) : %S", tmp_name);
1695 }
1696 }
1697 }
1698 /* Not a relation line ... write to output file */
1699 else if(ajStrMatchC(mode, "Fix relations"))
1700 ajFmtPrintF(ouf_edam, "%S", line);
1701
1702
1703 }
1704
1705 ajFmtPrintF(ouf_log, "\n\n");
1706 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
1707
1708
1709 /* Renumber term ids */
1710 if(ajStrMatchC(mode, "Renumber terms"))
1711 {
1712 /*
1713 for(termcnt=0, found_typedef = ajFalse; ajReadline(inf_edam, &line); )
1714 {
1715 if(ajStrPrefixC(line, "[Term]"))
1716 termcnt++;
1717 else if(ajStrPrefixC(line, "[Typedef]"))
1718 found_typedef = ajTrue;
1719
1720 if(ajStrPrefixC(line, "id:") && (!found_typedef))
1721 ajFmtPrintF(ouf_edam, "id: EDAM:%07d\n", termcnt);
1722 else
1723 ajFmtPrintF(ouf_edam, "%S", line);
1724 }
1725 */
1726
1727 for(termcnt=0; ajReadline(inf_edam, &line); )
1728 {
1729 if(ajStrPrefixC(line, "[Term]"))
1730 termcnt++;
1731
1732 if(ajStrPrefixC(line, "id:"))
1733 ajFmtPrintF(ouf_edam, "id: EDAM:%07d\n", termcnt);
1734 else
1735 ajFmtPrintF(ouf_edam, "%S", line);
1736 }
1737 }
1738 ajFileSeek(inf_edam, 0, 0); /* Rewind file */
1739
1740
1741
1742
1743
1744
1745 /* Clean up and exit */
1746 /*
1747 for(x=0; x<nfields; x++)
1748 ajStrDel(&fields[x]);
1749 AJFREE(fields);
1750
1751 for(x=0; x<nids; x++)
1752 ajStrDel(&ids[x]);
1753 AJFREE(ids);
1754 */
1755
1756
1757 ajFileClose(&inf_edam);
1758 ajFileClose(&ouf_edam);
1759 ajFileClose(&ouf_log);
1760 if(ouf_xml)
1761 ajFileClose(&ouf_xml);
1762 ajStrDel(&line);
1763 ajStrDel(&tok);
1764 ajStrDel(&name);
1765 ajStrDel(&namespace);
1766 ajStrDel(&relation);
1767 ajStrDel(&tmp_name);
1768 ajStrDel(&tmp_id);
1769
1770
1771 for(x=0; x<NNAMESPACES; x++)
1772 ajNamespaceDel(&namespaces[x]);
1773 ajExit();
1774 return 0;
1775
1776 ajDiroutDel(&xmloutdir);
1777
1778 }
1779