1#!/usr/bin/perl -w
2
3$edamurl = "http://edamontology.org";
4
5@inputs = ("assembly",
6	   "codon",
7	   "cpdb",
8	   "datafile",
9	   "directory",
10	   "dirlist",
11	   "discretestates",
12	   "features",
13	   "filelist",
14	   "frequencies",
15	   "infile",
16	   "matrix",
17	   "matrixf",
18	   "obo",
19	   "pattern",
20	   "properties",
21	   "refseq",
22	   "regexp",
23	   "resource",
24	   "scop",
25	   "sequence",
26	   "seqall",
27	   "seqset",
28	   "seqsetall",
29	   "taxon",
30	   "text",
31	   "tree",
32	   "url",
33	   "variation",
34	   "xml",
35    );
36
37@outputs = ("align",
38	    "featout",
39	    "graph",
40	    "outassembly",
41	    "outcodon",
42	    "outcpdb",
43	    "outdata",
44	    "outdir",
45	    "outdiscrete",
46	    "outdistance",
47	    "outfile",
48	    "outfreq",
49	    "outmatrix",
50	    "outmatrixf",
51	    "outobo",
52	    "outproperties",
53+	    "outrefseq",
54	    "outresource",
55	    "outscop",
56	    "outtaxon",
57	    "outtext",
58	    "outtree",
59	    "outurl",
60	    "outvariation",
61	    "outxml",
62	    "report",
63	    "seqout",
64	    "seqoutall",
65	    "seqoutset",
66	    "xygraph",
67    );
68
69@embassylist = (
70    "appendixd",
71    "cbstools",
72    "clustalomega",
73    "domainatrix",
74    "domalign",
75    "domsearch",
76    "emnu",
77    "esim4",
78#   "hmmer",
79    "hmmernew",
80    "iprscan",
81    "meme",
82    "memenew",
83    "mira",
84    "mse",
85    "myemboss", # we avoid documenting these examples
86    "myembossdemo", # we avoid documenting these examples
87#   "phylip",
88    "phylipnew",
89    "signature",
90    "structure",
91    "topo",
92#   "vienna",	# old vienna
93    "vienna2",
94    );
95
96foreach $i (@inputs) {$inputtype{$i} = 1}
97foreach $i (@outputs) {$outputtype{$i} = 1}
98
99sub parseacd($){
100    my ($pname) = @_;
101    @function = ();		# EDAM_operation
102    @topic = ();		# EDAM_topic
103    @input = ();		# EDAM_data input terms
104    @output = ();		# EDAM_data output terms
105
106    system "acdpretty $pname"|| die "Failed to find ACD file $pname.acd";
107    open (ACD, "$pname.acdpretty") || die "Failed to open $pname.acdpretty";
108    my $acdtext = "";
109    while ($acd = <ACD>) {
110	$acdtext .= $acd;
111    }
112    close ACD;
113    unlink "$pname.acdpretty";
114    if($acdtext =~ m/application: \S+ [\[](.*?)\n\s*[\]]/gos) {
115	$appdoc = $1;
116	while($appdoc =~ /relations:\s+\"EDAM_operation:(\d+) ([^\"]+)\"/gos){
117	    $id = $1;
118	    $f = $2;
119	    $f =~ s/\n\s*/ /go;
120	    $f .= "#$edamurl/operation_$id";
121#	    print STDERR "$pname function: '$f'\n";
122	    push (@function, $f);
123	}
124	while($appdoc =~ /relations:\s+\"EDAM_topic:(\d+) ([^\"]+)\"/gos){
125	    $id = $1;
126	    $t = $2;
127	    $t =~ s/\n\s*/ /go;
128	    $t .= "#$edamurl/topic_$id";
129#	    print STDERR "$pname function: '$f'\n";
130	    push (@topic, $t);
131	}
132    }
133    else {die "$pname.acd failed to find application definition"}
134
135    while($acdtext =~ /\n\s*([a-z]+): \S+ [\[](.*?)\n\s*[\]]/gos) {
136	$acdtype = $1;
137	$acdrest = $2;
138	if(defined($inputtype{$acdtype})) {
139	    while($acdrest =~ /relations:\s+\"EDAM_data:(\d+) ([^\"]+)\"/gos){
140		$id = $1;
141		$i = $2;
142		$i =~ s/\n\s*/ /go;
143		$i .= "#$edamurl/data_$id";
144#	    print STDERR "$pname function: '$f'\n";
145		push (@input, $i);
146	    }
147	}
148	if(defined($outputtype{$acdtype})) {
149	    while($acdrest =~ /relations:\s+\"EDAM_data:(\d+) ([^\"]+)\"/gos){
150		$id = $1;
151		$i = $2;
152		$i =~ s/\n\s*/ /go;
153		$i .= "#$edamurl/data_$id";
154#	    print STDERR "$pname function: '$f'\n";
155		push (@output, $i);
156	    }
157	}
158    }
159
160    %done=();
161    foreach $f (@function) {
162	if(!defined($done{$f})){
163	    $done{$f} = 1;
164	    print OUT "<Function>$f</Function>\n";
165	}
166    }
167    %done=();
168    foreach $t (@topic) {
169	if(!defined($done{$f})){
170	    $done{$f} = 1;
171	    print OUT "<Topic>$t</Topic>\n";
172	}
173    }
174    %done=();
175    print OUT "<Taxon></Taxon>\n";
176    foreach $i (@input) {
177#	if(!defined($done{$i})){
178#	    $done{$i} = 1;
179	    print OUT "<Input>$i</Input>\n";
180#	}
181    }
182    %done=();
183    foreach $o (@output) {
184#	if(!defined($done{$o})){
185#	    $done{$o} = 1;
186	    print OUT "<Output>$o</Output>\n";
187#	}
188    }
189}
190
191open (OUT, ">acdtoelixir.xml") || die "Unable to open 'acdtoelixir.xml'";
192
193print OUT "<Tools>\n";
194
195$toolid = 0;
196
197#
198# Find install directory to read ACD files
199#
200
201open (PROGS, "wossname -noembassy -alpha -auto |") ||
202    die "Cannot run wossname";
203
204$plic = "GPL";
205$iformat = "";
206$oformat = "";
207$pauth = "EMBOSS";
208
209while ($prog = <PROGS>) {
210    if($prog =~ /^ALPHABETIC LIST/) {next}
211    if($prog =~ /^\n/) {next}
212    if ($prog =~ /^(\S+) +(.*)/) {
213	$pname = $1;
214	$pdesc = $2;
215	$toolid++;
216	print OUT "<Tool toolid=\"$toolid\">\n";
217	print OUT "<Name>$pname</Name>\n";
218	parseacd($pname);
219	print OUT "<Provider>EMBOSS</Provider>\n";
220	print OUT "<Developer>$pauth</Developer>\n";
221	print OUT "<Home>http://emboss.open-bio.org/rel/rel6/apps/$pname.html</Home>\n";
222	print OUT "<Contact>mailto:emboss-bug\@emboss.open-bio.org</Contact>\n";
223	print OUT "<Type>Command-line tool</Type>\n";
224	print OUT "<Description>$pdesc</Description>\n";
225	print OUT "<Collection>EMBOSS</Collection>\n";
226	print OUT "<InputFormat>$iformat</InputFormat>\n";
227	print OUT "<OutputFormat>$oformat</OutputFormat>\n";
228	print OUT "<WorkPackage></WorkPackage>\n";
229	print OUT "<Infrastructure>ELIXIR</Infrastructure>\n";
230	print OUT "<License>$plic</License>\n";
231	print OUT "<TermsOfUse>http://emboss.open-bio.org/html/dev/ch01s01.html</TermsOfUse>\n";
232	print OUT "<Downloads></Downloads>\n";
233	print OUT "<Available></Available>\n";
234	print OUT "</Tool>\n";
235    }
236    else { die "Bad record in wossname output: $prog"}
237}
238close PROGS;
239
240foreach $e(@embassylist) {
241    open (PROGS, "wossname -showembassy $e -alpha -auto |") ||
242	die "Cannot run wossname";
243
244    $plic = "";
245    while ($prog = <PROGS>) {
246	if($prog =~ /^ALPHABETIC LIST/) {next}
247	if($prog =~ /^\n/) {next}
248	if ($prog =~ /^(\S+) +(.*)/) {
249	    $pname = $1;
250	    $pdesc = $2;
251	    $toolid++;
252	    print OUT "<Tool toolid=\"$toolid\">\n";
253	    print OUT "<Name>$pname</Name>\n";
254	    parseacd($pname);
255	    print OUT "<Provider>EMBOSS</Provider>\n";
256	    print OUT "<Home>http://emboss.open-bio.org/rel/rel6/embassy/$e/$pname.html</Home>\n";
257	    print OUT "<Contact>mailto:emboss-bug\@emboss.open-bio.org</Contact>\n";
258	    print OUT "<Type>Command-line tools </Type>\n";
259	    print OUT "<Desc>$pdesc</Desc>\n";
260	    print OUT "<Package>EMBOSS</Package>\n";
261
262	    print OUT "</Tool>\n";
263	}
264	else { die "Bad record in wossname -showembassy $e output: $prog"}
265    }
266    close PROGS;
267}
268
269
270print OUT "</Tools>\n";
271close OUT;
272