1// ===========================================================================
2//
3//                            PUBLIC DOMAIN NOTICE
4//            National Center for Biotechnology Information (NCBI)
5//
6//  This software/database is a "United States Government Work" under the
7//  terms of the United States Copyright Act. It was written as part of
8//  the author's official duties as a United States Government employee and
9//  thus cannot be copyrighted. This software/database is freely available
10//  to the public for use. The National Library of Medicine and the U.S.
11//  Government do not place any restriction on its use or reproduction.
12//  We would, however, appreciate having the NCBI and the author cited in
13//  any work or product based on this material.
14//
15//  Although all reasonable efforts have been taken to ensure the accuracy
16//  and reliability of the software and data, the NLM and the U.S.
17//  Government do not and cannot warrant the performance or results that
18//  may be obtained by using this software or data. The NLM and the U.S.
19//  Government disclaim all warranties, express or implied, including
20//  warranties of performance, merchantability or fitness for any particular
21//  purpose.
22//
23// ===========================================================================
24//
25// File Name:  transmute.go
26//
27// Author:  Jonathan Kans
28//
29// ==========================================================================
30
31package main
32
33import (
34	"encoding/base64"
35	"eutils"
36	"fmt"
37	"html"
38	"io"
39	"io/ioutil"
40	"net/url"
41	"os"
42	"runtime"
43	"runtime/debug"
44	"runtime/pprof"
45	"strconv"
46	"strings"
47	"sync"
48	"unicode"
49)
50
51// TRANSMUTE HELP MESSAGE TEXT
52
53const transmuteHelp = `
54Pretty-Printing
55
56 Reformat XML
57
58  -x2p
59
60 Reformat JSON
61
62  -j2p
63
64 Table column alignment
65
66  -align
67
68    -a    Column alignment codes:
69
70            l left
71            c center
72            r right
73            n numeric align on decimal point
74            N trailing zero-pad decimals
75            z leading zero-pad integers
76
77    -g    Spacing between columns
78    -h    Indent before columns
79
80Data Conversion
81
82 JSON stream to XML
83
84  -j2x
85
86    -set setWrapper
87    -rec recordWrapper
88    -nest [flat|recurse|plural|depth]
89
90 ASN.1 stream to XML
91
92  -a2x
93
94    -set setWrapper
95    -rec recordWrapper
96
97 Tab-delimited table to XML
98
99  -t2x
100
101    -set setWrapper
102    -rec recordWrapper
103    -skip linesToSkip
104    -header
105    -lower | -upper
106    -indent | -flush
107
108      XML object names per column
109
110 Comma-separated values file to XML
111
112  -c2x
113
114    -set setWrapper
115    -rec recordWrapper
116    -skip linesToSkip
117    -header
118    -lower | -upper
119    -indent | -flush
120
121      XML object names per column
122
123 GenBank/GenPept flatfile to INSDSeq XML
124
125  -g2x
126
127Sequence Comparison
128
129  -diff        Compare two aligned files for point differences
130
131Sequence Editing
132
133  -revcomp     Reverse complement nucleotide sequence
134
135  -remove      Trim at ends of sequence
136
137    -first       Delete first N bases
138    -last        Delete last N bases
139
140  -retain      Save either end of sequence
141
142    -leading     Keep first N bases
143    -trailing    Keep last N bases
144
145  -replace     Apply base or residue substitution
146
147    -offset      Skip ahead by 0-based count (SPDI), or
148    -column      Move just before 1-based position (HGVS)
149
150    -delete      Delete N bases
151    -insert      Insert given sequence
152
153    -lower       Lower-case original sequence
154
155  -extract     Use xtract -insd feat_location instructions
156
157    -lower       Lower-case extracted sequence
158
159Sequence Processing
160
161  -cds2prot    Translate coding region into protein
162
163    -code        Genetic code
164    -frame       Offset in sequence
165    -stop        Include stop residue
166    -trim        Remove trailing Xs
167    -part5       CDS partial at 5' end
168    -part3       CDS extends past 3' end
169    -every       Translate all codons
170
171  -molwt       Calculate molecular weight of peptide
172
173    -met         Do not cleave leading methionine
174
175Variation Processing
176
177  -hgvs        Convert HGVS variation format to XML
178
179String Transformations
180
181 XML
182
183  -encodeXML
184  -decodeXML
185
186  -plainXML
187
188 URL
189
190  -encodeURL
191  -decodeURL
192
193 Base64
194
195  -encode64
196  -decode64
197
198 Protein
199
200  -aa1to3
201  -aa3to1
202
203Customized XML Reformatting
204
205  -format [compact|flush|indent|expand]
206
207    -xml
208    -doctype
209    -comment
210    -cdata
211    -separate
212    -self
213    -unicode [fuse|space|period|brackets|markdown|slash|tag]
214    -script [brackets|markdown]
215    -mathml [terse]
216
217XML Modification
218
219  -filter Object
220            [retain|remove|encode|decode|shrink|expand|accent]
221              [content|cdata|comment|object|attributes|container]
222
223EFetch XML Normalization
224
225  -normalize [database]
226
227Examples
228
229  -j2x -set - -rec GeneRec
230
231  -t2x -set Set -rec Rec -skip 1 Code Name
232
233  -filter ExpXml decode content
234
235  -filter LocationHist remove object
236
237  -normalize pubmed
238
239  -wrp PubmedArticleSet -pattern PubmedArticle -format
240
241Sequence Substitution
242
243  echo ATGAAACCCGGGTTTTAG |
244  transmute -replace -offset 5 -delete 1 -insert G
245
246Protein Translation
247
248  echo "CTAAAACCCGGGTTTCAT" |
249  transmute -revcomp |
250  transmute -cds2prot
251
252Variation Extraction
253
254  echo "NP_000504.1:p.Glu41Lys,NP_000504.1:p.P43Leu,NP_000504.1:p.Trp142Ter" |
255  transmute -hgvs | transmute -format
256
257Sequence Comparison
258
259  transmute -diff <( echo "MKPGSQPVIY" ) <( echo "-KPGFQ*VIY" )
260
261Translation of Coding Regions
262
263  efetch -db nuccore -id U54469 -format gb |
264  transmute -g2x |
265  xtract -insd CDS sub_sequence |
266  cut -f 2 |
267  while read seq
268  do
269    echo "$seq" |
270    transmute -cds2prot
271    echo ""
272  done
273
274Mitochondrial Mistranslation
275
276  efetch -db nuccore -id NC_012920 -format gb |
277  transmute -g2x |
278  xtract -insd CDS gene product protein_id translation sub_sequence |
279  while IFS=$'\t' read acc gene prod prid prot seq
280  do
281    mito=$( echo "$seq" | transmute -cds2prot -code 2 -stop )
282    norm=$( echo "$seq" | transmute -cds2prot -code 1 -stop )
283    if [ "$mito" != "$norm" ]
284    then
285      echo ">$acc $gene $prid $prod"
286      transmute -diff <( echo "$mito" ) <( echo "$norm" )
287      echo ""
288    fi
289  done
290
291Systematic Mutations
292
293  echo ATGAAACCCGGGTTTTAG |
294  while read seq
295  do
296    for (( i=0; i<${#seq}; i++ ))
297    do
298      ch="${seq:$i:1}"
299      for sub in A C G T
300      do
301        echo "$seq" |
302        transmute -replace -offset "$i" -delete "$ch" -insert "$sub"
303      done
304    done
305  done |
306  while read seq
307  do
308    tns=$( echo "$seq" | transmute -cds2prot )
309    mwt=$( echo "$tns" | transmute -molwt )
310    echo -e "${seq}\t${tns}\t${mwt}"
311  done
312`
313
314const transmuteExtra = `
315Mismatch Detection (RefSeq Proteins with 3 Residue Differences from RefSeq Genome)
316
317  esearch -db gene -query "DMD [GENE] AND human [ORGN]" |
318  efetch -format docsum |
319  xtract -pattern DocumentSummary -block GenomicInfoType \
320    -tab "\n" -element ChrAccVer,ChrStart,ChrStop |
321  xargs -n 3 sh -c 'efetch -db nuccore -format gbc \
322    -id "$0" -chr_start "$1" -chr_stop "$2"' > dystrophin.xml
323
324  cat dystrophin.xml |
325  xtract -insd CDS gene product translation sub_sequence > dystrophin.txt
326
327  cat dystrophin.txt |
328  while IFS=$'\t' read acc gene prod prot seq
329  do
330    trans=$( echo "$seq" | transmute -cds2prot )
331    if [ "$prot" != "$trans" ]
332    then
333      echo ">$acc $gene $prod"
334      transmute -diff <( echo "$prot" ) <( echo "$trans" )
335      echo ""
336    fi
337  done > failures.txt
338`
339
340// XML FORMATTING FUNCTIONS
341
342// createFormatters does concurrent reformatting, using flush-left to remove leading spaces
343func createFormatters(parent string, format string, inp <-chan eutils.XMLRecord) <-chan eutils.XMLRecord {
344
345	if inp == nil {
346		return nil
347	}
348
349	out := make(chan eutils.XMLRecord, eutils.ChanDepth())
350	if out == nil {
351		fmt.Fprintf(os.Stderr, "\nERROR: Unable to create formatter channel\n")
352		os.Exit(1)
353	}
354
355	if format == "" {
356		format = "flush"
357	}
358
359	// xmlFormatter reads partitioned XML from channel and formats on a per-record basis
360	xmlFormatter := func(wg *sync.WaitGroup, parent string, inp <-chan eutils.XMLRecord, out chan<- eutils.XMLRecord) {
361
362		// report when this formatter has no more records to process
363		defer wg.Done()
364
365		// read partitioned XML from producer channel
366		for ext := range inp {
367
368			idx := ext.Index
369			text := ext.Text
370
371			if text == "" {
372				// should never see empty input data
373				out <- eutils.XMLRecord{Index: idx, Text: text}
374				continue
375			}
376
377			// str := doFormat(text[:], parent)
378
379			frm := eutils.FormatRecord(text, parent, eutils.FormatArgs{Format: format})
380			str := eutils.ChanToString(frm)
381
382			// send even if empty to get all record counts for reordering
383			out <- eutils.XMLRecord{Index: idx, Text: str}
384		}
385	}
386
387	var wg sync.WaitGroup
388
389	// launch multiple formatter goroutines
390	for i := 0; i < eutils.NumServe(); i++ {
391		wg.Add(1)
392		go xmlFormatter(&wg, parent, inp, out)
393	}
394
395	// launch separate anonymous goroutine to wait until all formatters are done
396	go func() {
397		wg.Wait()
398		close(out)
399	}()
400
401	return out
402}
403
404// processFormat reformats XML for ease of reading
405func processFormat(rdr <-chan eutils.XMLBlock, args []string) {
406
407	if rdr == nil || args == nil {
408		return
409	}
410
411	// skip past command name
412	args = args[1:]
413
414	format := ""
415	xml := ""
416	doctype := ""
417
418	doSeparate := true
419	doSelf := false
420	doComment := false
421	doCdata := false
422
423	if len(args) > 0 {
424		// look for [compact|flush|indent|expand] specification
425		format = args[0]
426		if strings.HasPrefix(format, "-") {
427			// ran into next argument, default to indent
428			format = "indent"
429		} else {
430			// skip past first argument
431			args = args[1:]
432		}
433	} else {
434		format = "indent"
435	}
436
437	// look for remaining arguments
438	for len(args) > 0 {
439
440		switch args[0] {
441		case "-xml":
442			args = args[1:]
443			// -xml argument must be followed by value to use in xml line
444			if len(args) < 1 || strings.HasPrefix(args[0], "-") {
445				fmt.Fprintf(os.Stderr, "\nERROR: -xml argument is missing\n")
446				os.Exit(1)
447			}
448			xml = args[0]
449			args = args[1:]
450		case "-doctype":
451			args = args[1:]
452			if len(args) > 0 {
453				// if -doctype argument followed by value, use instead of DOCTYPE line
454				doctype = args[0]
455				args = args[1:]
456			}
457		/*
458			// allow setting of unicode, script, and mathml flags within -format
459			case "-unicode":
460				if len(args) < 2 {
461					fmt.Fprintf(os.Stderr, "\nERROR: Unicode argument is missing\n")
462					os.Exit(1)
463				}
464				// unicodePolicy = args[1]
465				args = args[2:]
466			case "-script":
467				if len(args) < 2 {
468					fmt.Fprintf(os.Stderr, "\nERROR: Script argument is missing\n")
469					os.Exit(1)
470				}
471				// scriptPolicy = args[1]
472				args = args[2:]
473			case "-mathml":
474				if len(args) < 2 {
475					fmt.Fprintf(os.Stderr, "\nERROR: MathML argument is missing\n")
476					os.Exit(1)
477				}
478				// mathmlPolicy = args[1]
479				args = args[2:]
480		*/
481
482		// also allow setting additional processing flags within -format (undocumented)
483		case "-separate", "-separated":
484			doSeparate = false
485			args = args[1:]
486		case "-self", "-self-closing":
487			doSelf = true
488			args = args[1:]
489		case "-comment":
490			doComment = true
491			args = args[1:]
492		case "-cdata":
493			doCdata = true
494			args = args[1:]
495		default:
496			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -format command\n")
497			os.Exit(1)
498		}
499	}
500
501	tknq := eutils.CreateTokenizer(rdr)
502
503	frgs := eutils.FormatArgs{
504		Format: format, XML: xml, Doctype: doctype,
505		Separate: doSeparate, Self: doSelf,
506		Comment: doComment, Cdata: doCdata}
507
508	frm := eutils.FormatTokens(tknq, frgs)
509
510	eutils.ChanToStdout(frm)
511}
512
513// processTokens shows individual tokens in stream (undocumented)
514func processTokens(rdr <-chan eutils.XMLBlock) {
515
516	if rdr == nil {
517		return
518	}
519
520	tknq := eutils.CreateTokenizer(rdr)
521
522	if tknq == nil {
523		fmt.Fprintf(os.Stderr, "\nERROR: Unable to create debug tokenizer\n")
524		os.Exit(1)
525	}
526
527	var buffer strings.Builder
528
529	count := 0
530	indent := 0
531
532	for tkn := range tknq {
533
534		tag := tkn.Tag
535		name := tkn.Name
536		attr := tkn.Attr
537
538		switch tag {
539		case eutils.STARTTAG:
540			buffer.WriteString("ST: ")
541			for i := 0; i < indent; i++ {
542				buffer.WriteString("  ")
543			}
544			buffer.WriteString(name)
545			buffer.WriteString("\n")
546			if attr != "" {
547				buffer.WriteString("AT: ")
548				for i := 0; i < indent; i++ {
549					buffer.WriteString("  ")
550				}
551				buffer.WriteString(attr)
552				buffer.WriteString("\n")
553			}
554			indent++
555		case eutils.SELFTAG:
556			buffer.WriteString("SL: ")
557			for i := 0; i < indent; i++ {
558				buffer.WriteString("  ")
559			}
560			buffer.WriteString(name)
561			buffer.WriteString("/")
562			buffer.WriteString("\n")
563			if attr != "" {
564				buffer.WriteString("AT: ")
565				for i := 0; i < indent; i++ {
566					buffer.WriteString("  ")
567				}
568				buffer.WriteString(attr)
569				buffer.WriteString("\n")
570			}
571		case eutils.STOPTAG:
572			indent--
573			buffer.WriteString("SP: ")
574			for i := 0; i < indent; i++ {
575				buffer.WriteString("  ")
576			}
577			buffer.WriteString(name)
578			buffer.WriteString("/")
579			buffer.WriteString("\n")
580		case eutils.CONTENTTAG:
581			ctype := tkn.Cont
582			if (ctype & eutils.LFTSPACE) != 0 {
583				if (ctype & eutils.RGTSPACE) != 0 {
584					buffer.WriteString("FL: ")
585				} else {
586					buffer.WriteString("LF: ")
587				}
588			} else if (ctype & eutils.RGTSPACE) != 0 {
589				buffer.WriteString("RT: ")
590			} else {
591				buffer.WriteString("VL: ")
592			}
593			for i := 0; i < indent; i++ {
594				buffer.WriteString("  ")
595			}
596			buffer.WriteString(name)
597			buffer.WriteString("\n")
598		case eutils.CDATATAG:
599			buffer.WriteString("CD: ")
600			for i := 0; i < indent; i++ {
601				buffer.WriteString("  ")
602			}
603			buffer.WriteString(name)
604			buffer.WriteString("\n")
605		case eutils.COMMENTTAG:
606			buffer.WriteString("CO: ")
607			for i := 0; i < indent; i++ {
608				buffer.WriteString("  ")
609			}
610			buffer.WriteString(name)
611			buffer.WriteString("\n")
612		case eutils.DOCTYPETAG:
613			buffer.WriteString("DC: ")
614			for i := 0; i < indent; i++ {
615				buffer.WriteString("  ")
616			}
617			buffer.WriteString(name)
618			buffer.WriteString("\n")
619		case eutils.NOTAG:
620			buffer.WriteString("NO:")
621			if indent != 0 {
622				buffer.WriteString(" (indent ")
623				buffer.WriteString(strconv.Itoa(indent))
624				buffer.WriteString(")")
625			}
626			buffer.WriteString("\n")
627		case eutils.ISCLOSED:
628			buffer.WriteString("CL:")
629			if indent != 0 {
630				buffer.WriteString(" (indent ")
631				buffer.WriteString(strconv.Itoa(indent))
632				buffer.WriteString(")")
633			}
634			buffer.WriteString("\n")
635			txt := buffer.String()
636			if txt != "" {
637				// print final buffer
638				fmt.Fprintf(os.Stdout, "%s", txt)
639			}
640			return
641		default:
642			buffer.WriteString("UNKONWN:")
643			if indent != 0 {
644				buffer.WriteString(" (indent ")
645				buffer.WriteString(strconv.Itoa(indent))
646				buffer.WriteString(")")
647			}
648			buffer.WriteString("\n")
649		}
650
651		count++
652		if count > 1000 {
653			count = 0
654			txt := buffer.String()
655			if txt != "" {
656				// print current buffered output
657				fmt.Fprintf(os.Stdout, "%s", txt)
658			}
659			buffer.Reset()
660		}
661	}
662}
663
664// processOutline displays outline of XML structure
665func processOutline(rdr <-chan eutils.XMLBlock) {
666
667	if rdr == nil {
668		return
669	}
670
671	tknq := eutils.CreateTokenizer(rdr)
672
673	if tknq == nil {
674		fmt.Fprintf(os.Stderr, "\nERROR: Unable to create outline tokenizer\n")
675		os.Exit(1)
676	}
677
678	var buffer strings.Builder
679
680	count := 0
681	indent := 0
682
683	for tkn := range tknq {
684
685		tag := tkn.Tag
686		name := tkn.Name
687
688		switch tag {
689		case eutils.STARTTAG:
690			if name == "eSummaryResult" ||
691				name == "eLinkResult" ||
692				name == "eInfoResult" ||
693				name == "PubmedArticleSet" ||
694				name == "DocumentSummarySet" ||
695				name == "INSDSet" ||
696				name == "Entrezgene-Set" ||
697				name == "TaxaSet" {
698				break
699			}
700			for i := 0; i < indent; i++ {
701				buffer.WriteString("  ")
702			}
703			buffer.WriteString(name)
704			buffer.WriteString("\n")
705			indent++
706		case eutils.SELFTAG:
707			for i := 0; i < indent; i++ {
708				buffer.WriteString("  ")
709			}
710			buffer.WriteString(name)
711			buffer.WriteString("\n")
712		case eutils.STOPTAG:
713			indent--
714		case eutils.DOCTYPETAG:
715		case eutils.NOTAG:
716		case eutils.ISCLOSED:
717			txt := buffer.String()
718			if txt != "" {
719				// print final buffer
720				fmt.Fprintf(os.Stdout, "%s", txt)
721			}
722			return
723		default:
724		}
725
726		count++
727		if count > 1000 {
728			count = 0
729			txt := buffer.String()
730			if txt != "" {
731				// print current buffered output
732				fmt.Fprintf(os.Stdout, "%s", txt)
733			}
734			buffer.Reset()
735		}
736	}
737}
738
739// processSynopsis displays paths to XML elements
740func processSynopsis(rdr <-chan eutils.XMLBlock, leaf bool, delim string) {
741
742	if rdr == nil {
743		return
744	}
745
746	tknq := eutils.CreateTokenizer(rdr)
747
748	if tknq == nil {
749		fmt.Fprintf(os.Stderr, "\nERROR: Unable to create synopsis tokenizer\n")
750		os.Exit(1)
751	}
752
753	var buffer strings.Builder
754	count := 0
755
756	// synopsisLevel recursive definition
757	var synopsisLevel func(string) bool
758
759	synopsisLevel = func(parent string) bool {
760
761		for tkn := range tknq {
762
763			tag := tkn.Tag
764			name := tkn.Name
765
766			switch tag {
767			case eutils.STARTTAG:
768				if name == "eSummaryResult" ||
769					name == "eLinkResult" ||
770					name == "eInfoResult" ||
771					name == "PubmedArticleSet" ||
772					name == "DocumentSummarySet" ||
773					name == "INSDSet" ||
774					name == "Entrezgene-Set" ||
775					name == "TaxaSet" {
776					break
777				}
778				if leaf {
779					if name == "root" ||
780						name == "opt" ||
781						name == "anon" {
782						break
783					}
784				}
785				if !leaf {
786					// show all paths, including container objects
787					if parent != "" {
788						buffer.WriteString(parent)
789						buffer.WriteString(delim)
790					}
791					buffer.WriteString(name)
792					buffer.WriteString("\n")
793				}
794				path := parent
795				if path != "" {
796					path += delim
797				}
798				path += name
799				if synopsisLevel(path) {
800					return true
801				}
802			case eutils.SELFTAG:
803				if parent != "" {
804					buffer.WriteString(parent)
805					buffer.WriteString(delim)
806				}
807				buffer.WriteString(name)
808				buffer.WriteString("\n")
809			case eutils.STOPTAG:
810				// break recursion
811				return false
812			case eutils.CONTENTTAG:
813				if leaf {
814					// only show endpoint paths
815					if parent != "" {
816						buffer.WriteString(parent)
817						buffer.WriteString("\n")
818					}
819				}
820			case eutils.DOCTYPETAG:
821			case eutils.NOTAG:
822			case eutils.ISCLOSED:
823				txt := buffer.String()
824				if txt != "" {
825					// print final buffer
826					fmt.Fprintf(os.Stdout, "%s", txt)
827				}
828				return true
829			default:
830			}
831
832			count++
833			if count > 1000 {
834				count = 0
835				txt := buffer.String()
836				if txt != "" {
837					// print current buffered output
838					fmt.Fprintf(os.Stdout, "%s", txt)
839				}
840				buffer.Reset()
841			}
842		}
843		return true
844	}
845
846	for {
847		// may have concatenated XMLs, loop through all
848		if synopsisLevel("") {
849			return
850		}
851	}
852}
853
854// processFilter modifies XML content, comments, or CDATA
855func processFilter(rdr <-chan eutils.XMLBlock, args []string) {
856
857	if rdr == nil || args == nil {
858		return
859	}
860
861	tknq := eutils.CreateTokenizer(rdr)
862
863	if tknq == nil {
864		fmt.Fprintf(os.Stderr, "\nERROR: Unable to create filter tokenizer\n")
865		os.Exit(1)
866	}
867
868	var buffer strings.Builder
869
870	count := 0
871
872	// skip past command name
873	args = args[1:]
874
875	max := len(args)
876	if max < 1 {
877		fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to transmute -filter\n")
878		os.Exit(1)
879	}
880
881	pttrn := args[0]
882
883	args = args[1:]
884	max--
885
886	if max < 2 {
887		fmt.Fprintf(os.Stderr, "\nERROR: No object name supplied to transmute -filter\n")
888		os.Exit(1)
889	}
890
891	type ActionType int
892
893	const (
894		NOACTION ActionType = iota
895		DORETAIN
896		DOREMOVE
897		DOENCODE
898		DODECODE
899		DOSHRINK
900		DOEXPAND
901		DOACCENT
902	)
903
904	action := args[0]
905
906	what := NOACTION
907	switch action {
908	case "retain":
909		what = DORETAIN
910	case "remove":
911		what = DOREMOVE
912	case "encode":
913		what = DOENCODE
914	case "decode":
915		what = DODECODE
916	case "shrink":
917		what = DOSHRINK
918	case "expand":
919		what = DOEXPAND
920	case "accent":
921		what = DOACCENT
922	default:
923		fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized action '%s' supplied to transmute -filter\n", action)
924		os.Exit(1)
925	}
926
927	trget := args[1]
928
929	which := eutils.NOTAG
930	switch trget {
931	case "attribute", "attributes":
932		which = eutils.ATTRIBTAG
933	case "content", "contents":
934		which = eutils.CONTENTTAG
935	case "cdata", "CDATA":
936		which = eutils.CDATATAG
937	case "comment", "comments":
938		which = eutils.COMMENTTAG
939	case "object":
940		// object normally retained
941		which = eutils.OBJECTTAG
942	case "container":
943		which = eutils.CONTAINERTAG
944	default:
945		fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized target '%s' supplied to transmute -filter\n", trget)
946		os.Exit(1)
947	}
948
949	inPattern := false
950	prevName := ""
951
952	for tkn := range tknq {
953
954		tag := tkn.Tag
955		name := tkn.Name
956		attr := tkn.Attr
957
958		switch tag {
959		case eutils.STARTTAG:
960			prevName = name
961			if name == pttrn {
962				inPattern = true
963				if which == eutils.CONTAINERTAG && what == DOREMOVE {
964					continue
965				}
966			}
967			if inPattern && which == eutils.OBJECTTAG && what == DOREMOVE {
968				continue
969			}
970			buffer.WriteString("<")
971			buffer.WriteString(name)
972			if attr != "" {
973				if which != eutils.ATTRIBTAG || what != DOREMOVE {
974					attr = strings.TrimSpace(attr)
975					attr = eutils.CompressRunsOfSpaces(attr)
976					buffer.WriteString(" ")
977					buffer.WriteString(attr)
978				}
979			}
980			buffer.WriteString(">\n")
981		case eutils.SELFTAG:
982			if inPattern && which == eutils.OBJECTTAG && what == DOREMOVE {
983				continue
984			}
985			buffer.WriteString("<")
986			buffer.WriteString(name)
987			if attr != "" {
988				if which != eutils.ATTRIBTAG || what != DOREMOVE {
989					attr = strings.TrimSpace(attr)
990					attr = eutils.CompressRunsOfSpaces(attr)
991					buffer.WriteString(" ")
992					buffer.WriteString(attr)
993				}
994			}
995			buffer.WriteString("/>\n")
996		case eutils.STOPTAG:
997			if name == pttrn {
998				inPattern = false
999				if which == eutils.OBJECTTAG && what == DOREMOVE {
1000					continue
1001				}
1002				if which == eutils.CONTAINERTAG && what == DOREMOVE {
1003					continue
1004				}
1005			}
1006			if inPattern && which == eutils.OBJECTTAG && what == DOREMOVE {
1007				continue
1008			}
1009			buffer.WriteString("</")
1010			buffer.WriteString(name)
1011			buffer.WriteString(">\n")
1012		case eutils.CONTENTTAG:
1013			if inPattern && which == eutils.OBJECTTAG && what == DOREMOVE {
1014				continue
1015			}
1016			if inPattern && which == eutils.CONTENTTAG && what == DOEXPAND {
1017				var words []string
1018				if strings.Contains(name, "|") {
1019					words = strings.FieldsFunc(name, func(c rune) bool {
1020						return c == '|'
1021					})
1022				} else if strings.Contains(name, ",") {
1023					words = strings.FieldsFunc(name, func(c rune) bool {
1024						return c == ','
1025					})
1026				} else {
1027					words = strings.Fields(name)
1028				}
1029				between := ""
1030				for _, item := range words {
1031					max := len(item)
1032					for max > 1 {
1033						ch := item[max-1]
1034						if ch != '.' && ch != ',' && ch != ':' && ch != ';' {
1035							break
1036						}
1037						// trim trailing punctuation
1038						item = item[:max-1]
1039						// continue checking for runs of punctuation at end
1040						max--
1041					}
1042					if eutils.HasFlankingSpace(item) {
1043						item = strings.TrimSpace(item)
1044					}
1045					if item != "" {
1046						if between != "" {
1047							buffer.WriteString(between)
1048						}
1049						buffer.WriteString(item)
1050						buffer.WriteString("\n")
1051						between = "</" + prevName + ">\n<" + prevName + ">\n"
1052					}
1053				}
1054				continue
1055			}
1056			if inPattern && which == tag {
1057				switch what {
1058				case DORETAIN:
1059					// default behavior for content - can use -filter X retain content as a no-op
1060				case DOREMOVE:
1061					continue
1062				case DOENCODE:
1063					name = html.EscapeString(name)
1064				case DODECODE:
1065					name = html.UnescapeString(name)
1066				case DOSHRINK:
1067					name = eutils.CompressRunsOfSpaces(name)
1068				case DOACCENT:
1069					if eutils.IsNotASCII(name) {
1070						name = eutils.DoAccentTransform(name)
1071					}
1072				default:
1073					continue
1074				}
1075			}
1076			// content normally printed
1077			if eutils.HasFlankingSpace(name) {
1078				name = strings.TrimSpace(name)
1079			}
1080			buffer.WriteString(name)
1081			buffer.WriteString("\n")
1082		case eutils.CDATATAG:
1083			if inPattern && which == eutils.OBJECTTAG && what == DOREMOVE {
1084				continue
1085			}
1086			if inPattern && which == tag {
1087				switch what {
1088				case DORETAIN:
1089					// cdata requires explicit retain command
1090				case DOREMOVE:
1091					continue
1092				case DOENCODE:
1093					name = html.EscapeString(name)
1094				case DODECODE:
1095					name = html.UnescapeString(name)
1096				case DOSHRINK:
1097					name = eutils.CompressRunsOfSpaces(name)
1098				case DOACCENT:
1099					if eutils.IsNotASCII(name) {
1100						name = eutils.DoAccentTransform(name)
1101					}
1102				default:
1103					continue
1104				}
1105				// cdata normally removed
1106				if eutils.HasFlankingSpace(name) {
1107					name = strings.TrimSpace(name)
1108				}
1109				buffer.WriteString(name)
1110				buffer.WriteString("\n")
1111			}
1112		case eutils.COMMENTTAG:
1113			if inPattern && which == eutils.OBJECTTAG && what == DOREMOVE {
1114				continue
1115			}
1116			if inPattern && which == tag {
1117				switch what {
1118				case DORETAIN:
1119					// comment requires explicit retain command
1120				case DOREMOVE:
1121					continue
1122				case DOENCODE:
1123					name = html.EscapeString(name)
1124				case DODECODE:
1125					name = html.UnescapeString(name)
1126				case DOSHRINK:
1127					name = eutils.CompressRunsOfSpaces(name)
1128				case DOACCENT:
1129					if eutils.IsNotASCII(name) {
1130						name = eutils.DoAccentTransform(name)
1131					}
1132				default:
1133					continue
1134				}
1135				// comment normally removed
1136				if eutils.HasFlankingSpace(name) {
1137					name = strings.TrimSpace(name)
1138				}
1139				buffer.WriteString(name)
1140				buffer.WriteString("\n")
1141			}
1142		case eutils.DOCTYPETAG:
1143		case eutils.NOTAG:
1144		case eutils.ISCLOSED:
1145			txt := buffer.String()
1146			if txt != "" {
1147				// print final buffer
1148				fmt.Fprintf(os.Stdout, "%s", txt)
1149			}
1150			return
1151		default:
1152		}
1153
1154		count++
1155		if count > 1000 {
1156			count = 0
1157			txt := buffer.String()
1158			if txt != "" {
1159				// print current buffered output
1160				fmt.Fprintf(os.Stdout, "%s", txt)
1161			}
1162			buffer.Reset()
1163		}
1164	}
1165}
1166
1167// STRING CONVERTERS
1168
1169func encodeURL(inp io.Reader) {
1170
1171	if inp == nil {
1172		return
1173	}
1174
1175	data, _ := ioutil.ReadAll(inp)
1176	txt := string(data)
1177	txt = strings.TrimSuffix(txt, "\n")
1178
1179	str := url.QueryEscape(txt)
1180
1181	os.Stdout.WriteString(str)
1182	if !strings.HasSuffix(str, "\n") {
1183		os.Stdout.WriteString("\n")
1184	}
1185}
1186
1187func decodeURL(inp io.Reader) {
1188
1189	if inp == nil {
1190		return
1191	}
1192
1193	byt, _ := ioutil.ReadAll(inp)
1194	txt := string(byt)
1195	txt = strings.TrimSuffix(txt, "\n")
1196
1197	str, _ := url.QueryUnescape(txt)
1198
1199	os.Stdout.WriteString(str)
1200	if !strings.HasSuffix(str, "\n") {
1201		os.Stdout.WriteString("\n")
1202	}
1203}
1204
1205func encodeB64(inp io.Reader) {
1206
1207	if inp == nil {
1208		return
1209	}
1210
1211	data, _ := ioutil.ReadAll(inp)
1212
1213	str := base64.StdEncoding.EncodeToString(data)
1214
1215	os.Stdout.WriteString(str)
1216	if !strings.HasSuffix(str, "\n") {
1217		os.Stdout.WriteString("\n")
1218	}
1219}
1220
1221func decodeB64(inp io.Reader) {
1222
1223	if inp == nil {
1224		return
1225	}
1226
1227	byt, _ := ioutil.ReadAll(inp)
1228
1229	data, _ := base64.StdEncoding.DecodeString(string(byt))
1230	str := string(data)
1231
1232	os.Stdout.WriteString(str)
1233	if !strings.HasSuffix(str, "\n") {
1234		os.Stdout.WriteString("\n")
1235	}
1236}
1237
1238func decodeHGVS(inp io.Reader) {
1239
1240	if inp == nil {
1241		return
1242	}
1243
1244	byt, _ := ioutil.ReadAll(inp)
1245	txt := string(byt)
1246
1247	os.Stdout.WriteString("<HGVS>\n")
1248
1249	str := eutils.ParseHGVS(txt)
1250
1251	os.Stdout.WriteString(str)
1252	if !strings.HasSuffix(str, "\n") {
1253		os.Stdout.WriteString("\n")
1254	}
1255
1256	os.Stdout.WriteString("</HGVS>\n")
1257}
1258
1259// COLUMN ALIGNMENT FORMATTER
1260
1261// processAlign aligns a tab-delimited table by individual column widths
1262func processAlign(inp io.Reader, args []string) {
1263
1264	// tab-delimited-table to padded-by-spaces alignment inspired by
1265	// Steve Kinzler's align script - see http://kinzler.com/me/align/
1266
1267	if inp == nil {
1268		return
1269	}
1270
1271	mrg := 0
1272	pdg := 0
1273	aln := ""
1274
1275	// skip past command name
1276	args = args[1:]
1277
1278	for len(args) > 0 {
1279
1280		switch args[0] {
1281		case "-g":
1282			pdg = eutils.GetNumericArg(args, "-g spacing between columns", 0, 1, 30)
1283			args = args[2:]
1284		case "-h":
1285			mrg = eutils.GetNumericArg(args, "-i indent before columns", 0, 1, 30)
1286			args = args[2:]
1287		case "-a":
1288			aln = eutils.GetStringArg(args, "-a column alignment code string")
1289			args = args[2:]
1290		default:
1291			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -align command\n")
1292			os.Exit(1)
1293		}
1294	}
1295
1296	algn := eutils.AlignColumns(inp, mrg, pdg, aln)
1297
1298	if algn == nil {
1299		fmt.Fprintf(os.Stderr, "\nERROR: Unable to create alignment function\n")
1300		os.Exit(1)
1301	}
1302
1303	eutils.ChanToStdout(algn)
1304
1305	return
1306}
1307
1308// SEQUENCE EDITING
1309
1310func sequenceRemove(inp io.Reader, args []string) {
1311
1312	if inp == nil {
1313		return
1314	}
1315
1316	first := ""
1317	last := ""
1318
1319	// skip past command name
1320	args = args[1:]
1321
1322	for len(args) > 0 {
1323
1324		switch args[0] {
1325		case "-first":
1326			first = eutils.GetStringArg(args, "Bases to delete at beginning")
1327			first = strings.ToUpper(first)
1328			args = args[2:]
1329		case "-last":
1330			last = eutils.GetStringArg(args, "Bases to delete at end")
1331			last = strings.ToUpper(last)
1332			args = args[2:]
1333		default:
1334			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -remove command\n")
1335			os.Exit(1)
1336		}
1337	}
1338
1339	str := eutils.ReadAllIntoSequence(inp)
1340
1341	str = eutils.SequenceRemove(str, first, last)
1342
1343	os.Stdout.WriteString(str)
1344	if !strings.HasSuffix(str, "\n") {
1345		os.Stdout.WriteString("\n")
1346	}
1347}
1348
1349func sequenceRetain(inp io.Reader, args []string) {
1350
1351	if inp == nil {
1352		return
1353	}
1354
1355	lead := 0
1356	trail := 0
1357
1358	// skip past command name
1359	args = args[1:]
1360
1361	for len(args) > 0 {
1362
1363		switch args[0] {
1364		case "-leading":
1365			lead = eutils.GetNumericArg(args, "Bases to keep at beginning", 0, -1, -1)
1366			args = args[2:]
1367		case "-trailing":
1368			trail = eutils.GetNumericArg(args, "Bases to keep at end", 0, -1, -1)
1369			args = args[2:]
1370		default:
1371			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -retain command\n")
1372			os.Exit(1)
1373		}
1374	}
1375
1376	str := eutils.ReadAllIntoSequence(inp)
1377
1378	str = eutils.SequenceRetain(str, lead, trail)
1379
1380	os.Stdout.WriteString(str)
1381	if !strings.HasSuffix(str, "\n") {
1382		os.Stdout.WriteString("\n")
1383	}
1384}
1385
1386func sequenceReplace(inp io.Reader, args []string) {
1387
1388	if inp == nil {
1389		return
1390	}
1391
1392	pos := 0
1393	del := ""
1394	ins := ""
1395	lower := false
1396
1397	// skip past command name
1398	args = args[1:]
1399
1400	for len(args) > 0 {
1401
1402		switch args[0] {
1403		case "-offset":
1404			pos = eutils.GetNumericArg(args, "0-based position", 0, -1, -1)
1405			args = args[2:]
1406		case "-column":
1407			val := eutils.GetNumericArg(args, "1-based position", 1, -1, -1)
1408			pos = val - 1
1409			args = args[2:]
1410		case "-delete":
1411			del = eutils.GetStringArg(args, "Number to delete")
1412			del = strings.ToUpper(del)
1413			args = args[2:]
1414		case "-insert":
1415			ins = eutils.GetStringArg(args, "Bases to insert")
1416			ins = strings.ToUpper(ins)
1417			args = args[2:]
1418		case "-lower":
1419			lower = true
1420			args = args[1:]
1421		default:
1422			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -replace command\n")
1423			os.Exit(1)
1424		}
1425	}
1426
1427	str := eutils.ReadAllIntoSequence(inp)
1428
1429	if lower {
1430		str = strings.ToLower(str)
1431	}
1432
1433	str = eutils.SequenceReplace(str, pos, del, ins)
1434
1435	os.Stdout.WriteString(str)
1436	if !strings.HasSuffix(str, "\n") {
1437		os.Stdout.WriteString("\n")
1438	}
1439}
1440
1441func sequenceExtract(inp io.Reader, args []string) {
1442
1443	if inp == nil {
1444		return
1445	}
1446
1447	featLoc := ""
1448	lower := false
1449
1450	// skip past command name
1451	args = args[1:]
1452
1453	for len(args) > 0 {
1454
1455		switch args[0] {
1456		case "-lower":
1457			lower = true
1458			args = args[1:]
1459		default:
1460			// read output of xtract -insd feat_location qualifier
1461			featLoc = args[0]
1462			args = args[1:]
1463		}
1464	}
1465
1466	if featLoc == "" {
1467		fmt.Fprintf(os.Stderr, "\nERROR: Missing argument after -extract command\n")
1468		os.Exit(1)
1469	}
1470
1471	str := eutils.ReadAllIntoSequence(inp)
1472
1473	str = eutils.SequenceExtract(str, featLoc)
1474
1475	if lower {
1476		str = strings.ToLower(str)
1477	}
1478
1479	os.Stdout.WriteString(str)
1480	if !strings.HasSuffix(str, "\n") {
1481		os.Stdout.WriteString("\n")
1482	}
1483}
1484
1485// REVERSE SEQUENCE
1486
1487// seqFlip reverses without complementing - e.g., minus strand proteins translated in reverse order
1488func seqFlip(inp io.Reader) {
1489
1490	if inp == nil {
1491		return
1492	}
1493
1494	str := eutils.ReadAllIntoSequence(inp)
1495
1496	str = eutils.SequenceReverse(str)
1497
1498	os.Stdout.WriteString(str)
1499	if !strings.HasSuffix(str, "\n") {
1500		os.Stdout.WriteString("\n")
1501	}
1502}
1503
1504// REVERSE COMPLEMENT
1505
1506func nucRevComp(inp io.Reader) {
1507
1508	if inp == nil {
1509		return
1510	}
1511
1512	str := eutils.ReadAllIntoSequence(inp)
1513
1514	str = eutils.ReverseComplement(str)
1515
1516	os.Stdout.WriteString(str)
1517	if !strings.HasSuffix(str, "\n") {
1518		os.Stdout.WriteString("\n")
1519	}
1520}
1521
1522// FASTA DIFFERENCES
1523
1524func printFastaPairs(frst, scnd string) {
1525
1526	frst = strings.ToLower(frst)
1527	scnd = strings.ToLower(scnd)
1528
1529	fst := frst[:]
1530	scd := scnd[:]
1531
1532	// next functions return spaces after end of sequence
1533	nextF := func() rune {
1534
1535		if len(fst) < 1 {
1536			return ' '
1537		}
1538		ch := fst[0]
1539		fst = fst[1:]
1540
1541		return rune(ch)
1542	}
1543
1544	nextS := func() rune {
1545
1546		if len(scd) < 1 {
1547			return ' '
1548		}
1549		ch := scd[0]
1550		scd = scd[1:]
1551
1552		return rune(ch)
1553	}
1554
1555	var fs []rune
1556	var sc []rune
1557	mx := 0
1558
1559	// populate output arrays
1560	for {
1561
1562		f, s := nextF(), nextS()
1563		// if both spaces, end of both sequences
1564		if f == ' ' && s == ' ' {
1565			break
1566		}
1567		if f == s {
1568			fs = append(fs, f)
1569			sc = append(sc, ' ')
1570		} else {
1571			// show mismatches in upper case
1572			fs = append(fs, unicode.ToUpper(f))
1573			sc = append(sc, unicode.ToUpper(s))
1574		}
1575		mx++
1576	}
1577
1578	// pad output to multiple of 50
1579	j := mx % 50
1580	if j > 0 {
1581		for j < 50 {
1582			fs = append(fs, ' ')
1583			sc = append(sc, ' ')
1584			j++
1585			mx++
1586		}
1587	}
1588
1589	// print in blocks of 50 bases or residues
1590	for i := 0; i < mx; i += 50 {
1591		dl := 50
1592		if mx-i < 50 {
1593			dl = mx - i
1594		}
1595		lf := fs[:dl]
1596		rt := sc[:dl]
1597		fs = fs[dl:]
1598		sc = sc[dl:]
1599		tm := strings.TrimRight(string(lf), " ")
1600		fmt.Fprintf(os.Stdout, "%s %6d\n%s\n", string(lf), i+len(tm), string(rt))
1601	}
1602}
1603
1604func fastaDiff(inp io.Reader, args []string) {
1605
1606	if inp == nil {
1607		return
1608	}
1609
1610	// skip past command name
1611	args = args[1:]
1612
1613	if len(args) != 2 {
1614		fmt.Fprintf(os.Stderr, "\nERROR: Two files required by -diff command\n")
1615		os.Exit(1)
1616	}
1617
1618	frst := args[0]
1619	scnd := args[1]
1620
1621	frstFasta := eutils.ReadFromFileIntoSequence(frst)
1622	scndFasta := eutils.ReadFromFileIntoSequence(scnd)
1623
1624	if frstFasta == scndFasta {
1625		return
1626	}
1627
1628	// sequences are assumed to be aligned, this code highlight mismatches
1629	printFastaPairs(frstFasta, scndFasta)
1630}
1631
1632// PROTEIN WEIGHT
1633
1634func protWeight(inp io.Reader, args []string) {
1635
1636	if inp == nil {
1637		return
1638	}
1639
1640	trimLeadingMet := true
1641
1642	// skip past command name
1643	args = args[1:]
1644
1645	for len(args) > 0 {
1646
1647		switch args[0] {
1648		case "-met":
1649			trimLeadingMet = false
1650			args = args[1:]
1651		default:
1652			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -molwt command\n")
1653			os.Exit(1)
1654		}
1655	}
1656
1657	str := eutils.ReadAllIntoSequence(inp)
1658
1659	str = eutils.ProteinWeight(str, trimLeadingMet)
1660
1661	os.Stdout.WriteString(str)
1662	if !strings.HasSuffix(str, "\n") {
1663		os.Stdout.WriteString("\n")
1664	}
1665}
1666
1667// cdRegionToProtein reads all of stdin as sequence data
1668func cdRegionToProtein(inp io.Reader, args []string) {
1669
1670	if inp == nil {
1671		return
1672	}
1673
1674	genCode := 1
1675	frame := 0
1676	includeStop := false
1677	doEveryCodon := false
1678	removeTrailingX := false
1679	is5primeComplete := true
1680	is3primeComplete := true
1681
1682	repeat := 1
1683
1684	// skip past command name
1685	args = args[1:]
1686
1687	for len(args) > 0 {
1688
1689		switch args[0] {
1690		case "-code", "-gencode":
1691			genCode = eutils.GetNumericArg(args, "genetic code number", 0, 1, 30)
1692			args = args[2:]
1693		case "-frame":
1694			frame = eutils.GetNumericArg(args, "offset into coding sequence", 0, 1, 30)
1695			args = args[2:]
1696		case "-stop", "-stops":
1697			includeStop = true
1698			args = args[1:]
1699		case "-every", "-all":
1700			doEveryCodon = true
1701			args = args[1:]
1702		case "-trim", "-trailing":
1703			removeTrailingX = true
1704			args = args[1:]
1705		case "-part5", "-partial5", "-lt5":
1706			is5primeComplete = false
1707			args = args[1:]
1708		case "-part3", "-partial3", "-gt3":
1709			is3primeComplete = false
1710			args = args[1:]
1711		case "-repeat":
1712			repeat = eutils.GetNumericArg(args, "number of repetitions for testing", 1, 1, 100)
1713			args = args[2:]
1714		default:
1715			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized option after -cds2prot command\n")
1716			os.Exit(1)
1717		}
1718	}
1719
1720	txt := eutils.ReadAllIntoSequence(inp)
1721
1722	for i := 0; i < repeat; i++ {
1723
1724		// repeat multiple times for performance testing (undocumented)
1725		str := eutils.TranslateCdRegion(txt, genCode, frame, includeStop, doEveryCodon, removeTrailingX, is5primeComplete, is3primeComplete)
1726
1727		os.Stdout.WriteString(str)
1728		if !strings.HasSuffix(str, "\n") {
1729			os.Stdout.WriteString("\n")
1730		}
1731	}
1732}
1733
1734// MAIN FUNCTION
1735
1736func main() {
1737
1738	// skip past executable name
1739	args := os.Args[1:]
1740
1741	if len(args) < 1 {
1742		fmt.Fprintf(os.Stderr, "\nERROR: No command-line arguments supplied to transmute\n")
1743		os.Exit(1)
1744	}
1745
1746	// performance arguments
1747	chanDepth := 0
1748	farmSize := 0
1749	heapSize := 0
1750	numServe := 0
1751	goGc := 0
1752
1753	// processing option arguments
1754	doCompress := false
1755	doCleanup := false
1756	doStrict := false
1757	doMixed := false
1758	deAccent := false
1759	doASCII := false
1760
1761	/*
1762		doUnicode := false
1763		doScript := false
1764		doMathML := false
1765	*/
1766
1767	// CONCURRENCY, CLEANUP, AND DEBUGGING FLAGS
1768
1769	// do these first because -defcpu and -maxcpu can be sent from wrapper before other arguments
1770
1771	ncpu := runtime.NumCPU()
1772	if ncpu < 1 {
1773		ncpu = 1
1774	}
1775
1776	// wrapper can limit maximum number of processors to use (undocumented)
1777	maxProcs := 0
1778	defProcs := 0
1779
1780	// concurrent performance tuning parameters, can be overridden by -proc and -cons
1781	numProcs := 0
1782	serverRatio := 4
1783
1784	// -flag sets -strict or -mixed cleanup flags from argument
1785	flgs := ""
1786
1787	/*
1788		unicodePolicy := ""
1789		scriptPolicy := ""
1790		mathmlPolicy := ""
1791	*/
1792
1793	// read data from file instead of stdin
1794	fileName := ""
1795
1796	// debugging
1797	stts := false
1798	timr := false
1799
1800	// profiling
1801	prfl := false
1802
1803	inSwitch := true
1804
1805	// get concurrency, cleanup, and debugging flags in any order
1806	for {
1807
1808		inSwitch = true
1809
1810		switch args[0] {
1811
1812		// concurrency override arguments can be passed in by local wrapper script (undocumented)
1813		case "-maxcpu":
1814			maxProcs = eutils.GetNumericArg(args, "Maximum number of processors", 1, 1, ncpu)
1815			args = args[1:]
1816		case "-defcpu":
1817			defProcs = eutils.GetNumericArg(args, "Default number of processors", ncpu, 1, ncpu)
1818			args = args[1:]
1819		// performance tuning flags
1820		case "-proc":
1821			numProcs = eutils.GetNumericArg(args, "Number of processors", ncpu, 1, ncpu)
1822			args = args[1:]
1823		case "-cons":
1824			serverRatio = eutils.GetNumericArg(args, "Parser to processor ratio", 4, 1, 32)
1825			args = args[1:]
1826		case "-serv":
1827			numServe = eutils.GetNumericArg(args, "Concurrent parser count", 0, 1, 128)
1828			args = args[1:]
1829		case "-chan":
1830			chanDepth = eutils.GetNumericArg(args, "Communication channel depth", 0, ncpu, 128)
1831			args = args[1:]
1832		case "-heap":
1833			heapSize = eutils.GetNumericArg(args, "Unshuffler heap size", 8, 8, 64)
1834			args = args[1:]
1835		case "-farm":
1836			farmSize = eutils.GetNumericArg(args, "Node buffer length", 4, 4, 2048)
1837			args = args[1:]
1838		case "-gogc":
1839			goGc = eutils.GetNumericArg(args, "Garbage collection percentage", 0, 50, 1000)
1840			args = args[1:]
1841
1842		// read data from file
1843		case "-input":
1844			if len(args) < 2 {
1845				fmt.Fprintf(os.Stderr, "\nERROR: Input file name is missing\n")
1846				os.Exit(1)
1847			}
1848			fileName = args[1]
1849			// skip past first of two arguments
1850			args = args[1:]
1851
1852		// data cleanup flags
1853		case "-compress", "-compressed":
1854			doCompress = true
1855		case "-spaces", "-cleanup":
1856			doCleanup = true
1857		case "-strict":
1858			doStrict = true
1859		case "-mixed":
1860			doMixed = true
1861		case "-accent":
1862			deAccent = true
1863		case "-ascii":
1864			doASCII = true
1865
1866		// previously visible processing flags (undocumented)
1867		case "-stems", "-stem":
1868			// ignore
1869		case "-stops", "-stop":
1870			// ignore
1871
1872		// allow setting of unicode, script, and mathml flags (undocumented)
1873		case "-unicode":
1874			if len(args) < 2 {
1875				fmt.Fprintf(os.Stderr, "\nERROR: -unicode argument is missing\n")
1876				os.Exit(1)
1877			}
1878			// unicodePolicy = eutils.GetStringArg(args, "Unicode argument")
1879			args = args[1:]
1880		case "-script":
1881			if len(args) < 2 {
1882				fmt.Fprintf(os.Stderr, "\nERROR: -script argument is missing\n")
1883				os.Exit(1)
1884			}
1885			// scriptPolicy = eutils.GetStringArg(args, "Script argument")
1886			args = args[1:]
1887		case "-mathml":
1888			if len(args) < 2 {
1889				fmt.Fprintf(os.Stderr, "\nERROR: -mathml argument is missing\n")
1890				os.Exit(1)
1891			}
1892			// mathmlPolicy = eutils.GetStringArg(args, "MathML argument")
1893			args = args[1:]
1894
1895		case "-flag", "-flags":
1896			if len(args) < 2 {
1897				fmt.Fprintf(os.Stderr, "\nERROR: -flags argument is missing\n")
1898				os.Exit(1)
1899			}
1900			flgs = eutils.GetStringArg(args, "Flags argument")
1901			args = args[1:]
1902
1903		// debugging flags
1904		case "-stats", "-stat":
1905			stts = true
1906		case "-timer":
1907			timr = true
1908		case "-profile":
1909			prfl = true
1910
1911		default:
1912			// if not any of the controls, set flag to break out of for loop
1913			inSwitch = false
1914		}
1915
1916		if !inSwitch {
1917			break
1918		}
1919
1920		// skip past argument
1921		args = args[1:]
1922
1923		if len(args) < 1 {
1924			break
1925		}
1926	}
1927
1928	// -flag allows script to set -strict or -mixed (or -stems, or -stops) from argument
1929	switch flgs {
1930	case "strict":
1931		doStrict = true
1932	case "mixed":
1933		doMixed = true
1934	case "stems", "stem":
1935		// ignore
1936	case "stops", "stop":
1937		// ignore
1938	case "none", "default":
1939	default:
1940		if flgs != "" {
1941			fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized -flag value '%s'\n", flgs)
1942			os.Exit(1)
1943		}
1944	}
1945
1946	/*
1947		UnicodeFix = ParseMarkup(unicodePolicy, "-unicode")
1948		ScriptFix = ParseMarkup(scriptPolicy, "-script")
1949		MathMLFix = ParseMarkup(mathmlPolicy, "-mathml")
1950
1951		if UnicodeFix != NOMARKUP {
1952			doUnicode = true
1953		}
1954
1955		if ScriptFix != NOMARKUP {
1956			doScript = true
1957		}
1958
1959		if MathMLFix != NOMARKUP {
1960			doMathML = true
1961		}
1962	*/
1963
1964	if numProcs == 0 {
1965		if defProcs > 0 {
1966			numProcs = defProcs
1967		} else if maxProcs > 0 {
1968			numProcs = maxProcs
1969		}
1970	}
1971	if numProcs > ncpu {
1972		numProcs = ncpu
1973	}
1974	if numProcs > maxProcs {
1975		numProcs = maxProcs
1976	}
1977
1978	eutils.SetTunings(numProcs, numServe, serverRatio, chanDepth, farmSize, heapSize, goGc)
1979
1980	eutils.SetOptions(doStrict, doMixed, deAccent, doASCII, doCompress, doCleanup)
1981
1982	// -stats prints number of CPUs and performance tuning values if no other arguments (undocumented)
1983	if stts && len(args) < 1 {
1984
1985		eutils.PrintStats()
1986
1987		return
1988	}
1989
1990	if len(args) < 1 {
1991		fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to transmute\n")
1992		os.Exit(1)
1993	}
1994
1995	// DOCUMENTATION COMMANDS
1996
1997	inSwitch = true
1998
1999	switch args[0] {
2000	case "-version":
2001		fmt.Printf("%s\n", eutils.EDirectVersion)
2002	case "-help":
2003		fmt.Printf("transmute %s\n%s\n", eutils.EDirectVersion, transmuteHelp)
2004	case "-extra", "-extras":
2005		fmt.Printf("transmute %s\n%s\n", eutils.EDirectVersion, transmuteExtra)
2006	case "-degenerate":
2007		// generate new genetic code data tables (undocumented)
2008		eutils.GenerateGeneticCodeMaps()
2009	default:
2010		// if not any of the documentation commands, keep going
2011		inSwitch = false
2012	}
2013
2014	if inSwitch {
2015		return
2016	}
2017
2018	// FILE NAME CAN BE SUPPLIED WITH -input COMMAND
2019
2020	in := os.Stdin
2021
2022	// check for data being piped into stdin
2023	isPipe := false
2024	fi, err := os.Stdin.Stat()
2025	if err == nil {
2026		isPipe = bool((fi.Mode() & os.ModeNamedPipe) != 0)
2027	}
2028
2029	usingFile := false
2030
2031	if fileName != "" {
2032
2033		inFile, err := os.Open(fileName)
2034		if err != nil {
2035			fmt.Fprintf(os.Stderr, "\nERROR: Unable to open input file '%s'\n", fileName)
2036			os.Exit(1)
2037		}
2038
2039		defer inFile.Close()
2040
2041		// use indicated file instead of stdin
2042		in = inFile
2043		usingFile = true
2044
2045		if isPipe && runtime.GOOS != "windows" {
2046			mode := fi.Mode().String()
2047			fmt.Fprintf(os.Stderr, "\nERROR: Input data from both stdin and file '%s', mode is '%s'\n", fileName, mode)
2048			os.Exit(1)
2049		}
2050	}
2051
2052	// check for -input command after extraction arguments
2053	for _, str := range args {
2054		if str == "-input" {
2055			fmt.Fprintf(os.Stderr, "\nERROR: Misplaced -input command\n")
2056			os.Exit(1)
2057		}
2058	}
2059
2060	// START PROFILING IF REQUESTED
2061
2062	if prfl {
2063
2064		f, err := os.Create("cpu.pprof")
2065		if err != nil {
2066			fmt.Fprintf(os.Stderr, "\nERROR: Unable to create profile output file\n")
2067			os.Exit(1)
2068		}
2069
2070		pprof.StartCPUProfile(f)
2071
2072		defer pprof.StopCPUProfile()
2073	}
2074
2075	// INITIALIZE RECORD COUNT
2076
2077	recordCount := 0
2078	byteCount := 0
2079
2080	// print processing rate and program duration
2081	printDuration := func(name string) {
2082
2083		eutils.PrintDuration(name, recordCount, byteCount)
2084	}
2085
2086	nextArg := func() (string, bool) {
2087
2088		if len(args) < 1 {
2089			return "", false
2090		}
2091
2092		// remove next token from slice
2093		nxt := args[0]
2094		args = args[1:]
2095
2096		return nxt, true
2097	}
2098
2099	// The several converter functions that follow must be called
2100	// before CreateXMLStreamer starts draining stdin
2101
2102	// JSON TO XML CONVERTER
2103
2104	if args[0] == "-j2x" || args[0] == "-json2xml" {
2105
2106		// skip past command name
2107		args = args[1:]
2108
2109		set := "root"
2110		rec := ""
2111		nest := ""
2112
2113		// look for optional arguments
2114		for {
2115			arg, ok := nextArg()
2116			if !ok {
2117				break
2118			}
2119
2120			switch arg {
2121			case "-set":
2122				// override set wrapper
2123				set, ok = nextArg()
2124				if ok && set == "-" {
2125					set = ""
2126				}
2127			case "-rec":
2128				// override record wrapper
2129				rec, ok = nextArg()
2130				if ok && rec == "-" {
2131					rec = ""
2132				}
2133			case "-nest":
2134				// specify nested array naming policy
2135				nest, ok = nextArg()
2136				if !ok {
2137					fmt.Fprintf(os.Stderr, "Nested array naming policy is missing\n")
2138					os.Exit(1)
2139				}
2140				if ok && nest == "-" {
2141					nest = "flat"
2142				}
2143				switch nest {
2144				case "flat", "plural", "name", "recurse", "recursive", "same", "depth", "deep", "level":
2145				default:
2146					fmt.Fprintf(os.Stderr, "Unrecognized nested array naming policy\n")
2147					os.Exit(1)
2148				}
2149			default:
2150				// alternative form uses positional arguments to override set and rec
2151				set = arg
2152				if set == "-" {
2153					set = ""
2154				}
2155				rec, ok = nextArg()
2156				if ok && rec == "-" {
2157					rec = ""
2158				}
2159			}
2160		}
2161
2162		// use output channel of tokenizer as input channel of converter
2163		jcnv := eutils.JSONConverter(in, set, rec, nest)
2164
2165		if jcnv == nil {
2166			fmt.Fprintf(os.Stderr, "\nERROR: Unable to create JSON to XML converter\n")
2167			os.Exit(1)
2168		}
2169
2170		// drain output of channel
2171		for str := range jcnv {
2172
2173			if str == "" {
2174				continue
2175			}
2176
2177			// send result to output
2178			os.Stdout.WriteString(str)
2179			if !strings.HasSuffix(str, "\n") {
2180				os.Stdout.WriteString("\n")
2181			}
2182
2183			recordCount++
2184			runtime.Gosched()
2185		}
2186
2187		debug.FreeOSMemory()
2188
2189		if timr {
2190			printDuration("blocks")
2191		}
2192
2193		return
2194	}
2195
2196	// ASN.1 TO XML CONVERTER
2197
2198	if args[0] == "-a2x" || args[0] == "-asn2xml" {
2199
2200		// skip past command name
2201		args = args[1:]
2202
2203		set := ""
2204		rec := ""
2205
2206		// look for optional arguments
2207		for {
2208			arg, ok := nextArg()
2209			if !ok {
2210				break
2211			}
2212
2213			switch arg {
2214			case "-set":
2215				// override set wrapper
2216				set, ok = nextArg()
2217				if ok && set == "-" {
2218					set = ""
2219				}
2220			case "-rec":
2221				// override record wrapper
2222				rec, ok = nextArg()
2223				if ok && rec == "-" {
2224					rec = ""
2225				}
2226			}
2227		}
2228
2229		acnv := eutils.ASN1Converter(in, set, rec)
2230
2231		if acnv == nil {
2232			fmt.Fprintf(os.Stderr, "\nERROR: Unable to create ASN.1 to XML converter\n")
2233			os.Exit(1)
2234		}
2235
2236		// drain output of channel
2237		for str := range acnv {
2238
2239			if str == "" {
2240				continue
2241			}
2242
2243			// send result to output
2244			os.Stdout.WriteString(str)
2245			if !strings.HasSuffix(str, "\n") {
2246				os.Stdout.WriteString("\n")
2247			}
2248
2249			recordCount++
2250			runtime.Gosched()
2251		}
2252
2253		debug.FreeOSMemory()
2254
2255		if timr {
2256			printDuration("blocks")
2257		}
2258
2259		return
2260	}
2261
2262	// READ TAB-DELIMITED FILE AND WRAP IN XML FIELDS
2263
2264	doTable := func(delim string) {
2265
2266		// skip past command name
2267		args = args[1:]
2268
2269		set := ""
2270		rec := ""
2271
2272		skip := 0
2273		header := false
2274		lower := false
2275		upper := false
2276		indent := true
2277
2278		var fields []string
2279		numFlds := 0
2280
2281		for len(args) > 0 {
2282			str := args[0]
2283			switch str {
2284			case "-set":
2285				args = args[1:]
2286				if len(args) < 1 {
2287					fmt.Fprintf(os.Stderr, "\nERROR: No argument after -set\n")
2288					os.Exit(1)
2289				}
2290				set = args[0]
2291				args = args[1:]
2292			case "-rec":
2293				args = args[1:]
2294				if len(args) < 1 {
2295					fmt.Fprintf(os.Stderr, "\nERROR: No argument after -rec\n")
2296					os.Exit(1)
2297				}
2298				rec = args[0]
2299				args = args[1:]
2300			case "-skip":
2301				args = args[1:]
2302				if len(args) < 1 {
2303					fmt.Fprintf(os.Stderr, "\nERROR: No argument after -skip\n")
2304					os.Exit(1)
2305				}
2306				tmp := args[0]
2307				val, err := strconv.Atoi(tmp)
2308				if err != nil {
2309					fmt.Fprintf(os.Stderr, "\nERROR: -skip argument (%s) is not an integer\n", tmp)
2310					os.Exit(1)
2311				}
2312				skip = val
2313				args = args[1:]
2314			case "-header", "-headers", "-heading":
2315				header = true
2316				args = args[1:]
2317			case "-lower":
2318				lower = true
2319				args = args[1:]
2320			case "-upper":
2321				upper = true
2322				args = args[1:]
2323			case "-indent":
2324				indent = true
2325				args = args[1:]
2326			case "-flush":
2327				indent = false
2328				args = args[1:]
2329			default:
2330				// remaining arguments are names for columns
2331				if str != "" && str != "*" {
2332					fields = append(fields, str)
2333					numFlds++
2334				}
2335				args = args[1:]
2336			}
2337		}
2338
2339		if numFlds < 1 && !header {
2340			fmt.Fprintf(os.Stderr, "\nERROR: Insufficient arguments for table converter\n")
2341			os.Exit(1)
2342		}
2343
2344		tble := eutils.TableConverter(in, delim, set, rec, skip, header, lower, upper, indent, fields)
2345
2346		if tble == nil {
2347			fmt.Fprintf(os.Stderr, "\nERROR: Unable to create table to XML converter\n")
2348			os.Exit(1)
2349		}
2350
2351		// drain output of channel
2352		for str := range tble {
2353
2354			if str == "" {
2355				continue
2356			}
2357
2358			// send result to output
2359			os.Stdout.WriteString(str)
2360			if !strings.HasSuffix(str, "\n") {
2361				os.Stdout.WriteString("\n")
2362			}
2363
2364			recordCount++
2365			runtime.Gosched()
2366		}
2367
2368		debug.FreeOSMemory()
2369
2370		if timr {
2371			printDuration("lines")
2372		}
2373	}
2374
2375	if len(args) > 1 && args[0] == "-t2x" {
2376
2377		doTable("\t")
2378		return
2379	}
2380
2381	if len(args) > 1 && args[0] == "-c2x" {
2382
2383		doTable(",")
2384		return
2385	}
2386
2387	// READ GENBANK FLATFILE AND TRANSLATE TO INSDSEQ XML
2388
2389	if len(args) > 0 && args[0] == "-g2x" {
2390
2391		gbk := eutils.GenBankConverter(in)
2392
2393		if gbk == nil {
2394			fmt.Fprintf(os.Stderr, "Unable to create GenBank to XML converter\n")
2395			os.Exit(1)
2396		}
2397
2398		head := `<?xml version="1.0" encoding="UTF-8" ?>
2399<!DOCTYPE INSDSet PUBLIC "-//NCBI//INSD INSDSeq/EN" "https://www.ncbi.nlm.nih.gov/dtd/INSD_INSDSeq.dtd">
2400<INSDSet>
2401`
2402		tail := ""
2403
2404		// drain output of last channel in service chain
2405		for str := range gbk {
2406
2407			if str == "" {
2408				continue
2409			}
2410
2411			if head != "" {
2412				os.Stdout.WriteString(head)
2413				head = ""
2414				tail = `</INSDSet>
2415`
2416			}
2417
2418			// send result to stdout
2419			os.Stdout.WriteString(str)
2420			if !strings.HasSuffix(str, "\n") {
2421				os.Stdout.WriteString("\n")
2422			}
2423
2424			recordCount++
2425
2426			runtime.Gosched()
2427		}
2428
2429		if tail != "" {
2430			os.Stdout.WriteString(tail)
2431		}
2432
2433		debug.FreeOSMemory()
2434
2435		if timr {
2436			printDuration("records")
2437		}
2438
2439		return
2440	}
2441
2442	// STRING CONVERSION COMMANDS
2443
2444	inSwitch = true
2445
2446	switch args[0] {
2447	case "-encodeURL":
2448		encodeURL(in)
2449	case "-decodeURL":
2450		decodeURL(in)
2451	case "-encode64", "-encodeB64", "-encodeBase64":
2452		encodeB64(in)
2453	case "-decode64", "-decodeB64", "-decodeBase64":
2454		decodeB64(in)
2455	case "-hgvs":
2456		decodeHGVS(in)
2457	case "-align":
2458		processAlign(in, args)
2459	case "-remove":
2460		sequenceRemove(in, args)
2461	case "-retain":
2462		sequenceRetain(in, args)
2463	case "-replace":
2464		sequenceReplace(in, args)
2465	case "-extract":
2466		sequenceExtract(in, args)
2467	case "-revcomp":
2468		nucRevComp(in)
2469	case "-reverse":
2470		seqFlip(in)
2471	case "-molwt":
2472		protWeight(in, args)
2473	case "-cds2prot":
2474		cdRegionToProtein(in, args)
2475	case "-diff":
2476		fastaDiff(in, args)
2477	default:
2478		// if not any of the conversion commands, keep going
2479		inSwitch = false
2480	}
2481
2482	if inSwitch {
2483
2484		debug.FreeOSMemory()
2485
2486		return
2487	}
2488
2489	// CREATE XML BLOCK READER FROM STDIN OR FILE
2490
2491	rdr := eutils.CreateXMLStreamer(in)
2492	if rdr == nil {
2493		fmt.Fprintf(os.Stderr, "\nERROR: Unable to create XML Block Reader\n")
2494		os.Exit(1)
2495	}
2496
2497	// CONFIRM INPUT DATA AVAILABILITY AFTER RUNNING COMMAND GENERATORS
2498
2499	if fileName == "" && runtime.GOOS != "windows" {
2500
2501		fromStdin := bool((fi.Mode() & os.ModeCharDevice) == 0)
2502		if !isPipe || !fromStdin {
2503			mode := fi.Mode().String()
2504			fmt.Fprintf(os.Stderr, "\nERROR: No data supplied to transmute from stdin or file, mode is '%s'\n", mode)
2505			os.Exit(1)
2506		}
2507	}
2508
2509	if !usingFile && !isPipe {
2510
2511		fmt.Fprintf(os.Stderr, "\nERROR: No XML input data supplied to transmute\n")
2512		os.Exit(1)
2513	}
2514
2515	// SPECIAL FORMATTING COMMANDS
2516
2517	inSwitch = true
2518	leaf := false
2519
2520	switch args[0] {
2521	case "-format":
2522		processFormat(rdr, args)
2523	case "-filter":
2524		processFilter(rdr, args)
2525	case "-normalize", "-normal":
2526		if len(args) < 2 {
2527			fmt.Fprintf(os.Stderr, "\nERROR: No database supplied to -normalize\n")
2528			os.Exit(1)
2529		}
2530		db := args[1]
2531		nrm := eutils.NormalizeXML(rdr, db)
2532		eutils.ChanToStdout(nrm)
2533	case "-outline":
2534		processOutline(rdr)
2535	case "-contour":
2536		leaf = true
2537		fallthrough
2538	case "-synopsis":
2539		args = args[1:]
2540		delim := "/"
2541		if len(args) > 0 {
2542			delim = args[0]
2543			if len(delim) > 3 {
2544				delim = "/"
2545			}
2546		}
2547		processSynopsis(rdr, leaf, delim)
2548	case "-tokens":
2549		processTokens(rdr)
2550	default:
2551		// if not any of the formatting commands, keep going
2552		inSwitch = false
2553	}
2554
2555	if inSwitch {
2556
2557		debug.FreeOSMemory()
2558
2559		// suppress printing of lines if not properly counted
2560		if recordCount == 1 {
2561			recordCount = 0
2562		}
2563
2564		if timr {
2565			printDuration("lines")
2566		}
2567
2568		return
2569	}
2570
2571	// SPECIFY STRINGS TO GO BEFORE AND AFTER ENTIRE OUTPUT OR EACH RECORD
2572
2573	head := ""
2574	tail := ""
2575
2576	hd := ""
2577	tl := ""
2578
2579	for {
2580
2581		inSwitch = true
2582
2583		switch args[0] {
2584		case "-head":
2585			if len(args) < 2 {
2586				fmt.Fprintf(os.Stderr, "\nERROR: Pattern missing after -head command\n")
2587				os.Exit(1)
2588			}
2589			head = eutils.ConvertSlash(args[1])
2590		case "-tail":
2591			if len(args) < 2 {
2592				fmt.Fprintf(os.Stderr, "\nERROR: Pattern missing after -tail command\n")
2593				os.Exit(1)
2594			}
2595			tail = eutils.ConvertSlash(args[1])
2596		case "-hd":
2597			if len(args) < 2 {
2598				fmt.Fprintf(os.Stderr, "\nERROR: Pattern missing after -hd command\n")
2599				os.Exit(1)
2600			}
2601			hd = eutils.ConvertSlash(args[1])
2602		case "-tl":
2603			if len(args) < 2 {
2604				fmt.Fprintf(os.Stderr, "\nERROR: Pattern missing after -tl command\n")
2605				os.Exit(1)
2606			}
2607			tl = eutils.ConvertSlash(args[1])
2608		case "-wrp":
2609			// shortcut to wrap records in XML tags
2610			if len(args) < 2 {
2611				fmt.Fprintf(os.Stderr, "\nERROR: Pattern missing after -wrp command\n")
2612				os.Exit(1)
2613			}
2614			tmp := eutils.ConvertSlash(args[1])
2615			lft, rgt := eutils.SplitInTwoLeft(tmp, ",")
2616			if lft != "" {
2617				head = "<" + lft + ">"
2618				tail = "</" + lft + ">"
2619			}
2620			if rgt != "" {
2621				hd = "<" + rgt + ">"
2622				tl = "</" + rgt + ">"
2623			}
2624		case "-set":
2625			if len(args) < 2 {
2626				fmt.Fprintf(os.Stderr, "\nERROR: Pattern missing after -set command\n")
2627				os.Exit(1)
2628			}
2629			tmp := eutils.ConvertSlash(args[1])
2630			if tmp != "" {
2631				head = "<" + tmp + ">"
2632				tail = "</" + tmp + ">"
2633			}
2634		case "-rec":
2635			if len(args) < 2 {
2636				fmt.Fprintf(os.Stderr, "\nERROR: Pattern missing after -rec command\n")
2637				os.Exit(1)
2638			}
2639			tmp := eutils.ConvertSlash(args[1])
2640			if tmp != "" {
2641				hd = "<" + tmp + ">"
2642				tl = "</" + tmp + ">"
2643			}
2644		default:
2645			// if not any of the controls, set flag to break out of for loop
2646			inSwitch = false
2647		}
2648
2649		if !inSwitch {
2650			break
2651		}
2652
2653		// skip past arguments
2654		args = args[2:]
2655
2656		if len(args) < 1 {
2657			fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to transmute\n")
2658			os.Exit(1)
2659		}
2660	}
2661
2662	// ENSURE PRESENCE OF PATTERN ARGUMENT
2663
2664	if len(args) < 1 {
2665		fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to transmute\n")
2666		os.Exit(1)
2667	}
2668
2669	// allow -record as synonym of -pattern (undocumented)
2670	if args[0] == "-record" || args[0] == "-Record" {
2671		args[0] = "-pattern"
2672	}
2673
2674	// make sure top-level -pattern command is next
2675	if args[0] != "-pattern" && args[0] != "-Pattern" {
2676		fmt.Fprintf(os.Stderr, "\nERROR: No -pattern in command-line arguments\n")
2677		os.Exit(1)
2678	}
2679	if len(args) < 2 {
2680		fmt.Fprintf(os.Stderr, "\nERROR: Item missing after -pattern command\n")
2681		os.Exit(1)
2682	}
2683
2684	topPat := args[1]
2685	if topPat == "" {
2686		fmt.Fprintf(os.Stderr, "\nERROR: Item missing after -pattern command\n")
2687		os.Exit(1)
2688	}
2689	if strings.HasPrefix(topPat, "-") {
2690		fmt.Fprintf(os.Stderr, "\nERROR: Misplaced %s command\n", topPat)
2691		os.Exit(1)
2692	}
2693
2694	// look for -pattern Parent/* construct for heterogeneous data, e.g., -pattern PubmedArticleSet/*
2695	topPattern, star := eutils.SplitInTwoLeft(topPat, "/")
2696	if topPattern == "" {
2697		return
2698	}
2699
2700	// CONCURRENT REFORMATTING OF PARSED XML RECORDS
2701
2702	// -pattern plus -format does concurrent flush-left reformatting
2703	if len(args) > 2 && args[2] == "-format" {
2704
2705		format := "flush"
2706		if len(args) > 3 {
2707			format = args[3]
2708			if strings.HasPrefix(format, "-") {
2709				format = "flush"
2710			}
2711		}
2712
2713		xmlq := eutils.CreateXMLProducer(topPattern, star, rdr)
2714		fchq := createFormatters(topPattern, format, xmlq)
2715		unsq := eutils.CreateXMLUnshuffler(fchq)
2716
2717		if xmlq == nil || fchq == nil || unsq == nil {
2718			fmt.Fprintf(os.Stderr, "\nERROR: Unable to create formatter\n")
2719			os.Exit(1)
2720		}
2721
2722		if head != "" {
2723			os.Stdout.WriteString(head)
2724			os.Stdout.WriteString("\n")
2725		}
2726
2727		// drain output channel
2728		for curr := range unsq {
2729
2730			str := curr.Text
2731
2732			if str == "" {
2733				continue
2734			}
2735
2736			if hd != "" {
2737				os.Stdout.WriteString(hd)
2738				os.Stdout.WriteString("\n")
2739			}
2740
2741			// send result to output
2742			os.Stdout.WriteString(str)
2743			if !strings.HasSuffix(str, "\n") {
2744				os.Stdout.WriteString("\n")
2745			}
2746
2747			if tl != "" {
2748				os.Stdout.WriteString(tl)
2749				os.Stdout.WriteString("\n")
2750			}
2751
2752			recordCount++
2753			runtime.Gosched()
2754		}
2755
2756		if tail != "" {
2757			os.Stdout.WriteString(tail)
2758			os.Stdout.WriteString("\n")
2759		}
2760
2761		debug.FreeOSMemory()
2762
2763		if timr {
2764			printDuration("records")
2765		}
2766
2767		return
2768	}
2769
2770	// REPORT UNRECOGNIZED COMMAND
2771
2772	fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized transmute command\n")
2773	os.Exit(1)
2774}
2775