1#!/bin/bash
2
3INPUT_FILE="/dev/stdin"
4OUTPUT_FILE="/dev/stdout"
5
6[ -z "$TMPDIR" ] && TMPDIR=/tmp
7
8
9message ()
10{
11  echo "USAGE: $(basename "$0") [-f format] [in [out]]"
12  echo " -f format        one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx"
13  echo " in               input file (stdin by default)"
14  echo " out              output file (stdout by default)"
15  exit 1;
16}
17
18locale_utf8 ()
19{
20  LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1)
21  export LC_CTYPE
22  if [ "$LC_CTYPE" = "" ]
23  then echo "Error: Install an UTF-8 locale in your system";
24       exit 1;
25  fi
26}
27
28test_zip ()
29{
30  if ! command -v zip &>/dev/null; then
31    echo "Error: Install 'zip' command in your system";
32    exit 1;
33  fi
34
35  if ! command -v unzip &>/dev/null; then
36    echo "Error: Install 'unzip' command in your system";
37    exit 1;
38  fi
39}
40
41test_gawk ()
42{
43  if ! command -v gawk &>/dev/null; then
44    echo "Error: Install 'gawk' in your system"
45    exit 1
46  fi
47}
48
49unformat_latex()
50{
51  test_gawk
52
53  if [ "$FICHERO" = "" ]
54  then FICHERO=$(mktemp $TMPDIR/apertium.XXXXXXXX)
55       cat > "$FICHERO"
56       BORRAFICHERO="true"
57  fi
58
59  "$APERTIUM_PATH/apertium-prelatex" "$FICHERO" | \
60  "$APERTIUM_PATH/apertium-utils-fixlatex" | \
61  "$APERTIUM_PATH/apertium-deslatex"  >"$SALIDA"
62
63  if [ "$BORRAFICHERO" = "true" ]
64  then rm -Rf "$FICHERO"
65  fi
66}
67
68
69unformat_odt ()
70{
71  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
72
73  locale_utf8
74  test_zip
75
76  unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
77  find "$INPUT_TMPDIR" | grep content\\\.xml |\
78  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
79  "$APERTIUM_PATH/apertium-desodt" >"$SALIDA"
80  rm -Rf "$INPUT_TMPDIR"
81}
82
83unformat_docx ()
84{
85  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
86
87  locale_utf8
88  test_zip
89
90  unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
91
92  for i in $(find "$INPUT_TMPDIR"|grep "xlsx$");
93  do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX)
94     "$APERTIUM_PATH/apertium" -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP";
95     cp "$LOCALTEMP" "$i";
96     rm "$LOCALTEMP";
97  done;
98
99  find "$INPUT_TMPDIR" | grep "xml" |\
100  grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\
101  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
102  "$APERTIUM_PATH/apertium-deswxml" >"$SALIDA"
103  rm -Rf "$INPUT_TMPDIR"
104}
105
106unformat_pptx ()
107{
108  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
109
110  locale_utf8
111  test_zip
112
113  unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
114
115  for i in $(find "$INPUT_TMPDIR"|grep "xlsx$");
116  do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX)
117     "$APERTIUM_PATH/apertium" -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP"
118     cp "$LOCALTEMP" "$i"
119     rm "$LOCALTEMP"
120  done;
121
122  find . -path '**/slides/slide*.xml' |\
123  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
124  "$APERTIUM_PATH/apertium-despptx" >"$SALIDA"
125  rm -Rf "$INPUT_TMPDIR"
126}
127
128
129unformat_xlsx ()
130{
131  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
132
133  locale_utf8
134  test_zip
135
136  unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO"
137  find "$INPUT_TMPDIR" | grep "sharedStrings.xml" |\
138  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
139  "$APERTIUM_PATH/apertium-desxlsx" >"$SALIDA"
140  rm -Rf "$INPUT_TMPDIR"
141
142}
143
144
145while getopts "f:" opt; do
146    case "$opt" in
147        f) FORMAT=$OPTARG ;;
148        \?) echo "ERROR: Unknown option $OPTARG" >&2; message >&2 ;;
149        :) echo "ERROR: $OPTARG requires an argument" >&2; message >&2 ;;
150    esac
151done
152
153shift "$((OPTIND-1))"
154
155case "$#" in
156     2)
157       OUTPUT_FILE=$2;
158       INPUT_FILE=$1;
159       if [ ! -e "$INPUT_FILE" ];
160       then echo "Error: file '$INPUT_FILE' not found."
161            message;
162       fi
163       ;;
164     1)
165       INPUT_FILE=$1;
166       if [ ! -e "$INPUT_FILE" ];
167       then echo "Error: file '$INPUT_FILE' not found."
168            message;
169       fi
170       ;;
171     0)
172       ;;
173     *)
174       message
175       ;;
176esac
177
178if [ -z "$FORMAT" ]; then FORMAT="txt"; fi
179
180FORMATADOR=$FORMAT;
181FICHERO=$INPUT_FILE;
182SALIDA=$OUTPUT_FILE;
183
184
185case "$FORMATADOR" in
186        rtf)
187                MILOCALE=$(locale -a | grep -E -i -v -m1 'utf|^C|^POSIX$')
188		if [ "$MILOCALE" = "" ]
189		then echo "Error: Install a ISO-8859-1 compatible locale in your system";
190	             exit 1;
191	        fi
192	        export LC_CTYPE=$MILOCALE
193		;;
194        html-noent)
195        	FORMATADOR="html"
196        	;;
197
198        latex)
199                unformat_latex
200                exit 0
201                ;;
202
203        odt)
204		unformat_odt
205		exit 0
206		;;
207	docx)
208		unformat_docx
209		exit 0
210		;;
211	xlsx)
212		unformat_xlsx
213		exit 0
214		;;
215	pptx)
216		unformat_pptx
217		exit 0
218		;;
219
220	wxml)
221	        locale_utf8
222	        ;;
223	*)
224	        ;;
225
226esac
227
228"$APERTIUM_PATH/apertium-des$FORMATADOR" "$FICHERO" >"$SALIDA"
229