1#!/bin/bash 2 3INPUT_FILE="/dev/stdin" 4OUTPUT_FILE="/dev/stdout" 5 6[ -z "$TMPDIR" ] && TMPDIR=/tmp 7 8 9message () 10{ 11 echo "USAGE: $(basename "$0") [-f format] [in [out]]" 12 echo " -f format one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx" 13 echo " in input file (stdin by default)" 14 echo " out output file (stdout by default)" 15 exit 1; 16} 17 18locale_utf8 () 19{ 20 LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1) 21 export LC_CTYPE 22 if [ "$LC_CTYPE" = "" ] 23 then echo "Error: Install an UTF-8 locale in your system"; 24 exit 1; 25 fi 26} 27 28test_zip () 29{ 30 if ! command -v zip &>/dev/null; then 31 echo "Error: Install 'zip' command in your system"; 32 exit 1; 33 fi 34 35 if ! command -v unzip &>/dev/null; then 36 echo "Error: Install 'unzip' command in your system"; 37 exit 1; 38 fi 39} 40 41test_gawk () 42{ 43 if ! command -v gawk &>/dev/null; then 44 echo "Error: Install 'gawk' in your system" 45 exit 1 46 fi 47} 48 49unformat_latex() 50{ 51 test_gawk 52 53 if [ "$FICHERO" = "" ] 54 then FICHERO=$(mktemp $TMPDIR/apertium.XXXXXXXX) 55 cat > "$FICHERO" 56 BORRAFICHERO="true" 57 fi 58 59 "$APERTIUM_PATH/apertium-prelatex" "$FICHERO" | \ 60 "$APERTIUM_PATH/apertium-utils-fixlatex" | \ 61 "$APERTIUM_PATH/apertium-deslatex" >"$SALIDA" 62 63 if [ "$BORRAFICHERO" = "true" ] 64 then rm -Rf "$FICHERO" 65 fi 66} 67 68 69unformat_odt () 70{ 71 INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX) 72 73 locale_utf8 74 test_zip 75 76 unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" 77 find "$INPUT_TMPDIR" | grep content\\\.xml |\ 78 awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ 79 "$APERTIUM_PATH/apertium-desodt" >"$SALIDA" 80 rm -Rf "$INPUT_TMPDIR" 81} 82 83unformat_docx () 84{ 85 INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX) 86 87 locale_utf8 88 test_zip 89 90 unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" 91 92 for i in $(find "$INPUT_TMPDIR"|grep "xlsx$"); 93 do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX) 94 "$APERTIUM_PATH/apertium" -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP"; 95 cp "$LOCALTEMP" "$i"; 96 rm "$LOCALTEMP"; 97 done; 98 99 find "$INPUT_TMPDIR" | grep "xml" |\ 100 grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\ 101 awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ 102 "$APERTIUM_PATH/apertium-deswxml" >"$SALIDA" 103 rm -Rf "$INPUT_TMPDIR" 104} 105 106unformat_pptx () 107{ 108 INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX) 109 110 locale_utf8 111 test_zip 112 113 unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" 114 115 for i in $(find "$INPUT_TMPDIR"|grep "xlsx$"); 116 do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX) 117 "$APERTIUM_PATH/apertium" -f xlsx -d "$DIRECTORY" "$OPCIONU" "$PREFIJO" <"$i" >"$LOCALTEMP" 118 cp "$LOCALTEMP" "$i" 119 rm "$LOCALTEMP" 120 done; 121 122 find . -path '**/slides/slide*.xml' |\ 123 awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ 124 "$APERTIUM_PATH/apertium-despptx" >"$SALIDA" 125 rm -Rf "$INPUT_TMPDIR" 126} 127 128 129unformat_xlsx () 130{ 131 INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX) 132 133 locale_utf8 134 test_zip 135 136 unzip -q -o -d "$INPUT_TMPDIR" "$FICHERO" 137 find "$INPUT_TMPDIR" | grep "sharedStrings.xml" |\ 138 awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\ 139 "$APERTIUM_PATH/apertium-desxlsx" >"$SALIDA" 140 rm -Rf "$INPUT_TMPDIR" 141 142} 143 144 145while getopts "f:" opt; do 146 case "$opt" in 147 f) FORMAT=$OPTARG ;; 148 \?) echo "ERROR: Unknown option $OPTARG" >&2; message >&2 ;; 149 :) echo "ERROR: $OPTARG requires an argument" >&2; message >&2 ;; 150 esac 151done 152 153shift "$((OPTIND-1))" 154 155case "$#" in 156 2) 157 OUTPUT_FILE=$2; 158 INPUT_FILE=$1; 159 if [ ! -e "$INPUT_FILE" ]; 160 then echo "Error: file '$INPUT_FILE' not found." 161 message; 162 fi 163 ;; 164 1) 165 INPUT_FILE=$1; 166 if [ ! -e "$INPUT_FILE" ]; 167 then echo "Error: file '$INPUT_FILE' not found." 168 message; 169 fi 170 ;; 171 0) 172 ;; 173 *) 174 message 175 ;; 176esac 177 178if [ -z "$FORMAT" ]; then FORMAT="txt"; fi 179 180FORMATADOR=$FORMAT; 181FICHERO=$INPUT_FILE; 182SALIDA=$OUTPUT_FILE; 183 184 185case "$FORMATADOR" in 186 rtf) 187 MILOCALE=$(locale -a | grep -E -i -v -m1 'utf|^C|^POSIX$') 188 if [ "$MILOCALE" = "" ] 189 then echo "Error: Install a ISO-8859-1 compatible locale in your system"; 190 exit 1; 191 fi 192 export LC_CTYPE=$MILOCALE 193 ;; 194 html-noent) 195 FORMATADOR="html" 196 ;; 197 198 latex) 199 unformat_latex 200 exit 0 201 ;; 202 203 odt) 204 unformat_odt 205 exit 0 206 ;; 207 docx) 208 unformat_docx 209 exit 0 210 ;; 211 xlsx) 212 unformat_xlsx 213 exit 0 214 ;; 215 pptx) 216 unformat_pptx 217 exit 0 218 ;; 219 220 wxml) 221 locale_utf8 222 ;; 223 *) 224 ;; 225 226esac 227 228"$APERTIUM_PATH/apertium-des$FORMATADOR" "$FICHERO" >"$SALIDA" 229