1#!/bin/sh 2 3# 4# By Aleksey Cheusov <vle@gmx.net> 5# 6 7usage (){ 8 printf "\ 9Converts .index file from DICTD database to the index file .suffix\n\ 10usage: dictfmt_index2suffix [OPTIONS] [files...]\n\ 11OPTIONS:\n\ 12 --help display this screen\n\ 13 --utf8 for creating utf8 .index file\n\ 14 all other -X and --XXX options are passed to dictfmt -I\n\ 15" 16} 17 18LC_ALL=C 19export LC_ALL 20 21arg_locale=C 22 23# Processing arguments 24while [ $# != 0 ]; do 25 case $1 in 26 --help) 27 usage 28 exit 0;; 29 --utf8) 30 utf8_mode=1 31 args="$args $1";; 32 -*) 33 args="$args $1";; 34 *) 35 break;; 36 esac 37 shift 38done 39 40if test $BASH; then 41 exit_="echo \${PIPESTATUS[@]} | egrep '^0( 0)*$' >/dev/null" 42else 43 exit_='exit $?' 44fi 45 46@AWK@ -v "utf8_mode=$utf8_mode" ' 47function charlen_utf8 (str){ 48 if (str == ""){ 49 return 0 50 }else if (str ~ /^[\1-\177]/){ 51 return 1 52 }else if (str ~ /^[\200-\277]/){ 53 return -1 54 }else if (str ~ /^[\300-\337]/){ 55 return 2 56 }else if (str ~ /^[\340-\357]/){ 57 return 3 58 }else if (str ~ /^[\360-\367]/){ 59 return 4 60 }else if (str ~ /^[\370-\373]/){ 61 return 5 62 }else if (str ~ /^[\374-\375]/){ 63 return 6 64 }else{ 65 return -1; 66 } 67} 68 69BEGIN { 70 FS = OFS = "\t" 71} 72 73{ 74 if (!utf8_mode){ 75 for (i = length($1); i >= 1; --i){ 76 printf "%s", substr($1, i, 1) 77 } 78 }else{ 79 i = 1 80 idx = 1 81 while (i < length($1)){ 82 rest = substr($1, i) 83 char_len = charlen_utf8(rest) 84 if (char_len < 0){ 85 print "invalid UTF-8 input: `" rest "`" > "/dev/stderr" 86 exit 87 } 88 inverse_char [idx] = substr($1, i, char_len) 89 i += char_len 90 ++idx 91 } 92 while (idx--){ 93 printf "%s", inverse_char [idx] 94 } 95 } 96 97 $1 = "" 98 print $0 99}' "$@" | dictfmt -I $args | uniq 100 101eval $exit_ 102