1 /* (C) Copyright 2005, 2006, 2007, 2008, 2009 Stijn van Dongen 2 * 3 * This file is part of tingea. You can redistribute and/or modify tingea under 4 * the terms of the GNU General Public License; either version 3 of the License 5 * or (at your option) any later version. You should have received a copy of 6 * the GPL along with tingea, in the file COPYING. 7 */ 8 9 #ifndef tingea_tr 10 #define tingea_tr 11 12 #include <string.h> 13 14 #include "ting.h" 15 #include "types.h" 16 #include "inttypes.h" 17 18 19 /* 20 * README 21 * This interface is not POSIX compliant. It might evolve to 22 * optionally be indeed. 23 * However, given some of the braindeadliness of POSIX tr compliance, 24 * I don't think the worlds needs another tr implementation. 25 * My gripe is mainly about derailed syntax such as '[:alpha:0'. 26 * It should go down in a ball of flames, not happily parse. 27 * To be honest, I don't know for sure whether this is a POSIX 28 * lack of requirement or an implementation choice. 29 * 30 * I did choose to follow most of the POSIX syntax. It is probably 31 * a sign of weakness. 32 * This interface should be able to do everything a POSIX interface can, 33 * possibly more. 34 * 35 * - It allows separate specification of src, dst, del and squash sets. 36 * - Provisionally we accept "^spec" to indicate complement, 37 * for any of src dst del squash sets. 38 * - It uses [*c*20] to denote repeats, rather than [c*20]. 39 * rationale: do not slam door shut on new syntax. 40 * - It does not recognize '[a-z]' ranges, only 'a-z'. 41 * rationale: none. If ever, notation will be [-a-z] or similar. 42 * - The magic repeat operator [*c#] stops on boundaries 43 * rationale: I like it. 44 * A boundary is introduced by stop/start of ranges and classes. 45 * - The magic repeat operator [*c*] does not stop on boundaries. 46 * - For now, the interface does 1) deletion, 2) translation, 3) squashing. 47 * in the future it may provide a custom order of doing things. 48 * 49 * 50 * Apart from the fact that you cannot have '\0' in C strings, everything 51 * here should work for '\0' as well - specifically the mcxTrTable structure. 52 * However, the current interface uses C strings for dst and src and C strings 53 * for data. 54 * 55 * More documentation to follow. 56 * 57 */ 58 59 extern const char* mcx_tr_err; 60 extern mcxbool mcx_tr_debug; 61 62 63 typedef struct 64 { u32 tlt[256] 65 ; mcxbits modes 66 ; 67 } mcxTR ; 68 69 70 #define MCX_TR_DEFAULT 0 71 #define MCX_TR_TRANSLATE 1 << 1 72 73 #define MCX_TR_SOURCE 1 << 2 74 #define MCX_TR_DEST 1 << 3 75 #define MCX_TR_SQUASH 1 << 4 76 #define MCX_TR_DELETE 1 << 5 77 78 #define MCX_TR_SOURCE_C 1 << 6 79 #define MCX_TR_DEST_C 1 << 7 80 #define MCX_TR_DELETE_C 1 << 8 81 #define MCX_TR_SQUASH_C 1 << 9 82 83 84 #define MCX_TR_COMPLEMENT 1 << 10 85 86 87 mcxstatus mcxTRloadTable 88 ( mcxTR* tr 89 , const char* src 90 , const char* dst 91 , const char* set_delete 92 , const char* set_squash 93 , mcxbits modes 94 ) ; 95 96 97 /* returns new length of string. 98 * fixme: document map/squash semantics. 99 */ 100 ofs mcxTRtranslate 101 ( char* src 102 , mcxTR* tr 103 ) ; 104 105 106 ofs mcxTingTranslate 107 ( mcxTing* src 108 , mcxTR* tr 109 ) ; 110 111 ofs mcxTingTr 112 ( mcxTing* txt 113 , const char* src 114 , const char* dst 115 , const char* set_delete 116 , const char* set_squash 117 , mcxbits flags 118 ) ; 119 120 121 /* Accepts e.g. \012 and sets *value to 10. 122 * idem \xa0 and \n (\t, \r, \b etc) 123 * Does *not* yet accept \0xa0 124 * 125 * Returns next parsable character. 126 * 127 * This interface should be moved to ding. 128 */ 129 130 char* mcxStrEscapedValue 131 ( const char* p 132 , const char* z 133 , int *value 134 ) ; 135 136 137 138 /* 139 * returns a ting containing all the characters according to bits. 140 * bits accept 141 * MCX_TR_SOURCE 142 * MCX_TR_SOURCE_C 143 * MCX_TR_SQUASH 144 * MCX_TR_SQUASH_C 145 * MCX_TR_DELETE 146 * MCX_TR_DELETE_C 147 * 148 * NOTE 149 * MCX_TR_DEST 150 * MCX_TR_DEST_C 151 * are not yet implemented. 152 * 153 * NOTE DANGER SIGN 154 * tr no longer contains information on complements that were 155 * used in constructing it. 156 * The complements that bits refer to is simply the information 157 * present in tr. 158 * So a source of "^a-z" given to mcxTRloadTable 159 * and MCX_TR_SOURCE_C given to mcxTRsplash 160 * result in a string containing all of a-z. 161 */ 162 163 mcxTing* mcxTRsplash 164 ( mcxTR* tr 165 , mcxbits bits 166 ) ; 167 168 169 #endif 170 171