1 /* 2 * $LynxId: HTParse.h,v 1.26 2021/07/05 20:56:50 tom Exp $ 3 * HTParse: URL parsing in the WWW Library 4 * HTPARSE 5 * 6 * This module of the WWW library contains code to parse URLs and various 7 * related things. 8 * Implemented by HTParse.c . 9 */ 10 #ifndef HTPARSE_H 11 #define HTPARSE_H 12 13 #ifndef HTUTILS_H 14 #include <HTUtils.h> 15 #endif 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 21 #define RFC_3986_UNRESERVED(c) (isalnum(UCH(c)) || strchr("-._~", UCH(c)) != 0) 22 #define RFC_3986_GEN_DELIMS(c) ((c) != 0 && strchr(":/?#[]@", UCH(c)) != 0) 23 #define RFC_3986_SUB_DELIMS(c) ((c) != 0 && strchr("!$&'()*+,;=", UCH(c)) != 0) 24 25 /* 26 * The following are flag bits which may be ORed together to form 27 * a number to give the 'wanted' argument to HTParse. 28 */ 29 #define PARSE_ACCESS 16 30 #define PARSE_HOST 8 31 #define PARSE_PATH 4 32 #define PARSE_ANCHOR 2 33 #define PARSE_PUNCTUATION 1 34 #define PARSE_ALL 31 35 #define PARSE_ALL_WITHOUT_ANCHOR (PARSE_ALL ^ PARSE_ANCHOR) 36 /* 37 * Additional flag bits for more details on components already 38 * covered by the above. The PARSE_PATH above doesn't really 39 * strictly refer to the path component in the sense of the URI 40 * specs only, but rather to that combined with a possible query 41 * component. - kw 42 */ 43 #define PARSE_STRICTPATH 32 44 #define PARSE_QUERY 64 45 /* 46 * The following are valid mask values. The terms are the BNF names 47 * in the URL document. 48 */ 49 #define URL_XALPHAS UCH(1) 50 #define URL_XPALPHAS UCH(2) 51 #define URL_PATH UCH(4) 52 53 #ifdef USE_IDN2 54 typedef enum { 55 LYidna2003 = 1, 56 LYidna2008, 57 LYidnaTR46, 58 LYidnaCompat 59 } HTIdnaModes; 60 61 extern int LYidnaMode; 62 #endif 63 64 /* Strip white space off a string. HTStrip() 65 * ------------------------------- 66 * 67 * On exit, 68 * Return value points to first non-white character, or to 0 if none. 69 * All trailing white space is OVERWRITTEN with zero. 70 */ 71 extern char *HTStrip(char *s); 72 73 /* 74 * Parse a port number 75 * ------------------- 76 * 77 * On entry, 78 * host A pointer to hostname possibly followed by port 79 * 80 * On exit, 81 * returns A pointer to the ":" before the port 82 * sets the port number via the pointer portp. 83 */ 84 extern char *HTParsePort(char *host, int *portp); 85 86 /* Parse a Name relative to another name. HTParse() 87 * -------------------------------------- 88 * 89 * This returns those parts of a name which are given (and requested) 90 * substituting bits from the related name where necessary. 91 * 92 * On entry, 93 * aName A filename given 94 * relatedName A name relative to which aName is to be parsed 95 * wanted A mask for the bits which are wanted. 96 * 97 * On exit, 98 * returns A pointer to a malloc'd string which MUST BE FREED 99 */ 100 extern char *HTParse(const char *aName, 101 const char *relatedName, 102 int wanted); 103 104 /* HTParseAnchor(), fast HTParse() specialization 105 * ---------------------------------------------- 106 * 107 * On exit, 108 * returns A pointer within input string (probably to its end '\0') 109 */ 110 extern const char *HTParseAnchor(const char *aName); 111 112 /* Simplify a filename. HTSimplify() 113 * -------------------- 114 * 115 * A unix-style file is allowed to contain the sequence xxx/../ which may 116 * be replaced by "" , and the sequence "/./" which may be replaced by "/". 117 * Simplification helps us recognize duplicate filenames. 118 */ 119 extern void HTSimplify(char *filename, BOOL absolute); 120 121 /* Make Relative Name. HTRelative() 122 * ------------------- 123 * 124 * This function creates and returns a string which gives an expression of 125 * one address as related to another. Where there is no relation, an absolute 126 * address is returned. 127 * 128 * On entry, 129 * Both names must be absolute, fully qualified names of nodes 130 * (no anchor bits) 131 * 132 * On exit, 133 * The return result points to a newly allocated name which, if 134 * parsed by HTParse relative to relatedName, will yield aName. 135 * The caller is responsible for freeing the resulting name later. 136 * 137 */ 138 extern char *HTRelative(const char *aName, 139 const char *relatedName); 140 141 /* Escape undesirable characters using % HTEscape() 142 * ------------------------------------- 143 * 144 * This function takes a pointer to a string in which 145 * some characters may be unacceptable are unescaped. 146 * It returns a string which has these characters 147 * represented by a '%' character followed by two hex digits. 148 * 149 * Unlike HTUnEscape(), this routine returns a malloc'd string. 150 */ 151 extern char *HTEscape(const char *str, 152 unsigned mask); 153 154 /* Escape unsafe characters using % HTEscapeUnsafe() 155 * -------------------------------- 156 * 157 * This function takes a pointer to a string in which 158 * some characters may be that may be unsafe are unescaped. 159 * It returns a string which has these characters 160 * represented by a '%' character followed by two hex digits. 161 * 162 * Unlike HTUnEscape(), this routine returns a malloc'd string. 163 */ 164 extern char *HTEscapeUnsafe(const char *str); 165 166 /* Escape undesirable characters using % but space to +. HTEscapeSP() 167 * ----------------------------------------------------- 168 * 169 * This function takes a pointer to a string in which 170 * some characters may be unacceptable are unescaped. 171 * It returns a string which has these characters 172 * represented by a '%' character followed by two hex digits, 173 * except that spaces are converted to '+' instead of %2B. 174 * 175 * Unlike HTUnEscape(), this routine returns a malloc'd string. 176 */ 177 extern char *HTEscapeSP(const char *str, 178 unsigned mask); 179 180 /* Decode %xx escaped characters. HTUnEscape() 181 * ------------------------------ 182 * 183 * This function takes a pointer to a string in which some 184 * characters may have been encoded in %xy form, where xy is 185 * the acsii hex code for character 16x+y. 186 * The string is converted in place, as it will never grow. 187 */ 188 extern char *HTUnEscape(char *str); 189 190 /* Decode some %xx escaped characters. HTUnEscapeSome() 191 * ----------------------------------- Klaus Weide 192 * (kweide@tezcat.com) 193 * This function takes a pointer to a string in which some 194 * characters may have been encoded in %xy form, where xy is 195 * the acsii hex code for character 16x+y, and a pointer to 196 * a second string containing one or more characters which 197 * should be unescaped if escaped in the first string. 198 * The first string is converted in place, as it will never grow. 199 */ 200 extern char *HTUnEscapeSome(char *str, 201 const char *do_trans); 202 203 /* 204 * Turn a string which is not a RFC 822 token into a quoted-string. - KW 205 */ 206 extern void HTMake822Word(char **str, 207 int quoted); 208 209 #ifdef __cplusplus 210 } 211 #endif 212 #endif /* HTPARSE_H */ 213