1 // crm114_structs.h - structures for CRM114 2 3 // Copyright 2009 William S. Yerazunis. 4 // This file is under GPLv3, as described in COPYING. 5 6 #ifndef __CRM114_STRUCTS_H__ 7 #define __CRM114_STRUCTS_H__ 8 9 // These are systemwide globals. Sure, they should go into a global 10 // struct, but that realization only occured to me in 2008. Sorry. 11 12 extern long vht_size; 13 extern long max_pgmsize; 14 extern long user_trace; 15 extern long internal_trace; 16 extern long debug_countdown; 17 extern long cmdline_break; 18 extern long cycle_counter; 19 extern long ignore_environment_vars; 20 extern long data_window_size; 21 22 // Number of hash table buckets. Set from command line, read (only) 23 // by classifier learns. 24 extern long sparse_spectrum_file_length; 25 26 extern long microgroom_chain_length ; 27 extern long microgroom_stop_after; 28 29 extern float min_pmax_pmin_ratio; 30 extern long profile_execution; 31 32 extern int dontcare; 33 34 extern long prettyprint_listing; // 0= none, 1 = basic, 2 = expanded, 3 = parsecode 35 36 extern long engine_exit_base; // All internal errors will use this number or higher; 37 // the user programs can use lower numbers freely. 38 39 40 // how should math be handled? 41 // = 0 no extended (non-EVAL) math, use algebraic notation 42 // = 1 no extended (non-EVAL) math, use RPN 43 // = 2 extended (everywhere) math, use algebraic notation 44 // = 3 extended (everywhere) math, use RPN 45 extern long q_expansion_mode; 46 47 48 // structure of a vht cell 49 // note - each file gets an entry, with the name of the file 50 // being the name of the variable - no colons! 51 // 52 // also note that there's no "next" pointer in a vht cell; this is because 53 // we do in-table overflowing (if a table entry is in use, we use the next 54 // available table entry, wrapping around. It's easy to change in any case. 55 // 56 typedef struct mythical_vht_cell { 57 char *filename; // file where defined (or NULL) 58 int filedesc; // filedesc of defining file (or NULL) 59 char *nametxt; // block of text that hosts the variable name 60 long nstart; // index into nametxt to start of varname 61 long nlen; // length of name 62 char *valtxt; // text block that hosts the captured value 63 // vstart, vlen, mstart, and mlen are all measured 64 // from the _start_ of valtxt, mstart relative to 65 // vstart, etc!!! 66 long vstart; // zero-base index of start of variable (inclusive) 67 long vlen; // length of captured value : this plus vstart is where 68 // you could put a NULL if you wanted to. 69 long mstart; // zero-base start of most recent match of this var 70 long mlen; // length of most recent match against this var; this 71 // plus mstart is where you could put a NULL if you 72 // wanted to. 73 long linenumber; // linenumber of this variable (if known, else -1) 74 long lazy_redirects; // how many lazy redirects are allowed (0 by default); 75 } VHT_CELL; 76 77 // The argparse block is filled in at run time, though at least in 78 // principle it could be done at microcompile time, but var-expansion 79 // needs to be done at statement execution time.. so we don't fill it 80 // in till we have to, then we cache the result. 81 // 82 83 84 typedef struct mythical_argparse_block { 85 char *a1start; 86 long a1len; 87 char *p1start; 88 long p1len; 89 char *p2start; 90 long p2len; 91 char *p3start; 92 long p3len; 93 char *b1start; 94 long b1len; 95 char *s1start; 96 long s1len; 97 char *s2start; 98 long s2len; 99 unsigned long long sflags; 100 } ARGPARSE_BLOCK; 101 102 103 104 // structure of a microcompile table cell (one such per statement) 105 // 106 // These table entries get filled in during microcompile operation. 107 // 108 typedef struct mythical_mct_cell { 109 char *hosttxt; // text file this statement lives in. 110 ARGPARSE_BLOCK *apb; // the argparse block for this statement 111 long start; // zero-base index of start of statement (inclusive) 112 long fchar; // zero-base index of non-blank stmt (for prettyprint) 113 long achar; // zero-base index of start of args; 114 long stmt_utime; // user time spent in this statement line; 115 long stmt_stime; // system time spent in this statement line; 116 int stmt_type; // statement type of this line 117 int nest_level; // nesting level of this statement 118 int fail_index; // if this statement failed, where would we go? 119 int liaf_index; // if this statement liafed, where would we go? 120 int trap_index; // if this statement faults, where would we go? 121 int stmt_break; // 1 if "break" on this stmt, 0 otherwise. 122 } MCT_CELL; 123 124 // structure of a control stack level cell. 125 // Nota Bene: CSL cells are used to both retain toplevel data about 126 // any particular file being executed as well as being used to retain 127 // data on any file that is data! If a file is executable, then the 128 // mct pointer is a pointer to the compiled MCT table, else the mct 129 // pointer is a NULL and the file is not executable. 130 // 131 typedef struct mythical_csl_cell { 132 char *filename; //filename if any 133 long rdwr; // 0=readonly, 1=rdwr 134 long filedes; // file descriptor it's open on (if any) 135 char *filetext; // text buffer 136 long nchars; // characters of data we have 137 unsigned int hash; // hash of this data (if done) 138 MCT_CELL **mct; // microcompile (if compiled) 139 long nstmts; // how many statements in the microcompile 140 long preload_window; // do we preload the window or not? 141 long cstmt; // current executing statement of this file 142 void *caller; // pointer to this file's caller (if any) 143 long return_vht_cell; // index into the VHT to stick the return value 144 long calldepth; // how many calls deep is this stack frame 145 long aliusstk[MAX_BRACKETDEPTH]; // the status stack for ALIUS 146 } CSL_CELL; 147 148 // A 1024-byte standardized header for our statistical files (well, the 149 // new standard. Old file types don't have this. Forward migration 150 // shall take place. :-) 151 152 typedef struct { 153 uint32_t start; 154 uint32_t length; 155 uint32_t tag; 156 } STATISTICS_FILE_CHUNK; 157 158 typedef struct { 159 uint8_t file_ident_string [ STATISTICS_FILE_IDENT_STRING_MAX ]; 160 // Text description of this file. This should 161 // always start with "CRM114 Classdata " and then 162 // the classifier name etc. Embed versioning 163 // information here (and get it back with strtod) 164 // Please pad unused space with NULLs; don't 165 // change the length (to make file-magic easier). 166 // This is always chunks[0]. 167 // 168 STATISTICS_FILE_CHUNK chunks [ STATISTICS_FILE_NCHUNKS ]; 169 // The byte indexed chunks of data in this file, 170 // by start, length, and tag. 171 // chunks[1] points to this array itself. 172 // A -1 length means "to the 173 // end of the file" 174 // 175 //////////////////////////// 176 // Following in the file are more data chunks. Note that there's 177 // plenty of space here for pre-solves (such as an SVM might generate) 178 // but probably NOT enough space for individual examples to get their 179 // own chunks, unless you change the default number of chunks upward 180 // from 1024. 181 //////////////////////////// 182 } STATISTICS_FILE_HEADER_STRUCT; 183 184 typedef struct { 185 unsigned int hash; 186 unsigned int key; 187 unsigned int value; 188 } FEATUREBUCKET_STRUCT; 189 190 191 typedef struct { 192 unsigned char version[4]; 193 unsigned long flags; 194 unsigned long skip_to; 195 } FEATURE_HEADER_STRUCT; 196 197 198 typedef struct { 199 unsigned int hash; 200 unsigned int key; 201 float value; 202 } WINNOW_FEATUREBUCKET_STRUCT; 203 204 #define ENTROPY_RESERVED_HEADER_LEN 1024 205 typedef struct { 206 long firlatstart; 207 long firlatlen; 208 long nodestart; 209 long nodeslen; 210 long long totalbits; 211 } ENTROPY_HEADER_STRUCT; 212 213 typedef struct mythical_entropy_alphabet_slot { 214 long count; 215 long nextcell; 216 } ENTROPY_ALPHABET_SLOT; 217 218 // 28 byte header, 24 bytes alph (52 tot). Pare: 16 header, 16 alph (36 tot) 219 typedef struct mythical_entropy_cell { 220 double fir_prior; 221 long fir_larger; 222 long fir_smaller; 223 long firlat_slot; 224 // long total_count; 225 ENTROPY_ALPHABET_SLOT abet[ENTROPY_ALPHABET_SIZE]; 226 } ENTROPY_FEATUREBUCKET_STRUCT; 227 228 229 // TMS struct - used for measurng process time. 230 typedef struct mythical_tms_struct { 231 clock_t tms_utime; // user time 232 clock_t tms_stime; // system time 233 clock_t tms_cutime; // user time of children 234 clock_t tms_cstime; // system time of children 235 } TMS_STRUCT; 236 237 238 // define statement types for microcompile 239 // 240 #define CRM_BOGUS 0 241 #define CRM_NOOP 1 242 #define CRM_EXIT 2 243 #define CRM_OPENBRACKET 3 244 #define CRM_CLOSEBRACKET 4 245 #define CRM_LABEL 5 246 #define CRM_GOTO 6 247 #define CRM_MATCH 7 248 #define CRM_FAIL 8 249 #define CRM_LIAF 9 250 #define CRM_ACCEPT 10 251 #define CRM_TRAP 11 252 #define CRM_FAULT 12 253 #define CRM_INPUT 13 254 #define CRM_OUTPUT 14 255 #define CRM_WINDOW 15 256 #define CRM_ALTER 16 257 #define CRM_CALL 17 258 #define CRM_ROUTINE 18 259 #define CRM_RETURN 19 260 #define CRM_SYSCALL 20 261 #define CRM_LEARN 21 262 #define CRM_CLASSIFY 22 263 #define CRM_ISOLATE 23 264 #define CRM_HASH 24 265 #define CRM_INTERSECT 25 266 #define CRM_UNION 26 267 #define CRM_EVAL 27 268 #define CRM_ALIUS 28 269 #define CRM_TRANSLATE 29 270 #define CRM_DEBUG 30 271 #define CRM_CLUMP 31 // make clusters out of tokens 272 #define CRM_PMULC 32 // pmulc translates tokens to cluster names 273 #define CRM_LAZY 33 // makes a "lazy" variable. 274 #define CRM_UNIMPLEMENTED 34 275 276 277 // FLAGS FLAGS FLAGS 278 // all of the valid CRM114 flags are listed here 279 // 280 // GROT GROT GROT - You must keep this in synchrony with the 281 // definitions of the keywords in crm_stmt_parser!!! Yes, I'd 282 // love to define it in one place and one place only, but I haven't 283 // figured out a way to do that well. 284 285 // match searchstart flags 286 #define CRM_FROMSTART (1LLU << 0) 287 #define CRM_FROMNEXT (1LLU << 1) 288 #define CRM_FROMEND (1LLU << 2) 289 #define CRM_NEWEND (1LLU << 3) 290 #define CRM_FROMCURRENT (1LLU << 4) 291 // match control flags 292 #define CRM_NOCASE (1LLU << 5) 293 #define CRM_ABSENT (1LLU << 6) 294 #define CRM_BASIC (1LLU << 7) 295 #define CRM_BACKWARDS (1LLU << 8) 296 #define CRM_LITERAL (1LLU << 9) 297 #define CRM_NOMULTILINE (1LLU << 10) 298 // input/output/window flags 299 #define CRM_BYLINE CRM_NOMULTILINE 300 #define CRM_BYCHAR (1LLU << 11) 301 #define CRM_STRING CRM_BYCHAR // string is bychar. I think... 302 #define CRM_BYCHUNK (1LLU << 12) 303 #define CRM_BYEOF (1LLU << 13) 304 #define CRM_EOFACCEPTS (1LLU << 14) 305 #define CRM_EOFRETRY (1LLU << 15) 306 #define CRM_APPEND (1LLU << 16) 307 // process control flags 308 #define CRM_KEEP (1LLU << 17) 309 #define CRM_ASYNC (1LLU << 18) 310 // learn and classify 311 #define CRM_REFUTE (1LLU << 19) 312 #define CRM_MICROGROOM (1LLU << 20) 313 #define CRM_MARKOVIAN (1LLU << 21) 314 #define CRM_OSB_BAYES (1LLU << 22) // synonym with OSB feature gen 315 #define CRM_OSB CRM_OSB_BAYES 316 #define CRM_CORRELATE (1LLU << 23) 317 #define CRM_OSB_WINNOW (1LLU << 24) // synonym to Winnow feature combiner 318 #define CRM_WINNOW CRM_OSB_WINNOW 319 #define CRM_CHI2 (1LLU << 25) 320 #define CRM_UNIQUE (1LLU << 26) 321 #define CRM_ENTROPY (1LLU << 27) 322 #define CRM_OSBF (1LLU << 28) // synonym with OSBF local rule 323 #define CRM_OSBF_BAYES CRM_OSBF 324 #define CRM_HYPERSPACE (1LLU << 29) 325 #define CRM_UNIGRAM (1LLU << 30) 326 #define CRM_CROSSLINK (1LLU << 31) 327 // 328 // Flags that need to be sorted back in 329 // input 330 #define CRM_READLINE (1LLU << 32) 331 // isolate flags 332 #define CRM_DEFAULT (1LLU << 33) 333 // SKS classifier 334 #define CRM_SKS (1LLU << 34) 335 // SVM classifier 336 #define CRM_SVM (1LLU << 35) 337 // FSCM classifier 338 #define CRM_FSCM (1LLU << 36) 339 // Neural Net classifier 340 #define CRM_NEURAL_NET (1LLU << 37) 341 // 342 #define CRM_ERASE (1LLU << 38) 343 //PCA classifier 344 #define CRM_PCA (1LLU << 39) 345 // and a struct to put them in. 346 typedef struct 347 { 348 char * string; 349 unsigned long long value; 350 } FLAG_DEF ; 351 352 353 //***************************************************************** 354 // 355 // The following table describes the statements allowed in CRM114. 356 // 357 // Each entry is one line of STMT_TABLE_TYPE, and gives the text 358 // representation of the command, the internal dispatch code, 359 // whether the statement is "executable" or not, what the minimum 360 // and maximum number of slash-groups, paren-groups, and box-groups 361 // are for the statement to make sense, and what flags are allowed 362 // for that statement. 363 // 364 365 typedef struct 366 { 367 char *stmt_name; 368 int stmt_code; 369 int namelen; 370 int is_executable; 371 int minslashes; 372 int maxslashes; 373 int minparens; 374 int maxparens; 375 int minboxes; 376 int maxboxes; 377 long flags_allowed_mask; 378 } STMT_TABLE_TYPE; 379 380 381 // The compiler file actually contains this "for real", the 382 // extern here is merely a reference to it. 383 // 384 #ifndef BASE_COMPILER_TABLE_HERE 385 extern STMT_TABLE_TYPE stmt_table[]; 386 #endif 387 388 389 // these defines are for arg type... note that they must remain synched 390 // IN THIS ORDER with the start chars and end chars in crm_statement_parse 391 // 392 #define CRM_ANGLES 0 393 #define CRM_PARENS 1 394 #define CRM_BOXES 2 395 #define CRM_SLASHES 3 396 397 398 399 // The possible exit codes 400 #define CRM_EXIT_OK 0 401 #define CRM_EXIT_ERROR 1 402 #define CRM_EXIT_FATAL 2 403 #define CRM_EXIT_APOCALYPSE 666 404 405 406 // The ORable exec codes for crm_zexpandvar; OR together the ones 407 // you want to enable for zexpandvar. Nexpandvar is ansi|stringvar|redirect, 408 // and qexpandvar is "all of them". :) 409 #define CRM_EVAL_ANSI 0x01 410 #define CRM_EVAL_STRINGVAR 0x02 411 #define CRM_EVAL_REDIRECT 0x04 412 #define CRM_EVAL_STRINGLEN 0x08 413 #define CRM_EVAL_MATH 0x10 414 415 416 417 // The possible cache actions 418 #define CRM_MMAP_CACHE_UNUSED 0 419 // active makes it really mapped (or reactivates a released mmap) 420 #define CRM_MMAP_CACHE_ACTIVE 1 421 // release marks the slot reusable, but doesn't unmap (yet) 422 #define CRM_MMAP_CACHE_RELEASE 2 423 // drop really unmaps 424 #define CRM_MMAP_CACHE_DROP 3 425 426 427 #endif // !__CRM114_STRUCTS_H__ 428