1 /* 2 Copyright (c) by respective owners including Yahoo!, Microsoft, and 3 individual contributors. All rights reserved. Released under a BSD 4 license as described in the file LICENSE. 5 */ 6 #pragma once 7 #include "io_buf.h" 8 #include "parse_primitives.h" 9 #include "example.h" 10 11 #include <boost/program_options.hpp> 12 namespace po = boost::program_options; 13 14 struct vw; 15 16 struct parser { 17 v_array<substring> channels;//helper(s) for text parsing 18 v_array<substring> words; 19 v_array<substring> name; 20 21 io_buf* input; //Input source(s) 22 int (*reader)(void*, example* ae); 23 hash_func_t hasher; 24 bool resettable; //Whether or not the input can be reset. 25 io_buf* output; //Where to output the cache. 26 bool write_cache; 27 bool sort_features; 28 bool sorted_cache; 29 30 size_t ring_size; 31 uint64_t begin_parsed_examples; // The index of the beginning parsed example. 32 uint64_t end_parsed_examples; // The index of the fully parsed example. 33 uint64_t local_example_number; 34 uint32_t in_pass_counter; 35 example* examples; 36 uint64_t used_index; 37 bool emptylines_separate_examples; // true if you want to have holdout computed on a per-block basis rather than a per-line basis 38 MUTEX examples_lock; 39 CV example_available; 40 CV example_unused; 41 MUTEX output_lock; 42 CV output_done; 43 44 bool done; 45 v_array<size_t> gram_mask; 46 47 v_array<size_t> ids; //unique ids for sources 48 v_array<size_t> counts; //partial examples received from sources 49 size_t finished_count;//the number of finished examples; 50 int label_sock; 51 int bound_sock; 52 int max_fd; 53 54 v_array<substring> parse_name; 55 56 label_parser lp; // moved from vw 57 }; 58 59 parser* new_parser(); 60 61 void enable_sources(vw& all, bool quiet, size_t passes); 62 63 bool examples_to_finish(); 64 65 //only call these from the library form: 66 void initialize_parser_datastructures(vw& all); 67 void release_parser_datastructures(vw& all); 68 void adjust_used_index(vw& all); 69 70 //parser control 71 72 void make_example_available(); 73 bool parser_done(parser* p); 74 void set_done(vw& all); 75 76 //source control functions 77 bool inconsistent_cache(size_t numbits, io_buf& cache); 78 void reset_source(vw& all, size_t numbits); 79 void finalize_source(parser* source); 80 void set_compressed(parser* par); 81 void initialize_examples(vw& all); 82 void free_parser(vw& all); 83 bool parse_atomic_example(vw& all, example* ae, bool do_read); 84