1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved.  Released under a BSD
4 license as described in the file LICENSE.
5  */
6 #pragma once
7 #include "io_buf.h"
8 #include "parse_primitives.h"
9 #include "example.h"
10 
11 #include <boost/program_options.hpp>
12 namespace po = boost::program_options;
13 
14 struct vw;
15 
16 struct parser {
17   v_array<substring> channels;//helper(s) for text parsing
18   v_array<substring> words;
19   v_array<substring> name;
20 
21   io_buf* input; //Input source(s)
22   int (*reader)(void*, example* ae);
23   hash_func_t hasher;
24   bool resettable; //Whether or not the input can be reset.
25   io_buf* output; //Where to output the cache.
26   bool write_cache;
27   bool sort_features;
28   bool sorted_cache;
29 
30   size_t ring_size;
31   uint64_t begin_parsed_examples; // The index of the beginning parsed example.
32   uint64_t end_parsed_examples; // The index of the fully parsed example.
33   uint64_t local_example_number;
34   uint32_t in_pass_counter;
35   example* examples;
36   uint64_t used_index;
37   bool emptylines_separate_examples; // true if you want to have holdout computed on a per-block basis rather than a per-line basis
38   MUTEX examples_lock;
39   CV example_available;
40   CV example_unused;
41   MUTEX output_lock;
42   CV output_done;
43 
44   bool done;
45   v_array<size_t> gram_mask;
46 
47   v_array<size_t> ids; //unique ids for sources
48   v_array<size_t> counts; //partial examples received from sources
49   size_t finished_count;//the number of finished examples;
50   int label_sock;
51   int bound_sock;
52   int max_fd;
53 
54   v_array<substring> parse_name;
55 
56   label_parser lp;  // moved from vw
57 };
58 
59 parser* new_parser();
60 
61 void enable_sources(vw& all, bool quiet, size_t passes);
62 
63 bool examples_to_finish();
64 
65 //only call these from the library form:
66 void initialize_parser_datastructures(vw& all);
67 void release_parser_datastructures(vw& all);
68 void adjust_used_index(vw& all);
69 
70 //parser control
71 
72 void make_example_available();
73 bool parser_done(parser* p);
74 void set_done(vw& all);
75 
76 //source control functions
77 bool inconsistent_cache(size_t numbits, io_buf& cache);
78 void reset_source(vw& all, size_t numbits);
79 void finalize_source(parser* source);
80 void set_compressed(parser* par);
81 void initialize_examples(vw& all);
82 void free_parser(vw& all);
83 bool parse_atomic_example(vw& all, example* ae, bool do_read);
84