1 // Author:  Bruce Allen
2 // Created: 2/25/2013
3 //
4 // The software provided here is released by the Naval Postgraduate
5 // School, an agency of the U.S. Department of Navy.  The software
6 // bears no warranty, either expressed or implied. NPS does not assume
7 // legal liability nor responsibility for a User's use of the software
8 // or the results of such use.
9 //
10 // Please note that within the United States, copyright protection,
11 // under Section 105 of the United States Code, Title 17, is not
12 // available for any work of the United States Government and/or for
13 // any works created by United States Government employees. User
14 // acknowledges that this software contains work which was created by
15 // NPS government employees and is therefore in the public domain and
16 // not subject to copyright.
17 //
18 // Released into the public domain on February 25, 2013 by Bruce Allen.
19 
20 /**
21  * \file
22  * Export data in JSON format.  Lines are one of:
23  *   source data, block hash data, or comment.
24  */
25 
26 #include <config.h>
27 // this process of getting WIN32 defined was inspired
28 // from i686-w64-mingw32/sys-root/mingw/include/windows.h.
29 // All this to include winsock2.h before windows.h to avoid a warning.
30 #if defined(__MINGW64__) && defined(__cplusplus)
31 #  ifndef WIN32
32 #    define WIN32
33 #  endif
34 #endif
35 #ifdef WIN32
36   // including winsock2.h now keeps an included header somewhere from
37   // including windows.h first, resulting in a warning.
38   #include <winsock2.h>
39 #endif
40 
41 #include <iostream>
42 #include <cassert>
43 #include "../src_libhashdb/hashdb.hpp"
44 #include "progress_tracker.hpp"
45 
export_json_sources(const hashdb::scan_manager_t & manager,std::ostream & os)46 void export_json_sources(const hashdb::scan_manager_t& manager,
47                          std::ostream& os) {
48 
49   std::string file_hash = manager.first_source();
50   while (file_hash.size() != 0) {
51 
52     // get source data
53     std::string json_source_string = manager.export_source_json(file_hash);
54 
55     // program error
56     if (json_source_string.size() == 0) {
57       assert(0);
58     }
59 
60     os << json_source_string << "\n";
61 
62     // next
63     file_hash = manager.next_source(file_hash);
64   }
65 }
66 
export_json_hashes(const hashdb::scan_manager_t & manager,progress_tracker_t & progress_tracker,std::ostream & os)67 void export_json_hashes(const hashdb::scan_manager_t& manager,
68                         progress_tracker_t& progress_tracker,
69                         std::ostream& os) {
70 
71   // space for variables in order to use the tracker
72   std::string block_hash;
73   uint64_t k_entropy;
74   std::string block_label;
75   uint64_t count;
76   hashdb::source_sub_counts_t source_sub_counts;
77 
78   block_hash = manager.first_hash();
79   while (block_hash.size() != 0) {
80 
81     // get hash data
82     std::string json_hash_string = manager.export_hash_json(block_hash);
83 
84     // program error
85     if (json_hash_string.size() == 0) {
86       assert(0);
87     }
88 
89     // emit the JSON
90     os << json_hash_string << "\n";
91 
92     // update the progress tracker, this accurate approach is expensive
93     manager.find_hash(block_hash, k_entropy, block_label,
94                       count, source_sub_counts);
95     progress_tracker.track_hash_data(source_sub_counts.size());
96 
97     // next
98     block_hash = manager.next_hash(block_hash);
99   }
100 }
101 
export_json_range(const hashdb::scan_manager_t & manager,const std::string & begin_block_hash,const std::string & end_block_hash,progress_tracker_t & progress_tracker,std::ostream & os)102 void export_json_range(const hashdb::scan_manager_t& manager,
103                        const std::string& begin_block_hash,
104                        const std::string& end_block_hash,
105                        progress_tracker_t& progress_tracker,
106                        std::ostream& os) {
107 
108   // the subset of cited sources to export
109   std::set<std::string> source_hashes;
110 
111   // space for variables in order to use the tracker
112   std::string block_hash;
113   uint64_t k_entropy;
114   std::string block_label;
115   uint64_t count;
116   hashdb::source_sub_counts_t source_sub_counts;
117 
118   // export the block hashes that are in range
119   block_hash = manager.first_hash();
120   while (block_hash.size() != 0) {
121 
122     manager.find_hash(block_hash, k_entropy, block_label,
123                       count, source_sub_counts);
124 
125     if (block_hash >= begin_block_hash && block_hash <= end_block_hash) {
126       // process the block hash since it is in range
127 
128       // get JSON hash data
129       std::string json_hash_string = manager.export_hash_json(block_hash);
130 
131       // program error
132       if (json_hash_string.size() == 0) {
133         assert(0);
134       }
135 
136       // emit the JSON
137       os << json_hash_string << "\n";
138 
139       // note the sources involved
140       for (hashdb::source_sub_counts_t::const_iterator it =
141            source_sub_counts.begin(); it != source_sub_counts.end(); ++it) {
142         source_hashes.insert(it->file_hash);
143       }
144     }
145 
146     // update the progress tracker
147     progress_tracker.track_hash_data(source_sub_counts.size());
148 
149     // next
150     block_hash = manager.next_hash(block_hash);
151   }
152 
153   // export the cited sources
154   for (std::set<std::string>::const_iterator it2 = source_hashes.begin();
155        it2 != source_hashes.end(); ++it2) {
156 
157     // get source data
158     std::string json_source_string = manager.export_source_json(*it2);
159 
160     // program error
161     if (json_source_string.size() == 0) {
162       assert(0);
163     }
164 
165     os << json_source_string << "\n";
166   }
167 }
168 
169