1 #pragma once
2 
3 #ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON
4 
5 #include "partial_tweets.h"
6 
7 namespace partial_tweets {
8 
9 using json = nlohmann::json;
10 
11 struct nlohmann_json_sax {
12     using StringType=std::string;
13 
14     struct Handler : json::json_sax_t
15     {
16         // 8 keys to parse for each tweet (in order of appearance): "created_at", "id", "text", "in_reply_status_id", "id"(user),
17         // "screen_name"(user), "retweet_count" and "favorite_count".
18         // Assume that the first valid key encountered will be the correct key to parse.
19         // Assume that each tweet/retweet start with a key "metadata" and has a key "retweeted" towards the end
20         // The previous assumption will be used to check for the beginning of a new tweet and the end of a retweet
21         enum state {    // Bitset to store state of search
22             key_date = (1<<0),
23             key_id = (1<<1),
24             key_text = (1<<2),
25             key_reply = (1<<3),
26             key_userid = (1<<4),
27             key_screenname = (1<<5),
28             key_rt = (1<<6),
29             key_fav = (1<<7),
30             found_date = (1<<8),
31             found_id = (1<<9),
32             found_text = (1<<10),
33             found_reply = (1<<11),
34             found_userid = (1<<12),
35             found_screenname = (1<<13),
36             found_rt = (1<<14),
37             found_fav = (1<<15)
38         };
39         int values = state::key_date;
40         bool userobject_id = false; // If in a user object (to find user.id)
41         bool userobject_screen_name = false;    // If in a user object (to find user.screen_name)
42         bool inretweet = false; // If in a retweet (all keys irrelevant in retweet object)
43         // Fields to store partial tweet info
44         uint64_t user_id;
45         uint64_t id;
46         uint64_t rt;
47         uint64_t fav;
48         uint64_t reply_status;
49         string_t screen_name;
50         string_t date;
51         string_t text;
52         std::vector<tweet<std::string>>& result;
53 
Handlernlohmann_json_sax::Handler54         Handler(std::vector<tweet<std::string>> &r) : result(r) { }
55 
keynlohmann_json_sax::Handler56         bool key(string_t& val) override {
57             if (!inretweet) {   // If not in a retweet object, find relevant keys
58                 if (val.compare("retweeted_status") == 0) { inretweet = true; }   // Check if entering retweet
59                 else if (val.compare("metadata") == 0) { values = 0; }  // Reset
60                 // Check if key has been found and if key matches a valid key
61                 else if (!(values & found_date) && (val.compare("created_at") == 0)) { values |= (key_date); }
62                 // Must also check if not in a user object
63                 else if (!(values & found_id) && !userobject_id && (val.compare("id") == 0)) { values |= (key_id); }
64                 else if (!(values & found_text) && (val.compare("text") == 0)) { values |= (key_text); }
65                 else if (!(values & found_reply) && (val.compare("in_reply_to_status_id") == 0)) { values |= (key_reply); }
66                 // Check if entering user object
67                 else if ((val.compare("user") == 0)) { userobject_id = userobject_screen_name = true; }
68                 // Must also check if in a user object
69                 else if (!(values & found_userid) && userobject_id && (val.compare("id") == 0)) { values |= (key_userid); }
70                 // Must also check if in a user object
71                 else if (!(values & found_screenname) && userobject_screen_name && (val.compare("screen_name") == 0)) { values |= (key_screenname); }
72                 else if (!(values & found_rt) && (val.compare("retweet_count") == 0)) { values |= (key_rt); }
73                 else if (!(values & found_fav) && (val.compare("favorite_count") == 0)) { values |= (key_fav); }
74             }
75             else if (val.compare("retweeted") == 0) { inretweet = false; }  // Check if end of retweet
76             return true;
77         }
number_unsignednlohmann_json_sax::Handler78         bool number_unsigned(number_unsigned_t val) override {
79             if (values & key_id && !(values & found_id)) {    // id
80                 id = val;
81                 values &= ~(key_id);
82                 values |= (found_id);
83             }
84             else if (values & key_reply && !(values & found_reply)) {   // in_reply_status_id
85                 reply_status = val;
86                 values &= ~(key_reply);
87                 values |= (found_reply);
88             }
89             else if (values & key_userid && !(values & found_userid)) {    // user.id
90                 user_id = val;
91                 userobject_id = false;
92                 values &= ~(key_userid);
93                 values |= (found_userid);
94             }
95             else if (values & key_rt && !(values & found_rt)) {   // retweet_count
96                 rt = val;
97                 values &= ~(key_rt);
98                 values |= (found_rt);
99             }
100             else if (values & key_fav && !(values & found_fav)) {   // favorite_count
101                 fav = val;
102                 values &= ~(key_fav);
103                 values |= (found_fav);
104                 // Assume that this is last key required, so add the partial_tweet to result
105                 result.emplace_back(partial_tweets::tweet<std::string>{
106                 date,id,text,reply_status,{user_id,screen_name},rt,fav});
107             }
108             return true;
109         }
stringnlohmann_json_sax::Handler110         bool string(string_t& val) override {
111             if (values & key_date && !(values & found_date)) {   //  created_at
112                 date = val;
113                 values &= ~(key_date);
114                 values |= (found_date);
115             }
116             else if (values & key_text && !(values & found_text)) {   // text
117                 text = val;
118                 values &= ~(key_text);
119                 values |= (found_text);
120             }
121             else if (values & key_screenname && !(values & found_screenname)) {    // user.screen_name
122                 screen_name = val;
123                 userobject_screen_name = false;
124                 values &= ~(key_screenname);
125                 values |= (found_screenname);
126             }
127             return true;
128         }
nullnlohmann_json_sax::Handler129         bool null() override {
130             if (values & key_reply && !(values & found_reply)) {    // in_reply_status (null case)
131                 reply_status = 0;
132                 values &= ~(key_reply);
133                 values |= (found_reply);
134             }
135             return true;
136         }
137         // Irrelevant events
booleannlohmann_json_sax::Handler138         bool boolean(bool val) override { return true; }
number_floatnlohmann_json_sax::Handler139         bool number_float(number_float_t val, const string_t& s) override { return true; }
number_integernlohmann_json_sax::Handler140         bool number_integer(number_integer_t val) override { return true; }
start_objectnlohmann_json_sax::Handler141         bool start_object(std::size_t elements) override { return true; }
end_objectnlohmann_json_sax::Handler142         bool end_object() override { return true; }
start_arraynlohmann_json_sax::Handler143         bool start_array(std::size_t elements) override { return true; }
end_arraynlohmann_json_sax::Handler144         bool end_array() override { return true; }
binarynlohmann_json_sax::Handler145         bool binary(json::binary_t& val) override { return true; }
parse_errornlohmann_json_sax::Handler146         bool parse_error(std::size_t position, const std::string& last_token, const json::exception& ex) override { return false; }
147     }; // Handler
148 
runnlohmann_json_sax149     bool run(simdjson::padded_string &json, std::vector<tweet<std::string>> &result) {
150         Handler handler(result);
151         json::sax_parse(json.data(), &handler);
152 
153         return true;
154     }
155 }; // nlohmann_json_sax
156 BENCHMARK_TEMPLATE(partial_tweets, nlohmann_json_sax)->UseManualTime();
157 } // namespace partial_tweets
158 
159 #endif // SIMDJSON_COMPETITION_NLOHMANN_JSON