1 #pragma once 2 3 #ifdef SIMDJSON_COMPETITION_NLOHMANN_JSON 4 5 #include "partial_tweets.h" 6 7 namespace partial_tweets { 8 9 using json = nlohmann::json; 10 11 struct nlohmann_json_sax { 12 using StringType=std::string; 13 14 struct Handler : json::json_sax_t 15 { 16 // 8 keys to parse for each tweet (in order of appearance): "created_at", "id", "text", "in_reply_status_id", "id"(user), 17 // "screen_name"(user), "retweet_count" and "favorite_count". 18 // Assume that the first valid key encountered will be the correct key to parse. 19 // Assume that each tweet/retweet start with a key "metadata" and has a key "retweeted" towards the end 20 // The previous assumption will be used to check for the beginning of a new tweet and the end of a retweet 21 enum state { // Bitset to store state of search 22 key_date = (1<<0), 23 key_id = (1<<1), 24 key_text = (1<<2), 25 key_reply = (1<<3), 26 key_userid = (1<<4), 27 key_screenname = (1<<5), 28 key_rt = (1<<6), 29 key_fav = (1<<7), 30 found_date = (1<<8), 31 found_id = (1<<9), 32 found_text = (1<<10), 33 found_reply = (1<<11), 34 found_userid = (1<<12), 35 found_screenname = (1<<13), 36 found_rt = (1<<14), 37 found_fav = (1<<15) 38 }; 39 int values = state::key_date; 40 bool userobject_id = false; // If in a user object (to find user.id) 41 bool userobject_screen_name = false; // If in a user object (to find user.screen_name) 42 bool inretweet = false; // If in a retweet (all keys irrelevant in retweet object) 43 // Fields to store partial tweet info 44 uint64_t user_id; 45 uint64_t id; 46 uint64_t rt; 47 uint64_t fav; 48 uint64_t reply_status; 49 string_t screen_name; 50 string_t date; 51 string_t text; 52 std::vector<tweet<std::string>>& result; 53 Handlernlohmann_json_sax::Handler54 Handler(std::vector<tweet<std::string>> &r) : result(r) { } 55 keynlohmann_json_sax::Handler56 bool key(string_t& val) override { 57 if (!inretweet) { // If not in a retweet object, find relevant keys 58 if (val.compare("retweeted_status") == 0) { inretweet = true; } // Check if entering retweet 59 else if (val.compare("metadata") == 0) { values = 0; } // Reset 60 // Check if key has been found and if key matches a valid key 61 else if (!(values & found_date) && (val.compare("created_at") == 0)) { values |= (key_date); } 62 // Must also check if not in a user object 63 else if (!(values & found_id) && !userobject_id && (val.compare("id") == 0)) { values |= (key_id); } 64 else if (!(values & found_text) && (val.compare("text") == 0)) { values |= (key_text); } 65 else if (!(values & found_reply) && (val.compare("in_reply_to_status_id") == 0)) { values |= (key_reply); } 66 // Check if entering user object 67 else if ((val.compare("user") == 0)) { userobject_id = userobject_screen_name = true; } 68 // Must also check if in a user object 69 else if (!(values & found_userid) && userobject_id && (val.compare("id") == 0)) { values |= (key_userid); } 70 // Must also check if in a user object 71 else if (!(values & found_screenname) && userobject_screen_name && (val.compare("screen_name") == 0)) { values |= (key_screenname); } 72 else if (!(values & found_rt) && (val.compare("retweet_count") == 0)) { values |= (key_rt); } 73 else if (!(values & found_fav) && (val.compare("favorite_count") == 0)) { values |= (key_fav); } 74 } 75 else if (val.compare("retweeted") == 0) { inretweet = false; } // Check if end of retweet 76 return true; 77 } number_unsignednlohmann_json_sax::Handler78 bool number_unsigned(number_unsigned_t val) override { 79 if (values & key_id && !(values & found_id)) { // id 80 id = val; 81 values &= ~(key_id); 82 values |= (found_id); 83 } 84 else if (values & key_reply && !(values & found_reply)) { // in_reply_status_id 85 reply_status = val; 86 values &= ~(key_reply); 87 values |= (found_reply); 88 } 89 else if (values & key_userid && !(values & found_userid)) { // user.id 90 user_id = val; 91 userobject_id = false; 92 values &= ~(key_userid); 93 values |= (found_userid); 94 } 95 else if (values & key_rt && !(values & found_rt)) { // retweet_count 96 rt = val; 97 values &= ~(key_rt); 98 values |= (found_rt); 99 } 100 else if (values & key_fav && !(values & found_fav)) { // favorite_count 101 fav = val; 102 values &= ~(key_fav); 103 values |= (found_fav); 104 // Assume that this is last key required, so add the partial_tweet to result 105 result.emplace_back(partial_tweets::tweet<std::string>{ 106 date,id,text,reply_status,{user_id,screen_name},rt,fav}); 107 } 108 return true; 109 } stringnlohmann_json_sax::Handler110 bool string(string_t& val) override { 111 if (values & key_date && !(values & found_date)) { // created_at 112 date = val; 113 values &= ~(key_date); 114 values |= (found_date); 115 } 116 else if (values & key_text && !(values & found_text)) { // text 117 text = val; 118 values &= ~(key_text); 119 values |= (found_text); 120 } 121 else if (values & key_screenname && !(values & found_screenname)) { // user.screen_name 122 screen_name = val; 123 userobject_screen_name = false; 124 values &= ~(key_screenname); 125 values |= (found_screenname); 126 } 127 return true; 128 } nullnlohmann_json_sax::Handler129 bool null() override { 130 if (values & key_reply && !(values & found_reply)) { // in_reply_status (null case) 131 reply_status = 0; 132 values &= ~(key_reply); 133 values |= (found_reply); 134 } 135 return true; 136 } 137 // Irrelevant events booleannlohmann_json_sax::Handler138 bool boolean(bool val) override { return true; } number_floatnlohmann_json_sax::Handler139 bool number_float(number_float_t val, const string_t& s) override { return true; } number_integernlohmann_json_sax::Handler140 bool number_integer(number_integer_t val) override { return true; } start_objectnlohmann_json_sax::Handler141 bool start_object(std::size_t elements) override { return true; } end_objectnlohmann_json_sax::Handler142 bool end_object() override { return true; } start_arraynlohmann_json_sax::Handler143 bool start_array(std::size_t elements) override { return true; } end_arraynlohmann_json_sax::Handler144 bool end_array() override { return true; } binarynlohmann_json_sax::Handler145 bool binary(json::binary_t& val) override { return true; } parse_errornlohmann_json_sax::Handler146 bool parse_error(std::size_t position, const std::string& last_token, const json::exception& ex) override { return false; } 147 }; // Handler 148 runnlohmann_json_sax149 bool run(simdjson::padded_string &json, std::vector<tweet<std::string>> &result) { 150 Handler handler(result); 151 json::sax_parse(json.data(), &handler); 152 153 return true; 154 } 155 }; // nlohmann_json_sax 156 BENCHMARK_TEMPLATE(partial_tweets, nlohmann_json_sax)->UseManualTime(); 157 } // namespace partial_tweets 158 159 #endif // SIMDJSON_COMPETITION_NLOHMANN_JSON