1 /*  $Id: timing.cpp 629837 2021-04-22 12:47:49Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Sergey Satskiy
27  *
28  * File Description: PSG server alerts
29  *
30  */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 
34 #include "timing.hpp"
35 #include "pubseq_gateway_utils.hpp"
36 #include "pubseq_gateway.hpp"
37 
38 static string           kTimeRangeStart("TimeRangeStart");
39 static string           kTimeRangeEnd("TimeRangeEnd");
40 
41 const unsigned long     kMaxBlobSize = 1024L*1024L*1024L*8L;   // 8 GB
42 
43 
SerializeHistogram(const TOnePSGTiming & histogram,const string & name,const string & description)44 CJsonNode SerializeHistogram(const TOnePSGTiming &  histogram,
45                              const string  &  name,
46                              const string  &  description)
47 {
48     static string   kBins("Bins");
49     static string   kStart("Start");
50     static string   kEnd("End");
51     static string   kCount("Count");
52     static string   kLowerAnomaly("LowerAnomaly");
53     static string   kUpperAnomaly("UpperAnomaly");
54     static string   kTotalCount("TotalCount");
55     static string   kValueSum("ValueSum");
56     static string   kName("name");
57     static string   kDescription("description");
58 
59     CJsonNode       ret(CJsonNode::NewObjectNode());
60     ret.SetString(kName, name);
61     ret.SetString(kDescription, description);
62 
63 
64     CJsonNode       bins(CJsonNode::NewArrayNode());
65 
66     size_t          bin_count =  histogram.GetNumberOfBins();
67     size_t          last_bin_index = bin_count - 1;
68     auto            starts = histogram.GetBinStartsPtr();
69     auto            counters = histogram.GetBinCountersPtr();
70     for (size_t  k = 0; k < bin_count; ++k) {
71         CJsonNode   bin(CJsonNode::NewObjectNode());
72         bin.SetInteger(kStart, starts[k]);
73         if (k >= last_bin_index)
74             bin.SetInteger(kEnd, histogram.GetMax());
75         else
76             bin.SetInteger(kEnd, starts[k+1] - 1);
77         bin.SetInteger(kCount, counters[k]);
78 
79         bins.Append(bin);
80     }
81     ret.SetByKey(kBins, bins);
82 
83     // GetCount() does not include anomalies!
84     auto    lower_anomalies = histogram.GetLowerAnomalyCount();
85     auto    upper_anomalies = histogram.GetUpperAnomalyCount();
86     ret.SetInteger(kLowerAnomaly, lower_anomalies);
87     ret.SetInteger(kUpperAnomaly, upper_anomalies);
88     ret.SetInteger(kTotalCount, histogram.GetCount() +
89                                 lower_anomalies + upper_anomalies);
90     ret.SetInteger(kValueSum, histogram.GetSum());
91     return ret;
92 }
93 
94 
95 
SerializeSeries(int most_ancient_time,int most_recent_time,unsigned long tick_span,const string & name,const string & description) const96 CJsonNode CPSGTimingBase::SerializeSeries(int  most_ancient_time,
97                                           int  most_recent_time,
98                                           unsigned long  tick_span,
99                                           const string &  name,
100                                           const string &  description) const
101 {
102     CJsonNode                   ret(CJsonNode::NewArrayNode());
103     TPSGTiming::TTimeBins       bins = m_PSGTiming->GetHistograms();
104 
105     int64_t         histogram_start_time = 0;
106 
107     for (auto &  bin : bins) {
108         int64_t     histogram_cover = tick_span * bin.n_ticks;
109         int64_t     histogram_end_time = histogram_start_time + histogram_cover - 1;
110 
111         if (most_recent_time >= 0) {
112             // Most recent time defined
113             if (most_recent_time > histogram_end_time) {
114                 // It is out of the requested range
115                 histogram_start_time = histogram_end_time + 1;
116                 continue;
117             }
118         }
119 
120         if (most_ancient_time >= 0) {
121             // Most ancient time defined
122             if (most_ancient_time < histogram_start_time) {
123                 // It is out of the requested range
124                 histogram_start_time = histogram_end_time + 1;
125                 continue;
126             }
127         }
128 
129         // Histogram is within the range. Take the counters.
130         CJsonNode   slice = SerializeHistogram(bin.histogram, name, description);
131         slice.SetInteger(kTimeRangeStart, histogram_start_time);
132         slice.SetInteger(kTimeRangeEnd, histogram_end_time);
133         ret.Append(slice);
134 
135         histogram_start_time = histogram_end_time + 1;
136     }
137 
138     return ret;
139 }
140 
141 
SerializeCombined(int most_ancient_time,int most_recent_time,unsigned long tick_span,const string & name,const string & description) const142 CJsonNode CPSGTimingBase::SerializeCombined(int  most_ancient_time,
143                                             int  most_recent_time,
144                                             unsigned long  tick_span,
145                                             const string &  name,
146                                             const string &  description) const
147 {
148     TPSGTiming::TTimeBins       bins = m_PSGTiming->GetHistograms();
149     TOnePSGTiming               combined_histogram = bins.front().
150                                     histogram.Clone(TOnePSGTiming::eCloneStructureOnly);
151 
152     int64_t         histogram_start_time = 0;
153     int64_t         actual_recent_time = -1;    // undefined so far
154     int64_t         actual_ancient_time = -1;   // undefined so far
155 
156     for (auto &  bin : bins) {
157         int64_t     histogram_cover = tick_span * bin.n_ticks;
158         int64_t     histogram_end_time = histogram_start_time + histogram_cover - 1;
159 
160         if (most_recent_time >= 0) {
161             // Most recent time defined
162             if (most_recent_time > histogram_end_time) {
163                 // It is out of the requested range
164                 histogram_start_time = histogram_end_time + 1;
165                 continue;
166             }
167         }
168 
169         if (most_ancient_time >= 0) {
170             // Most ancient time defined
171             if (most_ancient_time < histogram_start_time) {
172                 // It is out of the requested range
173                 histogram_start_time = histogram_end_time + 1;
174                 continue;
175             }
176         }
177 
178         // Histogram is within the range. Take the counters.
179         combined_histogram.AddCountersFrom(bin.histogram);
180 
181         // Update actual covered range if needed
182         if (actual_recent_time == -1)
183             actual_recent_time = histogram_start_time;
184         actual_ancient_time = histogram_end_time;
185 
186         histogram_start_time = histogram_end_time + 1;
187     }
188 
189     // The histograms were combined. Serialize them.
190     if (actual_recent_time == -1 && actual_ancient_time == -1) {
191         // Nothing fit the selected time range
192         return CJsonNode::NewObjectNode();
193     }
194 
195     CJsonNode   ret = SerializeHistogram(combined_histogram, name, description);
196     ret.SetInteger(kTimeRangeStart, actual_recent_time);
197     ret.SetInteger(kTimeRangeEnd, actual_ancient_time);
198     return ret;
199 }
200 
201 
CLmdbCacheTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)202 CLmdbCacheTiming::CLmdbCacheTiming(unsigned long  min_stat_value,
203                                    unsigned long  max_stat_value,
204                                    unsigned long  n_bins,
205                                    TOnePSGTiming::EScaleType  stat_type,
206                                    bool &  reset_to_default)
207 {
208     reset_to_default = false;
209 
210     try {
211         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
212                                             n_bins, stat_type);
213         m_PSGTiming.reset(new TPSGTiming(model_histogram));
214     } catch (...) {
215         reset_to_default = true;
216         TOnePSGTiming       model_histogram(kMinStatValue,
217                                             kMaxStatValue,
218                                             kNStatBins,
219                                             TOnePSGTiming::eLog2);
220         m_PSGTiming.reset(new TPSGTiming(model_histogram));
221     }
222 }
223 
224 
CLmdbResolutionTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)225 CLmdbResolutionTiming::CLmdbResolutionTiming(unsigned long  min_stat_value,
226                                              unsigned long  max_stat_value,
227                                              unsigned long  n_bins,
228                                              TOnePSGTiming::EScaleType  stat_type,
229                                              bool &  reset_to_default)
230 {
231     reset_to_default = false;
232 
233     try {
234         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
235                                             n_bins, stat_type);
236         m_PSGTiming.reset(new TPSGTiming(model_histogram));
237     } catch (...) {
238         reset_to_default = true;
239         TOnePSGTiming       model_histogram(kMinStatValue,
240                                             kMaxStatValue,
241                                             kNStatBins,
242                                             TOnePSGTiming::eLog2);
243         m_PSGTiming.reset(new TPSGTiming(model_histogram));
244     }
245 }
246 
247 
CCassTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)248 CCassTiming::CCassTiming(unsigned long  min_stat_value,
249                          unsigned long  max_stat_value,
250                          unsigned long  n_bins,
251                          TOnePSGTiming::EScaleType  stat_type,
252                          bool &  reset_to_default)
253 {
254     reset_to_default = false;
255 
256     try {
257         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
258                                             n_bins, stat_type);
259         m_PSGTiming.reset(new TPSGTiming(model_histogram));
260     } catch (...) {
261         reset_to_default = true;
262         TOnePSGTiming       model_histogram(kMinStatValue,
263                                             kMaxStatValue,
264                                             kNStatBins,
265                                             TOnePSGTiming::eLog2);
266         m_PSGTiming.reset(new TPSGTiming(model_histogram));
267     }
268 }
269 
270 
CCassResolutionTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)271 CCassResolutionTiming::CCassResolutionTiming(unsigned long  min_stat_value,
272                                              unsigned long  max_stat_value,
273                                              unsigned long  n_bins,
274                                              TOnePSGTiming::EScaleType  stat_type,
275                                              bool &  reset_to_default)
276 {
277     reset_to_default = false;
278 
279     try {
280         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
281                                             n_bins, stat_type);
282         m_PSGTiming.reset(new TPSGTiming(model_histogram));
283     } catch (...) {
284         reset_to_default = true;
285         TOnePSGTiming       model_histogram(kMinStatValue,
286                                             kMaxStatValue,
287                                             kNStatBins,
288                                             TOnePSGTiming::eLog2);
289         m_PSGTiming.reset(new TPSGTiming(model_histogram));
290     }
291 }
292 
293 
CBlobRetrieveTiming(size_t min_blob_size,size_t max_blob_size,unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)294 CBlobRetrieveTiming::CBlobRetrieveTiming(size_t  min_blob_size,
295                                          size_t  max_blob_size,
296                                          unsigned long  min_stat_value,
297                                          unsigned long  max_stat_value,
298                                          unsigned long  n_bins,
299                                          TOnePSGTiming::EScaleType  stat_type,
300                                          bool &  reset_to_default) :
301     m_MinBlobSize(min_blob_size), m_MaxBlobSize(max_blob_size)
302 {
303     reset_to_default = false;
304 
305     try {
306         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
307                                             n_bins, stat_type);
308         m_PSGTiming.reset(new TPSGTiming(model_histogram));
309     } catch (...) {
310         reset_to_default = true;
311         TOnePSGTiming       model_histogram(kMinStatValue,
312                                             kMaxStatValue,
313                                             kNStatBins,
314                                             TOnePSGTiming::eLog2);
315         m_PSGTiming.reset(new TPSGTiming(model_histogram));
316     }
317 }
318 
319 
320 static string   kStartBlobSize("MinBlobSize");
321 static string   kEndBlobSize("MaxBlobSize");
322 
SerializeCombined(int most_ancient_time,int most_recent_time,unsigned long tick_span,const string & name,const string & description) const323 CJsonNode CBlobRetrieveTiming::SerializeCombined(int  most_ancient_time,
324                                                  int  most_recent_time,
325                                                  unsigned long  tick_span,
326                                                  const string &  name,
327                                                  const string &  description) const
328 {
329     CJsonNode       timing = CPSGTimingBase::SerializeCombined(most_ancient_time,
330                                                                most_recent_time,
331                                                                tick_span,
332                                                                name,
333                                                                description);
334     timing.SetInteger(kStartBlobSize, m_MinBlobSize);
335     timing.SetInteger(kEndBlobSize, m_MaxBlobSize);
336     return timing;
337 }
338 
339 
SerializeSeries(int most_ancient_time,int most_recent_time,unsigned long tick_span,const string & name,const string & description) const340 CJsonNode CBlobRetrieveTiming::SerializeSeries(int  most_ancient_time,
341                                                int  most_recent_time,
342                                                unsigned long  tick_span,
343                                                const string &  name,
344                                                const string &  description) const
345 {
346     static string   kBins("Bins");
347 
348     CJsonNode   ret(CJsonNode::NewObjectNode());
349     ret.SetByKey(kBins, CPSGTimingBase::SerializeSeries(most_ancient_time,
350                                                         most_recent_time,
351                                                         tick_span,
352                                                         name,
353                                                         description));
354     ret.SetInteger(kStartBlobSize, m_MinBlobSize);
355     ret.SetInteger(kEndBlobSize, m_MaxBlobSize);
356     return ret;
357 }
358 
359 
CHugeBlobRetrieveTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)360 CHugeBlobRetrieveTiming::CHugeBlobRetrieveTiming(
361         unsigned long  min_stat_value,
362         unsigned long  max_stat_value,
363         unsigned long  n_bins,
364         TOnePSGTiming::EScaleType  stat_type,
365         bool &  reset_to_default)
366 {
367     reset_to_default = false;
368 
369     try {
370         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
371                                             n_bins, stat_type);
372         m_PSGTiming.reset(new TPSGTiming(model_histogram));
373     } catch (...) {
374         reset_to_default = true;
375         TOnePSGTiming       model_histogram(kMinStatValue,
376                                             kMaxStatValue,
377                                             kNStatBins,
378                                             TOnePSGTiming::eLog2);
379         m_PSGTiming.reset(new TPSGTiming(model_histogram));
380     }
381 }
382 
383 
CNotFoundBlobRetrieveTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)384 CNotFoundBlobRetrieveTiming::CNotFoundBlobRetrieveTiming(
385         unsigned long  min_stat_value,
386         unsigned long  max_stat_value,
387         unsigned long  n_bins,
388         TOnePSGTiming::EScaleType  stat_type,
389         bool &  reset_to_default)
390 {
391     reset_to_default = false;
392 
393     try {
394         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
395                                             n_bins, stat_type);
396         m_PSGTiming.reset(new TPSGTiming(model_histogram));
397     } catch (...) {
398         reset_to_default = true;
399         TOnePSGTiming       model_histogram(kMinStatValue,
400                                             kMaxStatValue,
401                                             kNStatBins,
402                                             TOnePSGTiming::eLog2);
403         m_PSGTiming.reset(new TPSGTiming(model_histogram));
404     }
405 }
406 
407 
CNARetrieveTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)408 CNARetrieveTiming::CNARetrieveTiming(unsigned long  min_stat_value,
409                                      unsigned long  max_stat_value,
410                                      unsigned long  n_bins,
411                                      TOnePSGTiming::EScaleType  stat_type,
412                                      bool &  reset_to_default)
413 {
414     reset_to_default = false;
415 
416     try {
417         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
418                                             n_bins, stat_type);
419         m_PSGTiming.reset(new TPSGTiming(model_histogram));
420     } catch (...) {
421         reset_to_default = true;
422         TOnePSGTiming       model_histogram(kMinStatValue,
423                                             kMaxStatValue,
424                                             kNStatBins,
425                                             TOnePSGTiming::eLog2);
426         m_PSGTiming.reset(new TPSGTiming(model_histogram));
427     }
428 }
429 
430 
CSplitHistoryRetrieveTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)431 CSplitHistoryRetrieveTiming::CSplitHistoryRetrieveTiming(unsigned long  min_stat_value,
432                                                          unsigned long  max_stat_value,
433                                                          unsigned long  n_bins,
434                                                          TOnePSGTiming::EScaleType  stat_type,
435                                                          bool &  reset_to_default)
436 {
437     reset_to_default = false;
438 
439     try {
440         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
441                                             n_bins, stat_type);
442         m_PSGTiming.reset(new TPSGTiming(model_histogram));
443     } catch (...) {
444         reset_to_default = true;
445         TOnePSGTiming       model_histogram(kMinStatValue,
446                                             kMaxStatValue,
447                                             kNStatBins,
448                                             TOnePSGTiming::eLog2);
449         m_PSGTiming.reset(new TPSGTiming(model_histogram));
450     }
451 }
452 
453 
CPublicCommentRetrieveTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)454 CPublicCommentRetrieveTiming::CPublicCommentRetrieveTiming(unsigned long  min_stat_value,
455                                                            unsigned long  max_stat_value,
456                                                            unsigned long  n_bins,
457                                                            TOnePSGTiming::EScaleType  stat_type,
458                                                            bool &  reset_to_default)
459 {
460     reset_to_default = false;
461 
462     try {
463         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
464                                             n_bins, stat_type);
465         m_PSGTiming.reset(new TPSGTiming(model_histogram));
466     } catch (...) {
467         reset_to_default = true;
468         TOnePSGTiming       model_histogram(kMinStatValue,
469                                             kMaxStatValue,
470                                             kNStatBins,
471                                             TOnePSGTiming::eLog2);
472         m_PSGTiming.reset(new TPSGTiming(model_histogram));
473     }
474 }
475 
476 
CResolutionTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,bool & reset_to_default)477 CResolutionTiming::CResolutionTiming(unsigned long  min_stat_value,
478                                      unsigned long  max_stat_value,
479                                      unsigned long  n_bins,
480                                      TOnePSGTiming::EScaleType  stat_type,
481                                      bool &  reset_to_default)
482 {
483     reset_to_default = false;
484 
485     try {
486         TOnePSGTiming       model_histogram(min_stat_value, max_stat_value,
487                                             n_bins, stat_type);
488         m_PSGTiming.reset(new TPSGTiming(model_histogram));
489     } catch (...) {
490         reset_to_default = true;
491         TOnePSGTiming       model_histogram(kMinStatValue,
492                                             kMaxStatValue,
493                                             kNStatBins,
494                                             TOnePSGTiming::eLog2);
495         m_PSGTiming.reset(new TPSGTiming(model_histogram));
496     }
497 }
498 
499 
500 
COperationTiming(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,const string & stat_type,unsigned long small_blob_size)501 COperationTiming::COperationTiming(unsigned long  min_stat_value,
502                                    unsigned long  max_stat_value,
503                                    unsigned long  n_bins,
504                                    const string &  stat_type,
505                                    unsigned long  small_blob_size) :
506     m_HugeBlobByteCounter(0)
507 {
508     auto        scale_type = TOnePSGTiming::eLog2;
509     if (NStr::CompareNocase(stat_type, "linear") == 0)
510         scale_type = TOnePSGTiming::eLinear;
511 
512     bool    reset_to_default = false;
513     for (size_t  index = 0; index <= 1; ++index) {
514         m_LookupLmdbSi2csiTiming.push_back(
515             unique_ptr<CLmdbCacheTiming>(
516                 new CLmdbCacheTiming(min_stat_value, max_stat_value,
517                                      n_bins, scale_type, reset_to_default)));
518         m_LookupLmdbBioseqInfoTiming.push_back(
519             unique_ptr<CLmdbCacheTiming>(
520                 new CLmdbCacheTiming(min_stat_value, max_stat_value,
521                                      n_bins, scale_type, reset_to_default)));
522         m_LookupLmdbBlobPropTiming.push_back(
523             unique_ptr<CLmdbCacheTiming>(
524                 new CLmdbCacheTiming(min_stat_value, max_stat_value,
525                                      n_bins, scale_type, reset_to_default)));
526         m_LookupCassSi2csiTiming.push_back(
527             unique_ptr<CCassTiming>(
528                 new CCassTiming(min_stat_value, max_stat_value,
529                                 n_bins, scale_type, reset_to_default)));
530         m_LookupCassBioseqInfoTiming.push_back(
531             unique_ptr<CCassTiming>(
532                 new CCassTiming(min_stat_value, max_stat_value,
533                                 n_bins, scale_type, reset_to_default)));
534         m_LookupCassBlobPropTiming.push_back(
535             unique_ptr<CCassTiming>(
536                 new CCassTiming(min_stat_value, max_stat_value,
537                                 n_bins, scale_type, reset_to_default)));
538 
539         m_ResolutionLmdbTiming.push_back(
540             unique_ptr<CLmdbResolutionTiming>(
541                 new CLmdbResolutionTiming(min_stat_value, max_stat_value,
542                                           n_bins, scale_type, reset_to_default)));
543         m_ResolutionCassTiming.push_back(
544             unique_ptr<CCassResolutionTiming>(
545                 new CCassResolutionTiming(min_stat_value, max_stat_value,
546                                           n_bins, scale_type, reset_to_default)));
547 
548         m_NARetrieveTiming.push_back(
549             unique_ptr<CNARetrieveTiming>(
550                 new CNARetrieveTiming(min_stat_value, max_stat_value,
551                                       n_bins, scale_type, reset_to_default)));
552 
553         m_SplitHistoryRetrieveTiming.push_back(
554             unique_ptr<CSplitHistoryRetrieveTiming>(
555                 new CSplitHistoryRetrieveTiming(min_stat_value, max_stat_value,
556                                                 n_bins, scale_type, reset_to_default)));
557 
558         m_PublicCommentRetrieveTiming.push_back(
559             unique_ptr<CPublicCommentRetrieveTiming>(
560                 new CPublicCommentRetrieveTiming(min_stat_value, max_stat_value,
561                                                  n_bins, scale_type, reset_to_default)));
562     }
563 
564     m_HugeBlobRetrievalTiming.reset(
565         new CHugeBlobRetrieveTiming(min_stat_value, max_stat_value,
566                                     n_bins, scale_type, reset_to_default));
567     m_NotFoundBlobRetrievalTiming.reset(
568         new CNotFoundBlobRetrieveTiming(min_stat_value, max_stat_value,
569                                         n_bins, scale_type, reset_to_default));
570 
571     // Resolution timing
572     m_ResolutionErrorTiming.reset(
573         new CResolutionTiming(min_stat_value, max_stat_value,
574                               n_bins, scale_type, reset_to_default));
575     m_ResolutionNotFoundTiming.reset(
576         new CResolutionTiming(min_stat_value, max_stat_value,
577                               n_bins, scale_type, reset_to_default));
578     m_ResolutionFoundTiming.reset(
579         new CResolutionTiming(min_stat_value, max_stat_value,
580                               n_bins, scale_type, reset_to_default));
581     // 1, 2, 3, 4, 5+ trips to cassandra
582     for (size_t  index = 0; index < 5; ++index) {
583         m_ResolutionFoundCassandraTiming.push_back(
584             unique_ptr<CResolutionTiming>(
585                 new CResolutionTiming(min_stat_value, max_stat_value,
586                                       n_bins, scale_type, reset_to_default)));
587     }
588 
589 
590     reset_to_default |= x_SetupBlobSizeBins(min_stat_value, max_stat_value,
591                                             n_bins, scale_type, small_blob_size);
592 
593 
594     if (reset_to_default)
595         ERR_POST("Invalid statistics parameters detected. Default parameters "
596                  "were used");
597 
598     // fill the map between the histogram name and where it is stored
599     m_NamesMap = {
600         { "LookupLmdbSi2csiFound",
601           SInfo(m_LookupLmdbSi2csiTiming[0].get(),
602                 "si2csi LMDB cache found",
603                 "The timing of si2csi LMDB cache lookup "
604                 "when a record was found"
605                )
606         },
607         { "LookupLmdbSi2csiNotFound",
608           SInfo(m_LookupLmdbSi2csiTiming[1].get(),
609                 "si2csi LMDB cache not found",
610                 "The timing of si2csi LMDB cache lookup "
611                 "when there was no record found"
612                )
613         },
614         { "LookupLmdbBioseqInfoFound",
615           SInfo(m_LookupLmdbBioseqInfoTiming[0].get(),
616                 "bioseq info LMDB cache found",
617                 "The timing of bioseq info LMDB cache lookup "
618                 "when a record was found"
619                )
620         },
621         { "LookupLmdbBioseqInfoNotFound",
622           SInfo(m_LookupLmdbBioseqInfoTiming[1].get(),
623                 "bioseq info LMDB cache not found",
624                 "The timing of bioseq info LMDB cache lookup "
625                 "when there was no record found"
626                )
627         },
628         { "LookupLmdbBlobPropFound",
629           SInfo(m_LookupLmdbBlobPropTiming[0].get(),
630                 "blob properties LMDB cache found",
631                 "The timing of blob properties LMDB cache lookup "
632                 "when a record was found"
633                )
634         },
635         { "LookupLmdbBlobPropNotFound",
636           SInfo(m_LookupLmdbBlobPropTiming[1].get(),
637                 "blob properties LMDB cache not found",
638                 "The timing of blob properties LMDB cache lookup "
639                 "when there was no record found"
640                )
641         },
642         { "LookupCassSi2csiFound",
643           SInfo(m_LookupCassSi2csiTiming[0].get(),
644                 "si2csi Cassandra found",
645                 "The timing of si2csi Cassandra lookup "
646                 "when a record was found"
647                )
648         },
649         { "LookupCassSi2csiNotFound",
650           SInfo(m_LookupCassSi2csiTiming[1].get(),
651                 "si2csi Cassandra not found",
652                 "The timing of si2csi Cassandra lookup "
653                 "when there was no record found"
654                )
655         },
656         { "LookupCassBioseqInfoFound",
657           SInfo(m_LookupCassBioseqInfoTiming[0].get(),
658                 "bioseq info Cassandra found",
659                 "The timing of bioseq info Cassandra lookup "
660                 "when a record was found"
661                )
662         },
663         { "LookupCassBioseqInfoNotFound",
664           SInfo(m_LookupCassBioseqInfoTiming[1].get(),
665                 "bioseq info Cassandra not found",
666                 "The timing of bioseq info Cassandra lookup "
667                 "when there was no record found"
668                )
669         },
670         { "LookupCassBlobPropFound",
671           SInfo(m_LookupCassBlobPropTiming[0].get(),
672                 "blob properties Cassandra found",
673                 "The timing of blob properties Cassandra lookup "
674                 "when a record was found"
675                )
676         },
677         { "LookupCassBlobPropNotFound",
678           SInfo(m_LookupCassBlobPropTiming[1].get(),
679                 "blob properties Cassandra not found",
680                 "The timing of blob properties Cassandra lookup "
681                 "when there was no record found"
682                )
683         },
684         { "ResolutionLmdbFound",
685           SInfo(m_ResolutionLmdbTiming[0].get(),
686                 "LMDB resolution succeeded",
687                 "The timing of a seq id successful resolution "
688                 "in LMDB cache (start: request is received)"
689                )
690         },
691         { "ResolutionLmdbNotFound",
692           SInfo(m_ResolutionLmdbTiming[1].get(),
693                 "LMDB resolution not found",
694                 "The timing of a seq id unsuccessful resolution "
695                 "when all the tries in LMDB cache led to nothing "
696                 "(start: request is received)"
697                )
698         },
699         { "ResolutionCassFound",
700           SInfo(m_ResolutionCassTiming[0].get(),
701                 "Cassandra resolution succeeded",
702                 "The timing of a seq id successful resolution "
703                 "in Cassandra regardless how many queries were "
704                 "made to Cassandra (start: first Cassandra query)"
705                )
706         },
707         { "ResolutionCassNotFound",
708           SInfo(m_ResolutionCassTiming[1].get(),
709                 "Cassandra resolution not found",
710                 "The timing of a seq id unsuccessful resolution "
711                 "when all the tries in Cassandra led to nothing "
712                 "start: first Cassandra query)"
713                )
714         },
715         { "NARetrieveFound",
716           SInfo(m_NARetrieveTiming[0].get(),
717                 "Named annotations found",
718                 "The timing of named annotations successful retrieval"
719                )
720         },
721         { "NARetrieveNotFound",
722           SInfo(m_NARetrieveTiming[1].get(),
723                 "Named annotations not found",
724                 "The timing of named annotations retrieval "
725                 "when nothing was found"
726                )
727         },
728         { "SplitHistoryRetrieveFound",
729           SInfo(m_SplitHistoryRetrieveTiming[0].get(),
730                 "Split history found",
731                 "The timing of split history successful retrieval"
732                )
733         },
734         { "SplitHistoryRetrieveNotFound",
735           SInfo(m_SplitHistoryRetrieveTiming[1].get(),
736                 "Split history not found",
737                 "The timing of split history retrieval "
738                 "when nothing was found"
739                )
740         },
741         { "PublicCommentRetrieveFound",
742           SInfo(m_PublicCommentRetrieveTiming[0].get(),
743                 "Public comment found",
744                 "The timing of a public comment successful retrieval"
745                )
746         },
747         { "PublicCommentRetrieveNotFound",
748           SInfo(m_PublicCommentRetrieveTiming[1].get(),
749                 "Public comment not found",
750                 "The timing of public comment retrieval "
751                 "when nothing was found"
752                )
753         },
754         { "HugeBlobRetrieval",
755           SInfo(m_HugeBlobRetrievalTiming.get(),
756                 "Huge blob retrieval",
757                 "The timing of the very large blob retrieval",
758                 &m_HugeBlobByteCounter,
759                 "HugeBlobByteCounter",
760                 "Huge blob bytes counter",
761                 "The number of bytes transferred to the user as very large blobs"
762                )
763         },
764         { "BlobRetrievalNotFound",
765           SInfo(m_NotFoundBlobRetrievalTiming.get(),
766                 "Blob retrieval not found",
767                 "The timing of blob retrieval when a blob was not found"
768                )
769         },
770         { "ResolutionError",
771           SInfo(m_ResolutionErrorTiming.get(),
772                 "Resolution error",
773                 "The timing of a case when an error was detected while "
774                 "resolving seq id regardless it was cache, Cassandra or both "
775                 "(start: request is received)"
776                )
777         },
778         { "ResolutionNotFound",
779           SInfo(m_ResolutionNotFoundTiming.get(),
780                 "Resolution not found",
781                 "The timing of a case when resolution of a seq id did not succeed "
782                 "regardless it was cache, Cassandra or both "
783                 "(start: request is received)"
784                )
785         },
786         { "ResolutionFound",
787           SInfo(m_ResolutionFoundTiming.get(),
788                 "Resolution succeeded",
789                 "The timing of a seq id successful resolution regardless it "
790                 "was cache, Cassandra or both "
791                 "(start: request is received)"
792                )
793         },
794         { "ResolutionFoundCassandraIn1Try",
795           SInfo(m_ResolutionFoundCassandraTiming[0].get(),
796                 "Resolution succeeded via Cassandra (1 try)",
797                 "The timing of a seq id resolution in Cassandra when "
798                 "1 try was required (start: first Cassandra query)"
799                )
800         },
801         { "ResolutionFoundCassandraIn2Tries",
802           SInfo(m_ResolutionFoundCassandraTiming[1].get(),
803                 "Resolution succeeded via Cassandra (2 tries)",
804                 "The timing of a seq id resolution in Cassandra when "
805                 "2 tries were required (start: first Cassandra query)"
806                )
807         },
808         { "ResolutionFoundCassandraIn3Tries",
809           SInfo(m_ResolutionFoundCassandraTiming[2].get(),
810                 "Resolution succeeded via Cassandra (3 tries)",
811                 "The timing of a seq id resolution in Cassandra when "
812                 "3 tries were required (start: first Cassandra query)"
813                )
814         },
815         { "ResolutionFoundCassandraIn4Tries",
816           SInfo(m_ResolutionFoundCassandraTiming[3].get(),
817                 "Resolution succeeded via Cassandra (4 tries)",
818                 "The timing of a seq id resolution in Cassandra when "
819                 "4 tries were required (start: first Cassandra query)"
820                )
821         },
822         { "ResolutionFoundCassandraIn5OrMoreTries",
823           SInfo(m_ResolutionFoundCassandraTiming[4].get(),
824                 "Resolution succeeded via Cassandra (5 tries or more)",
825                 "The timing of a seq id resolution in Cassandra when "
826                 "5 or more tries were required (start: first Cassandra query)"
827                )
828         }
829     };
830 
831     size_t      index = 0;
832     for (auto & retieve_timing : m_BlobRetrieveTiming) {
833         string      min_size_str = to_string(retieve_timing->GetMinBlobSize());
834         string      max_size_str = to_string(retieve_timing->GetMaxBlobSize());
835         string      id = "BlobRetrievalFrom" + min_size_str + "To" + max_size_str;
836         string      name = "Blob retrieval (size: " +
837                         min_size_str + " to " + max_size_str + ")";
838         string      description = "The timing of a blob retrieval when "
839                         "the blob size is between " + min_size_str +
840                         " and " + max_size_str + " bytes";
841 
842         string      counter_id = "BlobByteCounterFrom" + min_size_str +
843                                  "To" + max_size_str;
844         string      counter_name = "Blob byte counter (blob size: " +
845                                    min_size_str + " to " + max_size_str + ")";
846         string      counter_description = "The number of bytes transferred to "
847                                           "the user as blobs size between " +
848                                           min_size_str + " and " +
849                                           max_size_str;
850         m_NamesMap[id] = SInfo(retieve_timing.get(), name, description,
851                                &m_BlobByteCounters[index],
852                                counter_id, counter_name, counter_description);
853         ++index;
854     }
855 
856     // Overwrite the default names and descriptions with what came from
857     // the configuration
858     auto        app = CPubseqGatewayApp::GetInstance();
859     auto        id_to_name_and_desc = app->GetIdToNameAndDescriptionMap();
860 
861     for (const auto &  item : id_to_name_and_desc) {
862         if (m_NamesMap.find(item.first) != m_NamesMap.end()) {
863             m_NamesMap[item.first].m_Name = get<0>(item.second);
864             m_NamesMap[item.first].m_Description = get<1>(item.second);
865         } else {
866             // May need to overwrite the associated counters names and
867             // descriptions
868             for (auto &  info : m_NamesMap) {
869                 if (info.second.m_CounterId == item.first) {
870                     info.second.m_CounterName = get<0>(item.second);
871                     info.second.m_CounterDescription = get<1>(item.second);
872                     break;
873                 }
874             }
875         }
876     }
877 }
878 
879 
x_SetupBlobSizeBins(unsigned long min_stat_value,unsigned long max_stat_value,unsigned long n_bins,TOnePSGTiming::EScaleType stat_type,unsigned long small_blob_size)880 bool COperationTiming::x_SetupBlobSizeBins(unsigned long  min_stat_value,
881                                            unsigned long  max_stat_value,
882                                            unsigned long  n_bins,
883                                            TOnePSGTiming::EScaleType  stat_type,
884                                            unsigned long  small_blob_size)
885 {
886     bool    reset_to_default = false;
887 
888     m_BlobRetrieveTiming.push_back(
889             unique_ptr<CBlobRetrieveTiming>(
890                 new CBlobRetrieveTiming(0, small_blob_size,
891                                         min_stat_value, max_stat_value,
892                                         n_bins, stat_type,
893                                         reset_to_default)));
894     m_BlobByteCounters.push_back(0);
895     m_Ends.push_back(small_blob_size);
896 
897     unsigned long   range_end = small_blob_size;
898     unsigned long   range_start = range_end + 1;
899     size_t          k = 0;
900 
901     for (;;) {
902         range_end = (size_t(1) << k) - 1;
903         if (range_end <= small_blob_size) {
904             ++k;
905             continue;
906         }
907 
908         m_BlobRetrieveTiming.push_back(
909                 unique_ptr<CBlobRetrieveTiming>(
910                     new CBlobRetrieveTiming(range_start, range_end,
911                                             min_stat_value, max_stat_value,
912                                             n_bins, stat_type,
913                                             reset_to_default)));
914         m_BlobByteCounters.push_back(0);
915         m_Ends.push_back(range_end);
916 
917         range_start = range_end + 1;
918         if (range_start >= kMaxBlobSize)
919             break;
920         ++k;
921     }
922 
923     return reset_to_default;
924 }
925 
926 
x_GetBlobRetrievalBinIndex(unsigned long blob_size)927 ssize_t COperationTiming::x_GetBlobRetrievalBinIndex(unsigned long  blob_size)
928 {
929     if (blob_size >= kMaxBlobSize)
930         return -1;
931 
932     for (size_t  index = 0; ; ++index) {
933         if (m_Ends[index] >= blob_size)
934             return index;
935     }
936     return -1;
937 }
938 
939 
Register(EPSGOperation operation,EPSGOperationStatus status,const TPSGS_HighResolutionTimePoint & op_begin_ts,unsigned long blob_size)940 void COperationTiming::Register(EPSGOperation  operation,
941                                 EPSGOperationStatus  status,
942                                 const TPSGS_HighResolutionTimePoint &  op_begin_ts,
943                                 unsigned long  blob_size)
944 {
945     auto            now = chrono::high_resolution_clock::now();
946     uint64_t        mks = chrono::duration_cast<chrono::microseconds>(now - op_begin_ts).count();
947 
948     size_t          index = 0;
949     if (status == eOpStatusNotFound)
950         index = 1;
951 
952     switch (operation) {
953         case eLookupLmdbSi2csi:
954             m_LookupLmdbSi2csiTiming[index]->Add(mks);
955             break;
956         case eLookupLmdbBioseqInfo:
957             m_LookupLmdbBioseqInfoTiming[index]->Add(mks);
958             break;
959         case eLookupLmdbBlobProp:
960             m_LookupLmdbBlobPropTiming[index]->Add(mks);
961             break;
962         case eLookupCassSi2csi:
963             m_LookupCassSi2csiTiming[index]->Add(mks);
964             break;
965         case eLookupCassBioseqInfo:
966             m_LookupCassBioseqInfoTiming[index]->Add(mks);
967             break;
968         case eLookupCassBlobProp:
969             m_LookupCassBlobPropTiming[index]->Add(mks);
970             break;
971         case eResolutionLmdb:
972             m_ResolutionLmdbTiming[index]->Add(mks);
973             break;
974         case eResolutionCass:
975             m_ResolutionCassTiming[index]->Add(mks);
976             break;
977         case eBlobRetrieve:
978             if (status == eOpStatusNotFound)
979                 m_NotFoundBlobRetrievalTiming->Add(mks);
980             else {
981                 ssize_t     bin_index = x_GetBlobRetrievalBinIndex(blob_size);
982                 if (bin_index < 0) {
983                     m_HugeBlobByteCounter += blob_size;
984                     m_HugeBlobRetrievalTiming->Add(mks);
985                 } else {
986                     m_BlobByteCounters[bin_index] += blob_size;
987                     m_BlobRetrieveTiming[bin_index]->Add(mks);
988                 }
989             }
990             break;
991         case eNARetrieve:
992             m_NARetrieveTiming[index]->Add(mks);
993             break;
994         case eSplitHistoryRetrieve:
995             m_SplitHistoryRetrieveTiming[index]->Add(mks);
996             break;
997         case ePublicCommentRetrieve:
998             m_PublicCommentRetrieveTiming[index]->Add(mks);
999             break;
1000         case eResolutionError:
1001             m_ResolutionErrorTiming->Add(mks);
1002             break;
1003         case eResolutionNotFound:
1004             m_ResolutionNotFoundTiming->Add(mks);
1005             break;
1006         case eResolutionFound:
1007             m_ResolutionFoundTiming->Add(mks);
1008             break;
1009         case eResolutionFoundInCassandra:
1010             // The blob_size here is the number of queries of Cassandra
1011             index = blob_size - 1;
1012             if (index > 4)
1013                 index = 4;
1014             m_ResolutionFoundCassandraTiming[index]->Add(mks);
1015             break;
1016     }
1017 }
1018 
1019 
Rotate(void)1020 void COperationTiming::Rotate(void)
1021 {
1022     lock_guard<mutex>   guard(m_Lock);
1023 
1024     for (size_t  k = 0; k <= 1; ++k) {
1025         m_LookupLmdbSi2csiTiming[k]->Rotate();
1026         m_LookupLmdbBioseqInfoTiming[k]->Rotate();
1027         m_LookupLmdbBlobPropTiming[k]->Rotate();
1028         m_LookupCassSi2csiTiming[k]->Rotate();
1029         m_LookupCassBioseqInfoTiming[k]->Rotate();
1030         m_LookupCassBlobPropTiming[k]->Rotate();
1031         m_ResolutionLmdbTiming[k]->Rotate();
1032         m_ResolutionCassTiming[k]->Rotate();
1033         m_NARetrieveTiming[k]->Rotate();
1034         m_SplitHistoryRetrieveTiming[k]->Rotate();
1035         m_PublicCommentRetrieveTiming[k]->Rotate();
1036     }
1037 
1038     m_HugeBlobRetrievalTiming->Rotate();
1039     m_NotFoundBlobRetrievalTiming->Rotate();
1040 
1041     m_ResolutionErrorTiming->Rotate();
1042     m_ResolutionNotFoundTiming->Rotate();
1043     m_ResolutionFoundTiming->Rotate();
1044     for (auto &  item : m_ResolutionFoundCassandraTiming)
1045         item->Rotate();
1046 
1047     for (auto &  item : m_BlobRetrieveTiming)
1048         item->Rotate();
1049 }
1050 
1051 
Reset(void)1052 void COperationTiming::Reset(void)
1053 {
1054     lock_guard<mutex>   guard(m_Lock);
1055 
1056     for (size_t  k = 0; k <= 1; ++k) {
1057         m_LookupLmdbSi2csiTiming[k]->Reset();
1058         m_LookupLmdbBioseqInfoTiming[k]->Reset();
1059         m_LookupLmdbBlobPropTiming[k]->Reset();
1060         m_LookupCassSi2csiTiming[k]->Reset();
1061         m_LookupCassBioseqInfoTiming[k]->Reset();
1062         m_LookupCassBlobPropTiming[k]->Reset();
1063         m_ResolutionLmdbTiming[k]->Reset();
1064         m_ResolutionCassTiming[k]->Reset();
1065         m_NARetrieveTiming[k]->Reset();
1066         m_SplitHistoryRetrieveTiming[k]->Reset();
1067         m_PublicCommentRetrieveTiming[k]->Reset();
1068     }
1069 
1070     m_HugeBlobRetrievalTiming->Reset();
1071     m_NotFoundBlobRetrievalTiming->Reset();
1072 
1073     m_ResolutionErrorTiming->Reset();
1074     m_ResolutionNotFoundTiming->Reset();
1075     m_ResolutionFoundTiming->Reset();
1076     for (auto &  item : m_ResolutionFoundCassandraTiming)
1077         item->Reset();
1078 
1079     for (auto &  item : m_BlobRetrieveTiming)
1080         item->Reset();
1081 
1082     for (auto &  item : m_BlobByteCounters)
1083         item = 0;
1084     m_HugeBlobByteCounter = 0;
1085 }
1086 
1087 
1088 CJsonNode
Serialize(int most_ancient_time,int most_recent_time,const vector<CTempString> & histogram_names,unsigned long tick_span) const1089 COperationTiming::Serialize(int  most_ancient_time,
1090                             int  most_recent_time,
1091                             const vector<CTempString> &  histogram_names,
1092                             unsigned long  tick_span) const
1093 {
1094     static string   kSecondsCovered("SecondsCovered");
1095 
1096     lock_guard<mutex>       guard(m_Lock);
1097 
1098     CJsonNode       ret(CJsonNode::NewObjectNode());
1099 
1100     // All the histograms have the same number of covered ticks
1101     ret.SetInteger(kSecondsCovered,
1102                    tick_span * m_ResolutionFoundTiming->GetNumberOfCoveredTicks());
1103 
1104     if (histogram_names.empty()) {
1105         for (const auto &  name_to_histogram : m_NamesMap) {
1106             ret.SetByKey(name_to_histogram.first,
1107                          name_to_histogram.second.m_Timing->SerializeCombined(
1108                              most_ancient_time,
1109                              most_recent_time,
1110                              tick_span,
1111                              name_to_histogram.second.m_Name,
1112                              name_to_histogram.second.m_Description));
1113             if (name_to_histogram.second.m_Counter != nullptr) {
1114                 CJsonNode       bytes_counter(CJsonNode::NewObjectNode());
1115                 bytes_counter.SetString("name",
1116                                         name_to_histogram.second.m_CounterName);
1117                 bytes_counter.SetString("description",
1118                                         name_to_histogram.second.m_CounterDescription);
1119                 bytes_counter.SetInteger("bytes",
1120                                          *name_to_histogram.second.m_Counter);
1121                 ret.SetByKey(name_to_histogram.second.m_CounterId,
1122                              bytes_counter);
1123             }
1124         }
1125     } else {
1126         for (const auto &  name : histogram_names) {
1127             string      histogram_name(name.data(), name.size());
1128             const auto  iter = m_NamesMap.find(histogram_name);
1129             if (iter != m_NamesMap.end()) {
1130                 ret.SetByKey(
1131                     histogram_name,
1132                     iter->second.m_Timing->SerializeSeries(most_ancient_time,
1133                                                            most_recent_time,
1134                                                            tick_span,
1135                                                            iter->second.m_Name,
1136                                                            iter->second.m_Description));
1137                 if (iter->second.m_Counter != nullptr) {
1138                     CJsonNode       bytes_counter(CJsonNode::NewObjectNode());
1139                     bytes_counter.SetString("name",
1140                                             iter->second.m_CounterName);
1141                     bytes_counter.SetString("description",
1142                                             iter->second.m_CounterDescription);
1143                     bytes_counter.SetInteger("bytes",
1144                                              *(iter->second.m_Counter));
1145                     ret.SetByKey(iter->second.m_CounterId,
1146                                  bytes_counter);
1147                 }
1148             }
1149         }
1150     }
1151 
1152     return ret;
1153 }
1154