1 /*  $Id: id2info.cpp 629837 2021-04-22 12:47:49Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Sergey Satskiy
27  *
28  * File Description:
29  *   PSG ID2 info wrapper
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 USING_NCBI_SCOPE;
36 
37 #include "id2info.hpp"
38 #include "pubseq_gateway_exception.hpp"
39 #include "pubseq_gateway.hpp"
40 
41 
CPSGS_SatInfoChunksVerFlavorId2Info(const string & id2_info,bool count_errors)42 CPSGS_SatInfoChunksVerFlavorId2Info::CPSGS_SatInfoChunksVerFlavorId2Info(
43                                                     const string &  id2_info,
44                                                     bool  count_errors) :
45     m_Sat(0), m_Info(0), m_Chunks(0),
46     m_SplitVersion(0), m_SplitVersionPresent(false)
47 {
48     auto    app = CPubseqGatewayApp::GetInstance();
49 
50     // id2_info: "sat.info.nchunks[.splitversion]"
51     vector<string>          parts;
52     NStr::Split(id2_info, ".", parts);
53 
54     if (parts.size() < 3) {
55         if (count_errors)
56             app->GetCounters().Increment(CPSGSCounters::ePSGS_InvalidId2InfoError);
57         NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
58                    "Invalid id2 info '" + id2_info +
59                    "'. Expected 3 or more parts, found " +
60                    to_string(parts.size()) + " parts.");
61     }
62 
63     try {
64         m_Sat = NStr::StringToInt(parts[0]);
65         m_Info = NStr::StringToInt(parts[1]);
66         m_Chunks = NStr::StringToInt(parts[2]);
67 
68         if (parts.size() >= 4) {
69             m_SplitVersion = NStr::StringToInt(parts[3]);
70             m_SplitVersionPresent = true;
71         }
72     } catch (...) {
73         if (count_errors)
74             app->GetCounters().Increment(CPSGSCounters::ePSGS_InvalidId2InfoError);
75         NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
76                    "Invalid id2 info '" + id2_info +
77                    "'. Cannot convert parts into integers.");
78     }
79 
80 
81     // Validate
82     string      validate_message;
83     if (m_Sat <= 0)
84         validate_message = "Invalid id2 info SAT value. "
85             "Expected to be > 0. Received: " +
86             to_string(m_Sat) + ".";
87 
88     if (m_Info <= 0) {
89         if (!validate_message.empty())
90             validate_message += " ";
91         validate_message += "Invalid id2 info INFO value. "
92             "Expected to be > 0. Received: " +
93             to_string(m_Info) + ".";
94     }
95     if (m_Chunks <= 0) {
96         if (!validate_message.empty())
97             validate_message += " ";
98         validate_message += "Invalid id2 info NCHUNKS value. "
99             "Expected to be > 0. Received: " +
100             to_string(m_Chunks) + ".";
101     }
102 
103     if (!validate_message.empty()) {
104         if (count_errors)
105             app->GetCounters().Increment(CPSGSCounters::ePSGS_InvalidId2InfoError);
106         NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
107                    validate_message);
108     }
109 }
110 
111 
Serialize(void) const112 string CPSGS_SatInfoChunksVerFlavorId2Info::Serialize(void) const
113 {
114     if (m_SplitVersionPresent)
115         return to_string(m_Sat) + "." +
116                to_string(m_Info) + "." +
117                to_string(m_Chunks) + "." +
118                to_string(m_SplitVersion);
119 
120     return to_string(m_Sat) + "." +
121            to_string(m_Info) + "." +
122            to_string(m_Chunks);
123 }
124 
125 
126 
127 static string   kSeparator = "~~";
128 static string   kPrefix = "psg";
129 static string   kTseId = "tse_id-";
130 static string   kTseLastModified = "tse_last_modified-";
131 static string   kTseSplitVersion = "tse_split_version-";
132 
CPSGS_IdModifiedVerFlavorId2Info(const string & id2_info)133 CPSGS_IdModifiedVerFlavorId2Info::CPSGS_IdModifiedVerFlavorId2Info(
134                                                     const string &  id2_info) :
135     m_LastModified(0), m_SplitVersion(0),
136     m_LastModifiedPresent(false), m_SplitVersionPresent(false)
137 {
138     // id2_info:
139     // "psg~~tse_id-4.1234[~~tse_last_modified-98765][~~tse_split_version-888]"
140     list<string>            parts;
141     NStr::Split(id2_info, kSeparator, parts, NStr::fSplit_ByPattern);
142 
143     if (parts.size() < 2) {
144         NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
145                    "Invalid id2_info parameter value '" + id2_info +
146                    "'. Expected 2 or more (" + kSeparator +
147                    " separated) parts, found " +
148                    to_string(parts.size()) + " parts.");
149     }
150 
151     if (parts.front() != kPrefix) {
152         NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
153                    "Invalid id2_info parameter value '" + id2_info +
154                    "'. It has to start with '" + kPrefix + kSeparator + "'.");
155     }
156 
157     // Remove the prefix
158     parts.pop_front();
159 
160     bool    tse_id_found = false;
161     for (const auto &  part : parts) {
162         if (part.find(kTseId) == 0) {
163             m_TSEId = SCass_BlobId(part.substr(kTseId.size()));
164             if (!m_TSEId.IsValid()) {
165                 NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
166                            "Invalid id2 info part '" + part +
167                            "'. Cannot convert the part value "
168                            "into a pair '<sat>.<sat_key>'.");
169             }
170             tse_id_found = true;
171             continue;
172         }
173 
174         if (part.find(kTseLastModified) == 0) {
175             try {
176                 m_LastModified = NStr::StringToInt(part.substr(kTseLastModified.size()));
177                 m_LastModifiedPresent = true;
178             } catch (...) {
179                 NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
180                            "Invalid id2 info part '" + part +
181                            "'. Cannot convert the part value into an integer.");
182             }
183             continue;
184         }
185 
186         if (part.find(kTseSplitVersion) == 0) {
187             try {
188                 m_SplitVersion = NStr::StringToInt(part.substr(kTseSplitVersion.size()));
189                 m_SplitVersionPresent = true;
190             } catch (...) {
191                 NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
192                            "Invalid id2 info part '" + part +
193                            "'. Cannot convert the part value into an integer.");
194             }
195             continue;
196         }
197 
198         NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
199                    "Invalid id2_info parameter value '" + id2_info +
200                    "'. The part '" + part + "' is not recognized.");
201     }
202 
203     if (tse_id_found == false) {
204         NCBI_THROW(CPubseqGatewayException, eInvalidId2Info,
205                    "Invalid id2_info parameter value '" + id2_info +
206                    "'. The mandatory part '" + kTseId + "' is not found.");
207     }
208 }
209 
210 
211 
Serialize(void) const212 string CPSGS_IdModifiedVerFlavorId2Info::Serialize(void) const
213 {
214     string      ret = kPrefix + kSeparator + kTseId + m_TSEId.ToString();
215     if (m_LastModifiedPresent)
216         ret += kSeparator + kTseLastModified + to_string(m_LastModified);
217     if (m_SplitVersionPresent)
218         ret += kSeparator + kTseSplitVersion + to_string(m_SplitVersion);
219     return ret;
220 }
221 
222