1 #ifndef OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP 2 #define OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP 3 4 /* $Id: writedb_impl.hpp 610974 2020-06-26 12:59:33Z grichenk $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Kevin Bealer 30 * 31 */ 32 33 /// @file writedb_impl.hpp 34 /// Defines implementation class of WriteDB. 35 /// 36 /// Defines classes: 37 /// CWriteDBHeader 38 /// 39 /// Implemented for: UNIX, MS-Windows 40 41 #include <objects/seq/seq__.hpp> 42 #include <objects/blastdb/blastdb__.hpp> 43 #include <objects/blastdb/defline_extra.hpp> 44 #include <objtools/blast/seqdb_writer/writedb.hpp> 45 #include <objtools/blast/seqdb_reader/seqdbcommon.hpp> 46 #include <objtools/blast/seqdb_writer/writedb_lmdb.hpp> 47 #include "writedb_volume.hpp" 48 #include "writedb_gimask.hpp" 49 #include "mask_info_registry.hpp" 50 51 #include <objmgr/bioseq_handle.hpp> 52 #include <objmgr/seq_vector.hpp> 53 54 BEGIN_NCBI_SCOPE 55 56 /// Import definitions from the objects namespace. 57 USING_SCOPE(objects); 58 59 /// CWriteDB_Impl class 60 /// 61 /// This manufactures blast database header files from input data. 62 63 class CWriteDB_Impl { 64 public: 65 /// Whether and what kind of indices to build. 66 typedef CWriteDB::EIndexType EIndexType; 67 68 // Setup and control 69 70 /// Constructor. 71 /// @param dbname Name of the database to create. 72 /// @param protein True for protein, false for nucleotide. 73 /// @param title Title string for volumes and alias file. 74 /// @param indices Type of indexing to do for string IDs. 75 /// @param parse_ids If true generate ISAM files 76 /// @param long_ids If true, assume long sequence ids (database|accession) 77 /// when parsing strings ids 78 /// @param use_gi_mask If true generate GI-based mask files. 79 CWriteDB_Impl(const string & dbname, 80 bool protein, 81 const string & title, 82 EIndexType indices, 83 bool parse_ids, 84 bool long_ids, 85 bool use_gi_mask, 86 EBlastDbVersion dbver = eBDB_Version4, 87 bool limit_defline = false); 88 89 /// Destructor. 90 ~CWriteDB_Impl(); 91 92 /// Close the file and flush any remaining data to disk. 93 void Close(); 94 95 // Sequence Data 96 97 /// Add a new sequence as raw sequence and ambiguity data. 98 /// 99 /// A new sequence record is started, and data from any previous 100 /// sequence is combined and written to disk. Each sequence needs 101 /// sequence data and header data. This method takes sequence 102 /// data in the form of seperated sequence data and compressed 103 /// ambiguities packed in the blast database disk format. It is 104 /// intended for efficiently copying sequences from sources that 105 /// provide this format, such as CSeqDBExpert(). If this method 106 /// is used for protein data, the ambiguities string should be 107 /// empty. If this method is used, header data must also be 108 /// specified with a call to SetDeflines(). 109 /// 110 /// @param sequence Sequence data in blast db disk format. 111 /// @param ambiguities Ambiguity data in blast db disk format. 112 void AddSequence(const CTempString & sequence, 113 const CTempString & ambiguities); 114 115 /// Add a new sequence as a CBioseq. 116 /// 117 /// A new sequence record is started, and data from any previous 118 /// sequence is combined and written to disk. Each sequence needs 119 /// sequence data and header data. This method can extract both 120 /// from the provided CBioseq. If other header data is preferred, 121 /// SetDeflines() can be called after this method to replace the 122 /// header data from the CBioseq. Note that CBioseqs from some 123 /// sources are not guaranteed to contain sequence data; if this 124 /// might be the case, consider the versions of AddSequence that 125 /// take either CBioseq_Handle or CBioseq and CSeqVector. In 126 /// order to use this method, sequence data should be accessible 127 /// from bs.GetInst().GetSeq_data(). (Note: objects provided to 128 /// WriteDB will be kept alive until the next AddSequence call.) 129 /// 130 /// @param bs Bioseq containing sequence and header data. 131 void AddSequence(const CBioseq & bs); 132 133 /// Add a new sequence as a CBioseq_Handle. 134 /// 135 /// A new sequence record is started, and data from any previous 136 /// sequence is combined and written to disk. Each sequence needs 137 /// sequence data and header data. This method can extract both 138 /// from the provided CBioseq_Handle. If other header data is 139 /// preferred, SetDeflines() can be called after this method to 140 /// replace the header data from the CBioseq. (Note: objects 141 /// provided to WriteDB will be kept alive until the next 142 /// AddSequence call.) 143 /// 144 /// @param bsh Bioseq_Handle for sequence to add. 145 void AddSequence(const CBioseq_Handle & bsh); 146 147 /// Add a new sequence as a CBioseq_Handle. 148 /// 149 /// A new sequence record is started, and data from any previous 150 /// sequence is combined and written to disk. Each sequence needs 151 /// sequence data and header data. This method will extract 152 /// header data from the provided CBioseq. If the CBioseq 153 /// contains sequence data, it will be used; otherwise sequence 154 /// data will be fetched from the provided CSeqVector. If other 155 /// header data is preferred, SetDeflines() can be called after 156 /// this method. (Note: objects provided to WriteDB will be kept 157 /// alive until the next AddSequence call.) 158 /// 159 /// @param bs Bioseq_Handle for header and sequence data. 160 /// @param sv CSeqVector for sequence data. 161 void AddSequence(const CBioseq & bs, CSeqVector & sv); 162 163 /// This method replaces any stored header data for the current 164 /// sequence with the provided CBlast_def_line_set. Header data 165 /// can be constructed directly by the caller, or extracted from 166 /// an existing CBioseq using ExtractBioseqDeflines (see below). 167 /// Once it is in the correct form, it can be attached to the 168 /// sequence with this method. (Note: objects provided to WriteDB 169 /// will be kept alive until the next AddSequence call.) 170 /// 171 /// @param deflines Header data for the most recent sequence. 172 void SetDeflines(const CBlast_def_line_set & deflines); 173 174 /// Set the PIG identifier of this sequence. 175 /// 176 /// For protein sequences, this sets the PIG identifier. PIG ids 177 /// are per-sequence, so it will only be attached to the first 178 /// defline in the set. 179 /// 180 /// @param pig PIG identifier as an integer. 181 void SetPig(int pig); 182 183 // Options 184 185 /// Set the maximum size for any file in the database. 186 /// 187 /// This method sets the maximum size for any file in a database 188 /// volume. If adding a sequence would cause any file in the 189 /// generated database to exceed this size, the current volume is 190 /// ended and a new volume is started. This is not a strict 191 /// limit, inasmuch as it always puts at least one sequence in 192 /// each volume regardless of that sequence's size. 193 /// 194 /// @param sz Maximum file size (in bytes). 195 void SetMaxFileSize(Uint8 sz); 196 197 /// Set the maximum letters in one volume. 198 /// 199 /// This method sets the maximum number of sequence letters per 200 /// database volume. If adding a sequence would cause the volume 201 /// to have more than this many letters, the current volume is 202 /// ended and a new volume is started. This is not a strict 203 /// limit, inasmuch as it always puts at least one sequence in 204 /// each volume regardless of that sequence's size. 205 /// 206 /// @param sz Maximum sequence letters per volume. 207 void SetMaxVolumeLetters(Uint8 sz); 208 209 /// Extract deflines from a CBioseq. 210 /// 211 /// Given a CBioseq, this method extracts and returns header info 212 /// as a defline set. The deflines will not be applied to the 213 /// current sequence unless passed to SetDeflines. The expected 214 /// use of this method is in cases where the caller has a CBioseq 215 /// or CBioseq_Handle but wishes to examine and/or change the 216 /// deflines before passing them to CWriteDB. Some elements of 217 /// the CBioseq may be shared by the returned defline set, notably 218 /// the Seq-ids. 219 /// 220 /// @param bs Bioseq from which to construct the defline set. 221 /// @param parse_ids If we should parse seq_ids. 222 /// @param long_seqids If true use long sequence ids (database|accession) 223 /// @return The blast defline set. 224 static CRef<CBlast_def_line_set> 225 ExtractBioseqDeflines(const CBioseq & bs, bool parse_ids, bool long_seqids); 226 227 /// Set bases that should not be used in sequences. 228 /// 229 /// This method specifies nucelotide or protein bases that should 230 /// not be used in the resulting database. The bases in question 231 /// will be replaced with N (for nucleotide) or X (for protein). 232 /// The input data is expected to be specified in the appropriate 233 /// 'alphabetic' encoding (either IUPACAA and IUPACNA). 234 /// 235 /// @param masked 236 void SetMaskedLetters(const string & masked); 237 238 /// List Volumes 239 /// 240 /// Returns the base names of all volumes constructed by this 241 /// class; the returned list may not be complete until Close() has 242 /// been called. 243 /// 244 /// @param vols 245 /// The set of volumes produced by this class. 246 void ListVolumes(vector<string> & vols); 247 248 /// List Filenames 249 /// 250 /// Returns a list of the files constructed by this class; the 251 /// returned list may not be complete until Close() has been 252 /// called. 253 /// 254 /// @param files 255 /// The set of resolved database path names. 256 void ListFiles(vector<string> & files); 257 258 /// Register a type of filtering data found in this database. 259 /// 260 /// The BlastDb format supports storage of masking data (lists of 261 /// masked ranges) for each database sequence, as well as an 262 /// indication of the source (or sources) of this masking data (e.g.: 263 /// masking algorithm used to create them). 264 /// This method stores a description of one of these masking data 265 /// sources in this database, including which basic algorithm was 266 /// used, as well as the options passed to that algorithm. Each 267 /// description is associated with a numeric `algorithm id' (return value 268 /// of this method), which identifies that data source when adding data 269 /// with SetMaskData. 270 /// 271 /// @return algorithm ID for the filtering data. 272 /// @param program Program used to produce this masking data. [in] 273 /// @param options Algorithm options provided to the program. [in] 274 /// @param name Name of a GI-based mask [in] 275 int RegisterMaskAlgorithm(EBlast_filter_program program, 276 const string & options, 277 const string & name = ""); 278 279 /// Register a type of filtering data found in this database. 280 /// 281 /// The BlastDb format supports storage of masking data (lists of 282 /// masked ranges) for each database sequence, as well as an 283 /// indication of the source (or sources) of this masking data (e.g.: 284 /// masking algorithm used to create them). 285 /// This method stores a description of one of these masking data 286 /// sources in this database, including which basic algorithm was 287 /// used, as well as the options passed to that algorithm. Each 288 /// description is associated with a numeric `algorithm id' (return value 289 /// of this method), which identifies that data source when adding data 290 /// with SetMaskData. 291 /// 292 /// @return algorithm ID for the filtering data. 293 /// @param id A string to identify this masking data. [in] 294 /// @param description Details about the masking data. [in] 295 /// @param options Algorithm options provided to the program. [in] 296 int RegisterMaskAlgorithm(const string & id, 297 const string & description, 298 const string & options); 299 300 /// Set filtering data for a sequence. 301 /// 302 /// This method specifies filtered regions for the sequence. Each 303 /// sequence can have filtering data from various algorithms. 304 /// 305 /// @param ranges Filtered ranges for this sequence and algorithm. 306 /// @param gis The GIs associated with this sequence 307 void SetMaskData(const CMaskedRangesVector & ranges, 308 const vector <TGi> & gis); 309 310 /// Set up a generic CWriteDB metadata column. 311 /// 312 /// This method creates a column with the specified name (title). 313 /// The name must be unique among names provided to this database. 314 /// An integer column descriptor is returned, which must be used 315 /// to identify this column when applying blob data. This call 316 /// will fail with an exception if too many user defined columns 317 /// have already been created for this database (this limit is due 318 /// to BlastDb file naming conventions). The title identifies 319 /// this column and is also used to access the column with SeqDB. 320 /// 321 /// @param title Name identifying this column. 322 /// @return Column identifier (a positive integer). 323 int CreateColumn(const string & title, bool mbo=false); 324 325 /// Find an existing column. 326 /// 327 /// This looks for an existing column with the specified title and 328 /// returns the column ID if found. 329 /// 330 /// @param title The column title to look for. 331 /// @return The column ID if this column title is already defined. 332 int FindColumn(const string & title) const; 333 334 /// Add meta data to a column. 335 /// 336 /// In addition to normal blob data, database columns can store a 337 /// `dictionary' of user-defined metadata in key/value form. This 338 /// method adds one such key/value pair to the column. Specifying 339 /// a key a second time causes replacement of the previous value. 340 /// Using this mechanism to store large amounts of data may have a 341 /// negative impact on performance. 342 /// 343 /// @param col_id Specifies the column to add this metadata to. 344 /// @param key A unique key string. 345 /// @param value A value string. 346 void AddColumnMetaData(int col_id, 347 const string & key, 348 const string & value); 349 350 /// Get a blob to use for a given column letter. 351 /// 352 /// To add data for a `blob' type column, this method should be 353 /// called to get a reference to a CBlastDbBlob object. Add the 354 /// user-defined blob data to this object. It is not correct to 355 /// call this more than once for the same sequence and column. 356 /// Reading, writing, or otherwise using this object after the 357 /// current sequence is published is an error and has undefined 358 /// consequences. ('Publishing' of a sequence usually occurs 359 /// during the following AddSequence(*) call or during Close().) 360 /// 361 /// @param col_id Indicates the column receiving the blob data. 362 /// @return The user data should be stored in this blob. 363 CBlastDbBlob & SetBlobData(int col_id); 364 365 private: 366 // Configuration 367 368 string m_Dbname; ///< Database base name. 369 bool m_Protein; ///< True if DB is protein. 370 string m_Title; ///< Title field of database. 371 string m_Date; ///< Time stamp (for all volumes.) 372 Uint8 m_MaxFileSize; ///< Maximum size of any file. 373 Uint8 m_MaxVolumeLetters; ///< Max letters per volume. 374 EIndexType m_Indices; ///< Indexing mode. 375 bool m_Closed; ///< True if database has been closed. 376 string m_MaskedLetters; ///< Masked protein letters (IUPAC). 377 string m_MaskByte; ///< Byte that replaced masked letters. 378 vector<char> m_MaskLookup; ///< Is (blast-aa) byte masked? 379 int m_MaskDataColumn; ///< Column ID for masking data column. 380 map<int, int> m_MaskAlgoMap; ///< Mapping from algo_id to gi-mask id 381 bool m_ParseIDs; ///< Generate ISAM files 382 bool m_UseGiMask; ///< Generate GI-based mask files 383 EBlastDbVersion m_DbVersion; ///< BLASTDB version 384 385 /// Column titles. 386 vector<string> m_ColumnTitles; 387 388 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 389 (!defined(NCBI_COMPILER_MIPSPRO)) ) 390 /// Per-column metadata. 391 typedef CWriteDB_Column::TColumnMeta TColumnMeta; 392 393 /// Meta data for all columns. 394 vector< TColumnMeta > m_ColumnMetas; 395 396 /// Gi-based masks 397 vector< CRef<CWriteDB_GiMask> > m_GiMasks; 398 #endif 399 400 // Functions 401 402 /// Flush accumulated sequence data to volume. 403 void x_Publish(); 404 405 /// Compute name of alias file produced. 406 string x_MakeAliasName(); 407 408 /// Flush accumulated sequence data to volume. 409 void x_MakeAlias(); 410 411 /// Clear sequence data from last sequence. 412 void x_ResetSequenceData(); 413 414 /// Convert and compute final data formats. 415 void x_CookData(); 416 417 /// Convert header data into usable forms. 418 void x_CookHeader(); 419 420 /// Collect ids for ISAM files. 421 void x_CookIds(); 422 423 /// Compute the length of the current sequence. 424 int x_ComputeSeqLength(); 425 426 /// Convert sequence data into usable forms. 427 void x_CookSequence(); 428 429 /// Prepare column data to be appended to disk. 430 void x_CookColumns(); 431 432 /// Replace masked input letters with m_MaskByte value. 433 void x_MaskSequence(); 434 435 /// Get binary version of deflines from 'user' data in Bioseq. 436 /// 437 /// Some CBioseq objects (e.g. those from CSeqDB) have an ASN.1 438 /// octet array containing a binary ASN.1 version of the blast 439 /// defline set for the sequence. This method looks for that data 440 /// and returns it if found. If not found, it returns an empty 441 /// string. 442 /// 443 /// @param bioseq Bioseq from which to fetch header. [in] 444 /// @param binhdr Header data as binary ASN.1. [out] 445 static void x_GetBioseqBinaryHeader(const CBioseq & bioseq, 446 string & binhdr); 447 448 /// Construct deflines from a CBioseq and other meta-data. 449 /// 450 /// This method builds deflines from various data found in the 451 /// Bioseq, along with other meta data (like the PIG and 452 /// membership and linkout lists.) 453 /// 454 /// @param bioseq Defline data will be built from this. [in] 455 /// @param deflines A defline set will be returned here. [out] 456 /// @param membits Membership bits for each defline. [in] 457 /// @param linkout Linkout bits for each defline. [in] 458 /// @param pig PIG to attach to a protein sequence. [in] 459 static void 460 x_BuildDeflinesFromBioseq(const CBioseq & bioseq, 461 CConstRef<CBlast_def_line_set> & deflines, 462 const vector< vector<int> > & membits, 463 const vector< vector<int> > & linkout, 464 int pig); 465 466 /// Extract a defline set from a binary ASN.1 blob. 467 /// @param bin_hdr Binary ASN.1 encoding of defline set. [in] 468 /// @param deflines Defline set. [out] 469 static void 470 x_SetDeflinesFromBinary(const string & bin_hdr, 471 CConstRef<CBlast_def_line_set> & deflines); 472 473 /// Extract a defline set from a CFastaReader generated CBioseq. 474 /// 475 /// CBioseq objects produced by CFastaReader have an internal 476 /// 'user' field that contains the original FASTA, which can be 477 /// used to build blast deflines. If the original FASTA deflines 478 /// were delimited with control-A characters, then those will be 479 /// found here too. If the caller wishes to accept '>' as an 480 /// alternate delimiter, then accept_gt should be specified. 481 /// 482 /// @param bioseq Bioseq object produced by CFastaReader. [in] 483 /// @param deflines Defline set. [out] 484 /// @param membits Membership bits for each defline. [in] 485 /// @param linkout Linkout bits for each defline. [in] 486 /// @param pig PIG to attach to a protein sequence. [in] 487 /// @param accept_gt Whether greater-than is a delimiter. [in] 488 /// @param parse_ids Whether seq_id should not be parsed. [in] 489 /// @param long_seqids If true, use long sequence ids (database|accession) 490 /// [in] 491 static void 492 x_GetFastaReaderDeflines(const CBioseq & bioseq, 493 CConstRef<CBlast_def_line_set> & deflines, 494 const vector< vector<int> > & membits, 495 const vector< vector<int> > & linkout, 496 int pig, 497 bool accept_gt, 498 bool parse_ids, 499 bool long_seqids); 500 501 /// Returns true if we have unwritten sequence data. 502 bool x_HaveSequence() const; 503 504 /// Records that we now have unwritten sequence data. 505 void x_SetHaveSequence(); 506 507 /// Records that we no longer have unwritten sequence data. 508 void x_ClearHaveSequence(); 509 510 /// Get deflines from a CBioseq and other meta-data. 511 /// 512 /// This method extracts binary ASN.1 deflines from a CBioseq if 513 /// possible, and otherwise builds deflines from various data 514 /// found in the Bioseq, along with other meta data (like the PIG 515 /// and membership and linkout lists.) It returns the result as 516 /// a blast defline set. If a binary version of the headers is 517 /// computed during this method, it will be returned in bin_hdr. 518 /// 519 /// @param bioseq Defline data will be built from this. [in] 520 /// @param deflines A defline set will be returned here. [out] 521 /// @param bin_hdr Binary header data may be returned here. [out] 522 /// @param membbits Membership bits for each defline. [in] 523 /// @param linkouts Linkout bits for each defline. [in] 524 /// @param pig PIG to attach to a protein sequence. [in] 525 /// @param OID the current OID for local id. [in] 526 /// @param parse_ids whether we should not parse id. [in] 527 static void x_ExtractDeflines(CConstRef<CBioseq> & bioseq, 528 CConstRef<CBlast_def_line_set> & deflines, 529 string & bin_hdr, 530 const vector< vector<int> > & membbits, 531 const vector< vector<int> > & linkouts, 532 int pig, 533 set<TTaxId> & tax_ids, 534 int OID=-1, 535 bool parse_ids=true, 536 bool long_seqid=false, 537 bool limit_defline = false); 538 539 /// Compute the hash of a (raw) sequence. 540 /// 541 /// The hash of the provided sequence will be computed and 542 /// assigned to the m_Hash member. The sequence and optional 543 /// ambiguities are 'raw', meaning they are packed just as 544 /// sequences are packed in nsq and psq files. 545 /// 546 /// @param sequence The sequence data. [in] 547 /// @param ambiguities Nucleotide ambiguities are provided here. [in] 548 void x_ComputeHash(const CTempString & sequence, 549 const CTempString & ambiguities); 550 551 /// Compute the hash of a (Bioseq) sequence. 552 /// 553 /// The hash of the provided sequence will be computed and 554 /// assigned to the m_Hash member. The sequence is packed as a 555 /// CBioseq. 556 /// 557 /// @param sequence The sequence as a CBioseq. [in] 558 void x_ComputeHash(const CBioseq & sequence); 559 560 /// Get the mask data column id. 561 /// 562 /// The mask data column is created if it does not exist, and its 563 /// column ID number is returned. 564 /// 565 /// @return The column ID for the mask data column. 566 int x_GetMaskDataColumnId(); 567 568 // 569 // Accumulated sequence data. 570 // 571 572 /// Bioseq object for next sequence to write. 573 CConstRef<CBioseq> m_Bioseq; 574 575 /// SeqVector for next sequence to write. 576 CSeqVector m_SeqVector; 577 578 /// Deflines to write as header. 579 CConstRef<CBlast_def_line_set> m_Deflines; 580 581 /// Ids for next sequence to write, for use during ISAM construction. 582 vector< CRef<CSeq_id> > m_Ids; 583 584 /// Linkout bits - outer vector is per-defline, inner is bits. 585 vector< vector<int> > m_Linkouts; 586 587 /// Membership bits - outer vector is per-defline, inner is bits. 588 vector< vector<int> > m_Memberships; 589 590 /// PIG to attach to headers for protein sequences. 591 int m_Pig; 592 593 /// Sequence hash for this sequence. 594 int m_Hash; 595 596 /// When a sequence is added, this will be populated with the length of that sequence. 597 int m_SeqLength; 598 599 /// True if we have a sequence to write. 600 bool m_HaveSequence; 601 602 // Cooked 603 604 /// Sequence data in format that will be written to disk. 605 string m_Sequence; 606 607 /// Ambiguities in format that will be written to disk. 608 string m_Ambig; 609 610 /// Binary header in format that will be written to disk. 611 string m_BinHdr; 612 613 set<TTaxId> m_TaxIds; 614 615 // Volumes 616 617 /// This volume is currently accepting sequences. 618 CRef<CWriteDB_Volume> m_Volume; 619 620 /// List of all volumes so far, up to and including m_Volume. 621 vector< CRef<CWriteDB_Volume> > m_VolumeList; 622 623 /// Blob data for the current sequence, indexed by letter. 624 vector< CRef<CBlastDbBlob> > m_Blobs; 625 626 /// List of blob columns that are active for this sequence. 627 vector<int> m_HaveBlob; 628 629 /// Registry for masking algorithms in this database. 630 CMaskInfoRegistry m_MaskAlgoRegistry; 631 632 ///Write lmdb handle 633 CRef <CWriteDB_LMDB> m_Lmdbdb; 634 635 ///Write tax info handle 636 CRef <CWriteDB_TaxID> m_Taxdb; 637 638 /// If true, use long sequence id format (database|accession) for all 639 /// acessions 640 bool m_LongSeqId; 641 642 ///Current oid to use for lmdb 643 int m_LmdbOid; 644 645 bool m_limitDefline; 646 }; 647 648 END_NCBI_SCOPE 649 650 651 #endif // OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP 652 653 654