1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <vdb/extern.h>
27 #include <insdc/insdc.h>
28 #include <klib/defs.h>
29 #include <klib/rc.h>
30 #include <vdb/table.h>
31 #include <vdb/xform.h>
32 #include <vdb/schema.h>
33 #include <kdb/meta.h>
34 #include <klib/data-buffer.h>
35 #include <bitstr.h>
36 #include <sysalloc.h>
37
38 #include <stdint.h>
39 #include <stdlib.h>
40 #include <assert.h>
41 #include <string.h>
42 #include <stdio.h>
43
44 /****************************** tokenize_var_id *******************************/
45 /* typedef uint16_t text_token [ 3 ]; */
46
47 static
tokenize_var_id(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])48 rc_t CC tokenize_var_id ( void *data, const VXformInfo *info, int64_t row_id,
49 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
50 {
51 rc_t rc = 0;
52 int pos = 0;
53 unsigned const var_id_len = argv[0].u.data.elem_count;
54 struct
55 {
56 uint16_t token_type;
57 uint16_t position;
58 uint16_t length;
59 } *dst;
60 const char *var_id = argv[0].u.data.base;
61 var_id += argv[0].u.data.first_elem;
62
63 rslt->data->elem_bits = sizeof(dst[0]) * 8;
64 rc = KDataBufferResize( rslt -> data, 2 );
65 if ( rc != 0 ) return rc;
66 rslt -> elem_count = 2;
67 dst = rslt -> data -> base;
68 memset(dst, 0, 2 * sizeof *dst);
69
70 /* ([A-Za-z]*)(\d*) */
71 if (var_id_len > 0) {
72 for (pos = var_id_len - 1; pos >= 0; --pos) {
73 if (var_id[pos] < '0' || var_id[pos] > '9') {
74 ++pos;
75 break;
76 }
77 if (pos == 0) { /* all numbers */
78 break;
79 }
80 }
81 }
82 dst [ 1 ] . position = pos;
83 dst [ 1 ] . length = var_id_len - pos;
84 dst [ 0 ] . length = var_id_len - dst [ 1 ] . length;
85
86 return rc;
87 }
88
89 /*
90 * tokenize_var_id
91 * splits into 2 tokens
92 * 0 - prefix
93 * 1 - suffix
94 *
95 * extern function
96 * text:token NCBI:var:tokenize_var_id #1 ( ascii var_id );
97 */
98 VTRANSFACT_IMPL ( NCBI_var_tokenize_var_id, 1, 0, 0 ) ( const void *Self,
99 const VXfactInfo *info, VFuncDesc *rslt,
100 const VFactoryParams *cp, const VFunctionParams *dp )
101 {
102 rslt->u.rf = tokenize_var_id;
103 rslt->variant = vftRow;
104 return 0;
105 }
106