1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 #include <vdb/extern.h>
27 #include <insdc/insdc.h>
28 #include <klib/defs.h>
29 #include <klib/rc.h>
30 #include <vdb/table.h>
31 #include <vdb/xform.h>
32 #include <vdb/schema.h>
33 #include <kdb/meta.h>
34 #include <klib/data-buffer.h>
35 #include <bitstr.h>
36 #include <sysalloc.h>
37 
38 #include <stdint.h>
39 #include <stdlib.h>
40 #include <assert.h>
41 #include <string.h>
42 #include <stdio.h>
43 
44 /****************************** tokenize_var_id *******************************/
45 /* typedef uint16_t text_token [ 3 ]; */
46 
47 static
tokenize_var_id(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])48 rc_t CC tokenize_var_id ( void *data, const VXformInfo *info, int64_t row_id,
49     VRowResult *rslt, uint32_t argc, const VRowData argv [] )
50 {
51     rc_t rc = 0;
52     int pos = 0;
53     unsigned const var_id_len = argv[0].u.data.elem_count;
54     struct
55     {
56         uint16_t token_type;
57         uint16_t position;
58         uint16_t length;
59     } *dst;
60     const char *var_id	= argv[0].u.data.base;
61     var_id += argv[0].u.data.first_elem;
62 
63     rslt->data->elem_bits = sizeof(dst[0]) * 8;
64     rc = KDataBufferResize( rslt -> data, 2 );
65     if ( rc != 0 ) return rc;
66     rslt -> elem_count = 2;
67     dst = rslt -> data -> base;
68     memset(dst, 0, 2 * sizeof *dst);
69 
70     /* ([A-Za-z]*)(\d*) */
71     if (var_id_len > 0) {
72         for (pos = var_id_len - 1; pos >= 0; --pos) {
73             if (var_id[pos] < '0' || var_id[pos] > '9') {
74                 ++pos;
75                 break;
76             }
77             if (pos == 0) { /* all numbers */
78                 break;
79             }
80         }
81     }
82     dst [ 1 ] . position = pos;
83     dst [ 1 ] . length = var_id_len - pos;
84     dst [ 0 ] . length = var_id_len - dst [ 1 ] . length;
85 
86     return rc;
87 }
88 
89 /*
90  * tokenize_var_id
91  *   splits into 2 tokens
92  *   0 - prefix
93  *   1 - suffix
94  *
95  * extern function
96  * text:token NCBI:var:tokenize_var_id #1 ( ascii var_id );
97  */
98 VTRANSFACT_IMPL ( NCBI_var_tokenize_var_id, 1, 0, 0 ) ( const void *Self,
99     const VXfactInfo *info, VFuncDesc *rslt,
100     const VFactoryParams *cp, const VFunctionParams *dp )
101 {
102     rslt->u.rf = tokenize_var_id;
103     rslt->variant = vftRow;
104     return 0;
105 }
106