1 /*
2 Redistribution and use in source and binary forms, with or without
3 modification, are permitted provided that the following conditions are met:
4 
5     * Redistributions of source code must retain the above copyright
6     notice, this list of conditions and the following disclaimer.
7 
8     * Redistributions in binary form must reproduce the above copyright
9     notice, this list of conditions and the following disclaimer in the
10     documentation and/or other materials provided with the distribution.
11 
12     * Neither the name of "The Computer Language Benchmarks Game" nor the
13     name of "The Computer Language Shootout Benchmarks" nor the names of
14     its contributors may be used to endorse or promote products derived
15     from this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdlib.h>
34 #include <glib.h>
35 
36 typedef struct stat_s stat_t;
37 struct stat_s
38 {
39    const gchar *key;
40    long stat;
41 };
42 
43 #define MAX_ELM (8192 / sizeof (stat_t))
44 
45 static int
generate_frequencies(int fl,char * buffer,long buflen,GHashTable * ht,GTrashStack ** ts,GPtrArray * roots,GStringChunk * sc)46 generate_frequencies (int fl, char *buffer, long buflen,
47 		      GHashTable *ht, GTrashStack **ts, GPtrArray *roots, GStringChunk *sc)
48 {
49    gchar *key;
50    long i;
51 
52    if (fl > buflen) return 0;
53    if (fl == 0) return 0;
54 
55    for (i = 0; i < buflen - fl + 1; ++i)
56      {
57 	char nulled;
58 	stat_t *stat;
59 
60 	nulled = buffer[i + fl];
61 	buffer[i + fl] = '\0';
62 
63 	key = g_string_chunk_insert_const(sc, buffer + i);
64 
65 	stat = g_hash_table_lookup(ht, key);
66 	if (!stat)
67 	  {
68 	     stat = g_trash_stack_pop(ts);
69 	     if (!stat)
70 	       {
71 		  int j;
72 
73 		  stat = malloc(sizeof (stat_t) * MAX_ELM);
74 		  g_ptr_array_add(roots, stat);
75 
76 		  for (j = 1; j < MAX_ELM; ++j)
77 		    g_trash_stack_push(ts, stat + j);
78 	       }
79 	     stat->stat = 1;
80 	     stat->key = key;
81 
82 	     g_hash_table_insert(ht, key, stat);
83 	  }
84 	else
85 	  stat->stat++;
86 
87 	buffer[i + fl] = nulled;
88      }
89 
90    return buflen - fl + 1;
91 }
92 
93 static int
cmp_func(gconstpointer a,gconstpointer b)94 cmp_func(gconstpointer a, gconstpointer b)
95 {
96    const stat_t *left = a;
97    const stat_t *right = b;
98 
99    return right->stat - left->stat;
100 }
101 
102 static void
sorted_list(gpointer key,gpointer value,gpointer user_data)103 sorted_list(gpointer key, gpointer value, gpointer user_data)
104 {
105    stat_t *data = value;
106    GList **lst = user_data;
107 
108    *lst = g_list_insert_sorted(*lst, data, cmp_func);
109 }
110 
111 static void
display_stat(gpointer data,gpointer user_data)112 display_stat(gpointer data, gpointer user_data)
113 {
114    long *total = user_data;
115    stat_t *st = data;
116 
117    printf("%s %.3f\n", st->key, 100 * (float) st->stat / *total);
118 }
119 
120 void
write_frequencies(int fl,char * buffer,long buflen,GTrashStack ** ts,GPtrArray * roots)121 write_frequencies (int fl, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
122 {
123    GStringChunk *sc;
124    GHashTable *ht;
125    GList *lst;
126    long total;
127 
128    ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
129    sc = g_string_chunk_new(buflen);
130    lst = NULL;
131 
132    total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
133 
134    if (!total) goto on_error;
135 
136    g_hash_table_foreach(ht, sorted_list, &lst);
137    g_list_foreach(lst, display_stat, &total);
138    g_list_free(lst);
139 
140  on_error:
141    g_hash_table_destroy(ht);
142    g_string_chunk_free(sc);
143 }
144 
145 void
write_count(char * searchFor,char * buffer,long buflen,GTrashStack ** ts,GPtrArray * roots)146 write_count (char *searchFor, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
147 {
148    GStringChunk *sc;
149    GHashTable *ht;
150    stat_t *result;
151    GList *lst;
152    long total;
153    long fl;
154 
155    fl = strlen(searchFor);
156 
157    ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
158    sc = g_string_chunk_new(buflen);
159    lst = NULL;
160    result = NULL;
161 
162    total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
163 
164    if (!total) goto on_error;
165 
166    result = g_hash_table_lookup(ht, searchFor);
167 
168  on_error:
169    printf("%ld\t%s\n", result ? result->stat : 0, searchFor);
170 
171    g_hash_table_destroy(ht);
172    g_string_chunk_free(sc);
173 }
174 
175 int
main()176 main ()
177 {
178    char buffer[4096];
179    GTrashStack *ts;
180    GPtrArray *roots;
181    GString *stuff;
182    gchar *s;
183    int len;
184 
185    roots = g_ptr_array_new();
186    ts = NULL;
187 
188    while (fgets(buffer, sizeof (buffer), stdin))
189      if (strncmp(buffer, ">THREE", 6) == 0)
190        break;
191 
192    stuff = g_string_new(NULL);
193 
194    while (fgets(buffer, sizeof (buffer), stdin))
195      {
196 	size_t sz;
197 
198 	if (buffer[0] == '>')
199 	  break;
200 
201 	sz = strlen(buffer);
202 	if (buffer[sz - 1] == '\n')
203 	  --sz;
204 
205 	stuff = g_string_append_len(stuff, buffer, sz);
206      }
207 
208    stuff = g_string_ascii_up(stuff);
209    len = stuff->len;
210    s = g_string_free(stuff, FALSE);
211 
212    write_frequencies(1, s, len, &ts, roots);
213    printf("\n");
214    write_frequencies(2, s, len, &ts, roots);
215    printf("\n");
216    write_count("GGT", s, len, &ts, roots);
217    write_count("GGTA", s, len, &ts, roots);
218    write_count("GGTATT", s, len, &ts, roots);
219    write_count("GGTATTTTAATT", s, len, &ts, roots);
220    write_count("GGTATTTTAATTTATAGT", s, len, &ts, roots);
221 
222    free(s);
223 
224    g_ptr_array_foreach(roots, (GFunc)free, NULL);
225    g_ptr_array_free(roots, TRUE);
226 
227    return 0;
228 }
229