1 /*
2 Redistribution and use in source and binary forms, with or without
3 modification, are permitted provided that the following conditions are met:
4
5 * Redistributions of source code must retain the above copyright
6 notice, this list of conditions and the following disclaimer.
7
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11
12 * Neither the name of "The Computer Language Benchmarks Game" nor the
13 name of "The Computer Language Shootout Benchmarks" nor the names of
14 its contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdlib.h>
34 #include <glib.h>
35
36 typedef struct stat_s stat_t;
37 struct stat_s
38 {
39 const gchar *key;
40 long stat;
41 };
42
43 #define MAX_ELM (8192 / sizeof (stat_t))
44
45 static int
generate_frequencies(int fl,char * buffer,long buflen,GHashTable * ht,GTrashStack ** ts,GPtrArray * roots,GStringChunk * sc)46 generate_frequencies (int fl, char *buffer, long buflen,
47 GHashTable *ht, GTrashStack **ts, GPtrArray *roots, GStringChunk *sc)
48 {
49 gchar *key;
50 long i;
51
52 if (fl > buflen) return 0;
53 if (fl == 0) return 0;
54
55 for (i = 0; i < buflen - fl + 1; ++i)
56 {
57 char nulled;
58 stat_t *stat;
59
60 nulled = buffer[i + fl];
61 buffer[i + fl] = '\0';
62
63 key = g_string_chunk_insert_const(sc, buffer + i);
64
65 stat = g_hash_table_lookup(ht, key);
66 if (!stat)
67 {
68 stat = g_trash_stack_pop(ts);
69 if (!stat)
70 {
71 int j;
72
73 stat = malloc(sizeof (stat_t) * MAX_ELM);
74 g_ptr_array_add(roots, stat);
75
76 for (j = 1; j < MAX_ELM; ++j)
77 g_trash_stack_push(ts, stat + j);
78 }
79 stat->stat = 1;
80 stat->key = key;
81
82 g_hash_table_insert(ht, key, stat);
83 }
84 else
85 stat->stat++;
86
87 buffer[i + fl] = nulled;
88 }
89
90 return buflen - fl + 1;
91 }
92
93 static int
cmp_func(gconstpointer a,gconstpointer b)94 cmp_func(gconstpointer a, gconstpointer b)
95 {
96 const stat_t *left = a;
97 const stat_t *right = b;
98
99 return right->stat - left->stat;
100 }
101
102 static void
sorted_list(gpointer key,gpointer value,gpointer user_data)103 sorted_list(gpointer key, gpointer value, gpointer user_data)
104 {
105 stat_t *data = value;
106 GList **lst = user_data;
107
108 *lst = g_list_insert_sorted(*lst, data, cmp_func);
109 }
110
111 static void
display_stat(gpointer data,gpointer user_data)112 display_stat(gpointer data, gpointer user_data)
113 {
114 long *total = user_data;
115 stat_t *st = data;
116
117 printf("%s %.3f\n", st->key, 100 * (float) st->stat / *total);
118 }
119
120 void
write_frequencies(int fl,char * buffer,long buflen,GTrashStack ** ts,GPtrArray * roots)121 write_frequencies (int fl, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
122 {
123 GStringChunk *sc;
124 GHashTable *ht;
125 GList *lst;
126 long total;
127
128 ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
129 sc = g_string_chunk_new(buflen);
130 lst = NULL;
131
132 total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
133
134 if (!total) goto on_error;
135
136 g_hash_table_foreach(ht, sorted_list, &lst);
137 g_list_foreach(lst, display_stat, &total);
138 g_list_free(lst);
139
140 on_error:
141 g_hash_table_destroy(ht);
142 g_string_chunk_free(sc);
143 }
144
145 void
write_count(char * searchFor,char * buffer,long buflen,GTrashStack ** ts,GPtrArray * roots)146 write_count (char *searchFor, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
147 {
148 GStringChunk *sc;
149 GHashTable *ht;
150 stat_t *result;
151 GList *lst;
152 long total;
153 long fl;
154
155 fl = strlen(searchFor);
156
157 ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
158 sc = g_string_chunk_new(buflen);
159 lst = NULL;
160 result = NULL;
161
162 total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
163
164 if (!total) goto on_error;
165
166 result = g_hash_table_lookup(ht, searchFor);
167
168 on_error:
169 printf("%ld\t%s\n", result ? result->stat : 0, searchFor);
170
171 g_hash_table_destroy(ht);
172 g_string_chunk_free(sc);
173 }
174
175 int
main()176 main ()
177 {
178 char buffer[4096];
179 GTrashStack *ts;
180 GPtrArray *roots;
181 GString *stuff;
182 gchar *s;
183 int len;
184
185 roots = g_ptr_array_new();
186 ts = NULL;
187
188 while (fgets(buffer, sizeof (buffer), stdin))
189 if (strncmp(buffer, ">THREE", 6) == 0)
190 break;
191
192 stuff = g_string_new(NULL);
193
194 while (fgets(buffer, sizeof (buffer), stdin))
195 {
196 size_t sz;
197
198 if (buffer[0] == '>')
199 break;
200
201 sz = strlen(buffer);
202 if (buffer[sz - 1] == '\n')
203 --sz;
204
205 stuff = g_string_append_len(stuff, buffer, sz);
206 }
207
208 stuff = g_string_ascii_up(stuff);
209 len = stuff->len;
210 s = g_string_free(stuff, FALSE);
211
212 write_frequencies(1, s, len, &ts, roots);
213 printf("\n");
214 write_frequencies(2, s, len, &ts, roots);
215 printf("\n");
216 write_count("GGT", s, len, &ts, roots);
217 write_count("GGTA", s, len, &ts, roots);
218 write_count("GGTATT", s, len, &ts, roots);
219 write_count("GGTATTTTAATT", s, len, &ts, roots);
220 write_count("GGTATTTTAATTTATAGT", s, len, &ts, roots);
221
222 free(s);
223
224 g_ptr_array_foreach(roots, (GFunc)free, NULL);
225 g_ptr_array_free(roots, TRUE);
226
227 return 0;
228 }
229