1 // +build ignore
2 
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6 
7     * Redistributions of source code must retain the above copyright
8     notice, this list of conditions and the following disclaimer.
9 
10     * Redistributions in binary form must reproduce the above copyright
11     notice, this list of conditions and the following disclaimer in the
12     documentation and/or other materials provided with the distribution.
13 
14     * Neither the name of "The Computer Language Benchmarks Game" nor the
15     name of "The Computer Language Shootout Benchmarks" nor the names of
16     its contributors may be used to endorse or promote products derived
17     from this software without specific prior written permission.
18 
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
30 */
31 
32 /*
33  * http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3
34  */
35 
36 /*  The Computer Language Benchmarks Game
37  *  http://shootout.alioth.debian.org/
38  *
39  *  contributed by Petr Prokhorenkov
40  */
41 
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 
46 #ifndef fwrite_unlocked
47 // not available on OS X
48 #define fwrite_unlocked fwrite
49 #define fputc_unlocked fputc
50 #define fputs_unlocked fputs
51 #endif
52 
53 #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
54 #define unlikely(x) __builtin_expect((x), 0)
55 
56 #define IM 139968
57 #define IA 3877
58 #define IC 29573
59 
60 #define LINE_LEN 60
61 #define LOOKUP_SIZE 4096
62 #define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1))
63 
64 typedef unsigned random_t;
65 
66 void
random_init(random_t * random)67 random_init(random_t *random) {
68     *random = 42;
69 }
70 
71 // Special version with result rescaled to LOOKUP_SCALE.
72 static inline
73 float
random_next_lookup(random_t * random)74 random_next_lookup(random_t *random) {
75     *random = (*random*IA + IC)%IM;
76 
77     return (*random)*(LOOKUP_SCALE/IM);
78 }
79 
80 struct amino_acid {
81    char sym;
82    float prob;
83    float cprob_lookup;
84 };
85 
86 void
repeat(const char * alu,const char * title,int n)87 repeat(const char *alu, const char *title, int n) {
88     int len = strlen(alu);
89     char buffer[len + LINE_LEN];
90     int pos = 0;
91 
92     memcpy(buffer, alu, len);
93     memcpy(buffer + len, alu, LINE_LEN);
94 
95     fputs_unlocked(title, stdout);
96     while (n > 0) {
97         int bytes = n > LINE_LEN ? LINE_LEN : n;
98 
99         fwrite_unlocked(buffer + pos, bytes, 1, stdout);
100         pos += bytes;
101         if (pos > len) {
102             pos -= len;
103         }
104         fputc_unlocked('\n', stdout);
105         n -= bytes;
106     }
107 }
108 
109 /*
110  * Lookup table contains mapping from real values to cumulative
111  * probabilities. Careful selection of table size allows lookup
112  * virtually in constant time.
113  *
114  * All cumulative probabilities are rescaled to LOOKUP_SCALE,
115  * this allows to save one multiplication operation on each iteration
116  * in randomize().
117  */
118 
119 void *
fill_lookup(struct amino_acid ** lookup,struct amino_acid * amino_acid,int amino_acid_size)120 fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) {
121     float p = 0;
122     int i, j;
123 
124     for (i = 0; i < amino_acid_size; i++) {
125         p += amino_acid[i].prob;
126         amino_acid[i].cprob_lookup = p*LOOKUP_SCALE;
127     }
128 
129     // Prevent rounding error.
130     amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1;
131 
132     for (i = 0, j = 0; i < LOOKUP_SIZE; i++) {
133         while (amino_acid[j].cprob_lookup < i) {
134             j++;
135         }
136         lookup[i] = &amino_acid[j];
137     }
138 
139     return 0;
140 }
141 
142 void
randomize(struct amino_acid * amino_acid,int amino_acid_size,const char * title,int n,random_t * rand)143 randomize(struct amino_acid *amino_acid, int amino_acid_size,
144         const char *title, int n, random_t *rand) {
145     struct amino_acid *lookup[LOOKUP_SIZE];
146     char line_buffer[LINE_LEN + 1];
147     int i, j;
148 
149     line_buffer[LINE_LEN] = '\n';
150 
151     fill_lookup(lookup, amino_acid, amino_acid_size);
152 
153     fputs_unlocked(title, stdout);
154 
155     for (i = 0, j = 0; i < n; i++, j++) {
156         if (j == LINE_LEN) {
157             fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout);
158             j = 0;
159         }
160 
161         float r = random_next_lookup(rand);
162         struct amino_acid *u = lookup[(short)r];
163         while (unlikely(u->cprob_lookup < r)) {
164             ++u;
165         }
166         line_buffer[j] = u->sym;
167     }
168     line_buffer[j] = '\n';
169     fwrite_unlocked(line_buffer, j + 1, 1, stdout);
170 }
171 
172 struct amino_acid amino_acid[] = {
173    { 'a', 0.27 },
174    { 'c', 0.12 },
175    { 'g', 0.12 },
176    { 't', 0.27 },
177 
178    { 'B', 0.02 },
179    { 'D', 0.02 },
180    { 'H', 0.02 },
181    { 'K', 0.02 },
182    { 'M', 0.02 },
183    { 'N', 0.02 },
184    { 'R', 0.02 },
185    { 'S', 0.02 },
186    { 'V', 0.02 },
187    { 'W', 0.02 },
188    { 'Y', 0.02 },
189 };
190 
191 struct amino_acid homo_sapiens[] = {
192    { 'a', 0.3029549426680 },
193    { 'c', 0.1979883004921 },
194    { 'g', 0.1975473066391 },
195    { 't', 0.3015094502008 },
196 };
197 
198 static const char alu[] =
199    "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG"
200    "GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA"
201    "GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA"
202    "AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT"
203    "CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC"
204    "CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG"
205    "CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
206 
207 int
main(int argc,const char ** argv)208 main(int argc, const char **argv) {
209     int n = argc > 1 ? atoi( argv[1] ) : 512;
210     random_t rand;
211 
212     random_init(&rand);
213 
214     repeat(alu, ">ONE Homo sapiens alu\n", n*2);
215     randomize(amino_acid, ARRAY_SIZE(amino_acid),
216             ">TWO IUB ambiguity codes\n", n*3, &rand);
217     randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens),
218             ">THREE Homo sapiens frequency\n", n*5, &rand);
219 
220     return 0;
221 }
222