1 /***************************************************************
2
3 The Subread software package is free software package:
4 you can redistribute it and/or modify it under the terms
5 of the GNU General Public License as published by the
6 Free Software Foundation, either version 3 of the License,
7 or (at your option) any later version.
8
9 Subread is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty
11 of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
13 See the GNU General Public License for more details.
14
15 Authors: Drs Yang Liao and Wei Shi
16
17 ***************************************************************/
18
19
20 #include <ctype.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <stdarg.h>
24 #include <math.h>
25 #include <unistd.h>
26 #include <pthread.h>
27
28
29 #ifdef MACOS
30
31 #include <sys/types.h>
32 #include <sys/socket.h>
33 #include <sys/ioctl.h>
34 #include <sys/sysctl.h>
35 #include <net/if.h>
36 #include <net/if_dl.h>
37 #include <netinet/in.h>
38 #include <arpa/inet.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <mach/mach.h>
42 #include <mach/vm_statistics.h>
43
44 #else
45
46 #ifndef __MINGW32__
47 #include <sys/ioctl.h>
48 #include <netinet/in.h>
49 #include <net/if.h>
50 #include <sys/sysinfo.h>
51 #endif
52 #include <sys/types.h>
53 #endif
54
55
56 #include "subread.h"
57 #include "input-files.h"
58 #include "seek-zlib.h"
59 #include "gene-algorithms.h"
60 #include "HelperFunctions.h"
61
get_sys_mem_info(char * keyword)62 size_t get_sys_mem_info(char * keyword){
63 FILE * mfp = fopen("/proc/meminfo","r");
64 if(mfp==NULL) return -1;
65 char linebuf[1000];
66 size_t ret = -1;
67 while(1){
68 char * rret = fgets(linebuf, 999, mfp);
69 if(memcmp( keyword, linebuf, strlen(keyword) ) == 0 && strstr(linebuf," kB")) {
70 ret=0;
71 int ii ,state=0;
72 for(ii=strlen(keyword);; ii++){
73 //SUBREADprintf("CH[%d] = %d '%c' at state %d\n", ii, linebuf[ii], linebuf[ii], state);
74 if(state == 0 && linebuf[ii]==' ') state = 1;
75 if(state == 1 && linebuf[ii]!=' ') state = 2;
76 if(state == 2 && linebuf[ii]==' ') state = 9999;
77
78 if(state == 2 && !isdigit(linebuf[ii])){
79 SUBREADprintf("WRONG MEMORY INFO '%s'\n", linebuf);
80 ret = -1;
81 break;
82 }
83 if(state == 2) ret = ret*10 + ( linebuf[ii] - '0' );
84 if(state >= 9999) {
85 ret *=1024;
86 break;
87 }
88 }
89 }
90 if(!rret) break;
91 }
92 fclose(mfp);
93 return ret;
94 }
95
get_short_fname(char * lname)96 char * get_short_fname(char * lname){
97 char * ret = lname;
98
99 int x1;
100 for(x1 = strlen(lname)-2; x1>=0; x1--){
101 if(lname [x1] == '/'||lname [x1] == '\\'){
102 ret = lname + x1 + 1;
103 break;
104 }
105 }
106 return ret;
107 }
108
109
110
111 // This assumes the first part of Cigar has differet strandness to the main part of the cigar.
112 // Pos is the LAST WANTED BASE location before the first strand jump (split by 'b' or 'n').
113 // The first base in the read actually has a larger coordinate than Pos.
114 // new_cigar has to be at least 100 bytes.
reverse_cigar(unsigned int pos,char * cigar,char * new_cigar)115 unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar) {
116 int cigar_cursor = 0;
117 new_cigar[0]=0;
118 unsigned int tmpi=0;
119 int last_piece_end = 0;
120 int last_sec_start = 0;
121 unsigned int chro_pos = pos, this_section_start = pos, ret = pos;
122 int is_positive_dir = 0;
123 int read_cursor = 0;
124 int section_no = 0;
125
126 for(cigar_cursor = 0 ; ; cigar_cursor++)
127 {
128 if( cigar [cigar_cursor] == 'n' || cigar [cigar_cursor] == 'b' || cigar [cigar_cursor] == 0)
129 {
130 int xk1, jmlen=0, nclen=strlen(new_cigar);
131 char jump_mode [13];
132
133 if(cigar [cigar_cursor] !=0)
134 {
135 sprintf(jump_mode, "%u%c", tmpi, cigar [cigar_cursor] == 'b'?'n':'b');
136 jmlen = strlen(jump_mode);
137 }
138
139 for(xk1=nclen-1;xk1>=0; xk1--)
140 new_cigar[ xk1 + last_piece_end + jmlen - last_sec_start ] = new_cigar[ xk1 ];
141 new_cigar [nclen + jmlen + last_piece_end - last_sec_start ] = 0;
142
143 memcpy(new_cigar , jump_mode, jmlen);
144 memcpy(new_cigar + jmlen , cigar + last_sec_start, last_piece_end - last_sec_start);
145
146 last_sec_start = cigar_cursor+1;
147
148 if(is_positive_dir && cigar [cigar_cursor] !=0)
149 {
150 if(cigar [cigar_cursor] == 'b') chro_pos -= tmpi - read_cursor - 1;
151 else chro_pos += tmpi - read_cursor - 1;
152 }
153 if((!is_positive_dir) && cigar [cigar_cursor] !=0)
154 {
155 if(cigar [cigar_cursor] == 'b') chro_pos = this_section_start - tmpi - read_cursor - 1;
156 else chro_pos = this_section_start + tmpi - read_cursor - 1;
157 }
158
159 this_section_start = chro_pos;
160
161 if(section_no == 0)
162 ret = chro_pos;
163
164 is_positive_dir = ! is_positive_dir;
165 section_no++;
166 tmpi=0;
167 }
168 else if(isalpha(cigar [cigar_cursor]))
169 {
170 if(cigar [cigar_cursor]=='M' || cigar [cigar_cursor] == 'S')
171 read_cursor += tmpi;
172 tmpi=0;
173 last_piece_end = cigar_cursor+1;
174 }
175 else tmpi = tmpi*10 + (cigar [cigar_cursor] - '0');
176
177 if(cigar [cigar_cursor] == 0)break;
178 }
179
180 SUBREADprintf("REV CIGAR: %s => %s\n", cigar, new_cigar);
181 return ret;
182 }
183
find_left_end_cigar(unsigned int right_pos,char * cigar)184 unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar){
185 int delta_from_right = 0;
186 int cigar_cursor = 0;
187 unsigned int tmpi = 0;
188 while(1){
189 int nch = cigar[cigar_cursor++];
190 if(nch == 0) break;
191 if(isdigit(nch)){
192 tmpi = tmpi * 10 + nch - '0';
193 }else{
194 if(nch == 'M'||nch == 'D' || nch == 'N'){
195 delta_from_right +=tmpi;
196 }
197 tmpi = 0;
198 }
199 }
200 return right_pos - delta_from_right;
201 }
202
203
contig_fasta_int2base(int v)204 char contig_fasta_int2base(int v){
205 if(v == 1) return 'A';
206 if(v == 2) return 'T';
207 if(v == 3) return 'G';
208 if(v == 4) return 'C';
209 return 'N';
210 }
211
contig_fasta_base2int(char base)212 int contig_fasta_base2int(char base){
213 base = tolower(base);
214 if((base) == 'a'){ return 1;}
215 else if((base) == 't' || (base) == 'u'){ return 2;}
216 else if((base) == 'g'){ return 3;}
217 else if((base) == 'c'){ return 4;}
218 else return 15 ;
219 }
220
get_contig_fasta(fasta_contigs_t * tab,char * chro,unsigned int pos,int len,char * out_bases)221 int get_contig_fasta(fasta_contigs_t * tab, char * chro, unsigned int pos, int len, char * out_bases){
222 unsigned int this_size = HashTableGet( tab -> size_table, chro ) - NULL;
223 if(this_size > 0){
224 if(this_size >= len && pos <= this_size - len){
225 char * bin_block = HashTableGet(tab -> contig_table, chro );
226 unsigned int bin_byte = pos / 2;
227 int bin_bit = 4*(pos % 2), x1;
228
229 for(x1 = 0 ;x1 < len; x1++)
230 {
231 int bin_int = (bin_block[bin_byte] >> bin_bit) & 0xf;
232 if(bin_bit == 4) bin_byte++;
233 bin_bit = (bin_bit == 4)?0:4;
234 out_bases[x1] = contig_fasta_int2base(bin_int);
235 }
236
237 return 0;
238 }
239 }
240 return 1;
241 }
242
destroy_contig_fasta(fasta_contigs_t * tab)243 void destroy_contig_fasta(fasta_contigs_t * tab){
244 HashTableDestroy( tab -> size_table );
245 HashTableDestroy( tab -> contig_table );
246 }
read_contig_fasta(fasta_contigs_t * tab,char * fname)247 int read_contig_fasta(fasta_contigs_t * tab, char * fname){
248 autozip_fp fp;
249 int rv = autozip_open(fname, &fp);
250 if(rv>=0){
251 tab -> contig_table = HashTableCreate(3943);
252 tab -> size_table = HashTableCreate(3943);
253
254 HashTableSetDeallocationFunctions(tab -> contig_table, free, free);
255 HashTableSetDeallocationFunctions(tab -> size_table, NULL, NULL);
256
257 HashTableSetKeyComparisonFunction(tab -> contig_table, fc_strcmp_chro);
258 HashTableSetKeyComparisonFunction(tab -> size_table, fc_strcmp_chro);
259
260 HashTableSetHashFunction(tab -> contig_table, fc_chro_hash);
261 HashTableSetHashFunction(tab -> size_table, fc_chro_hash);
262
263 char chro_name[MAX_CHROMOSOME_NAME_LEN];
264 unsigned int inner_cursor = 0, current_bin_space = 0;
265 int status = 0;
266 char * bin_block = NULL;
267 chro_name[0]=0;
268
269 while(1){
270 int nch = autozip_getch(&fp);
271 if(nch<0)break;
272 if(status == 0){
273 assert(nch == '>');
274 status = 1;
275 }else if(status == 1){
276 if(inner_cursor == 0){
277 bin_block = calloc(sizeof(char),10000);
278 current_bin_space = 10000;
279 }
280 if(nch == '|' || nch == ' ') status = 2;
281 else if(nch == '\n'){
282 status = 3;
283 inner_cursor = 0;
284 }else{
285 chro_name[inner_cursor++] = nch;
286 chro_name[inner_cursor] = 0;
287 }
288 }else if(status == 2){
289 if(nch == '\n'){
290 status = 3;
291 inner_cursor = 0;
292 }
293 }else if(status == 3){
294 if(nch == '>' || nch <= 0){
295 char * mem_chro = malloc(strlen(chro_name)+1);
296 strcpy(mem_chro, chro_name);
297 HashTablePut(tab -> size_table , mem_chro, NULL + inner_cursor);
298 HashTablePut(tab -> contig_table , mem_chro, bin_block);
299 // SUBREADprintf("Read '%s' : %u bases\n", chro_name, inner_cursor);
300 inner_cursor = 0;
301 status = 1;
302 if(nch <= 0) break;
303 }else if(nch != '\n'){
304 int bin_bytes = inner_cursor / 2;
305 int bin_bits = 4*(inner_cursor % 2);
306 int base_int = contig_fasta_base2int(nch);
307 if(bin_bytes >= current_bin_space){
308 unsigned int new_bin_space = current_bin_space / 4 * 5;
309 if(current_bin_space > 0xffff0000 /5 * 4){
310 assert(0);
311 return 1;
312 }
313 bin_block = realloc(bin_block, new_bin_space);
314 memset(bin_block + current_bin_space, 0, new_bin_space - current_bin_space);
315 current_bin_space = new_bin_space;
316 }
317 bin_block[bin_bytes] |= (base_int << bin_bits);
318 inner_cursor++;
319 }
320 }
321 }
322
323 autozip_close(&fp);
324 return 0;
325 }
326 return 1;
327 }
328
RSubread_parse_CIGAR_Extra_string(int FLAG,char * MainChro,unsigned int MainPos,const char * CIGAR_Str,const char * Extra_Tags,int max_M,char ** Chros,unsigned int * Staring_Chro_Points,unsigned short * Section_Start_Read_Pos,unsigned short * Section_Length,int * is_junction_read)329 int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, int max_M, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read){
330 int ret = RSubread_parse_CIGAR_string(MainChro, MainPos, CIGAR_Str, max_M, Chros, Staring_Chro_Points, Section_Start_Read_Pos, Section_Length, is_junction_read);
331
332 char read_main_strand = (((FLAG & 0x40)==0x40) == ((FLAG & 0x10) == 0x10 ))?'-':'+';
333 int tag_cursor=0;
334 //SUBREADprintf("EXTRA=%s\n", Extra_Tags);
335 int status = PARSE_STATUS_TAGNAME;
336 char tag_name[2], typechar=0;
337 int tag_inner_cursor=0;
338
339 char current_fusion_char[MAX_CHROMOSOME_NAME_LEN];
340 unsigned int current_fusion_pos = 0;
341 char current_fusion_strand = 0;
342 char current_fusion_cigar[max_M * 15];
343 current_fusion_cigar [0] =0;
344 current_fusion_char [0]=0;
345
346 while(1){
347 int nch = Extra_Tags[tag_cursor];
348 if(status == PARSE_STATUS_TAGNAME){
349 tag_name[tag_inner_cursor++] = nch;
350 if(tag_inner_cursor == 2){
351 status = PARSE_STATUS_TAGTYPE;
352 tag_cursor += 1;
353 assert(Extra_Tags[tag_cursor] == ':');
354 }
355 }else if(status == PARSE_STATUS_TAGTYPE){
356 typechar = nch;
357 tag_cursor +=1;
358 assert(Extra_Tags[tag_cursor] == ':');
359 tag_inner_cursor = 0;
360 status = PARSE_STATUS_TAGVALUE;
361 }else if(status == PARSE_STATUS_TAGVALUE){
362 if(nch == '\t' || nch == 0 || nch == '\n'){
363 if(current_fusion_cigar[0] && current_fusion_char[0] && current_fusion_pos && current_fusion_strand){
364 //SUBREADprintf("ENTER CALC:%s\n", current_fusion_char );
365 unsigned int left_pos = current_fusion_pos;
366 if(current_fusion_strand!=read_main_strand)
367 left_pos = find_left_end_cigar(current_fusion_pos, current_fusion_cigar);
368 ret += RSubread_parse_CIGAR_string(current_fusion_char, left_pos, current_fusion_cigar, max_M - ret, Chros + ret, Staring_Chro_Points+ ret, Section_Start_Read_Pos+ ret, Section_Length + ret, is_junction_read);
369
370 current_fusion_pos = 0;
371 current_fusion_strand = 0;
372 current_fusion_cigar [0] =0;
373 current_fusion_char [0]=0;
374 //SUBREADprintf("EXIT CALC:%s\n", current_fusion_char );
375 }
376
377 tag_inner_cursor = 0;
378 status = PARSE_STATUS_TAGNAME;
379 }else{
380 if(tag_name[0]=='C' && tag_name[1]=='C' && typechar == 'Z'){
381 current_fusion_char[tag_inner_cursor++]=nch;
382 current_fusion_char[tag_inner_cursor]=0;
383 }else if(tag_name[0]=='C' && tag_name[1]=='G' && typechar == 'Z'){
384 current_fusion_cigar[tag_inner_cursor++]=nch;
385 current_fusion_cigar[tag_inner_cursor]=0;
386 }else if(tag_name[0]=='C' && tag_name[1]=='P' && typechar == 'i'){
387 current_fusion_pos = current_fusion_pos * 10 + (nch - '0');
388 }else if(tag_name[0]=='C' && tag_name[1]=='T' && typechar == 'Z'){
389 //SUBREADprintf("pos=%d %c -> %c\n", tag_cursor, current_fusion_strand, nch);
390 current_fusion_strand = nch;
391 //SUBREADprintf("spo=%d %c -> %c\n", tag_cursor, current_fusion_strand, nch);
392 }
393 }
394 }
395
396 if(nch == 0 || nch == '\n'){
397 assert(status == PARSE_STATUS_TAGNAME);
398 break;
399 }
400
401 tag_cursor++;
402 //SUBREADprintf("CUR=%d [%s], c=%d\n", tag_cursor, Extra_Tags, Extra_Tags[tag_cursor]);
403 }
404 return ret;
405 }
406
RSubread_parse_CIGAR_string(char * chro,unsigned int first_pos,const char * CIGAR_Str,int max_M,char ** Section_Chromosomes,unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos,unsigned short * Section_Chro_Length,int * is_junction_read)407 int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, int max_M, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
408 {
409 unsigned int tmp_int=0;
410 int cigar_cursor=0;
411 unsigned short current_section_chro_len=0, current_section_start_read_pos = 0, read_cursor = 0;
412 unsigned int chromosome_cursor=first_pos;
413 int ret=0, is_first_S = 1;
414
415 for(cigar_cursor=0; ; cigar_cursor++)
416 {
417 char ch = CIGAR_Str[cigar_cursor];
418
419 if(ch >='0' && ch <= '9')
420 {
421 tmp_int=tmp_int*10+(ch - '0');
422 }
423 else
424 {
425 if(ch == 'S'){
426 if(is_first_S) current_section_start_read_pos = tmp_int;
427 read_cursor += tmp_int;
428 }
429 else if(ch == 'M' || ch == 'X' || ch == '=') {
430 read_cursor += tmp_int;
431 current_section_chro_len += tmp_int;
432 chromosome_cursor += tmp_int;
433 } else if(ch == 'N' || ch == 'D' || ch=='I' || ch == 0) {
434 if('N' == ch)(*is_junction_read)=1;
435 if(ret < max_M)
436 {
437 if(current_section_chro_len>0)
438 {
439 Section_Chromosomes[ret] = chro;
440 Section_Start_Chro_Pos[ret] = chromosome_cursor - current_section_chro_len;
441 Section_Start_Read_Pos[ret] = current_section_start_read_pos;
442 Section_Chro_Length[ret] = current_section_chro_len;
443 ret ++;
444 }
445 }else assert(0);
446 current_section_chro_len = 0;
447 if(ch == 'I') read_cursor += tmp_int;
448 else if(ch == 'N' || ch == 'D') chromosome_cursor += tmp_int;
449 current_section_start_read_pos = read_cursor;
450
451 if(ch == 0) break;
452 }
453 //printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, current_section_chro_len);
454 tmp_int = 0;
455 is_first_S = 0;
456 }
457 if(cigar_cursor>100) return -1;
458 }
459
460 return ret;
461 }
462
display_sections(char * CIGAR_Str)463 void display_sections(char * CIGAR_Str)
464 {
465 //int is_junc=0;
466 int Section_Start_Chro_Pos[FC_CIGAR_PARSER_ITEMS];
467 unsigned short Section_Start_Read_Pos[FC_CIGAR_PARSER_ITEMS];
468 unsigned short Section_Chro_Length[FC_CIGAR_PARSER_ITEMS];
469
470 int retv = 0;//RSubread_parse_CIGAR_string(CIGAR_Str, Section_Start_Chro_Pos, Section_Start_Read_Pos, Section_Chro_Length, &is_junc);
471
472 int x1;
473 SUBREADprintf("Cigar=%s ; Sections=%d\n", CIGAR_Str, retv);
474 for(x1=0; x1<retv; x1++)
475 {
476 SUBREADprintf(" Section #%d: chro_offset=%d, read_offset=%u length=%u\n",x1, Section_Start_Chro_Pos[x1], Section_Start_Read_Pos[x1], Section_Chro_Length[x1]);
477 }
478 SUBREADprintf("\n");
479
480 }
481
482
483 #define GECV_STATE_BEFORE 10
484 #define GECV_STATE_NAME 20
485 #define GECV_STATE_GAP 30
486 #define GECV_STATE_VALUE 40
487 #define GECV_STATE_QVALUE 50
488 #define GECV_STATE_QV_END 60
489 #define GECV_STATE_ERROR 9999
490
GTF_extra_column_istoken_chr(char c)491 int GTF_extra_column_istoken_chr(char c)
492 {
493 return (isalpha(c)||isdigit(c)||c=='_');
494 }
495
GTF_extra_column_value(const char * Extra_Col,const char * Target_Name,char * Target_Value,int TargVal_Size)496 int GTF_extra_column_value(const char * Extra_Col, const char * Target_Name, char * Target_Value, int TargVal_Size)
497 {
498 int state = GECV_STATE_BEFORE;
499 int col_cursor = 0, is_correct_name=0;
500 char name_buffer[200];
501 int name_cursor = 0, value_cursor=-1;
502
503 while(1)
504 {
505 if(name_cursor>190) return -1;
506 char nch = Extra_Col[col_cursor];
507 if(nch == '\n' || nch == '\r') nch = 0;
508 if(state == GECV_STATE_BEFORE)
509 {
510 if(GTF_extra_column_istoken_chr(nch))
511 {
512 name_buffer[0] = nch;
513 name_cursor = 1;
514 state = GECV_STATE_NAME;
515 }
516 else if(nch != ' ' && nch != 0)
517 {
518 state = GECV_STATE_ERROR;
519 }
520 }
521 else if(state == GECV_STATE_NAME)
522 {
523 if(nch == ' ' || nch == '=')
524 {
525 state = GECV_STATE_GAP;
526 name_buffer[name_cursor] = 0;
527 is_correct_name = (strcmp(name_buffer , Target_Name) == 0);
528 //printf("CORR=%d : '%s'\n", is_correct_name, name_buffer);
529 }
530 else if(nch == '"')
531 {
532 name_buffer[name_cursor] = 0;
533 is_correct_name = (strcmp(name_buffer , Target_Name) == 0);
534 state = GECV_STATE_QVALUE;
535 if(is_correct_name)
536 value_cursor = 0;
537 }
538 else if(GTF_extra_column_istoken_chr(nch))
539 name_buffer[name_cursor++] = nch;
540 else
541 {
542 state = GECV_STATE_ERROR;
543 //printf("ERR2 : '%c'\n", nch);
544 }
545
546 }
547 else if(state == GECV_STATE_GAP)
548 {
549 if(nch == '"')
550 {
551 state = GECV_STATE_QVALUE;
552 if(is_correct_name)
553 value_cursor = 0;
554 }
555 else if(nch != '=' && isgraph(nch))
556 {
557 state = GECV_STATE_VALUE;
558 if(is_correct_name)
559 {
560 Target_Value[0]=nch;
561 value_cursor = 1;
562 }
563 }
564 else if(nch != ' ' && nch != '=')
565 state = GECV_STATE_ERROR;
566 }
567 else if(state == GECV_STATE_VALUE)
568 {
569 if(nch == ';' || nch == 0)
570 {
571 state = GECV_STATE_BEFORE;
572 if(is_correct_name)
573 {
574 Target_Value[value_cursor] = 0;
575 }
576 is_correct_name = 0;
577 }
578 else{
579 if(value_cursor < TargVal_Size-1 && is_correct_name)
580 Target_Value[value_cursor++] = nch;
581 }
582 }
583 else if(state == GECV_STATE_QVALUE)
584 {
585 if(nch == '"')
586 {
587 state = GECV_STATE_QV_END;
588 if(is_correct_name)
589 Target_Value[value_cursor] = 0;
590 is_correct_name = 0;
591 }
592 else
593 {
594 if(value_cursor < TargVal_Size-1 && is_correct_name)
595 {
596 if(nch !=' ' || value_cursor>0)
597 Target_Value[value_cursor++] = nch;
598 }
599 }
600 }
601 else if(state == GECV_STATE_QV_END)
602 {
603 if(nch == ';' || nch == 0)
604 state = GECV_STATE_BEFORE;
605 else if(nch != ' ')
606 state = GECV_STATE_ERROR;
607
608 }
609
610 if (GECV_STATE_ERROR == state){
611 Target_Value[0]=0;
612 return -1;
613 }
614 if (nch == 0)
615 {
616 if(state == GECV_STATE_BEFORE && value_cursor>0)
617 {
618 int x1;
619 for(x1 = value_cursor-1; x1>=0; x1--)
620 {
621 if(Target_Value[x1] == ' '){
622 value_cursor --;
623 Target_Value[x1]=0;
624 }
625 else break;
626 }
627
628 if(value_cursor>0)
629 return value_cursor;
630 }
631 Target_Value[0]=0;
632 return -1;
633 }
634 col_cursor++;
635 }
636 }
637
638
hpl_test2_func()639 void hpl_test2_func()
640 {
641 char * extra_column = " gene_id \"PC4-013 \"; 013=ABCD ; PC4 = CCXX ";
642 char * col_name = "gene_id";
643 char col_val[100];
644
645 int col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
646 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
647
648 col_name = "013";
649 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
650 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
651
652 col_name = "PC4";
653 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
654 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
655
656
657 col_name = "XXX";
658 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
659 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
660
661 extra_column = "gene_id = \"PC4-013 ;=\" ;013 = AXXD ; PC4=x";
662 col_name = "013";
663 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
664 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
665
666 col_name = "gene_id";
667 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
668 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
669
670
671 col_name = "PC4";
672 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
673 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
674
675
676
677
678 extra_column = " gene_id\" PC4-013 ;= \"; XXX='123' ;013 :ABCD ; PC4 = CCXX= ;";
679 col_name = "013";
680 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
681 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
682
683
684 col_name = "XXX";
685 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
686 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
687
688
689 col_name = "PC4";
690 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
691 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
692
693 col_name = "gene_id";
694 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
695 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
696
697
698
699 extra_column = "gene_id \"653635\"; transcript_id \"TR:653635\";";
700 col_name = "gene_id";
701 col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
702 SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
703
704
705
706
707
708 }
709
hpl_test1_func()710 void hpl_test1_func()
711 {
712 display_sections("");
713 display_sections("*");
714 display_sections("5S10M2D10M800N12M3I12M450N12M12D99M6S");
715 display_sections("110M2I10M800N32M3I12M6S");
716 display_sections("200S110M2I10M800N32M3I12M200N40M");
717 display_sections("3M1663N61M1045N36M3D20M66N10M2D10M77N3M1663N61M1045N36M3D20M66N103M1663N61M1045N36M3D20M66N9M");
718 }
719
test_bam_reader()720 void test_bam_reader(){
721 SamBam_FILE * bamfp = SamBam_fopen("/usr/local/work/liao/arena/Rsubread_Paper_OCT2016/RNAseq-SimHG38/Runs-100/STAR/STAR-Simulation-15M-DXC.bamAligned.out.bam", SAMBAM_FILE_BAM);
722 assert(bamfp);
723
724 while(1){
725 char linebuf[2000];
726 char * ret = SamBam_fgets(bamfp, linebuf, 1999, 1);
727 if(!ret) break;
728 SUBREADprintf("%s", linebuf); // linebuf has "\n" in the end.
729 }
730 SamBam_fclose(bamfp);
731 }
732
733
734
735 #ifdef HELPER_TEST_SAMBAM_READER
main()736 main() {
737 //hpl_test1_func();
738 test_bam_reader();
739 // hpl_test1_func();
740 }
741 #endif
742
str_replace(char * orig,char * rep,char * with)743 char *str_replace(char *orig, char *rep, char *with) {
744 char *result; // the return string
745 char *ins; // the next insert point
746 char *tmp; // varies
747 int len_rep; // length of rep
748 int len_with; // length of with
749 int len_front; // distance between rep and end of last rep
750 int count; // number of replacements
751
752 if (!orig)
753 return NULL;
754 if (!rep)
755 rep = "";
756 len_rep = strlen(rep);
757 if (!with)
758 with = "";
759 len_with = strlen(with);
760
761 ins = orig;
762 for (count = 0; NULL != (tmp = strstr(ins, rep)); ++count) {
763 ins = tmp + len_rep;
764 }
765 tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
766
767 if (!result)
768 return NULL;
769
770 while (count--) {
771 ins = strstr(orig, rep);
772 len_front = ins - orig;
773 tmp = strncpy(tmp, orig, len_front) + len_front;
774 tmp = strcpy(tmp, with) + len_with;
775 orig += len_front + len_rep; // move to next "end of rep"
776 }
777 strcpy(tmp, orig);
778 return result;
779 }
780
781
782
783 // rule: the string is ABC123XXXXXX...
784 // This is the priroity:
785 // First, compare the letters part.
786 // Second, compare the pure numeric part.
787 // Third, compare the remainder.
strcmp_number(char * s1,char * s2)788 int strcmp_number(char * s1, char * s2)
789 {
790 int x1 = 0;
791 int ret = 0;
792
793 while(1)
794 {
795 char nch1 = s1[x1];
796 char nch2 = s2[x1];
797
798 if((!nch1) || !nch2){return nch2?1:(nch1?(-1):0);}
799 if(isdigit(nch1) && isdigit(nch2))break;
800
801 ret = nch1 - nch2;
802 if(ret) return ret;
803 x1++;
804 }
805
806 int v1 = 0, v2 = 0;
807 while(1)
808 {
809 char nch1 = s1[x1];
810 char nch2 = s2[x1];
811 if((!nch1) || !nch2){
812 if(nch1 || nch2)
813 return nch2?(-1):1;
814 break;
815 }
816 int is_chr1_digit = isdigit(nch1);
817 int is_chr2_digit = isdigit(nch2);
818
819 if(is_chr1_digit || is_chr2_digit)
820 {
821 if(is_chr1_digit && is_chr2_digit)
822 {
823 v1 = v1*10+(nch1-'0');
824 v2 = v2*10+(nch2-'0');
825 }
826 else
827 {
828 ret = nch1 - nch2;
829 return ret;
830 }
831 }
832 else break;
833 x1++;
834 }
835
836 if(v1==v2)
837 return strcmp(s1+x1, s2+x1);
838 else
839 {
840 ret = v1 - v2;
841 return ret;
842 }
843 }
844
845
846
mac_str(char * str_buff)847 int mac_str(char * str_buff)
848 {
849 #if defined(__FreeBSD__) || defined(__MINGW32__) || defined(__DragonFly__)
850 return 1;
851 #else
852 #ifdef MACOS
853 int mib[6], x1, ret = 1;
854 size_t len;
855 char *buf;
856 unsigned char *ptr;
857 struct if_msghdr *ifm;
858 struct sockaddr_dl *sdl;
859
860
861 for(x1 = 0 ; x1 < 40; x1++)
862 {
863 mib[0] = CTL_NET;
864 mib[1] = AF_ROUTE;
865 mib[2] = 0;
866 mib[3] = AF_LINK;
867 mib[4] = NET_RT_IFLIST;
868 mib[5] = x1;
869
870 if (sysctl(mib, 6, NULL, &len, NULL, 0) >=0) {
871 buf = malloc(len);
872 if (sysctl(mib, 6, buf, &len, NULL, 0) >=0) {
873
874 ifm = (struct if_msghdr *)buf;
875 sdl = (struct sockaddr_dl *)(ifm + 1);
876 ptr = (unsigned char *)LLADDR(sdl);
877
878 if(sdl -> sdl_nlen < 1) continue;
879
880 char * ifname = malloc(sdl -> sdl_nlen + 1);
881
882 memcpy(ifname, sdl -> sdl_data, sdl -> sdl_nlen);
883 ifname[sdl -> sdl_nlen] = 0;
884 if(ifname[0]!='e'){
885 free(ifname);
886 continue;
887 }
888 free(ifname);
889
890 sprintf(str_buff,"%02X%02X%02X%02X%02X%02X", *ptr, *(ptr+1), *(ptr+2),
891 *(ptr+3), *(ptr+4), *(ptr+5));
892 ret = 0;
893 break;
894 }
895 free(buf);
896 }
897 }
898 return ret;
899 #else
900 #if defined(IFHWADDRLEN)
901 struct ifreq ifr;
902 struct ifconf ifc;
903 char buf[1024];
904 int success = 0;
905
906 int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
907 if (sock == -1) { /* handle error*/ };
908
909 ifc.ifc_len = sizeof(buf);
910 ifc.ifc_buf = buf;
911 if (ioctl(sock, SIOCGIFCONF, &ifc) == -1) { /* handle error */ }
912
913 struct ifreq* it = ifc.ifc_req;
914 const struct ifreq* const end = it + (ifc.ifc_len / sizeof(struct ifreq));
915
916 for (; it != end; ++it) {
917 strcpy(ifr.ifr_name, it->ifr_name);
918 if (ioctl(sock, SIOCGIFFLAGS, &ifr) == 0) {
919 if (! (ifr.ifr_flags & IFF_LOOPBACK)) { // don't count loopback
920 if (ioctl(sock, SIOCGIFHWADDR, &ifr) == 0) {
921 success = 1;
922 break;
923 }
924 }
925 }
926 }
927
928 close(sock);
929
930 unsigned char mac_address[6];
931
932 if (success){
933 memcpy(mac_address, ifr.ifr_hwaddr.sa_data, 6);
934 int x1;
935 for(x1 = 0; x1 < 6; x1++){
936 sprintf(str_buff+2*x1, "%02X",mac_address[x1]);
937 }
938 return 0;
939 }
940 #endif
941 return 1;
942 #endif
943 #endif
944 }
945
rand_str(char * str_buff)946 int rand_str(char * str_buff){
947 int ret = 1;
948 FILE * fp = fopen("/dev/urandom","r");
949 if(fp){
950 int x1;
951 for(x1=0; x1<6; x1++){
952 sprintf(str_buff + 2*x1 , "%02X", fgetc(fp));
953 }
954 fclose(fp);
955 ret = 0;
956 }
957 return ret;
958 }
959
mathrand_str(char * str_buff)960 int mathrand_str(char * str_buff){
961 myrand_srand((int)(miltime()*100));
962 int x1;
963 for(x1 = 0; x1 < 6; x1++){
964 sprintf(str_buff+2*x1, "%02X", myrand_rand() & 0xff );
965 }
966 return 0;
967 }
968
mac_or_rand_str(char * str_buff)969 int mac_or_rand_str(char * str_buff){
970 return mac_str(str_buff) && rand_str(str_buff) && mathrand_str(str_buff);
971 }
972
973 #define PI_LONG 3.1415926535897932384626434L
974
fast_fractorial(unsigned int x,long double * buff,int buflen)975 long double fast_fractorial(unsigned int x, long double * buff, int buflen){
976 if(x<2) return 0;
977
978 if(buff != NULL && x < buflen && buff[x]!=0){
979 return buff[x];
980 }
981 long double ret;
982
983 if(x<50){
984 int x1;
985 ret =0.L;
986 for(x1 = 2 ; x1 <= x; x1++) ret += logl((long double)(x1));
987 }else{
988 ret = logl(x)*1.0L*x - 1.0L*x + 0.5L * logl(2.0L*PI_LONG* x) + 1.L/(12.L*x) - 1.L/(360.L* x*x*x) + 1.L/(1260.L* x*x*x*x*x) - 1.L/(1680.L*x*x*x*x*x*x*x);
989 }
990 if(buff != NULL && x < buflen) buff[x]=ret;
991 return ret;
992 }
993
994
995 #define BUFF_4D 36
996
fast_freq(unsigned int tab[2][2],long double * buff,int buflen)997 long double fast_freq( unsigned int tab[2][2] , long double * buff, int buflen){
998 int x0 = -1;
999 if(buff && buflen > BUFF_4D * BUFF_4D * BUFF_4D * BUFF_4D && tab[0][0] < BUFF_4D && tab[0][1] < BUFF_4D && tab[1][0] < BUFF_4D && tab[1][1] < BUFF_4D ){
1000 x0 = buflen + tab[0][0]* BUFF_4D * BUFF_4D * BUFF_4D + tab[0][1] * BUFF_4D * BUFF_4D + tab[1][0] * BUFF_4D + tab[1][1];
1001 if(buff[x0]!=0) return buff[x0];
1002
1003 }
1004 long double ret = fast_fractorial(tab[0][0]+tab[0][1],buff,buflen)+fast_fractorial(tab[1][0]+tab[1][1],buff,buflen) +
1005 fast_fractorial(tab[0][0]+tab[1][0],buff,buflen)+fast_fractorial(tab[0][1]+tab[1][1],buff,buflen) -
1006 fast_fractorial(tab[0][0],buff,buflen) - fast_fractorial(tab[0][1],buff,buflen) -
1007 fast_fractorial(tab[1][0],buff,buflen) - fast_fractorial(tab[1][1],buff,buflen) -
1008 fast_fractorial(tab[0][0] + tab[0][1] + tab[1][0] + tab[1][1],buff,buflen);
1009 if(x0>=0) buff[x0] = ret;
1010 return ret;
1011 }
1012
1013
1014
fast_fisher_test_one_side(unsigned int a,unsigned int b,unsigned int c,unsigned int d,long double * buff,int buflen)1015 double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * buff, int buflen){
1016 unsigned int tab[2][2];
1017 long double P0, P_delta, ret;
1018
1019 tab[0][0]=a;
1020 tab[0][1]=b;
1021 tab[1][0]=c;
1022 tab[1][1]=d;
1023
1024 int x_a, y_a, x_min=-1, y_min=-1;
1025 unsigned int min_a = 0xffffffff;
1026 for(x_a = 0; x_a < 2; x_a++)
1027 for(y_a = 0; y_a < 2; y_a++){
1028 if(tab[x_a][y_a]<min_a){
1029 min_a = tab[x_a][y_a];
1030 x_min = x_a;
1031 y_min = y_a;
1032 }
1033 }
1034 P_delta = fast_freq(tab, buff, buflen);
1035 P0 = ret = exp(P_delta);
1036 //printf("P0=%LG\n", P0);
1037 if(min_a>0){
1038 unsigned int Qa = min_a;
1039 unsigned int Qb = tab[x_min][!y_min];
1040 unsigned int Qc = tab[!x_min][y_min];
1041 unsigned int Qd = tab[!x_min][!y_min];
1042 for(; ; ){
1043 min_a --;
1044 Qb++;Qc++;
1045 P_delta -= logl(Qb*Qc);
1046 P_delta += logl(Qa*Qd);
1047 Qa--;Qd--;
1048 ret += expl(P_delta);
1049 // printf("%LG %LG %LG\n", ret, 1 - (ret - P0), expl(P_delta));
1050 if(min_a < 1) break;
1051 }
1052 }
1053
1054 double ret1 = ret, ret2 = 1 - (ret - P0);
1055
1056 if(min(ret1, ret2) < 0){
1057 return 0.0;
1058 }
1059 return min(ret1, ret2) ;
1060
1061 }
1062
load_features_annotation(char * file_name,int file_type,char * gene_id_column,char * transcript_id_column,char * used_feature_type,void * context,int do_add_feature (char * gene_name,char * transcript_name,char * chro_name,unsigned int start,unsigned int end,int is_negative_strand,void * context))1063 int load_features_annotation(char * file_name, int file_type, char * gene_id_column, char * transcript_id_column, char * used_feature_type,
1064 void * context, int do_add_feature(char * gene_name, char * transcript_name, char * chro_name, unsigned int start, unsigned int end, int is_negative_strand, void * context) ){
1065 char * file_line = malloc(MAX_LINE_LENGTH+1);
1066 int lineno = 0, is_GFF_txid_warned = 0, is_GFF_geneid_warned = 0, loaded_features = 0;
1067 autozip_fp afp;
1068 int aret = autozip_open(file_name, &afp);
1069
1070 if(aret < 0){
1071 SUBREADprintf("Error: unable to open the annotation file : %s\n", file_name);
1072 return -1;
1073 }
1074
1075 while(1){
1076 int is_tx_id_found = 0, is_gene_id_found = 0, is_negative_strand = -1;
1077 char * token_temp = NULL, * feature_name, *transcript_id = NULL, * chro_name = NULL;
1078 char feature_name_tmp[FEATURE_NAME_LENGTH], txid_tmp[FEATURE_NAME_LENGTH];
1079 feature_name = feature_name_tmp;
1080
1081 unsigned int start = 0, end = 0;
1082 aret = autozip_gets(&afp, file_line, MAX_LINE_LENGTH);
1083 if(aret < 1) break;
1084
1085 lineno++;
1086 if(is_comment_line(file_line, file_type, lineno-1))continue;
1087
1088 if(file_type == FILE_TYPE_RSUBREAD)
1089 {
1090 feature_name = strtok_r(file_line,"\t",&token_temp);
1091 int feature_name_len = strlen(feature_name);
1092 if(feature_name_len > FEATURE_NAME_LENGTH) feature_name[FEATURE_NAME_LENGTH -1 ] = 0;
1093
1094 chro_name = strtok_r(NULL,"\t", &token_temp);
1095 int chro_name_len = strlen(chro_name);
1096 if(chro_name_len > MAX_CHROMOSOME_NAME_LEN) chro_name[MAX_CHROMOSOME_NAME_LEN -1 ] = 0;
1097
1098 char * start_ptr = strtok_r(NULL,"\t", &token_temp);
1099 char * end_ptr = strtok_r(NULL,"\t", &token_temp);
1100
1101 if(start_ptr == NULL || end_ptr == NULL){
1102 SUBREADprintf("\nWarning: the format on the %d-th line is wrong.\n", lineno);
1103 }
1104 long long int tv1 = atoll(start_ptr);
1105 long long int tv2 = atoll(end_ptr);
1106
1107 if( isdigit(start_ptr[0]) && isdigit(end_ptr[0]) ){
1108 if(strlen(start_ptr) > 10 || strlen(end_ptr) > 10 || tv1 > 0x7fffffff || tv2> 0x7fffffff){
1109 SUBREADprintf("\nError: Line %d contains a coordinate greater than 2^31.\n", lineno);
1110 return -2;
1111 }
1112 }else{
1113 SUBREADprintf("\nError: Line %d contains a format error. The expected annotation format is SAF.\n", lineno);
1114 return -2;
1115 }
1116
1117 start = atoi(start_ptr);// start
1118 end = atoi(end_ptr);//end
1119
1120 char * strand_str = strtok_r(NULL,"\t", &token_temp);
1121 if(strand_str == NULL)
1122 is_negative_strand = 0;
1123 else
1124 is_negative_strand = ('-' ==strand_str[0]);
1125
1126 is_gene_id_found = 1;
1127
1128 } else if(file_type == FILE_TYPE_GTF) {
1129 chro_name = strtok_r(file_line,"\t",&token_temp);
1130 strtok_r(NULL,"\t", &token_temp);// source
1131 char * feature_type = strtok_r(NULL,"\t", &token_temp);// feature_type
1132
1133 if(strcmp(feature_type, used_feature_type)==0){
1134 char * start_ptr = strtok_r(NULL,"\t", &token_temp);
1135 char * end_ptr = strtok_r(NULL,"\t", &token_temp);
1136
1137
1138 if(start_ptr == NULL || end_ptr == NULL){
1139 SUBREADprintf("\nWarning: the format on the %d-th line is wrong.\n", lineno);
1140 }
1141 long long int tv1 = atoll(start_ptr);
1142 long long int tv2 = atoll(end_ptr);
1143
1144
1145 if( isdigit(start_ptr[0]) && isdigit(end_ptr[0]) ){
1146 if(strlen(start_ptr) > 10 || strlen(end_ptr) > 10 || tv1 > 0x7fffffff || tv2> 0x7fffffff){
1147 SUBREADprintf("\nError: Line %d contains a coordinate greater than 2^31.\n", lineno);
1148 return -2;
1149 }
1150 }else{
1151 SUBREADprintf("\nError: Line %d contains a format error. The expected annotation format is GTF/GFF.\n", lineno);
1152 return -2;
1153 }
1154 start = atoi(start_ptr);// start
1155 end = atoi(end_ptr);//end
1156
1157 if(start < 1 || end<1 || start > 0x7fffffff || end > 0x7fffffff || start > end)
1158 SUBREADprintf("\nWarning: the feature on the %d-th line has zero coordinate or zero lengths\n\n", lineno);
1159
1160
1161 strtok_r(NULL,"\t", &token_temp);// score
1162 is_negative_strand = ('-' == (strtok_r(NULL,"\t", &token_temp)[0]));//strand
1163 strtok_r(NULL,"\t",&token_temp); // "frame"
1164 char * extra_attrs = strtok_r(NULL,"\t",&token_temp); // name_1 "val1"; name_2 "val2"; ...
1165 if(extra_attrs && (strlen(extra_attrs)>2)){
1166 int attr_val_len = GTF_extra_column_value(extra_attrs , gene_id_column , feature_name_tmp, FEATURE_NAME_LENGTH);
1167 if(attr_val_len>0) is_gene_id_found=1;
1168
1169 if(transcript_id_column){
1170 transcript_id = txid_tmp;
1171 attr_val_len = GTF_extra_column_value(extra_attrs , transcript_id_column , txid_tmp, FEATURE_NAME_LENGTH);
1172 if(attr_val_len>0) is_tx_id_found=1;
1173 else transcript_id = NULL;
1174 }
1175 }
1176
1177 if(!is_gene_id_found){
1178 if(!is_GFF_geneid_warned){
1179 int ext_att_len = strlen(extra_attrs);
1180 if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
1181 SUBREADprintf("\nERROR: failed to find the gene identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\n\n", gene_id_column, extra_attrs);
1182 }
1183 is_GFF_geneid_warned++;
1184 }
1185
1186 if(transcript_id_column && !is_tx_id_found){
1187 if(!is_GFF_txid_warned){
1188 int ext_att_len = strlen(extra_attrs);
1189 if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
1190 SUBREADprintf("\nERROR: failed to find the transcript identifier attribute in the 9th column of the provided GTF file.\nThe specified transcript identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\n\n", transcript_id_column, extra_attrs);
1191 }
1192 is_GFF_txid_warned++;
1193 }
1194 }
1195 }
1196
1197 if(is_gene_id_found){
1198 do_add_feature(feature_name, transcript_id, chro_name, start, end, is_negative_strand, context);
1199 loaded_features++;
1200 }
1201
1202 }
1203 autozip_close(&afp);
1204 free(file_line);
1205
1206 if(is_GFF_txid_warned || is_GFF_geneid_warned)return -2;
1207 if(loaded_features<1){
1208 SUBREADprintf("\nERROR: No feature was loaded from the annotation file. Please check if the annotation format was correctly specified, and also if the feature type was correctly specified if the annotation is in the GTF format.\n\n");
1209 return -2;
1210 }
1211 return loaded_features;
1212 }
1213
load_alias_table(char * fname)1214 HashTable * load_alias_table(char * fname) {
1215 FILE * fp = f_subr_open(fname, "r");
1216 if(!fp)
1217 {
1218 print_in_box(80,0,0,"WARNING unable to open alias file '%s'", fname);
1219 return NULL;
1220 }
1221
1222 char * fl = malloc(2000);
1223
1224 HashTable * ret = HashTableCreate(1013);
1225 HashTableSetDeallocationFunctions(ret, free, free);
1226 HashTableSetKeyComparisonFunction(ret, fc_strcmp_chro);
1227 HashTableSetHashFunction(ret, fc_chro_hash);
1228
1229 while (1)
1230 {
1231 char *ret_fl = fgets(fl, 1999, fp);
1232 if(!ret_fl) break;
1233 if(fl[0]=='#') continue;
1234 char * sam_chr = NULL;
1235 char * anno_chr = strtok_r(fl, ",", &sam_chr);
1236 if((!sam_chr)||(!anno_chr)) continue;
1237
1238 sam_chr[strlen(sam_chr)-1]=0;
1239 if(sam_chr[strlen(sam_chr)-1]=='\r') sam_chr[strlen(sam_chr)-1]=0;
1240 char * anno_chr_buf = malloc(strlen(anno_chr)+1);
1241 strcpy(anno_chr_buf, anno_chr);
1242 char * sam_chr_buf = malloc(strlen(sam_chr)+1);
1243 strcpy(sam_chr_buf, sam_chr);
1244 // SUBREADprintf("ALIAS_LOAD : '%s' => '%s'\n", sam_chr_buf, anno_chr_buf);
1245 HashTablePut(ret, sam_chr_buf, anno_chr_buf);
1246 }
1247
1248 fclose(fp);
1249
1250 free(fl);
1251 return ret;
1252 }
1253
rebuild_command_line(char ** lineptr,int argc,char ** argv)1254 int rebuild_command_line(char ** lineptr, int argc, char ** argv){
1255 int linecap = 1000,c;
1256 *lineptr = malloc(linecap);
1257 **lineptr = 0;
1258
1259 for(c = 0; c<argc;c++)
1260 {
1261 int cline = strlen(argv[c]);
1262 if(strlen(*lineptr) + 100 + cline > linecap)
1263 {
1264 linecap += cline+500;
1265 *lineptr = realloc(*lineptr, linecap);
1266 }
1267 sprintf((*lineptr) + strlen(*lineptr), "\"%s\" ", argv[c]);
1268 }
1269
1270 return strlen(*lineptr);
1271 }
1272
1273 // The following code was downloaded from
1274 // https://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
1275 // on the 23rd of Nov, 2018. I (Yang LIAO) made minor changes to the code.
1276 //
1277 // It says "Public Domain" in the licence but the original author should be acknwoledged (only for expressing my gratitude):
1278 // Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
1279
1280 /* Any 32-bit or wider unsigned integer data type will do */
1281
1282 /*
1283 * The basic HelpFuncMD5 functions.
1284 *
1285 * F and G are optimized compared to their RFC 1321 definitions for
1286 * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
1287 * implementation.
1288 */
1289 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
1290 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
1291 #define H(x, y, z) (((x) ^ (y)) ^ (z))
1292 #define H2(x, y, z) ((x) ^ ((y) ^ (z)))
1293 #define I(x, y, z) ((y) ^ ((x) | ~(z)))
1294
1295 /*
1296 * The HelpFuncMD5 transformation for all four rounds.
1297 */
1298 #define STEP(f, a, b, c, d, x, t, s) \
1299 (a) += f((b), (c), (d)) + (x) + (t); \
1300 (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
1301 (a) += (b);
1302
1303 /*
1304 * SET reads 4 input bytes in little-endian byte order and stores them in a
1305 * properly aligned word in host byte order.
1306 *
1307 * The check for little-endian architectures that tolerate unaligned memory
1308 * accesses is just an optimization. Nothing will break if it fails to detect
1309 * a suitable architecture.
1310 *
1311 * Unfortunately, this optimization may be a C strict aliasing rules violation
1312 * if the caller's data buffer has effective type that cannot be aliased by
1313 * HelpFuncMD5_u32plus. In practice, this problem may occur if these HelpFuncMD5 routines are
1314 * inlined into a calling function, or with future and dangerously advanced
1315 * link-time optimizations. For the time being, keeping these HelpFuncMD5 routines in
1316 * their own translation unit avoids the problem.
1317 */
1318 #define SET(n) (*(HelpFuncMD5_u32plus *)&ptr[(n) * 4])
1319 #define GET(n) SET(n)
1320 /*
1321 * This processes one or more 64-byte data blocks, but does NOT update the bit
1322 * counters. There are no alignment requirements.
1323 */
body(HelpFuncMD5_CTX * ctx,const void * data,unsigned long size)1324 static const void *body(HelpFuncMD5_CTX *ctx, const void *data, unsigned long size)
1325 {
1326 const unsigned char *ptr;
1327 HelpFuncMD5_u32plus a, b, c, d;
1328 HelpFuncMD5_u32plus saved_a, saved_b, saved_c, saved_d;
1329
1330 ptr = (const unsigned char *)data;
1331
1332 a = ctx->a;
1333 b = ctx->b;
1334 c = ctx->c;
1335 d = ctx->d;
1336
1337 do {
1338 saved_a = a;
1339 saved_b = b;
1340 saved_c = c;
1341 saved_d = d;
1342
1343 /* Round 1 */
1344 STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
1345 STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
1346 STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
1347 STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
1348 STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
1349 STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
1350 STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
1351 STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
1352 STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
1353 STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
1354 STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
1355 STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
1356 STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
1357 STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
1358 STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
1359 STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
1360
1361 /* Round 2 */
1362 STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
1363 STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
1364 STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
1365 STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
1366 STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
1367 STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
1368 STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
1369 STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
1370 STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
1371 STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
1372 STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
1373 STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
1374 STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
1375 STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
1376 STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
1377 STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
1378
1379 /* Round 3 */
1380 STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
1381 STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
1382 STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
1383 STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
1384 STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
1385 STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
1386 STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
1387 STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
1388 STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
1389 STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
1390 STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
1391 STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
1392 STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
1393 STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
1394 STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
1395 STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
1396
1397 /* Round 4 */
1398 STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
1399 STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
1400 STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
1401 STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
1402 STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
1403 STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
1404 STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
1405 STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
1406 STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
1407 STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
1408 STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
1409 STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
1410 STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
1411 STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
1412 STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
1413 STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
1414
1415 a += saved_a;
1416 b += saved_b;
1417 c += saved_c;
1418 d += saved_d;
1419
1420 ptr += 64;
1421 } while (size -= 64);
1422
1423 ctx->a = a;
1424 ctx->b = b;
1425 ctx->c = c;
1426 ctx->d = d;
1427
1428 return ptr;
1429 }
1430
HelpFuncMD5_Init(HelpFuncMD5_CTX * ctx)1431 void HelpFuncMD5_Init(HelpFuncMD5_CTX *ctx)
1432 {
1433 ctx->a = 0x67452301;
1434 ctx->b = 0xefcdab89;
1435 ctx->c = 0x98badcfe;
1436 ctx->d = 0x10325476;
1437
1438 ctx->lo = 0;
1439 ctx->hi = 0;
1440 }
1441
HelpFuncMD5_Update(HelpFuncMD5_CTX * ctx,const void * data,unsigned long size)1442 void HelpFuncMD5_Update(HelpFuncMD5_CTX *ctx, const void *data, unsigned long size)
1443 {
1444 HelpFuncMD5_u32plus saved_lo;
1445 unsigned long used, available;
1446
1447 saved_lo = ctx->lo;
1448 if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
1449 ctx->hi++;
1450 ctx->hi += size >> 29;
1451
1452 used = saved_lo & 0x3f;
1453
1454 if (used) {
1455 available = 64 - used;
1456
1457 if (size < available) {
1458 memcpy(&ctx->buffer[used], data, size);
1459 return;
1460 }
1461
1462 memcpy(&ctx->buffer[used], data, available);
1463 data = (const unsigned char *)data + available;
1464 size -= available;
1465 body(ctx, ctx->buffer, 64);
1466 }
1467
1468 if (size >= 64) {
1469 data = body(ctx, data, size & ~(unsigned long)0x3f);
1470 size &= 0x3f;
1471 }
1472
1473 memcpy(ctx->buffer, data, size);
1474 }
1475
1476 #define OUT(dst, src) \
1477 (dst)[0] = (unsigned char)(src); \
1478 (dst)[1] = (unsigned char)((src) >> 8); \
1479 (dst)[2] = (unsigned char)((src) >> 16); \
1480 (dst)[3] = (unsigned char)((src) >> 24);
1481
HelpFuncMD5_Final(unsigned char * result,HelpFuncMD5_CTX * ctx)1482 void HelpFuncMD5_Final(unsigned char *result, HelpFuncMD5_CTX *ctx)
1483 {
1484 unsigned long used, available;
1485
1486 used = ctx->lo & 0x3f;
1487
1488 ctx->buffer[used++] = 0x80;
1489
1490 available = 64 - used;
1491
1492 if (available < 8) {
1493 memset(&ctx->buffer[used], 0, available);
1494 body(ctx, ctx->buffer, 64);
1495 used = 0;
1496 available = 64;
1497 }
1498
1499 memset(&ctx->buffer[used], 0, available - 8);
1500
1501 ctx->lo <<= 3;
1502 OUT(&ctx->buffer[56], ctx->lo)
1503 OUT(&ctx->buffer[60], ctx->hi)
1504
1505 body(ctx, ctx->buffer, 64);
1506
1507 OUT(&result[0], ctx->a)
1508 OUT(&result[4], ctx->b)
1509 OUT(&result[8], ctx->c)
1510 OUT(&result[12], ctx->d)
1511
1512 memset(ctx, 0, sizeof(*ctx));
1513 }
1514
1515
Helper_md5sum(char * plain_txt,int plain_len,unsigned char * bin_md5_buff)1516 void Helper_md5sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff){
1517 HelpFuncMD5_CTX ctx;
1518 HelpFuncMD5_Init(&ctx);
1519 HelpFuncMD5_Update(&ctx, plain_txt, plain_len);
1520 HelpFuncMD5_Final(bin_md5_buff, &ctx);
1521 }
1522
plain_txt_to_long_rand(char * plain_txt,int plain_len)1523 unsigned long long plain_txt_to_long_rand(char * plain_txt, int plain_len){
1524 unsigned char md5v[16];
1525 Helper_md5sum(plain_txt, plain_len, md5v);
1526 unsigned long long ret = 0;
1527 memcpy(&ret, md5v, sizeof(ret));
1528 return ret;
1529 }
1530
md5txt(char * s)1531 void md5txt(char *s){
1532 unsigned char md5v[16];
1533 unsigned long long randv;
1534 Helper_md5sum(s, strlen(s), md5v);
1535 int x;
1536 randv = plain_txt_to_long_rand(s, strlen(s));
1537
1538 for(x=0;x<16;x++){
1539 SUBREADprintf("%02X", md5v[x]);
1540 }
1541 #ifdef __MINGW32__
1542 SUBREADprintf("\t'%s'\t%016I64X\t%I64u\t%.9f\n", s, randv, randv, randv*1./0xffffffffffffffffllu);
1543 #else
1544 SUBREADprintf("\t'%s'\t%016llX\t%llu\t%.9f\n", s, randv, randv, randv*1./0xffffffffffffffffllu);
1545 #endif
1546 }
1547
1548 //#define TESTHelpermain main
1549
TESTHelpermain()1550 int TESTHelpermain(){
1551 int xx;
1552 for(xx=0; xx<100; xx++){
1553 char xt[10];
1554 sprintf(xt, "%08d", xx);
1555 md5txt(xt);
1556 }
1557
1558 return 0;
1559 }
1560
1561
1562 // The following code was retrieved from
1563 // https://github.com/google/omaha/blob/master/third_party/lzma/files/C/Helper_Sha256.c
1564 // Thank the authors -- Igor Pavlov and Wei Dai.
1565
1566 /* Crypto/Helper_Sha256.c -- SHA-256 Hash
1567 2010-06-11 : Igor Pavlov : Public domain
1568 This code is based on public domain code from Wei Dai's Crypto++ library. */
1569 #define SHA256_DIGEST_SIZE 32
1570
1571 typedef struct
1572 {
1573 unsigned int state[8];
1574 unsigned long long count;
1575 unsigned char buffer[64];
1576 } CHelper_Sha256;
1577
1578 /* define it for speed optimization */
1579 /* #define _SHA256_UNROLL */
1580 /* #define _SHA256_UNROLL2 */
1581
Helper_Sha256_Init(CHelper_Sha256 * p)1582 void Helper_Sha256_Init(CHelper_Sha256 *p)
1583 {
1584 p->state[0] = 0x6a09e667;
1585 p->state[1] = 0xbb67ae85;
1586 p->state[2] = 0x3c6ef372;
1587 p->state[3] = 0xa54ff53a;
1588 p->state[4] = 0x510e527f;
1589 p->state[5] = 0x9b05688c;
1590 p->state[6] = 0x1f83d9ab;
1591 p->state[7] = 0x5be0cd19;
1592 p->count = 0;
1593 }
1594
1595 #define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
1596 #define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
1597
1598 #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
1599 #define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
1600 #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
1601 #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
1602
1603 #define blk0(i) (W[i] = data[i])
1604 #define blk2(i) (W[i&15] += s1(W[(i-2)&15]) + W[(i-7)&15] + s0(W[(i-15)&15]))
1605
1606 #define Ch(x,y,z) (z^(x&(y^z)))
1607 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
1608
1609 #define a(i) T[(0-(i))&7]
1610 #define b(i) T[(1-(i))&7]
1611 #define c(i) T[(2-(i))&7]
1612 #define d(i) T[(3-(i))&7]
1613 #define e(i) T[(4-(i))&7]
1614 #define f(i) T[(5-(i))&7]
1615 #define g(i) T[(6-(i))&7]
1616 #define h(i) T[(7-(i))&7]
1617
1618
1619 #ifdef _SHA256_UNROLL2
1620
1621 #define R(a,b,c,d,e,f,g,h, i) h += S1(e) + Ch(e,f,g) + K[i+j] + (j?blk2(i):blk0(i));\
1622 d += h; h += S0(a) + Maj(a, b, c)
1623
1624 #define RX_8(i) \
1625 R(a,b,c,d,e,f,g,h, i); \
1626 R(h,a,b,c,d,e,f,g, i+1); \
1627 R(g,h,a,b,c,d,e,f, i+2); \
1628 R(f,g,h,a,b,c,d,e, i+3); \
1629 R(e,f,g,h,a,b,c,d, i+4); \
1630 R(d,e,f,g,h,a,b,c, i+5); \
1631 R(c,d,e,f,g,h,a,b, i+6); \
1632 R(b,c,d,e,f,g,h,a, i+7)
1633
1634 #else
1635
1636 #define R(i) h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j?blk2(i):blk0(i));\
1637 d(i) += h(i); h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
1638
1639 #ifdef _SHA256_UNROLL
1640
1641 #define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
1642
1643 #endif
1644
1645 #endif
1646
1647 static const unsigned int K[64] = {
1648 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
1649 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
1650 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
1651 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
1652 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
1653 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
1654 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
1655 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
1656 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
1657 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
1658 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
1659 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
1660 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
1661 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
1662 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
1663 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
1664 };
1665
Helper_Sha256_Transform(unsigned int * state,const unsigned int * data)1666 static void Helper_Sha256_Transform(unsigned int *state, const unsigned int *data)
1667 {
1668 unsigned int W[16];
1669 unsigned j;
1670 #ifdef _SHA256_UNROLL2
1671 unsigned int a,b,c,d,e,f,g,h;
1672 a = state[0];
1673 b = state[1];
1674 c = state[2];
1675 d = state[3];
1676 e = state[4];
1677 f = state[5];
1678 g = state[6];
1679 h = state[7];
1680 #else
1681 unsigned int T[8];
1682 for (j = 0; j < 8; j++)
1683 T[j] = state[j];
1684 #endif
1685
1686 for (j = 0; j < 64; j += 16)
1687 {
1688 #if defined(_SHA256_UNROLL) || defined(_SHA256_UNROLL2)
1689 RX_8(0); RX_8(8);
1690 #else
1691 unsigned i;
1692 for (i = 0; i < 16; i++) { R(i); }
1693 #endif
1694 }
1695
1696 #ifdef _SHA256_UNROLL2
1697 state[0] += a;
1698 state[1] += b;
1699 state[2] += c;
1700 state[3] += d;
1701 state[4] += e;
1702 state[5] += f;
1703 state[6] += g;
1704 state[7] += h;
1705 #else
1706 for (j = 0; j < 8; j++)
1707 state[j] += T[j];
1708 #endif
1709
1710 /* Wipe variables */
1711 /* memset(W, 0, sizeof(W)); */
1712 /* memset(T, 0, sizeof(T)); */
1713 }
1714
1715 #undef S0
1716 #undef S1
1717 #undef s0
1718 #undef s1
1719
Helper_Sha256_WritecharBlock(CHelper_Sha256 * p)1720 static void Helper_Sha256_WritecharBlock(CHelper_Sha256 *p)
1721 {
1722 unsigned int data32[16];
1723 unsigned i;
1724 for (i = 0; i < 16; i++)
1725 data32[i] =
1726 ((unsigned int)(p->buffer[i * 4 ]) << 24) +
1727 ((unsigned int)(p->buffer[i * 4 + 1]) << 16) +
1728 ((unsigned int)(p->buffer[i * 4 + 2]) << 8) +
1729 ((unsigned int)(p->buffer[i * 4 + 3]));
1730 Helper_Sha256_Transform(p->state, data32);
1731 }
1732
Helper_Sha256_Update(CHelper_Sha256 * p,const char * data,size_t size)1733 void Helper_Sha256_Update(CHelper_Sha256 *p, const char *data, size_t size)
1734 {
1735 unsigned int curBufferPos = (unsigned int)p->count & 0x3F;
1736 while (size > 0)
1737 {
1738 p->buffer[curBufferPos++] = (char)*data;
1739 p->count++;
1740
1741 data++;
1742 size--;
1743 if (curBufferPos == 64)
1744 {
1745 curBufferPos = 0;
1746 Helper_Sha256_WritecharBlock(p);
1747 }
1748 }
1749 }
1750
Helper_Sha256_Final(unsigned char * digest,CHelper_Sha256 * p)1751 void Helper_Sha256_Final( unsigned char *digest , CHelper_Sha256 *p)
1752 {
1753 unsigned long long lenInBits = (p->count << 3);
1754 unsigned int curBufferPos = (unsigned int)p->count & 0x3F;
1755 unsigned i;
1756 p->buffer[curBufferPos++] = 0x80;
1757 while (curBufferPos != (64 - 8))
1758 {
1759 curBufferPos &= 0x3F;
1760 if (curBufferPos == 0)
1761 Helper_Sha256_WritecharBlock(p);
1762 p->buffer[curBufferPos++] = 0;
1763 }
1764 for (i = 0; i < 8; i++)
1765 {
1766 p->buffer[curBufferPos++] = (char)(lenInBits >> 56);
1767 lenInBits <<= 8;
1768 }
1769 Helper_Sha256_WritecharBlock(p);
1770
1771 for (i = 0; i < 8; i++)
1772 {
1773 *digest++ = (unsigned char)(p->state[i] >> 24);
1774 *digest++ = (unsigned char)(p->state[i] >> 16);
1775 *digest++ = (unsigned char)(p->state[i] >> 8);
1776 *digest++ = (unsigned char)(p->state[i]);
1777 }
1778 }
1779
Helper_sha256sum(char * plain_txt,int plain_len,unsigned char * bin_md5_buff)1780 void Helper_sha256sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff){
1781 CHelper_Sha256 ctx;
1782 Helper_Sha256_Init(&ctx);
1783 Helper_Sha256_Update(&ctx, plain_txt, plain_len);
1784 Helper_Sha256_Final(bin_md5_buff, &ctx);
1785 }
1786
sha256txt(char * s)1787 void sha256txt(char *s){
1788 unsigned char sha256v[32];
1789 Helper_sha256sum(s, strlen(s), sha256v);
1790 int x;
1791
1792 for(x=0;x<32;x++){
1793 SUBREADprintf("%02X", sha256v[x]);
1794 }
1795 SUBREADprintf("\t'%s'\n", s);
1796 }
1797
1798 //#define TEST256Helpermain main
TEST256Helpermain()1799 int TEST256Helpermain(){
1800 int xx;
1801 for(xx=0; xx<100; xx++){
1802 char xt[10];
1803 sprintf(xt, "%08d", xx);
1804 sha256txt(xt);
1805 }
1806 return 0;
1807 }
1808
1809
1810
1811 // The code of Helper_erfinv() was retreived from
1812 // https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c
1813 // I (Yang LIAO) adopted and modified it because it is under the New BSD license.
1814 //
1815 // Copyright (c) 2014 Indiana University
1816 // All rights reserved.
1817 //
1818 // Written by Prof. Gary L. Pavlis, Dept. of Geol. Sci.,
1819 // Indiana University, Bloomington, IN
1820 //
1821 // This software is licensed under the New BSD license:
1822 //
1823 // Redistribution and use in source and binary forms,
1824 // with or without modification, are permitted provided
1825 // that the following conditions are met:
1826 //
1827 // Redistributions of source code must retain the above
1828 // copyright notice, this list of conditions and the
1829 // following disclaimer.
1830 //
1831 // Redistributions in binary form must reproduce the
1832 // above copyright notice, this list of conditions and
1833 // the following disclaimer in the documentation and/or
1834 // other materials provided with the distribution.
1835 //
1836 // Neither the name of Indiana University nor
1837 // the names of its contributors may be used to endorse
1838 // or promote products derived from this software without
1839 // specific prior written permission.
1840 //
1841 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
1842 // CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
1843 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1844 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
1845 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
1846 // THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
1847 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1848 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
1849 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
1850 // USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1851 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
1852 // IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
1853 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
1854 // USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1855 // POSSIBILITY OF SUCH DAMAGE.
1856
1857
1858 #include <math.h>
1859 #include <float.h>
1860 #define MAXDOUBLE DBL_MAX
1861
1862 #define CENTRAL_RANGE 0.7
Helper_erfinv(double y)1863 double Helper_erfinv( double y) {
1864 double x=0,z,num,dem; /*working variables */
1865 /* coefficients in rational expansion */
1866 double a[4]={ 0.886226899, -1.645349621, 0.914624893, -0.140543331};
1867 double b[4]={-2.118377725, 1.442710462, -0.329097515, 0.012229801};
1868 double c[4]={-1.970840454, -1.624906493, 3.429567803, 1.641345311};
1869 double d[2]={ 3.543889200, 1.637067800};
1870 if(fabs(y) > 1.0) return (atof("NaN")); /* This needs IEEE constant*/
1871 if(fabs(y) == 1.0) return((copysign(1.0,y))*MAXDOUBLE);
1872 if( fabs(y) <= CENTRAL_RANGE )
1873 {
1874 z = y*y;
1875 num = (((a[3]*z + a[2])*z + a[1])*z + a[0]);
1876 dem = ((((b[3]*z + b[2])*z + b[1])*z +b[0])*z + 1.0);
1877 x = y*num/dem;
1878 }
1879 else if( (fabs(y) > CENTRAL_RANGE) && (fabs(y) < 1.0) )
1880 {
1881 z = sqrt(-log((1.0-fabs(y))/2.0));
1882 num = ((c[3]*z + c[2])*z + c[1])*z + c[0];
1883 dem = (d[1]*z + d[0])*z + 1.0;
1884 x = (copysign(1.0,y))*num/dem;
1885 }
1886 /* Two steps of Newton-Raphson correction */
1887 x = x - (erf(x) - y)/( (2.0/sqrt(M_PI))*exp(-x*x));
1888 x = x - (erf(x) - y)/( (2.0/sqrt(M_PI))*exp(-x*x));
1889
1890 return(x);
1891 }
1892
1893
inverse_sample_normal(double p)1894 double inverse_sample_normal(double p){
1895 return Helper_erfinv(2*p-1)*1.4142135623730950488;
1896 }
1897
1898 //#define TestNormalMain main
TestNormalMain()1899 void TestNormalMain(){
1900 int i;
1901 for(i=0; i<=10; i++){
1902 double ii = i*1./10;
1903 double p_0 = inverse_sample_normal(ii);
1904 SUBREADprintf("p of %.1f = %.40f\n\n" , ii, p_0);
1905 }
1906 }
1907
1908
1909 #ifndef MAKE_STANDALONE
1910
1911 message_queue_t mt_message_queue;
1912
1913
safeRprintf(char * fmt,...)1914 int safeRprintf(char * fmt,...){
1915 va_list args;
1916 va_start(args , fmt);
1917 FILE * ofp = fopen("/tmp/del4-Rlog.txt", "a");
1918 //fprintf(ofp, "NEW LOG : %p \"%s\"\n", fmt, fmt);
1919 fprintf(ofp, fmt, args);
1920 fclose(ofp);
1921 va_end(args);
1922 return 0;
1923 }
1924
1925 #endif
1926
msgqu_destroy()1927 void msgqu_destroy(){
1928 #ifndef MAKE_STANDALONE
1929 ArrayListDestroy(mt_message_queue.message_queue);
1930 subread_destroy_lock(&mt_message_queue.queue_lock);
1931 #endif
1932 }
1933
msgqu_notifyFinish()1934 void msgqu_notifyFinish(){
1935 #ifndef MAKE_STANDALONE
1936 subread_lock_occupy(&mt_message_queue.queue_lock);
1937 mt_message_queue.is_finished = 1;
1938 subread_lock_release(&mt_message_queue.queue_lock);
1939 #endif
1940 }
1941
msgqu_init(int thread_mode)1942 void msgqu_init(int thread_mode){
1943 #ifndef MAKE_STANDALONE
1944 mt_message_queue.is_thread_mode = thread_mode;
1945 if(!thread_mode) return;
1946 mt_message_queue.is_finished = 0;
1947 mt_message_queue.message_queue = ArrayListCreate(100);
1948 ArrayListSetDeallocationFunction(mt_message_queue.message_queue,free);
1949 subread_init_lock(&mt_message_queue.queue_lock);
1950 // subread_init_lock(&mt_message_queue.queue_notifier);
1951 #endif
1952 }
1953
msgqu_main_loop()1954 void msgqu_main_loop(){
1955 #ifndef MAKE_STANDALONE
1956 while(1){
1957 subread_lock_occupy(&mt_message_queue.queue_lock);
1958 while(0< mt_message_queue.message_queue->numOfElements){
1959 char * amsg = ArrayListShift(mt_message_queue.message_queue);
1960 Rprintf("%s", amsg);
1961 free(amsg);
1962 }
1963 if(mt_message_queue.is_finished) break;
1964 subread_lock_release(&mt_message_queue.queue_lock);
1965 usleep(40000);
1966 }
1967 #endif
1968 }
1969
1970 #define MSGQU_LINE_SIZE 3000
msgqu_printf(const char * fmt,...)1971 void msgqu_printf(const char * fmt, ...){
1972 va_list args;
1973 va_start(args , fmt);
1974 char * obuf = malloc(MSGQU_LINE_SIZE+1);
1975 vsnprintf( obuf, MSGQU_LINE_SIZE, fmt, args );
1976 va_end(args);
1977 obuf[MSGQU_LINE_SIZE]=0;
1978 #ifdef MAKE_STANDALONE
1979 fputs(obuf, stderr);
1980 free(obuf);
1981 #else
1982 if(!mt_message_queue.is_thread_mode){
1983 Rprintf("%s", obuf);
1984 free(obuf);
1985 return;
1986 }
1987
1988 subread_lock_occupy(&mt_message_queue.queue_lock);
1989 ArrayListPush(mt_message_queue.message_queue, obuf);
1990 subread_lock_release(&mt_message_queue.queue_lock);
1991 // it will be freed in the Main thread after printing.
1992 #endif
1993 }
1994
1995 #ifdef MAKE_TEST_MYRAND_TEST
1996
main()1997 int main(){
1998 myrand_srand(time(NULL));
1999 int x;
2000 for(x = 0; x<100000; x++){
2001 int mv = myrand_rand();
2002 SUBREADprintf("%d\n", mv);
2003 }
2004 return 0;
2005 }
2006
2007 #endif
2008
2009
2010
2011
2012 // retrieved from https://github.com/kokke/tiny-TNbignum-c/
2013 // The original code was in public domain
2014
2015 /* Functions for shifting number in-place. */
2016 static void _lshift_one_bit(struct bn* a);
2017 static void _rshift_one_bit(struct bn* a);
2018 static void _lshift_word(struct bn* a, int nwords);
2019 static void _rshift_word(struct bn* a, int nwords);
2020
2021
2022
2023 /* Public / Exported functions. */
TNbignum_init(struct bn * n)2024 void TNbignum_init(struct bn* n)
2025 {
2026 require(n, "n is null");
2027
2028 int i;
2029 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2030 {
2031 n->array[i] = 0;
2032 }
2033 }
2034
2035
TNbignum_from_int(struct bn * n,DTYPE_TMP i)2036 void TNbignum_from_int(struct bn* n, DTYPE_TMP i)
2037 {
2038 require(n, "n is null");
2039
2040 TNbignum_init(n);
2041
2042 /* Endianness issue if machine is not little-endian? */
2043 #ifdef WORD_SIZE
2044 #if (WORD_SIZE == 1)
2045 n->array[0] = (i & 0x000000ff);
2046 n->array[1] = (i & 0x0000ff00) >> 8;
2047 n->array[2] = (i & 0x00ff0000) >> 16;
2048 n->array[3] = (i & 0xff000000) >> 24;
2049 #elif (WORD_SIZE == 2)
2050 n->array[0] = (i & 0x0000ffff);
2051 n->array[1] = (i & 0xffff0000) >> 16;
2052 #elif (WORD_SIZE == 4)
2053 n->array[0] = i;
2054 DTYPE_TMP num_32 = 32;
2055 DTYPE_TMP tmp = i >> num_32; /* bit-shift with U64 operands to force 64-bit results */
2056 n->array[1] = tmp;
2057 #endif
2058 #endif
2059 }
2060
2061
TNbignum_to_int(struct bn * n)2062 int TNbignum_to_int(struct bn* n)
2063 {
2064 require(n, "n is null");
2065
2066 int ret = 0;
2067
2068 /* Endianness issue if machine is not little-endian? */
2069 #if (WORD_SIZE == 1)
2070 ret += n->array[0];
2071 ret += n->array[1] << 8;
2072 ret += n->array[2] << 16;
2073 ret += n->array[3] << 24;
2074 #elif (WORD_SIZE == 2)
2075 ret += n->array[0];
2076 ret += n->array[1] << 16;
2077 #elif (WORD_SIZE == 4)
2078 ret += n->array[0];
2079 #endif
2080
2081 return ret;
2082 }
2083
2084
TNbignum_from_string(struct bn * n,char * str,int nbytes)2085 void TNbignum_from_string(struct bn* n, char* str, int nbytes)
2086 {
2087 require(n, "n is null");
2088 require(str, "str is null");
2089 require(nbytes > 0, "nbytes must be positive");
2090 require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes");
2091
2092 TNbignum_init(n);
2093
2094 DTYPE tmp; /* DTYPE is defined in bn.h - uint{8,16,32,64}_t */
2095 int i = nbytes - (2 * WORD_SIZE); /* index into string */
2096 int j = 0; /* index into array */
2097
2098 /* reading last hex-byte "MSB" from string first -> big endian */
2099 /* MSB ~= most significant byte / block ? :) */
2100 while (i >= 0)
2101 {
2102 tmp = 0;
2103 sscanf(&str[i], SSCANF_FORMAT_STR, &tmp);
2104 //printf("SCAN_IN %d : v=%u\n", i, tmp);
2105 n->array[j] = tmp;
2106 i -= (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) back in the string. */
2107 j += 1; /* step one element forward in the array. */
2108 }
2109 }
2110
2111
TNbignum_to_string(struct bn * n,char * str,int nbytes)2112 void TNbignum_to_string(struct bn* n, char* str, int nbytes)
2113 {
2114 require(n, "n is null");
2115 require(str, "str is null");
2116 require(nbytes > 0, "nbytes must be positive");
2117 require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes");
2118
2119 int j = BN_ARRAY_SIZE - 1; /* index into array - reading "MSB" first -> big-endian */
2120 int i = 0; /* index into string representation. */
2121
2122 /* reading last array-element "MSB" first -> big endian */
2123 while ((j >= 0) && (nbytes > (i + 1)))
2124 {
2125 sprintf(&str[i], SPRINTF_FORMAT_STR, n->array[j]);
2126 //printf("WRITE:%d %s\n" , i, str+i);
2127 i += (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) forward in the string. */
2128 j -= 1; /* step one element back in the array. */
2129 }
2130
2131 /* Count leading zeros: */
2132 j = 0;
2133 while (str[j] == '0')
2134 {
2135 j += 1;
2136 }
2137
2138 /* Move string j places ahead, effectively skipping leading zeros */
2139 for (i = 0; i < (nbytes - j); ++i)
2140 {
2141 str[i] = str[i + j];
2142 }
2143
2144 /* Zero-terminate string */
2145 str[i] = 0;
2146 }
2147
2148
TNbignum_dec(struct bn * n)2149 void TNbignum_dec(struct bn* n)
2150 {
2151 require(n, "n is null");
2152
2153 DTYPE tmp; /* copy of n */
2154 DTYPE res;
2155
2156 int i;
2157 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2158 {
2159 tmp = n->array[i];
2160 res = tmp - 1;
2161 n->array[i] = res;
2162
2163 if (!(res > tmp))
2164 {
2165 break;
2166 }
2167 }
2168 }
2169
2170
TNbignum_inc(struct bn * n)2171 void TNbignum_inc(struct bn* n)
2172 {
2173 require(n, "n is null");
2174
2175 DTYPE res;
2176 DTYPE_TMP tmp; /* copy of n */
2177
2178 int i;
2179 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2180 {
2181 tmp = n->array[i];
2182 res = tmp + 1;
2183 n->array[i] = res;
2184
2185 if (res > tmp)
2186 {
2187 break;
2188 }
2189 }
2190 }
2191
2192
TNbignum_add(struct bn * a,struct bn * b,struct bn * c)2193 void TNbignum_add(struct bn* a, struct bn* b, struct bn* c)
2194 {
2195 require(a, "a is null");
2196 require(b, "b is null");
2197 require(c, "c is null");
2198
2199 DTYPE_TMP tmp;
2200 int carry = 0;
2201 int i;
2202 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2203 {
2204 tmp = (DTYPE_TMP)a->array[i] + b->array[i] + carry;
2205 carry = (tmp > MAX_VAL);
2206 c->array[i] = (tmp & MAX_VAL);
2207 }
2208 }
2209
2210
TNbignum_sub(struct bn * a,struct bn * b,struct bn * c)2211 void TNbignum_sub(struct bn* a, struct bn* b, struct bn* c)
2212 {
2213 require(a, "a is null");
2214 require(b, "b is null");
2215 require(c, "c is null");
2216
2217 DTYPE_TMP res;
2218 DTYPE_TMP tmp1;
2219 DTYPE_TMP tmp2;
2220 int borrow = 0;
2221 int i;
2222 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2223 {
2224 tmp1 = (DTYPE_TMP)a->array[i] + (MAX_VAL + 1); /* + number_base */
2225 tmp2 = (DTYPE_TMP)b->array[i] + borrow;;
2226 res = (tmp1 - tmp2);
2227 c->array[i] = (DTYPE)(res & MAX_VAL); /* "modulo number_base" == "% (number_base - 1)" if number_base is 2^N */
2228 borrow = (res <= MAX_VAL);
2229 }
2230 }
2231
2232
TNbignum_mul(struct bn * a,struct bn * b,struct bn * c)2233 void TNbignum_mul(struct bn* a, struct bn* b, struct bn* c)
2234 {
2235 require(a, "a is null");
2236 require(b, "b is null");
2237 require(c, "c is null");
2238
2239 struct bn row;
2240 struct bn tmp;
2241 int i, j;
2242
2243 TNbignum_init(c);
2244
2245 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2246 {
2247 TNbignum_init(&row);
2248
2249 for (j = 0; j < BN_ARRAY_SIZE; ++j)
2250 {
2251 if (i + j < BN_ARRAY_SIZE)
2252 {
2253 TNbignum_init(&tmp);
2254 DTYPE_TMP intermediate = ((DTYPE_TMP)a->array[i] * (DTYPE_TMP)b->array[j]);
2255 TNbignum_from_int(&tmp, intermediate);
2256 _lshift_word(&tmp, i + j);
2257 TNbignum_add(&tmp, &row, &row);
2258 }
2259 }
2260 TNbignum_add(c, &row, c);
2261 }
2262 }
2263
2264
TNbignum_div(struct bn * a,struct bn * b,struct bn * c)2265 void TNbignum_div(struct bn* a, struct bn* b, struct bn* c)
2266 {
2267 require(a, "a is null");
2268 require(b, "b is null");
2269 require(c, "c is null");
2270
2271 struct bn current;
2272 struct bn denom;
2273 struct bn tmp;
2274
2275 TNbignum_from_int(¤t, 1); // int current = 1;
2276 TNbignum_assign(&denom, b); // denom = b
2277 TNbignum_assign(&tmp, a); // tmp = a
2278
2279 const DTYPE_TMP half_max = 1 + (DTYPE_TMP)(MAX_VAL / 2);
2280 int overflow = 0;
2281 while (TNbignum_cmp(&denom, a) != LARGER) // while (denom <= a) {
2282 {
2283 if (denom.array[BN_ARRAY_SIZE - 1] >= half_max)
2284 {
2285 overflow = 1;
2286 break;
2287 }
2288 _lshift_one_bit(¤t); // current <<= 1;
2289 _lshift_one_bit(&denom); // denom <<= 1;
2290 }
2291 if (!overflow)
2292 {
2293 _rshift_one_bit(&denom); // denom >>= 1;
2294 _rshift_one_bit(¤t); // current >>= 1;
2295 }
2296 TNbignum_init(c); // int answer = 0;
2297
2298 while (!TNbignum_is_zero(¤t)) // while (current != 0)
2299 {
2300 if (TNbignum_cmp(&tmp, &denom) != SMALLER) // if (dividend >= denom)
2301 {
2302 TNbignum_sub(&tmp, &denom, &tmp); // dividend -= denom;
2303 TNbignum_or(c, ¤t, c); // answer |= current;
2304 }
2305 _rshift_one_bit(¤t); // current >>= 1;
2306 _rshift_one_bit(&denom); // denom >>= 1;
2307 } // return answer;
2308 }
2309
2310
TNbignum_lshift(struct bn * a,struct bn * b,int nbits)2311 void TNbignum_lshift(struct bn* a, struct bn* b, int nbits)
2312 {
2313 require(a, "a is null");
2314 require(b, "b is null");
2315 require(nbits >= 0, "no negative shifts");
2316
2317 TNbignum_assign(b, a);
2318 /* Handle shift in multiples of word-size */
2319 const int nbits_pr_word = (WORD_SIZE * 8);
2320 int nwords = nbits / nbits_pr_word;
2321 if (nwords != 0)
2322 {
2323 _lshift_word(b, nwords);
2324 nbits -= (nwords * nbits_pr_word);
2325 }
2326
2327 if (nbits != 0)
2328 {
2329 int i;
2330 for (i = (BN_ARRAY_SIZE - 1); i > 0; --i)
2331 {
2332 b->array[i] = (b->array[i] << nbits) | (b->array[i - 1] >> ((8 * WORD_SIZE) - nbits));
2333 }
2334 b->array[i] <<= nbits;
2335 }
2336 }
2337
2338
TNbignum_rshift(struct bn * a,struct bn * b,int nbits)2339 void TNbignum_rshift(struct bn* a, struct bn* b, int nbits)
2340 {
2341 require(a, "a is null");
2342 require(b, "b is null");
2343 require(nbits >= 0, "no negative shifts");
2344
2345 TNbignum_assign(b, a);
2346 /* Handle shift in multiples of word-size */
2347 const int nbits_pr_word = (WORD_SIZE * 8);
2348 int nwords = nbits / nbits_pr_word;
2349 if (nwords != 0)
2350 {
2351 _rshift_word(b, nwords);
2352 nbits -= (nwords * nbits_pr_word);
2353 }
2354
2355 if (nbits != 0)
2356 {
2357 int i;
2358 for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i)
2359 {
2360 b->array[i] = (b->array[i] >> nbits) | (b->array[i + 1] << ((8 * WORD_SIZE) - nbits));
2361 }
2362 b->array[i] >>= nbits;
2363 }
2364
2365 }
2366
2367
TNbignum_mod(struct bn * a,struct bn * b,struct bn * c)2368 void TNbignum_mod(struct bn* a, struct bn* b, struct bn* c)
2369 {
2370 /*
2371 * Take divmod and throw away div part
2372 * */
2373 require(a, "a is null");
2374 require(b, "b is null");
2375 require(c, "c is null");
2376
2377 struct bn tmp;
2378
2379 TNbignum_divmod(a,b,&tmp,c);
2380 }
2381
TNbignum_divmod(struct bn * a,struct bn * b,struct bn * c,struct bn * d)2382 void TNbignum_divmod(struct bn* a, struct bn* b, struct bn* c, struct bn* d)
2383 {
2384 /*
2385 * Puts a%b in d
2386 * and a/b in c
2387 *
2388 * mod(a,b) = a - ((a / b) * b)
2389 *
2390 * example:
2391 * mod(8, 3) = 8 - ((8 / 3) * 3) = 2
2392 * */
2393 require(a, "a is null");
2394 require(b, "b is null");
2395 require(c, "c is null");
2396
2397 struct bn tmp;
2398
2399 /* c = (a / b) */
2400 TNbignum_div(a, b, c);
2401
2402 /* tmp = (c * b) */
2403 TNbignum_mul(c, b, &tmp);
2404
2405 /* c = a - tmp */
2406 TNbignum_sub(a, &tmp, d);
2407 }
2408
2409
TNbignum_and(struct bn * a,struct bn * b,struct bn * c)2410 void TNbignum_and(struct bn* a, struct bn* b, struct bn* c)
2411 {
2412 require(a, "a is null");
2413 require(b, "b is null");
2414 require(c, "c is null");
2415
2416 int i;
2417 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2418 {
2419 c->array[i] = (a->array[i] & b->array[i]);
2420 }
2421 }
2422
2423
TNbignum_or(struct bn * a,struct bn * b,struct bn * c)2424 void TNbignum_or(struct bn* a, struct bn* b, struct bn* c)
2425 {
2426 require(a, "a is null");
2427 require(b, "b is null");
2428 require(c, "c is null");
2429
2430 int i;
2431 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2432 {
2433 c->array[i] = (a->array[i] | b->array[i]);
2434 }
2435 }
2436
2437
TNbignum_xor(struct bn * a,struct bn * b,struct bn * c)2438 void TNbignum_xor(struct bn* a, struct bn* b, struct bn* c)
2439 {
2440 require(a, "a is null");
2441 require(b, "b is null");
2442 require(c, "c is null");
2443
2444 int i;
2445 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2446 {
2447 c->array[i] = (a->array[i] ^ b->array[i]);
2448 }
2449 }
2450
2451
TNbignum_cmp(struct bn * a,struct bn * b)2452 int TNbignum_cmp(struct bn* a, struct bn* b)
2453 {
2454 require(a, "a is null");
2455 require(b, "b is null");
2456
2457 int i = BN_ARRAY_SIZE;
2458 do
2459 {
2460 i -= 1; /* Decrement first, to start with last array element */
2461 if (a->array[i] > b->array[i])
2462 {
2463 return LARGER;
2464 }
2465 else if (a->array[i] < b->array[i])
2466 {
2467 return SMALLER;
2468 }
2469 }
2470 while (i != 0);
2471
2472 return EQUAL;
2473 }
2474
2475
TNbignum_is_zero(struct bn * n)2476 int TNbignum_is_zero(struct bn* n)
2477 {
2478 require(n, "n is null");
2479
2480 int i;
2481 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2482 {
2483 if (n->array[i])
2484 {
2485 return 0;
2486 }
2487 }
2488
2489 return 1;
2490 }
2491
2492
TNbignum_pow(struct bn * a,struct bn * b,struct bn * c)2493 void TNbignum_pow(struct bn* a, struct bn* b, struct bn* c)
2494 {
2495 require(a, "a is null");
2496 require(b, "b is null");
2497 require(c, "c is null");
2498
2499 struct bn tmp;
2500
2501 TNbignum_init(c);
2502
2503 if (TNbignum_cmp(b, c) == EQUAL)
2504 {
2505 /* Return 1 when exponent is 0 -- n^0 = 1 */
2506 TNbignum_inc(c);
2507 }
2508 else
2509 {
2510 /* Copy a -> tmp */
2511 TNbignum_assign(&tmp, a);
2512
2513 TNbignum_dec(b);
2514
2515 /* Begin summing products: */
2516 while (!TNbignum_is_zero(b))
2517 {
2518
2519 /* c = tmp * tmp */
2520 TNbignum_mul(&tmp, a, c);
2521 /* Decrement b by one */
2522 TNbignum_dec(b);
2523
2524 TNbignum_assign(&tmp, c);
2525 }
2526
2527 /* c = tmp */
2528 TNbignum_assign(c, &tmp);
2529 }
2530 }
2531
TNbignum_isqrt(struct bn * a,struct bn * b)2532 void TNbignum_isqrt(struct bn *a, struct bn* b)
2533 {
2534 require(a, "a is null");
2535 require(b, "b is null");
2536
2537 struct bn low, high, mid, tmp;
2538
2539 TNbignum_init(&low);
2540 TNbignum_assign(&high, a);
2541 TNbignum_rshift(&high, &mid, 1);
2542 TNbignum_inc(&mid);
2543
2544 while (TNbignum_cmp(&high, &low) > 0)
2545 {
2546 TNbignum_mul(&mid, &mid, &tmp);
2547 if (TNbignum_cmp(&tmp, a) > 0)
2548 {
2549 TNbignum_assign(&high, &mid);
2550 TNbignum_dec(&high);
2551 }
2552 else
2553 {
2554 TNbignum_assign(&low, &mid);
2555 }
2556 TNbignum_sub(&high,&low,&mid);
2557 _rshift_one_bit(&mid);
2558 TNbignum_add(&low,&mid,&mid);
2559 TNbignum_inc(&mid);
2560 }
2561 TNbignum_assign(b,&low);
2562 }
2563
2564
TNbignum_assign(struct bn * dst,struct bn * src)2565 void TNbignum_assign(struct bn* dst, struct bn* src)
2566 {
2567 require(dst, "dst is null");
2568 require(src, "src is null");
2569
2570 int i;
2571 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2572 {
2573 dst->array[i] = src->array[i];
2574 }
2575 }
2576
2577
2578 /* Private / Static functions. */
_rshift_word(struct bn * a,int nwords)2579 static void _rshift_word(struct bn* a, int nwords)
2580 {
2581 /* Naive method: */
2582 require(a, "a is null");
2583 require(nwords >= 0, "no negative shifts");
2584
2585 int i;
2586 if (nwords >= BN_ARRAY_SIZE)
2587 {
2588 for (i = 0; i < BN_ARRAY_SIZE; ++i)
2589 {
2590 a->array[i] = 0;
2591 }
2592 return;
2593 }
2594
2595 for (i = 0; i < BN_ARRAY_SIZE - nwords; ++i)
2596 {
2597 a->array[i] = a->array[i + nwords];
2598 }
2599 for (; i < BN_ARRAY_SIZE; ++i)
2600 {
2601 a->array[i] = 0;
2602 }
2603 }
2604
2605
_lshift_word(struct bn * a,int nwords)2606 static void _lshift_word(struct bn* a, int nwords)
2607 {
2608 require(a, "a is null");
2609 require(nwords >= 0, "no negative shifts");
2610
2611 int i;
2612 /* Shift whole words */
2613 for (i = (BN_ARRAY_SIZE - 1); i >= nwords; --i)
2614 {
2615 a->array[i] = a->array[i - nwords];
2616 }
2617 /* Zero pad shifted words. */
2618 for (; i >= 0; --i)
2619 {
2620 a->array[i] = 0;
2621 }
2622 }
2623
2624
_lshift_one_bit(struct bn * a)2625 static void _lshift_one_bit(struct bn* a)
2626 {
2627 require(a, "a is null");
2628
2629 int i;
2630 for (i = (BN_ARRAY_SIZE - 1); i > 0; --i)
2631 {
2632 a->array[i] = (a->array[i] << 1) | (a->array[i - 1] >> ((8 * WORD_SIZE) - 1));
2633 }
2634 a->array[0] <<= 1;
2635 }
2636
2637
_rshift_one_bit(struct bn * a)2638 static void _rshift_one_bit(struct bn* a)
2639 {
2640 require(a, "a is null");
2641
2642 int i;
2643 for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i)
2644 {
2645 a->array[i] = (a->array[i] >> 1) | (a->array[i + 1] << ((8 * WORD_SIZE) - 1));
2646 }
2647 a->array[BN_ARRAY_SIZE - 1] >>= 1;
2648 }
2649
2650 /*
2651 void TNbignum_pow_then_mod( struct bn * m, int e, struct bn * modulus, struct bn * res ){
2652 int xx;
2653 struct bn res, remove_times, remove_value, tmpv;
2654 TNbignum_from_string(&res, "01", 2);
2655 for(xx = 0; xx < 31; xx++){
2656 TNbignum_mul( m, m, tmpv );
2657 TNbignum_div( tmpv, modulus, remove_times);
2658 TNbignum_mul( remove_times, modulus, remove_value);
2659 TNbignum_sub( tmpv , remove_value, m);
2660 e = e >> 1;
2661 }
2662
2663 }
2664 */
2665
2666 #ifdef HELPER_TEST_BIGNUM
main()2667 void main(){
2668 char restxt[1030];
2669 struct bn n,e,res;
2670 int exponent = 35;
2671 restxt[0]=0;
2672
2673 char * modulus = "AC97941B27989F9DFAC7FB03A951BF8D39CE60248D8FB5A2C4CDAFAE2431667DD5EBB46B6FF9BBEF5DE2B587CF6B06B5D63BB6B71B35C43FA5141F156A1AC77231FD5D916053CA3E5FEAD3AFB10877D1A5440119C6420A6C205758D01B5B75F5B420F9E99815820B389F0BFBA00B60C0A18612C268C0ED9B263B02DF526EED851581E0BDE4D46227DAA2EAA8D13EDF3476C20B30C3188FA3D0FBC1636C5477A744D0A042BA27C77E52CEFAE0B4005BF2E22001BA0C40A5898F3C8FE664969E72F208FDF8D7DC3D039690911E32DF6B4948280EAF67952D231907CAA1A3D1433D5A8E5325E3B849721871458E8EA5860E4EFFCCC61D82BE5EED6EAF9C6A292A2B";
2674
2675 TNbignum_from_string(&n, modulus,strlen(modulus) );
2676 TNbignum_pow_then_mod(&m, exponent, &n, &res); // (m ^ e) mod N. E doesn't need to be a BIG NUMBER.
2677 TNbignum_to_string(&res, restxt, 1030);
2678 printf("RES=%s\n", restxt);
2679
2680
2681 }
2682
2683 #endif
2684
2685 #ifdef HELPER_TEST_CRC32
main()2686 void main(){
2687 long c32=0;
2688 c32 = crc32(c32, "Hello.",6);
2689 printf("CRC = %08lX\n", c32);
2690
2691 c32=0;
2692 c32 = crc32(c32, "Hel",3);
2693 c32 = crc32(c32, "lo",2);
2694 c32 = crc32(c32, ".",1);
2695 c32 = crc32(c32, "",0);
2696 printf("CRC = %08lX\n", c32);
2697 }
2698
2699 #endif
2700
2701 #ifdef __MINGW32__
2702 #include <windows.h>
2703 #endif
2704
2705
get_free_total_mem(size_t * total,size_t * free_mem)2706 int get_free_total_mem(size_t * total, size_t * free_mem){
2707
2708 #if defined(__FreeBSD__) || defined(__DragonFly__)
2709 return -1;
2710 #endif
2711
2712 #ifdef __MINGW32__
2713 MEMORYSTATUSEX statex;
2714 statex.dwLength = sizeof (statex);
2715 GlobalMemoryStatusEx (&statex);
2716 (*total) = statex.ullTotalPhys;
2717 (*free_mem) = statex.ullAvailPhys;
2718 return 0;
2719 #else
2720 #ifdef MACOS
2721 mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
2722 vm_statistics_data_t vmstat;
2723 int page_size = getpagesize();
2724 if(KERN_SUCCESS != host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&vmstat, &count))
2725 return -1;
2726 //printf("PSIZE=%d\nACT=%u; INACT=%u; FREE=%u\n", page_size, vmstat.active_count, vmstat.inactive_count, vmstat.free_count);
2727 size_t btlen = sizeof(*total);
2728 if(sysctl( (int[]) { CTL_HW, HW_MEMSIZE }, 2, total, &btlen, NULL, 0)) return -1;
2729 *free_mem = (vmstat.free_count + vmstat.inactive_count) * 1llu * page_size;
2730 return 0;
2731 #else
2732 struct sysinfo sinf;
2733 sysinfo(&sinf);
2734 size_t cached_mem = get_sys_mem_info("Cached:");
2735 if(cached_mem<0)cached_mem=0;
2736 *free_mem = cached_mem + sinf.bufferram+sinf.freeram;
2737 *total = sinf.totalram;
2738 return 0;
2739 #endif
2740 #endif
2741 }
2742
worker_master_mutex_init(worker_master_mutex_t * wmt,int all_workers)2743 void worker_master_mutex_init(worker_master_mutex_t * wmt, int all_workers){
2744 memset(wmt,0,sizeof(worker_master_mutex_t));
2745 wmt -> conds_worker_wait = malloc(sizeof(pthread_cond_t) * all_workers);
2746 wmt -> mutexs_worker_wait = malloc(sizeof(pthread_mutex_t) * all_workers);
2747 wmt -> mutex_with_master = calloc(sizeof(int), all_workers);
2748 wmt -> worker_is_working = calloc(sizeof(int), all_workers);
2749
2750 wmt -> workers = all_workers;
2751 int x1;
2752 for(x1=0;x1<all_workers;x1++){
2753 pthread_cond_init(&wmt -> conds_worker_wait [x1], NULL);
2754 pthread_mutex_init(&wmt -> mutexs_worker_wait [x1], NULL);
2755 }
2756 }
2757
worker_thread_start(worker_master_mutex_t * wmt,int worker_id)2758 void worker_thread_start(worker_master_mutex_t * wmt, int worker_id){
2759 pthread_mutex_lock(&wmt->mutexs_worker_wait[worker_id]);
2760 wmt -> mutex_with_master[worker_id] = 0;
2761 }
2762
worker_master_mutex_destroy(worker_master_mutex_t * wmt)2763 void worker_master_mutex_destroy(worker_master_mutex_t * wmt){
2764 int x1;
2765 for(x1=0;x1<wmt -> workers;x1++){
2766 pthread_mutex_destroy(&wmt -> mutexs_worker_wait [x1]);
2767 pthread_cond_destroy(&wmt -> conds_worker_wait [x1]);
2768 }
2769 free( wmt -> worker_is_working);
2770 free( wmt -> mutex_with_master);
2771 free( wmt -> conds_worker_wait);
2772 free( wmt -> mutexs_worker_wait);
2773 }
2774
worker_wait_for_job(worker_master_mutex_t * wmt,int worker_id)2775 int worker_wait_for_job(worker_master_mutex_t * wmt, int worker_id){
2776 pthread_mutex_trylock(wmt->mutexs_worker_wait + worker_id);
2777 wmt->worker_is_working[worker_id] = 0;
2778 while(1){
2779 pthread_cond_wait(&wmt->conds_worker_wait[worker_id], &wmt->mutexs_worker_wait[worker_id]);
2780 if(wmt -> all_terminate)
2781 pthread_mutex_unlock(&wmt->mutexs_worker_wait[worker_id]);
2782 if(wmt -> mutex_with_master[worker_id] == 0)break;
2783 // otherwise, it is a spurious wakeup
2784 }
2785 return wmt -> all_terminate;
2786 }
2787
master_wait_for_job_done(worker_master_mutex_t * wmt,int worker_id)2788 void master_wait_for_job_done(worker_master_mutex_t * wmt, int worker_id){
2789 if(!wmt -> mutex_with_master[worker_id] ){
2790 while(1){
2791 pthread_mutex_lock(&wmt->mutexs_worker_wait[worker_id]);
2792 if(!wmt->worker_is_working[worker_id])break;
2793 // In a rare situation, the worker hasn't been scheduled after the last notification.
2794 // Namely, the master will notify twice the worker when the worker thread hasn't started at all. The second notification will cause no condition signaled.
2795 // This test is to prevent such a situation.
2796 pthread_mutex_unlock(&wmt->mutexs_worker_wait[worker_id]);
2797 usleep(50);
2798 }
2799 if(wmt->worker_is_working[worker_id])
2800 SUBREADprintf("ERROR 3: HOW CAN I HAVE THIS LOCK : %d?? TH_%d\n", wmt->worker_is_working[worker_id], worker_id);
2801 }
2802 wmt -> mutex_with_master[worker_id] = 1;
2803 }
2804
2805 // master collects results between master_wait_for_job_done and master_notify_worker
master_notify_worker(worker_master_mutex_t * wmt,int worker_id)2806 void master_notify_worker(worker_master_mutex_t * wmt, int worker_id){
2807 if(!wmt -> mutex_with_master[worker_id])SUBREADprintf("ERROR 2: HOW CAN I NOT HAVE THE LOCK : %d ; TERM=%d\n", worker_id, wmt -> all_terminate);
2808
2809 wmt -> worker_is_working[worker_id] = 1;
2810 wmt -> mutex_with_master[worker_id] = 0;
2811 pthread_cond_signal(&wmt->conds_worker_wait[worker_id]);
2812 pthread_mutex_unlock(&wmt->mutexs_worker_wait[worker_id]);
2813 }
2814
2815 // this function must be called when the master thread has all the worker locks in control.
2816 // ie all the workers should be in the "wait_for_job" status.
terminate_workers(worker_master_mutex_t * wmt)2817 void terminate_workers(worker_master_mutex_t * wmt){
2818 int x1;
2819 wmt -> all_terminate = 1;
2820 for(x1=0;x1<wmt -> workers;x1++)
2821 master_notify_worker(wmt,x1);
2822 }
2823
windows_memmem(const void * haystack_start,size_t haystack_len,const void * needle_start,size_t needle_len)2824 void *windows_memmem(const void *haystack_start, size_t haystack_len, const void *needle_start, size_t needle_len)
2825 {
2826
2827 const unsigned char *haystack = (const unsigned char *) haystack_start;
2828 const unsigned char *needle = (const unsigned char *) needle_start;
2829 const unsigned char *h = NULL;
2830 const unsigned char *n = NULL;
2831 size_t x = needle_len;
2832
2833 /* The first occurrence of the empty string is deemed to occur at
2834 * the beginning of the string. */
2835 if (needle_len == 0)
2836 return (void *) haystack_start;
2837
2838 /* Sanity check, otherwise the loop might search through the whole
2839 * memory. */
2840 if (haystack_len < needle_len)
2841 return NULL;
2842
2843 for (; *haystack && haystack_len--; haystack++) {
2844
2845 x = needle_len;
2846 n = needle;
2847 h = haystack;
2848
2849 if (haystack_len < needle_len)
2850 break;
2851
2852 if ((*haystack != *needle) || ( *haystack + needle_len != *needle + needle_len))
2853 continue;
2854
2855 for (; x ; h++ , n++) {
2856 x--;
2857
2858 if (*h != *n)
2859 break;
2860
2861 if (x == 0)
2862 return (void *)haystack;
2863 }
2864 }
2865
2866 return NULL;
2867 }
2868