1 /***************************************************************
2 
3    The Subread software package is free software package:
4    you can redistribute it and/or modify it under the terms
5    of the GNU General Public License as published by the
6    Free Software Foundation, either version 3 of the License,
7    or (at your option) any later version.
8 
9    Subread is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty
11    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13    See the GNU General Public License for more details.
14 
15    Authors: Drs Yang Liao and Wei Shi
16 
17   ***************************************************************/
18 
19 
20 #include <ctype.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <stdarg.h>
24 #include <math.h>
25 #include <unistd.h>
26 #include <pthread.h>
27 
28 
29 #ifdef MACOS
30 
31 #include <sys/types.h>
32 #include <sys/socket.h>
33 #include <sys/ioctl.h>
34 #include <sys/sysctl.h>
35 #include <net/if.h>
36 #include <net/if_dl.h>
37 #include <netinet/in.h>
38 #include <arpa/inet.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <mach/mach.h>
42 #include <mach/vm_statistics.h>
43 
44 #else
45 
46 #ifndef __MINGW32__
47 #include <sys/ioctl.h>
48 #include <netinet/in.h>
49 #include <net/if.h>
50 #include <sys/sysinfo.h>
51 #endif
52 #include <sys/types.h>
53 #endif
54 
55 
56 #include "subread.h"
57 #include "input-files.h"
58 #include "seek-zlib.h"
59 #include "gene-algorithms.h"
60 #include "HelperFunctions.h"
61 
get_sys_mem_info(char * keyword)62 size_t get_sys_mem_info(char * keyword){
63 	FILE * mfp = fopen("/proc/meminfo","r");
64 	if(mfp==NULL) return -1;
65 	char linebuf[1000];
66 	size_t ret = -1;
67 	while(1){
68 		char * rret = fgets(linebuf, 999, mfp);
69 		if(memcmp( keyword, linebuf, strlen(keyword) ) == 0 && strstr(linebuf," kB")) {
70 			ret=0;
71 			int ii ,state=0;
72 			for(ii=strlen(keyword);; ii++){
73 				//SUBREADprintf("CH[%d] = %d '%c' at state %d\n", ii, linebuf[ii], linebuf[ii], state);
74 				if(state == 0 && linebuf[ii]==' ') state = 1;
75 				if(state == 1 && linebuf[ii]!=' ') state = 2;
76 				if(state == 2 && linebuf[ii]==' ') state = 9999;
77 
78 				if(state == 2 && !isdigit(linebuf[ii])){
79 					SUBREADprintf("WRONG MEMORY INFO '%s'\n", linebuf);
80 					ret = -1;
81 					break;
82 				}
83 				if(state == 2) ret = ret*10 + ( linebuf[ii] - '0' );
84 				if(state >= 9999) {
85 					ret *=1024;
86 					break;
87 				}
88 			}
89 		}
90 		if(!rret) break;
91 	}
92 	fclose(mfp);
93 	return ret;
94 }
95 
get_short_fname(char * lname)96 char * get_short_fname(char * lname){
97 	char * ret = lname;
98 
99 	int x1;
100 	for(x1 = strlen(lname)-2; x1>=0; x1--){
101 		if(lname [x1] == '/'||lname [x1] == '\\'){
102 			ret = lname + x1 + 1;
103 			break;
104 		}
105 	}
106 	return ret;
107 }
108 
109 
110 
111 // This assumes the first part of Cigar has differet strandness to the main part of the cigar.
112 // Pos is the LAST WANTED BASE location before the first strand jump (split by 'b' or 'n').
113 // The first base in the read actually has a larger coordinate than Pos.
114 // new_cigar has to be at least 100 bytes.
reverse_cigar(unsigned int pos,char * cigar,char * new_cigar)115 unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar) {
116 	int cigar_cursor = 0;
117 	new_cigar[0]=0;
118 	unsigned int tmpi=0;
119 	int last_piece_end = 0;
120 	int last_sec_start = 0;
121 	unsigned int chro_pos = pos, this_section_start = pos, ret = pos;
122 	int is_positive_dir = 0;
123 	int read_cursor = 0;
124 	int section_no = 0;
125 
126 	for(cigar_cursor = 0 ;  ; cigar_cursor++)
127 	{
128 		if( cigar [cigar_cursor] == 'n' ||  cigar [cigar_cursor] == 'b' ||  cigar [cigar_cursor] == 0)
129 		{
130 			int xk1, jmlen=0, nclen=strlen(new_cigar);
131 			char jump_mode [13];
132 
133 			if(cigar [cigar_cursor] !=0)
134 			{
135 				sprintf(jump_mode, "%u%c", tmpi,  cigar [cigar_cursor] == 'b'?'n':'b');
136 				jmlen = strlen(jump_mode);
137 			}
138 
139 			for(xk1=nclen-1;xk1>=0; xk1--)
140 				new_cigar[ xk1 +  last_piece_end + jmlen - last_sec_start ] = new_cigar[ xk1 ];
141 			new_cigar [nclen + jmlen + last_piece_end - last_sec_start ] = 0;
142 
143 			memcpy(new_cigar , jump_mode, jmlen);
144 			memcpy(new_cigar + jmlen , cigar + last_sec_start, last_piece_end - last_sec_start);
145 
146 			last_sec_start = cigar_cursor+1;
147 
148 			if(is_positive_dir && cigar [cigar_cursor] !=0)
149 			{
150 				if(cigar [cigar_cursor] == 'b') chro_pos -= tmpi - read_cursor - 1;
151 				else	chro_pos += tmpi - read_cursor - 1;
152 			}
153 			if((!is_positive_dir) && cigar [cigar_cursor] !=0)
154 			{
155 				if(cigar [cigar_cursor] == 'b') chro_pos = this_section_start - tmpi - read_cursor - 1;
156 				else	chro_pos = this_section_start + tmpi - read_cursor - 1;
157 			}
158 
159 			this_section_start = chro_pos;
160 
161 			if(section_no == 0)
162 				ret = chro_pos;
163 
164 			is_positive_dir = ! is_positive_dir;
165 			section_no++;
166 			tmpi=0;
167 		}
168 		else if(isalpha(cigar [cigar_cursor]))
169 		{
170 			if(cigar [cigar_cursor]=='M' || cigar [cigar_cursor] == 'S')
171 				read_cursor += tmpi;
172 			tmpi=0;
173 			last_piece_end = cigar_cursor+1;
174 		}
175 		else tmpi = tmpi*10 + (cigar [cigar_cursor] - '0');
176 
177 		if(cigar [cigar_cursor] == 0)break;
178 	}
179 
180 	SUBREADprintf("REV CIGAR: %s  =>  %s\n", cigar, new_cigar);
181 	return ret;
182 }
183 
find_left_end_cigar(unsigned int right_pos,char * cigar)184 unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar){
185 	int delta_from_right = 0;
186 	int cigar_cursor = 0;
187 	unsigned int tmpi = 0;
188 	while(1){
189 		int nch = cigar[cigar_cursor++];
190 		if(nch == 0) break;
191 		if(isdigit(nch)){
192 			tmpi = tmpi * 10 + nch - '0';
193 		}else{
194 			if(nch == 'M'||nch == 'D' || nch == 'N'){
195 				delta_from_right +=tmpi;
196 			}
197 			tmpi = 0;
198 		}
199 	}
200 	return right_pos - delta_from_right;
201 }
202 
203 
contig_fasta_int2base(int v)204 char contig_fasta_int2base(int v){
205 	if(v == 1) return 'A';
206 	if(v == 2) return 'T';
207 	if(v == 3) return 'G';
208 	if(v == 4) return 'C';
209 	return 'N';
210 }
211 
contig_fasta_base2int(char base)212 int contig_fasta_base2int(char base){
213 	base = tolower(base);
214 	if((base) == 'a'){ return 1;}
215 	else if((base) == 't' || (base) == 'u'){ return 2;}
216 	else if((base) == 'g'){ return 3;}
217 	else if((base) == 'c'){ return 4;}
218 	else return 15 ;
219 }
220 
get_contig_fasta(fasta_contigs_t * tab,char * chro,unsigned int pos,int len,char * out_bases)221 int get_contig_fasta(fasta_contigs_t * tab, char * chro, unsigned int pos, int len, char * out_bases){
222 	unsigned int this_size = HashTableGet( tab -> size_table, chro ) - NULL;
223 	if(this_size > 0){
224 		if(this_size >= len && pos <= this_size - len){
225 			char * bin_block = HashTableGet(tab -> contig_table, chro );
226 			unsigned int bin_byte = pos / 2;
227 			int bin_bit = 4*(pos % 2), x1;
228 
229 			for(x1 = 0 ;x1 < len; x1++)
230 			{
231 				int bin_int = (bin_block[bin_byte] >> bin_bit) & 0xf;
232 				if(bin_bit == 4) bin_byte++;
233 				bin_bit = (bin_bit == 4)?0:4;
234 				out_bases[x1] = contig_fasta_int2base(bin_int);
235 			}
236 
237 			return 0;
238 		}
239 	}
240 	return 1;
241 }
242 
destroy_contig_fasta(fasta_contigs_t * tab)243 void destroy_contig_fasta(fasta_contigs_t * tab){
244 	HashTableDestroy( tab -> size_table );
245 	HashTableDestroy( tab -> contig_table );
246 }
read_contig_fasta(fasta_contigs_t * tab,char * fname)247 int read_contig_fasta(fasta_contigs_t * tab, char * fname){
248 	autozip_fp fp;
249 	int rv = autozip_open(fname, &fp);
250 	if(rv>=0){
251 		tab -> contig_table = HashTableCreate(3943);
252 		tab -> size_table = HashTableCreate(3943);
253 
254 		HashTableSetDeallocationFunctions(tab -> contig_table, free, free);
255 		HashTableSetDeallocationFunctions(tab -> size_table, NULL, NULL);
256 
257 		HashTableSetKeyComparisonFunction(tab -> contig_table, fc_strcmp_chro);
258 		HashTableSetKeyComparisonFunction(tab -> size_table, fc_strcmp_chro);
259 
260 		HashTableSetHashFunction(tab -> contig_table, fc_chro_hash);
261 		HashTableSetHashFunction(tab -> size_table, fc_chro_hash);
262 
263 		char chro_name[MAX_CHROMOSOME_NAME_LEN];
264 		unsigned int inner_cursor = 0, current_bin_space = 0;
265 		int status = 0;
266 		char * bin_block = NULL;
267 		chro_name[0]=0;
268 
269 		while(1){
270 			int nch = autozip_getch(&fp);
271 			if(nch<0)break;
272 			if(status == 0){
273 				assert(nch == '>');
274 				status = 1;
275 			}else if(status == 1){
276 				if(inner_cursor == 0){
277 					bin_block = calloc(sizeof(char),10000);
278 					current_bin_space = 10000;
279 				}
280 				if(nch == '|' || nch == ' ') status = 2;
281 				else if(nch == '\n'){
282 					status = 3;
283 					inner_cursor = 0;
284 				}else{
285 					chro_name[inner_cursor++] = nch;
286 					chro_name[inner_cursor] = 0;
287 				}
288 			}else if(status == 2){
289 				if(nch == '\n'){
290 					status = 3;
291 					inner_cursor = 0;
292 				}
293 			}else if(status == 3){
294 				if(nch == '>' || nch <= 0){
295 					char * mem_chro = malloc(strlen(chro_name)+1);
296 					strcpy(mem_chro, chro_name);
297 					HashTablePut(tab -> size_table , mem_chro, NULL + inner_cursor);
298 					HashTablePut(tab -> contig_table , mem_chro, bin_block);
299 		//			SUBREADprintf("Read '%s' : %u bases\n", chro_name, inner_cursor);
300 					inner_cursor = 0;
301 					status = 1;
302 					if(nch <= 0) break;
303 				}else if(nch != '\n'){
304 					int bin_bytes = inner_cursor / 2;
305 					int bin_bits = 4*(inner_cursor % 2);
306 					int base_int = contig_fasta_base2int(nch);
307 					if(bin_bytes >= current_bin_space){
308 						unsigned int new_bin_space = current_bin_space / 4 * 5;
309 						if(current_bin_space > 0xffff0000 /5 * 4){
310 							assert(0);
311 							return 1;
312 						}
313 						bin_block = realloc(bin_block, new_bin_space);
314 						memset(bin_block + current_bin_space, 0, new_bin_space - current_bin_space);
315 						current_bin_space = new_bin_space;
316 					}
317 					bin_block[bin_bytes] |= (base_int << bin_bits);
318 					inner_cursor++;
319 				}
320 			}
321 		}
322 
323 		autozip_close(&fp);
324 		return 0;
325 	}
326 	return 1;
327 }
328 
RSubread_parse_CIGAR_Extra_string(int FLAG,char * MainChro,unsigned int MainPos,const char * CIGAR_Str,const char * Extra_Tags,int max_M,char ** Chros,unsigned int * Staring_Chro_Points,unsigned short * Section_Start_Read_Pos,unsigned short * Section_Length,int * is_junction_read)329 int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, int max_M, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read){
330 	int ret = RSubread_parse_CIGAR_string(MainChro, MainPos, CIGAR_Str, max_M, Chros, Staring_Chro_Points, Section_Start_Read_Pos, Section_Length, is_junction_read);
331 
332 	char read_main_strand = (((FLAG & 0x40)==0x40) == ((FLAG & 0x10) == 0x10 ))?'-':'+';
333 	int tag_cursor=0;
334 	//SUBREADprintf("EXTRA=%s\n", Extra_Tags);
335 	int status = PARSE_STATUS_TAGNAME;
336 	char tag_name[2], typechar=0;
337 	int tag_inner_cursor=0;
338 
339 	char current_fusion_char[MAX_CHROMOSOME_NAME_LEN];
340 	unsigned int current_fusion_pos = 0;
341 	char current_fusion_strand = 0;
342 	char current_fusion_cigar[max_M * 15];
343 	current_fusion_cigar [0] =0;
344 	current_fusion_char [0]=0;
345 
346 	while(1){
347 		int nch = Extra_Tags[tag_cursor];
348 		if(status == PARSE_STATUS_TAGNAME){
349 			tag_name[tag_inner_cursor++] = nch;
350 			if(tag_inner_cursor == 2){
351 				status = PARSE_STATUS_TAGTYPE;
352 				tag_cursor += 1;
353 				assert(Extra_Tags[tag_cursor] == ':');
354 			}
355 		}else if(status == PARSE_STATUS_TAGTYPE){
356 			typechar = nch;
357 			tag_cursor +=1;
358 			assert(Extra_Tags[tag_cursor] == ':');
359 			tag_inner_cursor = 0;
360 			status = PARSE_STATUS_TAGVALUE;
361 		}else if(status == PARSE_STATUS_TAGVALUE){
362 			if(nch == '\t' || nch == 0 || nch == '\n'){
363 				if(current_fusion_cigar[0] && current_fusion_char[0] && current_fusion_pos && current_fusion_strand){
364 					//SUBREADprintf("ENTER CALC:%s\n", current_fusion_char );
365 					unsigned int left_pos = current_fusion_pos;
366 					if(current_fusion_strand!=read_main_strand)
367 						left_pos = find_left_end_cigar(current_fusion_pos, current_fusion_cigar);
368 					ret += RSubread_parse_CIGAR_string(current_fusion_char, left_pos, current_fusion_cigar, max_M - ret, Chros + ret, Staring_Chro_Points+ ret, Section_Start_Read_Pos+ ret, Section_Length + ret, is_junction_read);
369 
370 					current_fusion_pos = 0;
371 					current_fusion_strand = 0;
372 					current_fusion_cigar [0] =0;
373 					current_fusion_char [0]=0;
374 					//SUBREADprintf("EXIT CALC:%s\n", current_fusion_char );
375 				}
376 
377 				tag_inner_cursor = 0;
378 				status = PARSE_STATUS_TAGNAME;
379 			}else{
380 				if(tag_name[0]=='C' && tag_name[1]=='C' && typechar == 'Z'){
381 					current_fusion_char[tag_inner_cursor++]=nch;
382 					current_fusion_char[tag_inner_cursor]=0;
383 				}else if(tag_name[0]=='C' && tag_name[1]=='G' && typechar == 'Z'){
384 					current_fusion_cigar[tag_inner_cursor++]=nch;
385 					current_fusion_cigar[tag_inner_cursor]=0;
386 				}else if(tag_name[0]=='C' && tag_name[1]=='P' && typechar == 'i'){
387 					current_fusion_pos = current_fusion_pos * 10 + (nch - '0');
388 				}else if(tag_name[0]=='C' && tag_name[1]=='T' && typechar == 'Z'){
389 					//SUBREADprintf("pos=%d %c -> %c\n", tag_cursor, current_fusion_strand, nch);
390 					current_fusion_strand = nch;
391 					//SUBREADprintf("spo=%d %c -> %c\n", tag_cursor, current_fusion_strand, nch);
392 				}
393 			}
394 		}
395 
396 		if(nch == 0 || nch == '\n'){
397 			assert(status == PARSE_STATUS_TAGNAME);
398 			break;
399 		}
400 
401 		tag_cursor++;
402 		//SUBREADprintf("CUR=%d [%s], c=%d\n", tag_cursor, Extra_Tags, Extra_Tags[tag_cursor]);
403 	}
404 	return ret;
405 }
406 
RSubread_parse_CIGAR_string(char * chro,unsigned int first_pos,const char * CIGAR_Str,int max_M,char ** Section_Chromosomes,unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos,unsigned short * Section_Chro_Length,int * is_junction_read)407 int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, int max_M, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
408 {
409 	unsigned int tmp_int=0;
410 	int cigar_cursor=0;
411 	unsigned short current_section_chro_len=0, current_section_start_read_pos = 0, read_cursor = 0;
412 	unsigned int chromosome_cursor=first_pos;
413 	int ret=0, is_first_S = 1;
414 
415 	for(cigar_cursor=0; ; cigar_cursor++)
416 	{
417 		char ch = CIGAR_Str[cigar_cursor];
418 
419 		if(ch >='0' && ch <= '9')
420 		{
421 			tmp_int=tmp_int*10+(ch - '0');
422 		}
423 		else
424 		{
425 			if(ch == 'S'){
426 				if(is_first_S) current_section_start_read_pos = tmp_int;
427 				read_cursor += tmp_int;
428 			}
429 			else if(ch == 'M' || ch == 'X' || ch == '=') {
430 				read_cursor += tmp_int;
431 				current_section_chro_len += tmp_int;
432 				chromosome_cursor += tmp_int;
433 			} else if(ch == 'N' || ch == 'D' || ch=='I' || ch == 0) {
434 				if('N' == ch)(*is_junction_read)=1;
435 				if(ret < max_M)
436 				{
437 					if(current_section_chro_len>0)
438 					{
439 						Section_Chromosomes[ret] = chro;
440 						Section_Start_Chro_Pos[ret] = chromosome_cursor - current_section_chro_len;
441 						Section_Start_Read_Pos[ret] = current_section_start_read_pos;
442 						Section_Chro_Length[ret] = current_section_chro_len;
443 						ret ++;
444 					}
445 				}else assert(0);
446 				current_section_chro_len = 0;
447 				if(ch == 'I') read_cursor += tmp_int;
448 				else if(ch == 'N' || ch == 'D') chromosome_cursor += tmp_int;
449 				current_section_start_read_pos = read_cursor;
450 
451 				if(ch == 0) break;
452 			}
453 			//printf("C=%c, TV=%d, CC=%d, RC=%d\n", ch, tmp_int, chromosome_cursor, current_section_chro_len);
454 			tmp_int = 0;
455 			is_first_S = 0;
456 		}
457 		if(cigar_cursor>100) return -1;
458 	}
459 
460 	return ret;
461 }
462 
display_sections(char * CIGAR_Str)463 void display_sections(char * CIGAR_Str)
464 {
465 	//int is_junc=0;
466 	int Section_Start_Chro_Pos[FC_CIGAR_PARSER_ITEMS];
467 	unsigned short Section_Start_Read_Pos[FC_CIGAR_PARSER_ITEMS];
468 	unsigned short Section_Chro_Length[FC_CIGAR_PARSER_ITEMS];
469 
470 	int retv = 0;//RSubread_parse_CIGAR_string(CIGAR_Str, Section_Start_Chro_Pos, Section_Start_Read_Pos, Section_Chro_Length, &is_junc);
471 
472 	int x1;
473 	SUBREADprintf("Cigar=%s ; Sections=%d\n", CIGAR_Str, retv);
474 	for(x1=0; x1<retv; x1++)
475 	{
476 		SUBREADprintf("   Section #%d: chro_offset=%d, read_offset=%u  length=%u\n",x1, Section_Start_Chro_Pos[x1], Section_Start_Read_Pos[x1], Section_Chro_Length[x1]);
477 	}
478 	SUBREADprintf("\n");
479 
480 }
481 
482 
483 #define GECV_STATE_BEFORE 10
484 #define GECV_STATE_NAME 20
485 #define GECV_STATE_GAP 30
486 #define GECV_STATE_VALUE 40
487 #define GECV_STATE_QVALUE 50
488 #define GECV_STATE_QV_END 60
489 #define GECV_STATE_ERROR 9999
490 
GTF_extra_column_istoken_chr(char c)491 int GTF_extra_column_istoken_chr(char c)
492 {
493 	return (isalpha(c)||isdigit(c)||c=='_');
494 }
495 
GTF_extra_column_value(const char * Extra_Col,const char * Target_Name,char * Target_Value,int TargVal_Size)496 int GTF_extra_column_value(const char * Extra_Col, const char * Target_Name, char * Target_Value, int TargVal_Size)
497 {
498 	int state = GECV_STATE_BEFORE;
499 	int col_cursor = 0, is_correct_name=0;
500 	char name_buffer[200];
501 	int name_cursor = 0, value_cursor=-1;
502 
503 	while(1)
504 	{
505 		if(name_cursor>190) return -1;
506 		char nch = Extra_Col[col_cursor];
507 		if(nch == '\n' || nch == '\r') nch = 0;
508 		if(state == GECV_STATE_BEFORE)
509 		{
510 			if(GTF_extra_column_istoken_chr(nch))
511 			{
512 				name_buffer[0] = nch;
513 				name_cursor = 1;
514 				state = GECV_STATE_NAME;
515 			}
516 			else if(nch != ' ' && nch != 0)
517 			{
518 				state = GECV_STATE_ERROR;
519 			}
520 		}
521 		else if(state == GECV_STATE_NAME)
522 		{
523 			if(nch == ' ' || nch == '=')
524 			{
525 				state = GECV_STATE_GAP;
526 				name_buffer[name_cursor] = 0;
527 				is_correct_name = (strcmp(name_buffer , Target_Name) == 0);
528 				//printf("CORR=%d : '%s'\n", is_correct_name, name_buffer);
529 			}
530 			else if(nch == '"')
531 			{
532 				name_buffer[name_cursor] = 0;
533 				is_correct_name = (strcmp(name_buffer , Target_Name) == 0);
534 				state = GECV_STATE_QVALUE;
535 				if(is_correct_name)
536 					value_cursor = 0;
537 			}
538 			else if(GTF_extra_column_istoken_chr(nch))
539 				name_buffer[name_cursor++] = nch;
540 			else
541 			{
542 				state = GECV_STATE_ERROR;
543 				//printf("ERR2  : '%c'\n", nch);
544 			}
545 
546 		}
547 		else if(state == GECV_STATE_GAP)
548 		{
549 			if(nch == '"')
550 			{
551 				state = GECV_STATE_QVALUE;
552 				if(is_correct_name)
553 					value_cursor = 0;
554 			}
555 			else if(nch != '=' && isgraph(nch))
556 			{
557 				state = GECV_STATE_VALUE;
558 				if(is_correct_name)
559 				{
560 					Target_Value[0]=nch;
561 					value_cursor = 1;
562 				}
563 			}
564 			else if(nch != ' ' && nch != '=')
565 				state = GECV_STATE_ERROR;
566 		}
567 		else if(state == GECV_STATE_VALUE)
568 		{
569 			if(nch == ';' || nch == 0)
570 			{
571 				state = GECV_STATE_BEFORE;
572 				if(is_correct_name)
573 				{
574 					Target_Value[value_cursor] = 0;
575 				}
576 				is_correct_name = 0;
577 			}
578 			else{
579 				if(value_cursor < TargVal_Size-1 && is_correct_name)
580 					Target_Value[value_cursor++] = nch;
581 			}
582 		}
583 		else if(state == GECV_STATE_QVALUE)
584 		{
585 			if(nch == '"')
586 			{
587 				state = GECV_STATE_QV_END;
588 				if(is_correct_name)
589 					Target_Value[value_cursor] = 0;
590 				is_correct_name = 0;
591 			}
592 			else
593 			{
594 				if(value_cursor < TargVal_Size-1 && is_correct_name)
595 				{
596 					if(nch !=' ' || value_cursor>0)
597 						Target_Value[value_cursor++] = nch;
598 				}
599 			}
600 		}
601 		else if(state == GECV_STATE_QV_END)
602 		{
603 			if(nch == ';' || nch == 0)
604 				state = GECV_STATE_BEFORE;
605 			else if(nch != ' ')
606 				state = GECV_STATE_ERROR;
607 
608 		}
609 
610 		if (GECV_STATE_ERROR == state){
611 			Target_Value[0]=0;
612 			return -1;
613 		}
614 		if (nch == 0)
615 		{
616 			if(state == GECV_STATE_BEFORE && value_cursor>0)
617 			{
618 				int x1;
619 				for(x1 = value_cursor-1; x1>=0; x1--)
620 				{
621 					if(Target_Value[x1] == ' '){
622 						value_cursor --;
623 						Target_Value[x1]=0;
624 					}
625 					else break;
626 				}
627 
628 				if(value_cursor>0)
629 					return value_cursor;
630 			}
631 			Target_Value[0]=0;
632 			return -1;
633 		}
634 		col_cursor++;
635 	}
636 }
637 
638 
hpl_test2_func()639 void hpl_test2_func()
640 {
641 	char * extra_column = " gene_id \"PC4-013  \"; 013=ABCD  ; PC4 =  CCXX  ";
642 	char * col_name = "gene_id";
643 	char col_val[100];
644 
645 	int col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
646 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
647 
648 	col_name = "013";
649 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
650 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
651 
652 	col_name = "PC4";
653 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
654 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
655 
656 
657 	col_name = "XXX";
658 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
659 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
660 
661 	extra_column = "gene_id =   \"PC4-013  ;=\"  ;013 = AXXD ; PC4=x";
662 	col_name = "013";
663 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
664 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
665 
666 	col_name = "gene_id";
667 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
668 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
669 
670 
671 	col_name = "PC4";
672 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
673 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
674 
675 
676 
677 
678 	extra_column = " gene_id\"  PC4-013  ;=  \"; XXX='123' ;013 :ABCD  ; PC4 =  CCXX=  ;";
679 	col_name = "013";
680 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
681 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
682 
683 
684 	col_name = "XXX";
685 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
686 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
687 
688 
689 	col_name = "PC4";
690 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
691 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
692 
693 	col_name = "gene_id";
694 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
695 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
696 
697 
698 
699 	extra_column = "gene_id \"653635\"; transcript_id \"TR:653635\";";
700 	col_name = "gene_id";
701 	col_len = GTF_extra_column_value(extra_column, col_name, col_val, 100);
702 	SUBREADprintf("LEN=%d; KEY='%s'; VAL=\"%s\"\n", col_len, col_name, col_val);
703 
704 
705 
706 
707 
708 }
709 
hpl_test1_func()710 void hpl_test1_func()
711 {
712 	display_sections("");
713 	display_sections("*");
714 	display_sections("5S10M2D10M800N12M3I12M450N12M12D99M6S");
715 	display_sections("110M2I10M800N32M3I12M6S");
716 	display_sections("200S110M2I10M800N32M3I12M200N40M");
717 	display_sections("3M1663N61M1045N36M3D20M66N10M2D10M77N3M1663N61M1045N36M3D20M66N103M1663N61M1045N36M3D20M66N9M");
718 }
719 
test_bam_reader()720 void test_bam_reader(){
721 	SamBam_FILE * bamfp = SamBam_fopen("/usr/local/work/liao/arena/Rsubread_Paper_OCT2016/RNAseq-SimHG38/Runs-100/STAR/STAR-Simulation-15M-DXC.bamAligned.out.bam", SAMBAM_FILE_BAM);
722 	assert(bamfp);
723 
724 	while(1){
725 		char linebuf[2000];
726 		char * ret = SamBam_fgets(bamfp, linebuf, 1999, 1);
727 		if(!ret) break;
728 		SUBREADprintf("%s", linebuf); // linebuf has "\n" in the end.
729 	}
730 	SamBam_fclose(bamfp);
731 }
732 
733 
734 
735 #ifdef HELPER_TEST_SAMBAM_READER
main()736 main() {
737 	//hpl_test1_func();
738 	test_bam_reader();
739 //	hpl_test1_func();
740 }
741 #endif
742 
str_replace(char * orig,char * rep,char * with)743 char *str_replace(char *orig, char *rep, char *with) {
744     char *result; // the return string
745     char *ins;    // the next insert point
746     char *tmp;    // varies
747     int len_rep;  // length of rep
748     int len_with; // length of with
749     int len_front; // distance between rep and end of last rep
750     int count;    // number of replacements
751 
752     if (!orig)
753         return NULL;
754     if (!rep)
755         rep = "";
756     len_rep = strlen(rep);
757     if (!with)
758         with = "";
759     len_with = strlen(with);
760 
761     ins = orig;
762     for (count = 0; NULL != (tmp = strstr(ins, rep)); ++count) {
763         ins = tmp + len_rep;
764     }
765     tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
766 
767     if (!result)
768         return NULL;
769 
770     while (count--) {
771         ins = strstr(orig, rep);
772         len_front = ins - orig;
773         tmp = strncpy(tmp, orig, len_front) + len_front;
774         tmp = strcpy(tmp, with) + len_with;
775         orig += len_front + len_rep; // move to next "end of rep"
776     }
777     strcpy(tmp, orig);
778     return result;
779 }
780 
781 
782 
783 // rule: the string is ABC123XXXXXX...
784 // This is the priroity:
785 // First, compare the letters part.
786 // Second, compare the pure numeric part.
787 // Third, compare the remainder.
strcmp_number(char * s1,char * s2)788 int strcmp_number(char * s1, char * s2)
789 {
790 	int x1 = 0;
791 	int ret = 0;
792 
793 	while(1)
794 	{
795 		char nch1 = s1[x1];
796 		char nch2 = s2[x1];
797 
798 		if((!nch1) || !nch2){return nch2?1:(nch1?(-1):0);}
799 		if(isdigit(nch1) && isdigit(nch2))break;
800 
801 		ret = nch1 - nch2;
802 		if(ret) return ret;
803 		x1++;
804 	}
805 
806 	int v1 = 0, v2 = 0;
807 	while(1)
808 	{
809 		char nch1 = s1[x1];
810 		char nch2 = s2[x1];
811 		if((!nch1) || !nch2){
812 			if(nch1 || nch2)
813 				return nch2?(-1):1;
814 			break;
815 		}
816 		int is_chr1_digit = isdigit(nch1);
817 		int is_chr2_digit = isdigit(nch2);
818 
819 		if(is_chr1_digit || is_chr2_digit)
820 		{
821 			if(is_chr1_digit && is_chr2_digit)
822 			{
823 				v1 = v1*10+(nch1-'0');
824 				v2 = v2*10+(nch2-'0');
825 			}
826 			else
827 			{
828 				ret = nch1 - nch2;
829 				return ret;
830 			}
831 		}
832 		else break;
833 		x1++;
834 	}
835 
836 	if(v1==v2)
837 		return strcmp(s1+x1, s2+x1);
838 	else
839 	{
840 		ret = v1 - v2;
841 		return ret;
842 	}
843 }
844 
845 
846 
mac_str(char * str_buff)847 int mac_str(char * str_buff)
848 {
849 #if defined(__FreeBSD__) || defined(__MINGW32__) || defined(__DragonFly__)
850 	return 1;
851 #else
852 #ifdef MACOS
853     int         mib[6], x1, ret = 1;
854 	size_t		len;
855     char            *buf;
856     unsigned char       *ptr;
857     struct if_msghdr    *ifm;
858     struct sockaddr_dl  *sdl;
859 
860 
861 	for(x1 = 0 ; x1 < 40; x1++)
862     {
863 		mib[0] = CTL_NET;
864 		mib[1] = AF_ROUTE;
865 		mib[2] = 0;
866 		mib[3] = AF_LINK;
867 		mib[4] = NET_RT_IFLIST;
868 		mib[5] = x1;
869 
870 		if (sysctl(mib, 6, NULL, &len, NULL, 0) >=0) {
871 			buf = malloc(len);
872 			if (sysctl(mib, 6, buf, &len, NULL, 0) >=0) {
873 
874 				ifm = (struct if_msghdr *)buf;
875 				sdl = (struct sockaddr_dl *)(ifm + 1);
876 				ptr = (unsigned char *)LLADDR(sdl);
877 
878 				if(sdl -> sdl_nlen < 1) continue;
879 
880 				char * ifname = malloc(sdl -> sdl_nlen + 1);
881 
882 				memcpy(ifname, sdl -> sdl_data, sdl -> sdl_nlen);
883 				ifname[sdl -> sdl_nlen] = 0;
884 				if(ifname[0]!='e'){
885 					free(ifname);
886 					continue;
887 				}
888 				free(ifname);
889 
890 				sprintf(str_buff,"%02X%02X%02X%02X%02X%02X",  *ptr, *(ptr+1), *(ptr+2),
891 					*(ptr+3), *(ptr+4), *(ptr+5));
892 				ret = 0;
893 				break;
894 			}
895 			free(buf);
896 		}
897 	}
898     return ret;
899 #else
900 #if defined(IFHWADDRLEN)
901     struct ifreq ifr;
902     struct ifconf ifc;
903     char buf[1024];
904     int success = 0;
905 
906     int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
907     if (sock == -1) { /* handle error*/ };
908 
909     ifc.ifc_len = sizeof(buf);
910     ifc.ifc_buf = buf;
911     if (ioctl(sock, SIOCGIFCONF, &ifc) == -1) { /* handle error */ }
912 
913     struct ifreq* it = ifc.ifc_req;
914     const struct ifreq* const end = it + (ifc.ifc_len / sizeof(struct ifreq));
915 
916     for (; it != end; ++it) {
917         strcpy(ifr.ifr_name, it->ifr_name);
918         if (ioctl(sock, SIOCGIFFLAGS, &ifr) == 0) {
919             if (! (ifr.ifr_flags & IFF_LOOPBACK)) { // don't count loopback
920                 if (ioctl(sock, SIOCGIFHWADDR, &ifr) == 0) {
921                       success = 1;
922                       break;
923                 }
924             }
925         }
926     }
927 
928     close(sock);
929 
930     unsigned char mac_address[6];
931 
932     if (success){
933 	memcpy(mac_address, ifr.ifr_hwaddr.sa_data, 6);
934 	    int x1;
935 	    for(x1 = 0; x1 < 6; x1++){
936 		 sprintf(str_buff+2*x1, "%02X",mac_address[x1]);
937 	    }
938 		return 0;
939 	}
940 #endif
941 	return 1;
942 #endif
943 #endif
944 }
945 
rand_str(char * str_buff)946 int rand_str(char * str_buff){
947 	int ret = 1;
948 	FILE * fp = fopen("/dev/urandom","r");
949 	if(fp){
950 		int x1;
951 		for(x1=0; x1<6; x1++){
952 			sprintf(str_buff + 2*x1 , "%02X", fgetc(fp));
953 		}
954 		fclose(fp);
955 		ret = 0;
956 	}
957 	return ret;
958 }
959 
mathrand_str(char * str_buff)960 int mathrand_str(char * str_buff){
961 	myrand_srand((int)(miltime()*100));
962 	int x1;
963 	for(x1 = 0; x1 < 6; x1++){
964 		sprintf(str_buff+2*x1, "%02X", myrand_rand() & 0xff );
965 	}
966 	return 0;
967 }
968 
mac_or_rand_str(char * str_buff)969 int mac_or_rand_str(char * str_buff){
970 	return mac_str(str_buff) && rand_str(str_buff) && mathrand_str(str_buff);
971 }
972 
973 #define PI_LONG 3.1415926535897932384626434L
974 
fast_fractorial(unsigned int x,long double * buff,int buflen)975 long double fast_fractorial(unsigned int x, long double * buff, int buflen){
976 	if(x<2) return 0;
977 
978 	if(buff != NULL && x < buflen && buff[x]!=0){
979 		return buff[x];
980 	}
981 	long double ret;
982 
983 	if(x<50){
984 		int x1;
985 		ret =0.L;
986 		for(x1 = 2 ; x1 <= x; x1++) ret += logl((long double)(x1));
987 	}else{
988 		ret = logl(x)*1.0L*x - 1.0L*x + 0.5L * logl(2.0L*PI_LONG* x) + 1.L/(12.L*x) - 1.L/(360.L* x*x*x) +  1.L/(1260.L* x*x*x*x*x) -  1.L/(1680.L*x*x*x*x*x*x*x);
989 	}
990 	if(buff != NULL && x < buflen) buff[x]=ret;
991 	return ret;
992 }
993 
994 
995 #define BUFF_4D 36
996 
fast_freq(unsigned int tab[2][2],long double * buff,int buflen)997 long double fast_freq( unsigned int tab[2][2] , long double * buff, int buflen){
998 	int x0 = -1;
999 	if(buff && buflen > BUFF_4D * BUFF_4D * BUFF_4D * BUFF_4D && tab[0][0] < BUFF_4D && tab[0][1] < BUFF_4D && tab[1][0] < BUFF_4D && tab[1][1] < BUFF_4D ){
1000 		x0 = buflen + tab[0][0]* BUFF_4D * BUFF_4D * BUFF_4D + tab[0][1] * BUFF_4D * BUFF_4D + tab[1][0] * BUFF_4D + tab[1][1];
1001 		if(buff[x0]!=0) return buff[x0];
1002 
1003 	}
1004 	long double ret = fast_fractorial(tab[0][0]+tab[0][1],buff,buflen)+fast_fractorial(tab[1][0]+tab[1][1],buff,buflen) +
1005 		fast_fractorial(tab[0][0]+tab[1][0],buff,buflen)+fast_fractorial(tab[0][1]+tab[1][1],buff,buflen) -
1006 		fast_fractorial(tab[0][0],buff,buflen) - fast_fractorial(tab[0][1],buff,buflen) -
1007 		fast_fractorial(tab[1][0],buff,buflen) - fast_fractorial(tab[1][1],buff,buflen) -
1008 		fast_fractorial(tab[0][0] + tab[0][1] + tab[1][0] + tab[1][1],buff,buflen);
1009 	if(x0>=0) buff[x0] = ret;
1010 	return ret;
1011 }
1012 
1013 
1014 
fast_fisher_test_one_side(unsigned int a,unsigned int b,unsigned int c,unsigned int d,long double * buff,int buflen)1015 double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * buff, int buflen){
1016 	unsigned int tab[2][2];
1017 	long double P0, P_delta, ret;
1018 
1019 	tab[0][0]=a;
1020 	tab[0][1]=b;
1021 	tab[1][0]=c;
1022 	tab[1][1]=d;
1023 
1024 	int x_a, y_a, x_min=-1, y_min=-1;
1025 	unsigned int min_a = 0xffffffff;
1026 	for(x_a = 0; x_a < 2; x_a++)
1027 		for(y_a = 0; y_a < 2; y_a++){
1028 			if(tab[x_a][y_a]<min_a){
1029 				min_a = tab[x_a][y_a];
1030 				x_min = x_a;
1031 				y_min = y_a;
1032 			}
1033 		}
1034 	P_delta = fast_freq(tab, buff, buflen);
1035 	P0 = ret = exp(P_delta);
1036 	//printf("P0=%LG\n", P0);
1037 	if(min_a>0){
1038 		unsigned int Qa = min_a;
1039 		unsigned int Qb = tab[x_min][!y_min];
1040 		unsigned int Qc = tab[!x_min][y_min];
1041 		unsigned int Qd = tab[!x_min][!y_min];
1042 		for(; ; ){
1043 			min_a --;
1044 			Qb++;Qc++;
1045 			P_delta -= logl(Qb*Qc);
1046 			P_delta += logl(Qa*Qd);
1047 			Qa--;Qd--;
1048 			ret += expl(P_delta);
1049 		//	printf("%LG %LG %LG\n", ret, 1 - (ret - P0), expl(P_delta));
1050 			if(min_a < 1) break;
1051 		}
1052 	}
1053 
1054 	double ret1 = ret, ret2 = 1 - (ret - P0);
1055 
1056 	if(min(ret1, ret2) < 0){
1057 		return 0.0;
1058 	}
1059 	return  min(ret1, ret2) ;
1060 
1061 }
1062 
load_features_annotation(char * file_name,int file_type,char * gene_id_column,char * transcript_id_column,char * used_feature_type,void * context,int do_add_feature (char * gene_name,char * transcript_name,char * chro_name,unsigned int start,unsigned int end,int is_negative_strand,void * context))1063 int load_features_annotation(char * file_name, int file_type, char * gene_id_column, char * transcript_id_column, char * used_feature_type,
1064  void * context, int do_add_feature(char * gene_name, char * transcript_name, char * chro_name, unsigned int start, unsigned int end, int is_negative_strand, void * context)  ){
1065 	char * file_line = malloc(MAX_LINE_LENGTH+1);
1066 	int lineno = 0, is_GFF_txid_warned = 0, is_GFF_geneid_warned = 0, loaded_features = 0;
1067 	autozip_fp afp;
1068 	int aret = autozip_open(file_name, &afp);
1069 
1070 	if(aret < 0){
1071 		SUBREADprintf("Error: unable to open the annotation file : %s\n", file_name);
1072 		return -1;
1073 	}
1074 
1075 	while(1){
1076 		int is_tx_id_found = 0, is_gene_id_found = 0, is_negative_strand = -1;
1077 		char * token_temp = NULL, * feature_name, *transcript_id = NULL, * chro_name = NULL;
1078 		char feature_name_tmp[FEATURE_NAME_LENGTH], txid_tmp[FEATURE_NAME_LENGTH];
1079 		feature_name = feature_name_tmp;
1080 
1081 		unsigned int start = 0, end = 0;
1082 		aret = autozip_gets(&afp, file_line, MAX_LINE_LENGTH);
1083 		if(aret < 1) break;
1084 
1085 		lineno++;
1086 		if(is_comment_line(file_line, file_type, lineno-1))continue;
1087 
1088 		if(file_type == FILE_TYPE_RSUBREAD)
1089 		{
1090 			feature_name = strtok_r(file_line,"\t",&token_temp);
1091 			int feature_name_len = strlen(feature_name);
1092 			if(feature_name_len > FEATURE_NAME_LENGTH) feature_name[FEATURE_NAME_LENGTH -1 ] = 0;
1093 
1094 			chro_name = strtok_r(NULL,"\t", &token_temp);
1095 			int chro_name_len = strlen(chro_name);
1096 			if(chro_name_len > MAX_CHROMOSOME_NAME_LEN) chro_name[MAX_CHROMOSOME_NAME_LEN -1 ] = 0;
1097 
1098 			char * start_ptr = strtok_r(NULL,"\t", &token_temp);
1099 			char * end_ptr = strtok_r(NULL,"\t", &token_temp);
1100 
1101 			if(start_ptr == NULL || end_ptr == NULL){
1102 				SUBREADprintf("\nWarning: the format on the %d-th line is wrong.\n", lineno);
1103 			}
1104 			long long int tv1 = atoll(start_ptr);
1105 			long long int tv2 = atoll(end_ptr);
1106 
1107 			if( isdigit(start_ptr[0]) && isdigit(end_ptr[0]) ){
1108 				if(strlen(start_ptr) > 10 || strlen(end_ptr) > 10 || tv1 > 0x7fffffff || tv2> 0x7fffffff){
1109 					SUBREADprintf("\nError: Line %d contains a coordinate greater than 2^31.\n", lineno);
1110 					return -2;
1111 				}
1112 			}else{
1113 				SUBREADprintf("\nError: Line %d contains a format error. The expected annotation format is SAF.\n", lineno);
1114 				return -2;
1115 			}
1116 
1117 			start = atoi(start_ptr);// start
1118 			end = atoi(end_ptr);//end
1119 
1120 			char * strand_str = strtok_r(NULL,"\t", &token_temp);
1121 			if(strand_str == NULL)
1122 				is_negative_strand = 0;
1123 			else
1124 				is_negative_strand = ('-' ==strand_str[0]);
1125 
1126 			is_gene_id_found = 1;
1127 
1128 		} else if(file_type == FILE_TYPE_GTF) {
1129 			chro_name = strtok_r(file_line,"\t",&token_temp);
1130 			strtok_r(NULL,"\t", &token_temp);// source
1131 			char * feature_type = strtok_r(NULL,"\t", &token_temp);// feature_type
1132 
1133 			if(strcmp(feature_type, used_feature_type)==0){
1134 				char * start_ptr = strtok_r(NULL,"\t", &token_temp);
1135 				char * end_ptr = strtok_r(NULL,"\t", &token_temp);
1136 
1137 
1138 				if(start_ptr == NULL || end_ptr == NULL){
1139 					SUBREADprintf("\nWarning: the format on the %d-th line is wrong.\n", lineno);
1140 				}
1141 				long long int tv1 = atoll(start_ptr);
1142 				long long int tv2 = atoll(end_ptr);
1143 
1144 
1145 				if( isdigit(start_ptr[0]) && isdigit(end_ptr[0]) ){
1146 					if(strlen(start_ptr) > 10 || strlen(end_ptr) > 10 || tv1 > 0x7fffffff || tv2> 0x7fffffff){
1147 						SUBREADprintf("\nError: Line %d contains a coordinate greater than 2^31.\n", lineno);
1148 						return -2;
1149 					}
1150 				}else{
1151 					SUBREADprintf("\nError: Line %d contains a format error. The expected annotation format is GTF/GFF.\n", lineno);
1152 					return -2;
1153 				}
1154 				start = atoi(start_ptr);// start
1155 				end = atoi(end_ptr);//end
1156 
1157 				if(start < 1 || end<1 ||  start > 0x7fffffff || end > 0x7fffffff || start > end)
1158 					SUBREADprintf("\nWarning: the feature on the %d-th line has zero coordinate or zero lengths\n\n", lineno);
1159 
1160 
1161 				strtok_r(NULL,"\t", &token_temp);// score
1162 				is_negative_strand = ('-' == (strtok_r(NULL,"\t", &token_temp)[0]));//strand
1163 				strtok_r(NULL,"\t",&token_temp);	// "frame"
1164 				char * extra_attrs = strtok_r(NULL,"\t",&token_temp);   // name_1 "val1"; name_2 "val2"; ...
1165 				if(extra_attrs && (strlen(extra_attrs)>2)){
1166 					int attr_val_len = GTF_extra_column_value(extra_attrs , gene_id_column , feature_name_tmp, FEATURE_NAME_LENGTH);
1167 					if(attr_val_len>0) is_gene_id_found=1;
1168 
1169 					if(transcript_id_column){
1170 							transcript_id = txid_tmp;
1171 							attr_val_len = GTF_extra_column_value(extra_attrs , transcript_id_column , txid_tmp, FEATURE_NAME_LENGTH);
1172 							if(attr_val_len>0) is_tx_id_found=1;
1173 							else transcript_id = NULL;
1174 					}
1175 				}
1176 
1177 				if(!is_gene_id_found){
1178 					if(!is_GFF_geneid_warned){
1179 						int ext_att_len = strlen(extra_attrs);
1180 						if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
1181 						SUBREADprintf("\nERROR: failed to find the gene identifier attribute in the 9th column of the provided GTF file.\nThe specified gene identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\n\n",  gene_id_column, extra_attrs);
1182 					}
1183 					is_GFF_geneid_warned++;
1184 				}
1185 
1186 				if(transcript_id_column && !is_tx_id_found){
1187 					if(!is_GFF_txid_warned){
1188 						int ext_att_len = strlen(extra_attrs);
1189 						if(extra_attrs[ext_att_len-1] == '\n') extra_attrs[ext_att_len-1] =0;
1190 						SUBREADprintf("\nERROR: failed to find the transcript identifier attribute in the 9th column of the provided GTF file.\nThe specified transcript identifier attribute is '%s'.\nAn example of attributes included in your GTF annotation is '%s'.\n\n", transcript_id_column, extra_attrs);
1191 					}
1192 					is_GFF_txid_warned++;
1193 				}
1194 			}
1195 		}
1196 
1197 		if(is_gene_id_found){
1198 			do_add_feature(feature_name, transcript_id, chro_name, start, end, is_negative_strand, context);
1199 			loaded_features++;
1200 		}
1201 
1202 	}
1203 	autozip_close(&afp);
1204 	free(file_line);
1205 
1206 	if(is_GFF_txid_warned || is_GFF_geneid_warned)return -2;
1207 	if(loaded_features<1){
1208 		SUBREADprintf("\nERROR: No feature was loaded from the annotation file. Please check if the annotation format was correctly specified, and also if the feature type was correctly specified if the annotation is in the GTF format.\n\n");
1209 		return -2;
1210 	}
1211 	return loaded_features;
1212 }
1213 
load_alias_table(char * fname)1214 HashTable * load_alias_table(char * fname) {
1215 	FILE * fp = f_subr_open(fname, "r");
1216 	if(!fp)
1217 	{
1218 		print_in_box(80,0,0,"WARNING unable to open alias file '%s'", fname);
1219 		return NULL;
1220 	}
1221 
1222 	char * fl = malloc(2000);
1223 
1224 	HashTable * ret = HashTableCreate(1013);
1225 	HashTableSetDeallocationFunctions(ret, free, free);
1226 	HashTableSetKeyComparisonFunction(ret, fc_strcmp_chro);
1227 	HashTableSetHashFunction(ret, fc_chro_hash);
1228 
1229 	while (1)
1230 	{
1231 		char *ret_fl = fgets(fl, 1999, fp);
1232 		if(!ret_fl) break;
1233 		if(fl[0]=='#') continue;
1234 		char * sam_chr = NULL;
1235 		char * anno_chr = strtok_r(fl, ",", &sam_chr);
1236 		if((!sam_chr)||(!anno_chr)) continue;
1237 
1238 		sam_chr[strlen(sam_chr)-1]=0;
1239 		if(sam_chr[strlen(sam_chr)-1]=='\r') sam_chr[strlen(sam_chr)-1]=0;
1240 		char * anno_chr_buf = malloc(strlen(anno_chr)+1);
1241 		strcpy(anno_chr_buf, anno_chr);
1242 		char * sam_chr_buf = malloc(strlen(sam_chr)+1);
1243 		strcpy(sam_chr_buf, sam_chr);
1244 //		SUBREADprintf("ALIAS_LOAD : '%s' => '%s'\n", sam_chr_buf, anno_chr_buf);
1245 		HashTablePut(ret, sam_chr_buf, anno_chr_buf);
1246 	}
1247 
1248 	fclose(fp);
1249 
1250 	free(fl);
1251 	return ret;
1252 }
1253 
rebuild_command_line(char ** lineptr,int argc,char ** argv)1254 int rebuild_command_line(char ** lineptr, int argc, char ** argv){
1255 	int linecap = 1000,c;
1256 	*lineptr = malloc(linecap);
1257 	**lineptr = 0;
1258 
1259 	for(c = 0; c<argc;c++)
1260 	{
1261 		int cline = strlen(argv[c]);
1262 		if(strlen(*lineptr) + 100 + cline > linecap)
1263 		{
1264 			linecap += cline+500;
1265 			*lineptr = realloc(*lineptr, linecap);
1266 		}
1267 		sprintf((*lineptr) + strlen(*lineptr), "\"%s\" ", argv[c]);
1268 	}
1269 
1270 	return strlen(*lineptr);
1271 }
1272 
1273 // The following code was downloaded from
1274 //   https://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
1275 //   on the 23rd of Nov, 2018. I (Yang LIAO) made minor changes to the code.
1276 //
1277 // It says "Public Domain" in the licence but the original author should be acknwoledged (only for expressing my gratitude):
1278 //   Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
1279 
1280 /* Any 32-bit or wider unsigned integer data type will do */
1281 
1282 /*
1283  * The basic HelpFuncMD5 functions.
1284  *
1285  * F and G are optimized compared to their RFC 1321 definitions for
1286  * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
1287  * implementation.
1288  */
1289 #define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
1290 #define G(x, y, z)			((y) ^ ((z) & ((x) ^ (y))))
1291 #define H(x, y, z)			(((x) ^ (y)) ^ (z))
1292 #define H2(x, y, z)			((x) ^ ((y) ^ (z)))
1293 #define I(x, y, z)			((y) ^ ((x) | ~(z)))
1294 
1295 /*
1296  * The HelpFuncMD5 transformation for all four rounds.
1297  */
1298 #define STEP(f, a, b, c, d, x, t, s) \
1299 	(a) += f((b), (c), (d)) + (x) + (t); \
1300 	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
1301 	(a) += (b);
1302 
1303 /*
1304  * SET reads 4 input bytes in little-endian byte order and stores them in a
1305  * properly aligned word in host byte order.
1306  *
1307  * The check for little-endian architectures that tolerate unaligned memory
1308  * accesses is just an optimization.  Nothing will break if it fails to detect
1309  * a suitable architecture.
1310  *
1311  * Unfortunately, this optimization may be a C strict aliasing rules violation
1312  * if the caller's data buffer has effective type that cannot be aliased by
1313  * HelpFuncMD5_u32plus.  In practice, this problem may occur if these HelpFuncMD5 routines are
1314  * inlined into a calling function, or with future and dangerously advanced
1315  * link-time optimizations.  For the time being, keeping these HelpFuncMD5 routines in
1316  * their own translation unit avoids the problem.
1317  */
1318 #define SET(n) (*(HelpFuncMD5_u32plus *)&ptr[(n) * 4])
1319 #define GET(n) SET(n)
1320 /*
1321  * This processes one or more 64-byte data blocks, but does NOT update the bit
1322  * counters.  There are no alignment requirements.
1323  */
body(HelpFuncMD5_CTX * ctx,const void * data,unsigned long size)1324 static const void *body(HelpFuncMD5_CTX *ctx, const void *data, unsigned long size)
1325 {
1326 	const unsigned char *ptr;
1327 	HelpFuncMD5_u32plus a, b, c, d;
1328 	HelpFuncMD5_u32plus saved_a, saved_b, saved_c, saved_d;
1329 
1330 	ptr = (const unsigned char *)data;
1331 
1332 	a = ctx->a;
1333 	b = ctx->b;
1334 	c = ctx->c;
1335 	d = ctx->d;
1336 
1337 	do {
1338 		saved_a = a;
1339 		saved_b = b;
1340 		saved_c = c;
1341 		saved_d = d;
1342 
1343 /* Round 1 */
1344 		STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
1345 		STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
1346 		STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
1347 		STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
1348 		STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
1349 		STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
1350 		STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
1351 		STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
1352 		STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
1353 		STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
1354 		STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
1355 		STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
1356 		STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
1357 		STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
1358 		STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
1359 		STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
1360 
1361 /* Round 2 */
1362 		STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
1363 		STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
1364 		STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
1365 		STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
1366 		STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
1367 		STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
1368 		STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
1369 		STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
1370 		STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
1371 		STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
1372 		STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
1373 		STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
1374 		STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
1375 		STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
1376 		STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
1377 		STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
1378 
1379 /* Round 3 */
1380 		STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
1381 		STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
1382 		STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
1383 		STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
1384 		STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
1385 		STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
1386 		STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
1387 		STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
1388 		STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
1389 		STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
1390 		STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
1391 		STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
1392 		STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
1393 		STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
1394 		STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
1395 		STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
1396 
1397 /* Round 4 */
1398 		STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
1399 		STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
1400 		STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
1401 		STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
1402 		STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
1403 		STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
1404 		STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
1405 		STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
1406 		STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
1407 		STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
1408 		STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
1409 		STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
1410 		STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
1411 		STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
1412 		STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
1413 		STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
1414 
1415 		a += saved_a;
1416 		b += saved_b;
1417 		c += saved_c;
1418 		d += saved_d;
1419 
1420 		ptr += 64;
1421 	} while (size -= 64);
1422 
1423 	ctx->a = a;
1424 	ctx->b = b;
1425 	ctx->c = c;
1426 	ctx->d = d;
1427 
1428 	return ptr;
1429 }
1430 
HelpFuncMD5_Init(HelpFuncMD5_CTX * ctx)1431 void HelpFuncMD5_Init(HelpFuncMD5_CTX *ctx)
1432 {
1433 	ctx->a = 0x67452301;
1434 	ctx->b = 0xefcdab89;
1435 	ctx->c = 0x98badcfe;
1436 	ctx->d = 0x10325476;
1437 
1438 	ctx->lo = 0;
1439 	ctx->hi = 0;
1440 }
1441 
HelpFuncMD5_Update(HelpFuncMD5_CTX * ctx,const void * data,unsigned long size)1442 void HelpFuncMD5_Update(HelpFuncMD5_CTX *ctx, const void *data, unsigned long size)
1443 {
1444 	HelpFuncMD5_u32plus saved_lo;
1445 	unsigned long used, available;
1446 
1447 	saved_lo = ctx->lo;
1448 	if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
1449 		ctx->hi++;
1450 	ctx->hi += size >> 29;
1451 
1452 	used = saved_lo & 0x3f;
1453 
1454 	if (used) {
1455 		available = 64 - used;
1456 
1457 		if (size < available) {
1458 			memcpy(&ctx->buffer[used], data, size);
1459 			return;
1460 		}
1461 
1462 		memcpy(&ctx->buffer[used], data, available);
1463 		data = (const unsigned char *)data + available;
1464 		size -= available;
1465 		body(ctx, ctx->buffer, 64);
1466 	}
1467 
1468 	if (size >= 64) {
1469 		data = body(ctx, data, size & ~(unsigned long)0x3f);
1470 		size &= 0x3f;
1471 	}
1472 
1473 	memcpy(ctx->buffer, data, size);
1474 }
1475 
1476 #define OUT(dst, src) \
1477 	(dst)[0] = (unsigned char)(src); \
1478 	(dst)[1] = (unsigned char)((src) >> 8); \
1479 	(dst)[2] = (unsigned char)((src) >> 16); \
1480 	(dst)[3] = (unsigned char)((src) >> 24);
1481 
HelpFuncMD5_Final(unsigned char * result,HelpFuncMD5_CTX * ctx)1482 void HelpFuncMD5_Final(unsigned char *result, HelpFuncMD5_CTX *ctx)
1483 {
1484 	unsigned long used, available;
1485 
1486 	used = ctx->lo & 0x3f;
1487 
1488 	ctx->buffer[used++] = 0x80;
1489 
1490 	available = 64 - used;
1491 
1492 	if (available < 8) {
1493 		memset(&ctx->buffer[used], 0, available);
1494 		body(ctx, ctx->buffer, 64);
1495 		used = 0;
1496 		available = 64;
1497 	}
1498 
1499 	memset(&ctx->buffer[used], 0, available - 8);
1500 
1501 	ctx->lo <<= 3;
1502 	OUT(&ctx->buffer[56], ctx->lo)
1503 	OUT(&ctx->buffer[60], ctx->hi)
1504 
1505 	body(ctx, ctx->buffer, 64);
1506 
1507 	OUT(&result[0], ctx->a)
1508 	OUT(&result[4], ctx->b)
1509 	OUT(&result[8], ctx->c)
1510 	OUT(&result[12], ctx->d)
1511 
1512 	memset(ctx, 0, sizeof(*ctx));
1513 }
1514 
1515 
Helper_md5sum(char * plain_txt,int plain_len,unsigned char * bin_md5_buff)1516 void Helper_md5sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff){
1517 	HelpFuncMD5_CTX ctx;
1518 	HelpFuncMD5_Init(&ctx);
1519 	HelpFuncMD5_Update(&ctx, plain_txt, plain_len);
1520 	HelpFuncMD5_Final(bin_md5_buff, &ctx);
1521 }
1522 
plain_txt_to_long_rand(char * plain_txt,int plain_len)1523 unsigned long long plain_txt_to_long_rand(char * plain_txt, int plain_len){
1524 	unsigned char md5v[16];
1525 	Helper_md5sum(plain_txt, plain_len, md5v);
1526 	unsigned long long ret = 0;
1527 	memcpy(&ret, md5v, sizeof(ret));
1528 	return ret;
1529 }
1530 
md5txt(char * s)1531 void md5txt(char *s){
1532 	unsigned char md5v[16];
1533 	unsigned long long randv;
1534 	Helper_md5sum(s, strlen(s), md5v);
1535 	int x;
1536 	randv = plain_txt_to_long_rand(s, strlen(s));
1537 
1538 	for(x=0;x<16;x++){
1539 		SUBREADprintf("%02X", md5v[x]);
1540 	}
1541 	#ifdef __MINGW32__
1542 	SUBREADprintf("\t'%s'\t%016I64X\t%I64u\t%.9f\n", s, randv, randv, randv*1./0xffffffffffffffffllu);
1543 	#else
1544 	SUBREADprintf("\t'%s'\t%016llX\t%llu\t%.9f\n", s, randv, randv, randv*1./0xffffffffffffffffllu);
1545 	#endif
1546 }
1547 
1548 //#define TESTHelpermain main
1549 
TESTHelpermain()1550 int TESTHelpermain(){
1551 	int xx;
1552 	for(xx=0; xx<100; xx++){
1553 		char xt[10];
1554 		sprintf(xt, "%08d", xx);
1555 		md5txt(xt);
1556 	}
1557 
1558 	return 0;
1559 }
1560 
1561 
1562 // The following code was retrieved from
1563 //   https://github.com/google/omaha/blob/master/third_party/lzma/files/C/Helper_Sha256.c
1564 // Thank the authors -- Igor Pavlov and Wei Dai.
1565 
1566 /* Crypto/Helper_Sha256.c -- SHA-256 Hash
1567 2010-06-11 : Igor Pavlov : Public domain
1568 This code is based on public domain code from Wei Dai's Crypto++ library. */
1569 #define SHA256_DIGEST_SIZE 32
1570 
1571 typedef struct
1572 {
1573   unsigned int state[8];
1574   unsigned long long count;
1575   unsigned char buffer[64];
1576 } CHelper_Sha256;
1577 
1578 /* define it for speed optimization */
1579 /* #define _SHA256_UNROLL */
1580 /* #define _SHA256_UNROLL2 */
1581 
Helper_Sha256_Init(CHelper_Sha256 * p)1582 void Helper_Sha256_Init(CHelper_Sha256 *p)
1583 {
1584   p->state[0] = 0x6a09e667;
1585   p->state[1] = 0xbb67ae85;
1586   p->state[2] = 0x3c6ef372;
1587   p->state[3] = 0xa54ff53a;
1588   p->state[4] = 0x510e527f;
1589   p->state[5] = 0x9b05688c;
1590   p->state[6] = 0x1f83d9ab;
1591   p->state[7] = 0x5be0cd19;
1592   p->count = 0;
1593 }
1594 
1595 #define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
1596 #define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
1597 
1598 #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
1599 #define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
1600 #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
1601 #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
1602 
1603 #define blk0(i) (W[i] = data[i])
1604 #define blk2(i) (W[i&15] += s1(W[(i-2)&15]) + W[(i-7)&15] + s0(W[(i-15)&15]))
1605 
1606 #define Ch(x,y,z) (z^(x&(y^z)))
1607 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
1608 
1609 #define a(i) T[(0-(i))&7]
1610 #define b(i) T[(1-(i))&7]
1611 #define c(i) T[(2-(i))&7]
1612 #define d(i) T[(3-(i))&7]
1613 #define e(i) T[(4-(i))&7]
1614 #define f(i) T[(5-(i))&7]
1615 #define g(i) T[(6-(i))&7]
1616 #define h(i) T[(7-(i))&7]
1617 
1618 
1619 #ifdef _SHA256_UNROLL2
1620 
1621 #define R(a,b,c,d,e,f,g,h, i) h += S1(e) + Ch(e,f,g) + K[i+j] + (j?blk2(i):blk0(i));\
1622   d += h; h += S0(a) + Maj(a, b, c)
1623 
1624 #define RX_8(i) \
1625   R(a,b,c,d,e,f,g,h, i); \
1626   R(h,a,b,c,d,e,f,g, i+1); \
1627   R(g,h,a,b,c,d,e,f, i+2); \
1628   R(f,g,h,a,b,c,d,e, i+3); \
1629   R(e,f,g,h,a,b,c,d, i+4); \
1630   R(d,e,f,g,h,a,b,c, i+5); \
1631   R(c,d,e,f,g,h,a,b, i+6); \
1632   R(b,c,d,e,f,g,h,a, i+7)
1633 
1634 #else
1635 
1636 #define R(i) h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j?blk2(i):blk0(i));\
1637   d(i) += h(i); h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
1638 
1639 #ifdef _SHA256_UNROLL
1640 
1641 #define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
1642 
1643 #endif
1644 
1645 #endif
1646 
1647 static const unsigned int K[64] = {
1648   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
1649   0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
1650   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
1651   0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
1652   0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
1653   0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
1654   0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
1655   0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
1656   0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
1657   0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
1658   0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
1659   0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
1660   0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
1661   0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
1662   0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
1663   0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
1664 };
1665 
Helper_Sha256_Transform(unsigned int * state,const unsigned int * data)1666 static void Helper_Sha256_Transform(unsigned int *state, const unsigned int *data)
1667 {
1668   unsigned int W[16];
1669   unsigned j;
1670   #ifdef _SHA256_UNROLL2
1671   unsigned int a,b,c,d,e,f,g,h;
1672   a = state[0];
1673   b = state[1];
1674   c = state[2];
1675   d = state[3];
1676   e = state[4];
1677   f = state[5];
1678   g = state[6];
1679   h = state[7];
1680   #else
1681   unsigned int T[8];
1682   for (j = 0; j < 8; j++)
1683     T[j] = state[j];
1684   #endif
1685 
1686   for (j = 0; j < 64; j += 16)
1687   {
1688     #if defined(_SHA256_UNROLL) || defined(_SHA256_UNROLL2)
1689     RX_8(0); RX_8(8);
1690     #else
1691     unsigned i;
1692     for (i = 0; i < 16; i++) { R(i); }
1693     #endif
1694   }
1695 
1696   #ifdef _SHA256_UNROLL2
1697   state[0] += a;
1698   state[1] += b;
1699   state[2] += c;
1700   state[3] += d;
1701   state[4] += e;
1702   state[5] += f;
1703   state[6] += g;
1704   state[7] += h;
1705   #else
1706   for (j = 0; j < 8; j++)
1707     state[j] += T[j];
1708   #endif
1709 
1710   /* Wipe variables */
1711   /* memset(W, 0, sizeof(W)); */
1712   /* memset(T, 0, sizeof(T)); */
1713 }
1714 
1715 #undef S0
1716 #undef S1
1717 #undef s0
1718 #undef s1
1719 
Helper_Sha256_WritecharBlock(CHelper_Sha256 * p)1720 static void Helper_Sha256_WritecharBlock(CHelper_Sha256 *p)
1721 {
1722   unsigned int data32[16];
1723   unsigned i;
1724   for (i = 0; i < 16; i++)
1725     data32[i] =
1726       ((unsigned int)(p->buffer[i * 4    ]) << 24) +
1727       ((unsigned int)(p->buffer[i * 4 + 1]) << 16) +
1728       ((unsigned int)(p->buffer[i * 4 + 2]) <<  8) +
1729       ((unsigned int)(p->buffer[i * 4 + 3]));
1730   Helper_Sha256_Transform(p->state, data32);
1731 }
1732 
Helper_Sha256_Update(CHelper_Sha256 * p,const char * data,size_t size)1733 void Helper_Sha256_Update(CHelper_Sha256 *p, const char *data, size_t size)
1734 {
1735   unsigned int curBufferPos = (unsigned int)p->count & 0x3F;
1736   while (size > 0)
1737   {
1738     p->buffer[curBufferPos++] = (char)*data;
1739     p->count++;
1740 
1741 	data++;
1742     size--;
1743     if (curBufferPos == 64)
1744     {
1745       curBufferPos = 0;
1746       Helper_Sha256_WritecharBlock(p);
1747     }
1748   }
1749 }
1750 
Helper_Sha256_Final(unsigned char * digest,CHelper_Sha256 * p)1751 void Helper_Sha256_Final( unsigned char *digest , CHelper_Sha256 *p)
1752 {
1753   unsigned long long lenInBits = (p->count << 3);
1754   unsigned int curBufferPos = (unsigned int)p->count & 0x3F;
1755   unsigned i;
1756   p->buffer[curBufferPos++] = 0x80;
1757   while (curBufferPos != (64 - 8))
1758   {
1759     curBufferPos &= 0x3F;
1760     if (curBufferPos == 0)
1761       Helper_Sha256_WritecharBlock(p);
1762     p->buffer[curBufferPos++] = 0;
1763   }
1764   for (i = 0; i < 8; i++)
1765   {
1766     p->buffer[curBufferPos++] = (char)(lenInBits >> 56);
1767     lenInBits <<= 8;
1768   }
1769   Helper_Sha256_WritecharBlock(p);
1770 
1771   for (i = 0; i < 8; i++)
1772   {
1773     *digest++ = (unsigned char)(p->state[i] >> 24);
1774     *digest++ = (unsigned char)(p->state[i] >> 16);
1775     *digest++ = (unsigned char)(p->state[i] >> 8);
1776     *digest++ = (unsigned char)(p->state[i]);
1777   }
1778 }
1779 
Helper_sha256sum(char * plain_txt,int plain_len,unsigned char * bin_md5_buff)1780 void Helper_sha256sum(char * plain_txt, int plain_len, unsigned char * bin_md5_buff){
1781 	CHelper_Sha256 ctx;
1782 	Helper_Sha256_Init(&ctx);
1783 	Helper_Sha256_Update(&ctx, plain_txt, plain_len);
1784 	Helper_Sha256_Final(bin_md5_buff, &ctx);
1785 }
1786 
sha256txt(char * s)1787 void sha256txt(char *s){
1788 	unsigned char sha256v[32];
1789 	Helper_sha256sum(s, strlen(s), sha256v);
1790 	int x;
1791 
1792 	for(x=0;x<32;x++){
1793 		SUBREADprintf("%02X", sha256v[x]);
1794 	}
1795 	SUBREADprintf("\t'%s'\n", s);
1796 }
1797 
1798 //#define  TEST256Helpermain main
TEST256Helpermain()1799 int TEST256Helpermain(){
1800 	int xx;
1801 	for(xx=0; xx<100; xx++){
1802 		char xt[10];
1803 		sprintf(xt, "%08d", xx);
1804 		sha256txt(xt);
1805 	}
1806 	return 0;
1807 }
1808 
1809 
1810 
1811 // The code of Helper_erfinv() was retreived from
1812 //    https://github.com/antelopeusersgroup/antelope_contrib/blob/master/lib/location/libgenloc/erfinv.c
1813 // I (Yang LIAO) adopted and modified it because it is under the New BSD license.
1814 //
1815 // Copyright (c) 2014 Indiana University
1816 // All rights reserved.
1817 //
1818 // Written by Prof. Gary L. Pavlis, Dept. of Geol. Sci.,
1819 //   Indiana University, Bloomington, IN
1820 //
1821 // This software is licensed under the New BSD license:
1822 //
1823 // Redistribution and use in source and binary forms,
1824 // with or without modification, are permitted provided
1825 // that the following conditions are met:
1826 //
1827 // Redistributions of source code must retain the above
1828 // copyright notice, this list of conditions and the
1829 // following disclaimer.
1830 //
1831 // Redistributions in binary form must reproduce the
1832 // above copyright notice, this list of conditions and
1833 // the following disclaimer in the documentation and/or
1834 // other materials provided with the distribution.
1835 //
1836 // Neither the name of Indiana University nor
1837 // the names of its contributors may be used to endorse
1838 // or promote products derived from this software without
1839 // specific prior written permission.
1840 //
1841 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
1842 // CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
1843 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1844 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
1845 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
1846 // THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
1847 // DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1848 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
1849 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
1850 // USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1851 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
1852 // IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
1853 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
1854 // USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1855 // POSSIBILITY OF SUCH DAMAGE.
1856 
1857 
1858 #include <math.h>
1859 #include <float.h>
1860 #define MAXDOUBLE DBL_MAX
1861 
1862 #define CENTRAL_RANGE 0.7
Helper_erfinv(double y)1863 double Helper_erfinv( double y) {
1864         double x=0,z,num,dem; /*working variables */
1865         /* coefficients in rational expansion */
1866         double a[4]={ 0.886226899, -1.645349621,  0.914624893, -0.140543331};
1867         double b[4]={-2.118377725,  1.442710462, -0.329097515,  0.012229801};
1868         double c[4]={-1.970840454, -1.624906493,  3.429567803,  1.641345311};
1869         double d[2]={ 3.543889200,  1.637067800};
1870         if(fabs(y) > 1.0) return (atof("NaN"));  /* This needs IEEE constant*/
1871         if(fabs(y) == 1.0) return((copysign(1.0,y))*MAXDOUBLE);
1872         if( fabs(y) <= CENTRAL_RANGE )
1873         {
1874                 z = y*y;
1875                 num = (((a[3]*z + a[2])*z + a[1])*z + a[0]);
1876                 dem = ((((b[3]*z + b[2])*z + b[1])*z +b[0])*z + 1.0);
1877                 x = y*num/dem;
1878         }
1879         else if( (fabs(y) > CENTRAL_RANGE) && (fabs(y) < 1.0) )
1880         {
1881                 z = sqrt(-log((1.0-fabs(y))/2.0));
1882                 num = ((c[3]*z + c[2])*z + c[1])*z + c[0];
1883                 dem = (d[1]*z + d[0])*z + 1.0;
1884                 x = (copysign(1.0,y))*num/dem;
1885         }
1886         /* Two steps of Newton-Raphson correction */
1887         x = x - (erf(x) - y)/( (2.0/sqrt(M_PI))*exp(-x*x));
1888         x = x - (erf(x) - y)/( (2.0/sqrt(M_PI))*exp(-x*x));
1889 
1890         return(x);
1891 }
1892 
1893 
inverse_sample_normal(double p)1894 double inverse_sample_normal(double p){
1895 	return Helper_erfinv(2*p-1)*1.4142135623730950488;
1896 }
1897 
1898 //#define TestNormalMain main
TestNormalMain()1899 void TestNormalMain(){
1900 	int i;
1901 	for(i=0; i<=10; i++){
1902 		double ii = i*1./10;
1903 		double p_0 = inverse_sample_normal(ii);
1904 		SUBREADprintf("p of %.1f = %.40f\n\n" , ii, p_0);
1905 	}
1906 }
1907 
1908 
1909 #ifndef MAKE_STANDALONE
1910 
1911 message_queue_t mt_message_queue;
1912 
1913 
safeRprintf(char * fmt,...)1914 int safeRprintf(char * fmt,...){
1915 	va_list args;
1916 	va_start(args , fmt);
1917 	FILE * ofp = fopen("/tmp/del4-Rlog.txt", "a");
1918 	//fprintf(ofp, "NEW LOG : %p \"%s\"\n", fmt, fmt);
1919 	fprintf(ofp, fmt, args);
1920 	fclose(ofp);
1921 	va_end(args);
1922 	return 0;
1923 }
1924 
1925 #endif
1926 
msgqu_destroy()1927 void msgqu_destroy(){
1928 	#ifndef MAKE_STANDALONE
1929 	ArrayListDestroy(mt_message_queue.message_queue);
1930 	subread_destroy_lock(&mt_message_queue.queue_lock);
1931 	#endif
1932 }
1933 
msgqu_notifyFinish()1934 void msgqu_notifyFinish(){
1935 	#ifndef MAKE_STANDALONE
1936 	subread_lock_occupy(&mt_message_queue.queue_lock);
1937 	mt_message_queue.is_finished = 1;
1938 	subread_lock_release(&mt_message_queue.queue_lock);
1939 	#endif
1940 }
1941 
msgqu_init(int thread_mode)1942 void msgqu_init(int thread_mode){
1943 	#ifndef MAKE_STANDALONE
1944 	mt_message_queue.is_thread_mode = thread_mode;
1945 	if(!thread_mode) return;
1946 	mt_message_queue.is_finished = 0;
1947 	mt_message_queue.message_queue = ArrayListCreate(100);
1948 	ArrayListSetDeallocationFunction(mt_message_queue.message_queue,free);
1949 	subread_init_lock(&mt_message_queue.queue_lock);
1950 //	subread_init_lock(&mt_message_queue.queue_notifier);
1951 	#endif
1952 }
1953 
msgqu_main_loop()1954 void msgqu_main_loop(){
1955 	#ifndef MAKE_STANDALONE
1956 	while(1){
1957 		subread_lock_occupy(&mt_message_queue.queue_lock);
1958 		while(0< mt_message_queue.message_queue->numOfElements){
1959 			char * amsg = ArrayListShift(mt_message_queue.message_queue);
1960 			Rprintf("%s", amsg);
1961 			free(amsg);
1962 		}
1963 		if(mt_message_queue.is_finished) break;
1964 		subread_lock_release(&mt_message_queue.queue_lock);
1965 		usleep(40000);
1966 	}
1967 	#endif
1968 }
1969 
1970 #define MSGQU_LINE_SIZE 3000
msgqu_printf(const char * fmt,...)1971 void msgqu_printf(const char * fmt, ...){
1972 	va_list args;
1973 	va_start(args , fmt);
1974 	char * obuf = malloc(MSGQU_LINE_SIZE+1);
1975 	vsnprintf( obuf, MSGQU_LINE_SIZE, fmt, args );
1976 	va_end(args);
1977 	obuf[MSGQU_LINE_SIZE]=0;
1978 	#ifdef MAKE_STANDALONE
1979 		fputs(obuf, stderr);
1980 		free(obuf);
1981 	#else
1982 		if(!mt_message_queue.is_thread_mode){
1983 			Rprintf("%s", obuf);
1984 			free(obuf);
1985 			return;
1986 		}
1987 
1988 		subread_lock_occupy(&mt_message_queue.queue_lock);
1989 		ArrayListPush(mt_message_queue.message_queue, obuf);
1990 		subread_lock_release(&mt_message_queue.queue_lock);
1991 		// it will be freed in the Main thread after printing.
1992 	#endif
1993 }
1994 
1995 #ifdef MAKE_TEST_MYRAND_TEST
1996 
main()1997 int main(){
1998 	myrand_srand(time(NULL));
1999 	int x;
2000 	for(x = 0; x<100000; x++){
2001 		int mv = myrand_rand();
2002 		SUBREADprintf("%d\n", mv);
2003 	}
2004 	return 0;
2005 }
2006 
2007 #endif
2008 
2009 
2010 
2011 
2012 // retrieved from https://github.com/kokke/tiny-TNbignum-c/
2013 // The original code was in public domain
2014 
2015 /* Functions for shifting number in-place. */
2016 static void _lshift_one_bit(struct bn* a);
2017 static void _rshift_one_bit(struct bn* a);
2018 static void _lshift_word(struct bn* a, int nwords);
2019 static void _rshift_word(struct bn* a, int nwords);
2020 
2021 
2022 
2023 /* Public / Exported functions. */
TNbignum_init(struct bn * n)2024 void TNbignum_init(struct bn* n)
2025 {
2026   require(n, "n is null");
2027 
2028   int i;
2029   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2030   {
2031     n->array[i] = 0;
2032   }
2033 }
2034 
2035 
TNbignum_from_int(struct bn * n,DTYPE_TMP i)2036 void TNbignum_from_int(struct bn* n, DTYPE_TMP i)
2037 {
2038   require(n, "n is null");
2039 
2040   TNbignum_init(n);
2041 
2042   /* Endianness issue if machine is not little-endian? */
2043 #ifdef WORD_SIZE
2044  #if (WORD_SIZE == 1)
2045   n->array[0] = (i & 0x000000ff);
2046   n->array[1] = (i & 0x0000ff00) >> 8;
2047   n->array[2] = (i & 0x00ff0000) >> 16;
2048   n->array[3] = (i & 0xff000000) >> 24;
2049  #elif (WORD_SIZE == 2)
2050   n->array[0] = (i & 0x0000ffff);
2051   n->array[1] = (i & 0xffff0000) >> 16;
2052  #elif (WORD_SIZE == 4)
2053   n->array[0] = i;
2054   DTYPE_TMP num_32 = 32;
2055   DTYPE_TMP tmp = i >> num_32; /* bit-shift with U64 operands to force 64-bit results */
2056   n->array[1] = tmp;
2057  #endif
2058 #endif
2059 }
2060 
2061 
TNbignum_to_int(struct bn * n)2062 int TNbignum_to_int(struct bn* n)
2063 {
2064   require(n, "n is null");
2065 
2066   int ret = 0;
2067 
2068   /* Endianness issue if machine is not little-endian? */
2069 #if (WORD_SIZE == 1)
2070   ret += n->array[0];
2071   ret += n->array[1] << 8;
2072   ret += n->array[2] << 16;
2073   ret += n->array[3] << 24;
2074 #elif (WORD_SIZE == 2)
2075   ret += n->array[0];
2076   ret += n->array[1] << 16;
2077 #elif (WORD_SIZE == 4)
2078   ret += n->array[0];
2079 #endif
2080 
2081   return ret;
2082 }
2083 
2084 
TNbignum_from_string(struct bn * n,char * str,int nbytes)2085 void TNbignum_from_string(struct bn* n, char* str, int nbytes)
2086 {
2087   require(n, "n is null");
2088   require(str, "str is null");
2089   require(nbytes > 0, "nbytes must be positive");
2090   require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes");
2091 
2092   TNbignum_init(n);
2093 
2094   DTYPE tmp;                        /* DTYPE is defined in bn.h - uint{8,16,32,64}_t */
2095   int i = nbytes - (2 * WORD_SIZE); /* index into string */
2096   int j = 0;                        /* index into array */
2097 
2098   /* reading last hex-byte "MSB" from string first -> big endian */
2099   /* MSB ~= most significant byte / block ? :) */
2100   while (i >= 0)
2101   {
2102     tmp = 0;
2103     sscanf(&str[i], SSCANF_FORMAT_STR, &tmp);
2104     //printf("SCAN_IN %d : v=%u\n", i, tmp);
2105     n->array[j] = tmp;
2106     i -= (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) back in the string. */
2107     j += 1;               /* step one element forward in the array. */
2108   }
2109 }
2110 
2111 
TNbignum_to_string(struct bn * n,char * str,int nbytes)2112 void TNbignum_to_string(struct bn* n, char* str, int nbytes)
2113 {
2114   require(n, "n is null");
2115   require(str, "str is null");
2116   require(nbytes > 0, "nbytes must be positive");
2117   require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes");
2118 
2119   int j = BN_ARRAY_SIZE - 1; /* index into array - reading "MSB" first -> big-endian */
2120   int i = 0;                 /* index into string representation. */
2121 
2122   /* reading last array-element "MSB" first -> big endian */
2123   while ((j >= 0) && (nbytes > (i + 1)))
2124   {
2125     sprintf(&str[i], SPRINTF_FORMAT_STR, n->array[j]);
2126     //printf("WRITE:%d %s\n" , i, str+i);
2127     i += (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) forward in the string. */
2128     j -= 1;               /* step one element back in the array. */
2129   }
2130 
2131   /* Count leading zeros: */
2132   j = 0;
2133   while (str[j] == '0')
2134   {
2135     j += 1;
2136   }
2137 
2138   /* Move string j places ahead, effectively skipping leading zeros */
2139   for (i = 0; i < (nbytes - j); ++i)
2140   {
2141     str[i] = str[i + j];
2142   }
2143 
2144   /* Zero-terminate string */
2145   str[i] = 0;
2146 }
2147 
2148 
TNbignum_dec(struct bn * n)2149 void TNbignum_dec(struct bn* n)
2150 {
2151   require(n, "n is null");
2152 
2153   DTYPE tmp; /* copy of n */
2154   DTYPE res;
2155 
2156   int i;
2157   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2158   {
2159     tmp = n->array[i];
2160     res = tmp - 1;
2161     n->array[i] = res;
2162 
2163     if (!(res > tmp))
2164     {
2165       break;
2166     }
2167   }
2168 }
2169 
2170 
TNbignum_inc(struct bn * n)2171 void TNbignum_inc(struct bn* n)
2172 {
2173   require(n, "n is null");
2174 
2175   DTYPE res;
2176   DTYPE_TMP tmp; /* copy of n */
2177 
2178   int i;
2179   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2180   {
2181     tmp = n->array[i];
2182     res = tmp + 1;
2183     n->array[i] = res;
2184 
2185     if (res > tmp)
2186     {
2187       break;
2188     }
2189   }
2190 }
2191 
2192 
TNbignum_add(struct bn * a,struct bn * b,struct bn * c)2193 void TNbignum_add(struct bn* a, struct bn* b, struct bn* c)
2194 {
2195   require(a, "a is null");
2196   require(b, "b is null");
2197   require(c, "c is null");
2198 
2199   DTYPE_TMP tmp;
2200   int carry = 0;
2201   int i;
2202   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2203   {
2204     tmp = (DTYPE_TMP)a->array[i] + b->array[i] + carry;
2205     carry = (tmp > MAX_VAL);
2206     c->array[i] = (tmp & MAX_VAL);
2207   }
2208 }
2209 
2210 
TNbignum_sub(struct bn * a,struct bn * b,struct bn * c)2211 void TNbignum_sub(struct bn* a, struct bn* b, struct bn* c)
2212 {
2213   require(a, "a is null");
2214   require(b, "b is null");
2215   require(c, "c is null");
2216 
2217   DTYPE_TMP res;
2218   DTYPE_TMP tmp1;
2219   DTYPE_TMP tmp2;
2220   int borrow = 0;
2221   int i;
2222   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2223   {
2224     tmp1 = (DTYPE_TMP)a->array[i] + (MAX_VAL + 1); /* + number_base */
2225     tmp2 = (DTYPE_TMP)b->array[i] + borrow;;
2226     res = (tmp1 - tmp2);
2227     c->array[i] = (DTYPE)(res & MAX_VAL); /* "modulo number_base" == "% (number_base - 1)" if number_base is 2^N */
2228     borrow = (res <= MAX_VAL);
2229   }
2230 }
2231 
2232 
TNbignum_mul(struct bn * a,struct bn * b,struct bn * c)2233 void TNbignum_mul(struct bn* a, struct bn* b, struct bn* c)
2234 {
2235   require(a, "a is null");
2236   require(b, "b is null");
2237   require(c, "c is null");
2238 
2239   struct bn row;
2240   struct bn tmp;
2241   int i, j;
2242 
2243   TNbignum_init(c);
2244 
2245   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2246   {
2247     TNbignum_init(&row);
2248 
2249     for (j = 0; j < BN_ARRAY_SIZE; ++j)
2250     {
2251       if (i + j < BN_ARRAY_SIZE)
2252       {
2253         TNbignum_init(&tmp);
2254         DTYPE_TMP intermediate = ((DTYPE_TMP)a->array[i] * (DTYPE_TMP)b->array[j]);
2255         TNbignum_from_int(&tmp, intermediate);
2256         _lshift_word(&tmp, i + j);
2257         TNbignum_add(&tmp, &row, &row);
2258       }
2259     }
2260     TNbignum_add(c, &row, c);
2261   }
2262 }
2263 
2264 
TNbignum_div(struct bn * a,struct bn * b,struct bn * c)2265 void TNbignum_div(struct bn* a, struct bn* b, struct bn* c)
2266 {
2267   require(a, "a is null");
2268   require(b, "b is null");
2269   require(c, "c is null");
2270 
2271   struct bn current;
2272   struct bn denom;
2273   struct bn tmp;
2274 
2275   TNbignum_from_int(&current, 1);               // int current = 1;
2276   TNbignum_assign(&denom, b);                   // denom = b
2277   TNbignum_assign(&tmp, a);                     // tmp   = a
2278 
2279   const DTYPE_TMP half_max = 1 + (DTYPE_TMP)(MAX_VAL / 2);
2280   int overflow = 0;
2281   while (TNbignum_cmp(&denom, a) != LARGER)     // while (denom <= a) {
2282   {
2283     if (denom.array[BN_ARRAY_SIZE - 1] >= half_max)
2284     {
2285       overflow = 1;
2286       break;
2287     }
2288     _lshift_one_bit(&current);                //   current <<= 1;
2289     _lshift_one_bit(&denom);                  //   denom <<= 1;
2290   }
2291   if (!overflow)
2292   {
2293     _rshift_one_bit(&denom);                  // denom >>= 1;
2294     _rshift_one_bit(&current);                // current >>= 1;
2295   }
2296   TNbignum_init(c);                             // int answer = 0;
2297 
2298   while (!TNbignum_is_zero(&current))           // while (current != 0)
2299   {
2300     if (TNbignum_cmp(&tmp, &denom) != SMALLER)  //   if (dividend >= denom)
2301     {
2302       TNbignum_sub(&tmp, &denom, &tmp);         //     dividend -= denom;
2303       TNbignum_or(c, &current, c);              //     answer |= current;
2304     }
2305     _rshift_one_bit(&current);                //   current >>= 1;
2306     _rshift_one_bit(&denom);                  //   denom >>= 1;
2307   }                                           // return answer;
2308 }
2309 
2310 
TNbignum_lshift(struct bn * a,struct bn * b,int nbits)2311 void TNbignum_lshift(struct bn* a, struct bn* b, int nbits)
2312 {
2313   require(a, "a is null");
2314   require(b, "b is null");
2315   require(nbits >= 0, "no negative shifts");
2316 
2317   TNbignum_assign(b, a);
2318   /* Handle shift in multiples of word-size */
2319   const int nbits_pr_word = (WORD_SIZE * 8);
2320   int nwords = nbits / nbits_pr_word;
2321   if (nwords != 0)
2322   {
2323     _lshift_word(b, nwords);
2324     nbits -= (nwords * nbits_pr_word);
2325   }
2326 
2327   if (nbits != 0)
2328   {
2329     int i;
2330     for (i = (BN_ARRAY_SIZE - 1); i > 0; --i)
2331     {
2332       b->array[i] = (b->array[i] << nbits) | (b->array[i - 1] >> ((8 * WORD_SIZE) - nbits));
2333     }
2334     b->array[i] <<= nbits;
2335   }
2336 }
2337 
2338 
TNbignum_rshift(struct bn * a,struct bn * b,int nbits)2339 void TNbignum_rshift(struct bn* a, struct bn* b, int nbits)
2340 {
2341   require(a, "a is null");
2342   require(b, "b is null");
2343   require(nbits >= 0, "no negative shifts");
2344 
2345   TNbignum_assign(b, a);
2346   /* Handle shift in multiples of word-size */
2347   const int nbits_pr_word = (WORD_SIZE * 8);
2348   int nwords = nbits / nbits_pr_word;
2349   if (nwords != 0)
2350   {
2351     _rshift_word(b, nwords);
2352     nbits -= (nwords * nbits_pr_word);
2353   }
2354 
2355   if (nbits != 0)
2356   {
2357     int i;
2358     for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i)
2359     {
2360       b->array[i] = (b->array[i] >> nbits) | (b->array[i + 1] << ((8 * WORD_SIZE) - nbits));
2361     }
2362     b->array[i] >>= nbits;
2363   }
2364 
2365 }
2366 
2367 
TNbignum_mod(struct bn * a,struct bn * b,struct bn * c)2368 void TNbignum_mod(struct bn* a, struct bn* b, struct bn* c)
2369 {
2370   /*
2371  *     Take divmod and throw away div part
2372  *       */
2373   require(a, "a is null");
2374   require(b, "b is null");
2375   require(c, "c is null");
2376 
2377   struct bn tmp;
2378 
2379   TNbignum_divmod(a,b,&tmp,c);
2380 }
2381 
TNbignum_divmod(struct bn * a,struct bn * b,struct bn * c,struct bn * d)2382 void TNbignum_divmod(struct bn* a, struct bn* b, struct bn* c, struct bn* d)
2383 {
2384   /*
2385  *     Puts a%b in d
2386  *         and a/b in c
2387  *
2388  *             mod(a,b) = a - ((a / b) * b)
2389  *
2390  *                 example:
2391  *                       mod(8, 3) = 8 - ((8 / 3) * 3) = 2
2392  *                         */
2393   require(a, "a is null");
2394   require(b, "b is null");
2395   require(c, "c is null");
2396 
2397   struct bn tmp;
2398 
2399   /* c = (a / b) */
2400   TNbignum_div(a, b, c);
2401 
2402   /* tmp = (c * b) */
2403   TNbignum_mul(c, b, &tmp);
2404 
2405   /* c = a - tmp */
2406   TNbignum_sub(a, &tmp, d);
2407 }
2408 
2409 
TNbignum_and(struct bn * a,struct bn * b,struct bn * c)2410 void TNbignum_and(struct bn* a, struct bn* b, struct bn* c)
2411 {
2412   require(a, "a is null");
2413   require(b, "b is null");
2414   require(c, "c is null");
2415 
2416   int i;
2417   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2418   {
2419     c->array[i] = (a->array[i] & b->array[i]);
2420   }
2421 }
2422 
2423 
TNbignum_or(struct bn * a,struct bn * b,struct bn * c)2424 void TNbignum_or(struct bn* a, struct bn* b, struct bn* c)
2425 {
2426   require(a, "a is null");
2427   require(b, "b is null");
2428   require(c, "c is null");
2429 
2430   int i;
2431   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2432   {
2433     c->array[i] = (a->array[i] | b->array[i]);
2434   }
2435 }
2436 
2437 
TNbignum_xor(struct bn * a,struct bn * b,struct bn * c)2438 void TNbignum_xor(struct bn* a, struct bn* b, struct bn* c)
2439 {
2440   require(a, "a is null");
2441   require(b, "b is null");
2442   require(c, "c is null");
2443 
2444   int i;
2445   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2446   {
2447     c->array[i] = (a->array[i] ^ b->array[i]);
2448   }
2449 }
2450 
2451 
TNbignum_cmp(struct bn * a,struct bn * b)2452 int TNbignum_cmp(struct bn* a, struct bn* b)
2453 {
2454   require(a, "a is null");
2455   require(b, "b is null");
2456 
2457   int i = BN_ARRAY_SIZE;
2458   do
2459   {
2460     i -= 1; /* Decrement first, to start with last array element */
2461     if (a->array[i] > b->array[i])
2462     {
2463       return LARGER;
2464     }
2465     else if (a->array[i] < b->array[i])
2466     {
2467       return SMALLER;
2468     }
2469   }
2470   while (i != 0);
2471 
2472   return EQUAL;
2473 }
2474 
2475 
TNbignum_is_zero(struct bn * n)2476 int TNbignum_is_zero(struct bn* n)
2477 {
2478   require(n, "n is null");
2479 
2480   int i;
2481   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2482   {
2483     if (n->array[i])
2484     {
2485       return 0;
2486     }
2487   }
2488 
2489   return 1;
2490 }
2491 
2492 
TNbignum_pow(struct bn * a,struct bn * b,struct bn * c)2493 void TNbignum_pow(struct bn* a, struct bn* b, struct bn* c)
2494 {
2495   require(a, "a is null");
2496   require(b, "b is null");
2497   require(c, "c is null");
2498 
2499   struct bn tmp;
2500 
2501   TNbignum_init(c);
2502 
2503   if (TNbignum_cmp(b, c) == EQUAL)
2504   {
2505     /* Return 1 when exponent is 0 -- n^0 = 1 */
2506     TNbignum_inc(c);
2507   }
2508   else
2509   {
2510     /* Copy a -> tmp */
2511     TNbignum_assign(&tmp, a);
2512 
2513     TNbignum_dec(b);
2514 
2515     /* Begin summing products: */
2516     while (!TNbignum_is_zero(b))
2517     {
2518 
2519       /* c = tmp * tmp */
2520       TNbignum_mul(&tmp, a, c);
2521       /* Decrement b by one */
2522       TNbignum_dec(b);
2523 
2524       TNbignum_assign(&tmp, c);
2525     }
2526 
2527     /* c = tmp */
2528     TNbignum_assign(c, &tmp);
2529   }
2530 }
2531 
TNbignum_isqrt(struct bn * a,struct bn * b)2532 void TNbignum_isqrt(struct bn *a, struct bn* b)
2533 {
2534   require(a, "a is null");
2535   require(b, "b is null");
2536 
2537   struct bn low, high, mid, tmp;
2538 
2539   TNbignum_init(&low);
2540   TNbignum_assign(&high, a);
2541   TNbignum_rshift(&high, &mid, 1);
2542   TNbignum_inc(&mid);
2543 
2544   while (TNbignum_cmp(&high, &low) > 0)
2545   {
2546     TNbignum_mul(&mid, &mid, &tmp);
2547     if (TNbignum_cmp(&tmp, a) > 0)
2548     {
2549       TNbignum_assign(&high, &mid);
2550       TNbignum_dec(&high);
2551     }
2552     else
2553     {
2554       TNbignum_assign(&low, &mid);
2555     }
2556     TNbignum_sub(&high,&low,&mid);
2557     _rshift_one_bit(&mid);
2558     TNbignum_add(&low,&mid,&mid);
2559     TNbignum_inc(&mid);
2560   }
2561   TNbignum_assign(b,&low);
2562 }
2563 
2564 
TNbignum_assign(struct bn * dst,struct bn * src)2565 void TNbignum_assign(struct bn* dst, struct bn* src)
2566 {
2567   require(dst, "dst is null");
2568   require(src, "src is null");
2569 
2570   int i;
2571   for (i = 0; i < BN_ARRAY_SIZE; ++i)
2572   {
2573     dst->array[i] = src->array[i];
2574   }
2575 }
2576 
2577 
2578 /* Private / Static functions. */
_rshift_word(struct bn * a,int nwords)2579 static void _rshift_word(struct bn* a, int nwords)
2580 {
2581   /* Naive method: */
2582   require(a, "a is null");
2583   require(nwords >= 0, "no negative shifts");
2584 
2585   int i;
2586   if (nwords >= BN_ARRAY_SIZE)
2587   {
2588     for (i = 0; i < BN_ARRAY_SIZE; ++i)
2589     {
2590       a->array[i] = 0;
2591     }
2592     return;
2593   }
2594 
2595   for (i = 0; i < BN_ARRAY_SIZE - nwords; ++i)
2596   {
2597     a->array[i] = a->array[i + nwords];
2598   }
2599   for (; i < BN_ARRAY_SIZE; ++i)
2600   {
2601     a->array[i] = 0;
2602   }
2603 }
2604 
2605 
_lshift_word(struct bn * a,int nwords)2606 static void _lshift_word(struct bn* a, int nwords)
2607 {
2608   require(a, "a is null");
2609   require(nwords >= 0, "no negative shifts");
2610 
2611   int i;
2612   /* Shift whole words */
2613   for (i = (BN_ARRAY_SIZE - 1); i >= nwords; --i)
2614   {
2615     a->array[i] = a->array[i - nwords];
2616   }
2617   /* Zero pad shifted words. */
2618   for (; i >= 0; --i)
2619   {
2620     a->array[i] = 0;
2621   }
2622 }
2623 
2624 
_lshift_one_bit(struct bn * a)2625 static void _lshift_one_bit(struct bn* a)
2626 {
2627   require(a, "a is null");
2628 
2629   int i;
2630   for (i = (BN_ARRAY_SIZE - 1); i > 0; --i)
2631   {
2632     a->array[i] = (a->array[i] << 1) | (a->array[i - 1] >> ((8 * WORD_SIZE) - 1));
2633   }
2634   a->array[0] <<= 1;
2635 }
2636 
2637 
_rshift_one_bit(struct bn * a)2638 static void _rshift_one_bit(struct bn* a)
2639 {
2640   require(a, "a is null");
2641 
2642   int i;
2643   for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i)
2644   {
2645     a->array[i] = (a->array[i] >> 1) | (a->array[i + 1] << ((8 * WORD_SIZE) - 1));
2646   }
2647   a->array[BN_ARRAY_SIZE - 1] >>= 1;
2648 }
2649 
2650 /*
2651 void TNbignum_pow_then_mod( struct bn * m, int e, struct bn * modulus, struct bn * res ){
2652   int xx;
2653   struct bn res, remove_times, remove_value, tmpv;
2654   TNbignum_from_string(&res, "01", 2);
2655   for(xx = 0; xx < 31; xx++){
2656     TNbignum_mul( m, m, tmpv );
2657     TNbignum_div( tmpv, modulus, remove_times);
2658     TNbignum_mul( remove_times, modulus, remove_value);
2659     TNbignum_sub( tmpv , remove_value, m);
2660     e = e >> 1;
2661   }
2662 
2663 }
2664 */
2665 
2666 #ifdef HELPER_TEST_BIGNUM
main()2667 void main(){
2668   char restxt[1030];
2669   struct bn n,e,res;
2670   int exponent = 35;
2671   restxt[0]=0;
2672 
2673   char * modulus = "AC97941B27989F9DFAC7FB03A951BF8D39CE60248D8FB5A2C4CDAFAE2431667DD5EBB46B6FF9BBEF5DE2B587CF6B06B5D63BB6B71B35C43FA5141F156A1AC77231FD5D916053CA3E5FEAD3AFB10877D1A5440119C6420A6C205758D01B5B75F5B420F9E99815820B389F0BFBA00B60C0A18612C268C0ED9B263B02DF526EED851581E0BDE4D46227DAA2EAA8D13EDF3476C20B30C3188FA3D0FBC1636C5477A744D0A042BA27C77E52CEFAE0B4005BF2E22001BA0C40A5898F3C8FE664969E72F208FDF8D7DC3D039690911E32DF6B4948280EAF67952D231907CAA1A3D1433D5A8E5325E3B849721871458E8EA5860E4EFFCCC61D82BE5EED6EAF9C6A292A2B";
2674 
2675   TNbignum_from_string(&n, modulus,strlen(modulus) );
2676   TNbignum_pow_then_mod(&m, exponent, &n, &res); // (m ^ e) mod N. E doesn't need to be a BIG NUMBER.
2677   TNbignum_to_string(&res, restxt, 1030);
2678   printf("RES=%s\n", restxt);
2679 
2680 
2681 }
2682 
2683 #endif
2684 
2685 #ifdef HELPER_TEST_CRC32
main()2686 void main(){
2687   long c32=0;
2688   c32 = crc32(c32, "Hello.",6);
2689   printf("CRC = %08lX\n", c32);
2690 
2691   c32=0;
2692   c32 = crc32(c32, "Hel",3);
2693   c32 = crc32(c32, "lo",2);
2694   c32 = crc32(c32, ".",1);
2695   c32 = crc32(c32, "",0);
2696   printf("CRC = %08lX\n", c32);
2697 }
2698 
2699 #endif
2700 
2701 #ifdef __MINGW32__
2702 #include <windows.h>
2703 #endif
2704 
2705 
get_free_total_mem(size_t * total,size_t * free_mem)2706 int get_free_total_mem(size_t * total, size_t * free_mem){
2707 
2708 #if defined(__FreeBSD__) || defined(__DragonFly__)
2709     return -1;
2710 #endif
2711 
2712 #ifdef __MINGW32__
2713 	MEMORYSTATUSEX statex;
2714 	statex.dwLength = sizeof (statex);
2715 	GlobalMemoryStatusEx (&statex);
2716 	(*total) = statex.ullTotalPhys;
2717 	(*free_mem) = statex.ullAvailPhys;
2718 	return 0;
2719 #else
2720 #ifdef MACOS
2721     mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
2722     vm_statistics_data_t vmstat;
2723     int page_size = getpagesize();
2724     if(KERN_SUCCESS != host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&vmstat, &count))
2725         return -1;
2726     //printf("PSIZE=%d\nACT=%u; INACT=%u; FREE=%u\n", page_size, vmstat.active_count, vmstat.inactive_count, vmstat.free_count);
2727 	size_t btlen = sizeof(*total);
2728     if(sysctl( (int[]) { CTL_HW, HW_MEMSIZE }, 2, total, &btlen, NULL, 0)) return -1;
2729     *free_mem = (vmstat.free_count + vmstat.inactive_count) * 1llu * page_size;
2730     return 0;
2731 #else
2732     struct sysinfo sinf;
2733     sysinfo(&sinf);
2734     size_t cached_mem = get_sys_mem_info("Cached:");
2735     if(cached_mem<0)cached_mem=0;
2736     *free_mem = cached_mem + sinf.bufferram+sinf.freeram;
2737     *total = sinf.totalram;
2738     return 0;
2739 #endif
2740 #endif
2741 }
2742 
worker_master_mutex_init(worker_master_mutex_t * wmt,int all_workers)2743 void worker_master_mutex_init(worker_master_mutex_t * wmt, int all_workers){
2744 	memset(wmt,0,sizeof(worker_master_mutex_t));
2745 	wmt -> conds_worker_wait = malloc(sizeof(pthread_cond_t) * all_workers);
2746 	wmt -> mutexs_worker_wait = malloc(sizeof(pthread_mutex_t) * all_workers);
2747 	wmt -> mutex_with_master = calloc(sizeof(int), all_workers);
2748 	wmt -> worker_is_working = calloc(sizeof(int), all_workers);
2749 
2750 	wmt -> workers = all_workers;
2751 	int x1;
2752 	for(x1=0;x1<all_workers;x1++){
2753 		pthread_cond_init(&wmt -> conds_worker_wait [x1], NULL);
2754 		pthread_mutex_init(&wmt -> mutexs_worker_wait [x1], NULL);
2755 	}
2756 }
2757 
worker_thread_start(worker_master_mutex_t * wmt,int worker_id)2758 void worker_thread_start(worker_master_mutex_t * wmt, int worker_id){
2759 	pthread_mutex_lock(&wmt->mutexs_worker_wait[worker_id]);
2760 	wmt -> mutex_with_master[worker_id] = 0;
2761 }
2762 
worker_master_mutex_destroy(worker_master_mutex_t * wmt)2763 void worker_master_mutex_destroy(worker_master_mutex_t * wmt){
2764 	int x1;
2765 	for(x1=0;x1<wmt -> workers;x1++){
2766 		pthread_mutex_destroy(&wmt -> mutexs_worker_wait [x1]);
2767 		pthread_cond_destroy(&wmt -> conds_worker_wait [x1]);
2768 	}
2769 	free( wmt -> worker_is_working);
2770 	free( wmt -> mutex_with_master);
2771 	free( wmt -> conds_worker_wait);
2772 	free( wmt -> mutexs_worker_wait);
2773 }
2774 
worker_wait_for_job(worker_master_mutex_t * wmt,int worker_id)2775 int worker_wait_for_job(worker_master_mutex_t * wmt, int worker_id){
2776 	pthread_mutex_trylock(wmt->mutexs_worker_wait + worker_id);
2777 	wmt->worker_is_working[worker_id] = 0;
2778 	while(1){
2779 		pthread_cond_wait(&wmt->conds_worker_wait[worker_id], &wmt->mutexs_worker_wait[worker_id]);
2780 		if(wmt -> all_terminate)
2781 			pthread_mutex_unlock(&wmt->mutexs_worker_wait[worker_id]);
2782 		if(wmt -> mutex_with_master[worker_id] == 0)break;
2783 		// otherwise, it is a spurious wakeup
2784 	}
2785 	return wmt -> all_terminate;
2786 }
2787 
master_wait_for_job_done(worker_master_mutex_t * wmt,int worker_id)2788 void master_wait_for_job_done(worker_master_mutex_t * wmt, int worker_id){
2789 	if(!wmt -> mutex_with_master[worker_id] ){
2790 		while(1){
2791 			pthread_mutex_lock(&wmt->mutexs_worker_wait[worker_id]);
2792 			if(!wmt->worker_is_working[worker_id])break;
2793 			// In a rare situation, the worker hasn't been scheduled after the last notification.
2794 			// Namely, the master will notify twice the worker when the worker thread hasn't started at all. The second notification will cause no condition signaled.
2795 			// This test is to prevent such a situation.
2796 			pthread_mutex_unlock(&wmt->mutexs_worker_wait[worker_id]);
2797 			usleep(50);
2798 		}
2799 		if(wmt->worker_is_working[worker_id])
2800 			SUBREADprintf("ERROR 3: HOW CAN I HAVE THIS LOCK : %d?? TH_%d\n", wmt->worker_is_working[worker_id], worker_id);
2801 	}
2802 	wmt -> mutex_with_master[worker_id] = 1;
2803 }
2804 
2805 // master collects results between master_wait_for_job_done and master_notify_worker
master_notify_worker(worker_master_mutex_t * wmt,int worker_id)2806 void master_notify_worker(worker_master_mutex_t * wmt, int worker_id){
2807 	if(!wmt -> mutex_with_master[worker_id])SUBREADprintf("ERROR 2: HOW CAN I NOT HAVE THE LOCK : %d ; TERM=%d\n", worker_id, wmt -> all_terminate);
2808 
2809 	wmt -> worker_is_working[worker_id] = 1;
2810 	wmt -> mutex_with_master[worker_id] = 0;
2811 	pthread_cond_signal(&wmt->conds_worker_wait[worker_id]);
2812 	pthread_mutex_unlock(&wmt->mutexs_worker_wait[worker_id]);
2813 }
2814 
2815 // this function must be called when the master thread has all the worker locks in control.
2816 // ie all the workers should be in the "wait_for_job" status.
terminate_workers(worker_master_mutex_t * wmt)2817 void terminate_workers(worker_master_mutex_t * wmt){
2818 	int x1;
2819 	wmt -> all_terminate = 1;
2820 	for(x1=0;x1<wmt -> workers;x1++)
2821 		master_notify_worker(wmt,x1);
2822 }
2823 
windows_memmem(const void * haystack_start,size_t haystack_len,const void * needle_start,size_t needle_len)2824 void *windows_memmem(const void *haystack_start, size_t haystack_len, const void *needle_start, size_t needle_len)
2825 {
2826 
2827     const unsigned char *haystack = (const unsigned char *) haystack_start;
2828     const unsigned char *needle = (const unsigned char *) needle_start;
2829     const unsigned char *h = NULL;
2830     const unsigned char *n = NULL;
2831     size_t x = needle_len;
2832 
2833     /* The first occurrence of the empty string is deemed to occur at
2834  *     the beginning of the string.  */
2835     if (needle_len == 0)
2836         return (void *) haystack_start;
2837 
2838     /* Sanity check, otherwise the loop might search through the whole
2839  *         memory.  */
2840      if (haystack_len < needle_len)
2841        return NULL;
2842 
2843     for (; *haystack && haystack_len--; haystack++) {
2844 
2845         x = needle_len;
2846         n = needle;
2847         h = haystack;
2848 
2849         if (haystack_len < needle_len)
2850             break;
2851 
2852         if ((*haystack != *needle) || ( *haystack + needle_len != *needle + needle_len))
2853             continue;
2854 
2855         for (; x ; h++ , n++) {
2856             x--;
2857 
2858             if (*h != *n)
2859                 break;
2860 
2861            if (x == 0)
2862             return (void *)haystack;
2863         }
2864     }
2865 
2866     return NULL;
2867 }
2868