1 static char rcsid[] = "$Id: filestring.c 218257 2019-01-22 17:22:04Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "filestring.h"
7 #include <stdlib.h>
8 #include <stdarg.h>
9 #include <ctype.h>		/* For isdigit() */
10 #include <string.h>		/* For strlen and strncpy */
11 #include "assert.h"
12 #include "mem.h"
13 #include "complement.h"
14 #include "list.h"
15 
16 
17 #define BLOCKSIZE 1024
18 
19 #ifdef DEBUG
20 #define debug(x) x
21 #else
22 #define debug(x)
23 #endif
24 
25 /* Simultaneous print to stdout */
26 #ifdef DEBUG1
27 #define debug1(x) x
28 #else
29 #define debug1(x)
30 #endif
31 
32 
33 #define T Filestring_T
34 
35 struct T {
36   int id;
37   SAM_split_output_type split_output;
38 
39   List_T blocks;
40   int nleft;
41   char *ptr;
42 
43   char *string;
44   int strlength;
45 };
46 
47 
48 int
Filestring_id(T this)49 Filestring_id (T this) {
50   return this->id;
51 }
52 
53 void
Filestring_set_split_output(T this,int split_output)54 Filestring_set_split_output (T this, int split_output) {
55   this->split_output = split_output;
56   return;
57 }
58 
59 SAM_split_output_type
Filestring_split_output(T this)60 Filestring_split_output (T this) {
61   return this->split_output;
62 }
63 
64 
65 T
Filestring_new(int id)66 Filestring_new (int id) {
67   T new = (T) MALLOC_OUT(sizeof(*new));
68 
69   new->id = id;
70   new->split_output = OUTPUT_NONE;
71   new->blocks = (List_T) NULL;
72   new->nleft = 0;
73   new->ptr = (char *) NULL;
74 
75   new->string = (char *) NULL;
76 
77   return new;
78 }
79 
80 void
Filestring_free(T * old)81 Filestring_free (T *old) {
82   List_T p;
83   char *block;
84 
85   if (*old) {
86     if ((*old)->string != NULL) {
87       FREE_OUT((*old)->string);
88     }
89 
90     for (p = (*old)->blocks; p != NULL; p = List_next(p)) {
91       block = (char *) List_head(p);
92       FREE_OUT(block);
93     }
94     List_free_out(&(*old)->blocks);
95 
96     FREE_OUT(*old);
97   }
98 
99   return;
100 }
101 
102 
103 void
Filestring_stringify(T this)104 Filestring_stringify (T this) {
105   List_T p, next;
106   char *ptr, *dest;
107   int nblocks, i;
108 
109   if ((nblocks = List_length(this->blocks)) == 0) {
110     this->string = (char *) NULL;
111     this->strlength = -1;
112 
113   } else if (this->string != NULL) {
114     /* Already stringified */
115 
116   } else {
117     this->strlength = (nblocks - 1) * BLOCKSIZE + (BLOCKSIZE - this->nleft);
118     dest = this->string = (char *) MALLOC_OUT((this->strlength + 1) * sizeof(char));
119 
120     p = this->blocks = List_reverse(this->blocks);
121 
122     next = List_next(p);
123     while (next != NULL) {
124       ptr = (char *) List_head(p);
125       for (i = 0; i < BLOCKSIZE; i++) {
126 	*dest++ = *ptr++;
127       }
128       p = next;
129       next = List_next(p);
130     }
131 
132     ptr = (char *) List_head(p);
133     for (i = 0; i < BLOCKSIZE - this->nleft; i++) {
134       *dest++ = *ptr++;
135     }
136 
137     *dest = '\0';
138   }
139 
140   return;
141 }
142 
143 
144 /* Could assume that Filestring_stringify has been called */
145 void
Filestring_print(MPI_File fp,T this)146 Filestring_print (
147 #ifdef USE_MPI
148 		  MPI_File fp,
149 #else
150 		  FILE *fp,
151 #endif
152 		  T this) {
153   List_T p, next;
154   char *ptr;
155 
156   if (this == NULL) {
157     return;
158 
159   } else if (fp == NULL) {
160     /* Can happen with the --omit-concordant-uniq or --omit-concordant-mult flags */
161 #ifdef USE_MPI
162     /* This may not work if worker is from rank 0 */
163     Filestring_send(this,/*dest*/0,/*tag*/MPI_TAG_WRITE_STDOUT,MPI_COMM_WORLD);
164 #else
165     return;
166 #endif
167 
168   } else if (this->string != NULL) {
169     /* Already stringified */
170 #ifdef USE_MPI
171     debug1(fwrite(this->string,sizeof(char),this->strlength,stdout));
172     MPI_File_write_shared(fp,this->string,this->strlength,MPI_CHAR,MPI_STATUS_IGNORE);
173 #else
174     fwrite(this->string,sizeof(char),this->strlength,fp);
175 #endif
176 
177   } else if (this->blocks == NULL) {
178     return;
179 
180   } else {
181     p = this->blocks = List_reverse(this->blocks);
182 
183     next = List_next(p);
184     while (next != NULL) {
185       ptr = (char *) List_head(p);
186 #ifdef USE_MPI
187       debug1(fwrite(ptr,sizeof(char),BLOCKSIZE,stdout));
188       MPI_File_write_shared(fp,ptr,BLOCKSIZE,MPI_CHAR,MPI_STATUS_IGNORE);
189 #else
190       fwrite(ptr,sizeof(char),BLOCKSIZE,fp);
191 #endif
192       p = next;
193       next = List_next(p);
194     }
195 
196     ptr = (char *) List_head(p);
197 #ifdef USE_MPI
198     debug1(fwrite(ptr,sizeof(char),BLOCKSIZE - this->nleft,stdout));
199     MPI_File_write_shared(fp,ptr,BLOCKSIZE - this->nleft,MPI_CHAR,MPI_STATUS_IGNORE);
200 #else
201     fwrite(ptr,sizeof(char),BLOCKSIZE - this->nleft,fp);
202 #endif
203   }
204 
205   return;
206 }
207 
208 
209 static void
transfer_char(T this,char c)210 transfer_char (T this, char c) {
211   char *block;
212 
213   if (this->nleft == 0) {
214     block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
215     this->blocks = List_push_out(this->blocks,(void *) block);
216     this->nleft = BLOCKSIZE;
217     this->ptr = &(block[0]);
218   }
219   *this->ptr++ = c;
220   this->nleft -= 1;
221 
222   return;
223 }
224 
225 
226 static void
transfer_buffer(T this,char * string,int bufferlen)227 transfer_buffer (T this, char *string, int bufferlen) {
228   char *block, *q;
229 
230   for (q = string; --bufferlen >= 0 && *q != '\0'; q++) {
231     if (this->nleft == 0) {
232       block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
233       this->blocks = List_push_out(this->blocks,(void *) block);
234       this->nleft = BLOCKSIZE;
235       this->ptr = &(block[0]);
236     }
237     *this->ptr++ = *q;
238     this->nleft -= 1;
239   }
240 
241   if (bufferlen < 0) {
242     fprintf(stderr,"Overflowed buffer without seeing a terminating character\n");
243     fprintf(stderr,"String was %s\n",q);
244     abort();
245   }
246 
247   return;
248 }
249 
250 
251 static void
transfer_string(T this,char * string,int stringlen)252 transfer_string (T this, char *string, int stringlen) {
253   char *block, *q;
254 
255   if (stringlen > 0) {
256     q = string;
257     while (this->nleft <= stringlen) {
258       strncpy(this->ptr,q,this->nleft);
259       q += this->nleft;
260       stringlen -= this->nleft;
261 
262       block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
263       this->blocks = List_push_out(this->blocks,(void *) block);
264       this->nleft = BLOCKSIZE;
265       this->ptr = &(block[0]);
266     }
267 
268     strncpy(this->ptr,q,stringlen);
269     this->ptr += stringlen;
270     this->nleft -= stringlen;
271   }
272 
273   return;
274 }
275 
276 
277 static void
reverse_inplace(char * string,unsigned int length)278 reverse_inplace (char *string, unsigned int length) {
279   char temp, *p, *q;
280   unsigned int i;
281 
282   p = string;
283   q = &(string[length-1]);
284 
285   for (i = 0; i < length/2; i++) {
286     temp = *p;
287     *p++ = *q;
288     *q-- = temp;
289   }
290 
291   return;
292 }
293 
294 static void
transfer_string_reverse(T this,char * string,int stringlen)295 transfer_string_reverse (T this, char *string, int stringlen) {
296   char *block, *q;
297 
298   if (stringlen > 0) {
299     q = &(string[stringlen]);
300     while (this->nleft <= stringlen) {
301       q -= this->nleft;
302       strncpy(this->ptr,q,this->nleft);
303       reverse_inplace(this->ptr,this->nleft);
304       stringlen -= this->nleft;
305 
306       block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
307       this->blocks = List_push_out(this->blocks,(void *) block);
308       this->nleft = BLOCKSIZE;
309       this->ptr = &(block[0]);
310     }
311 
312     strncpy(this->ptr,string,stringlen);
313     reverse_inplace(this->ptr,stringlen);
314     this->ptr += stringlen;
315     this->nleft -= stringlen;
316   }
317 
318   return;
319 }
320 
321 
322 static char complCode[128] = COMPLEMENT_LC;
323 
324 static void
revcomp_inplace(char * string,unsigned int length)325 revcomp_inplace (char *string, unsigned int length) {
326   char temp, *p, *q;
327   unsigned int i;
328 
329   p = string;
330   q = &(string[length-1]);
331 
332   for (i = 0; i < length/2; i++) {
333     temp = complCode[(int) *p];
334     *p++ = complCode[(int) *q];
335     *q-- = temp;
336   }
337   if (p == q) {
338     *p = complCode[(int) *p];
339   }
340 
341   return;
342 }
343 
344 static void
transfer_string_revcomp(T this,char * string,int stringlen)345 transfer_string_revcomp (T this, char *string, int stringlen) {
346   char *block, *q;
347 
348   if (stringlen > 0) {
349     q = &(string[stringlen]);
350     while (this->nleft <= stringlen) {
351       q -= this->nleft;
352       strncpy(this->ptr,q,this->nleft);
353       revcomp_inplace(this->ptr,this->nleft);
354       stringlen -= this->nleft;
355 
356       block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
357       this->blocks = List_push_out(this->blocks,(void *) block);
358       this->nleft = BLOCKSIZE;
359       this->ptr = &(block[0]);
360     }
361 
362     strncpy(this->ptr,string,stringlen);
363     revcomp_inplace(this->ptr,stringlen);
364     this->ptr += stringlen;
365     this->nleft -= stringlen;
366   }
367 
368   return;
369 }
370 
371 
372 
373 #define BUFFERLEN 1024
374 
375 void
Filestring_put(T this,const char * format,...)376 Filestring_put (T this, const char *format, ...) {
377   va_list values;
378 
379   char BUFFER[BUFFERLEN];
380   char *block;
381   const char *p;
382   char *q, c;
383   char *string;
384   int precision, stringlen, i;
385 
386   va_start(values,format);
387 
388   p = format;
389   debug(printf("format is %s\n",format));
390   while (*p != '\0') {
391     if ((c = *p) == '\\') {  /* escape */
392       debug(printf("Saw an escape character\n"));
393       switch (*++p) {
394       case 't': transfer_char(this,'\t'); break; /* Actually \t shows up as an ASCII character */
395       case '\\': transfer_char(this,'\\'); break;
396       default: fprintf(stderr,"Cannot parse \\%c\n",*p);
397       }
398 
399     } else if (c == '%') {  /* formatting */
400       debug(printf("After formatting character saw %c\n",p[1]));
401       switch (*++p) {
402       case '%':			/* percent sign */
403 	transfer_char(this,'%');
404 	break;
405 
406       case 'c':			/* character */
407 	transfer_char(this,(char) va_arg(values, int));
408 	break;
409 
410       case 'p':		/* pointer */
411 	sprintf(BUFFER,"%p",va_arg(values, void *));
412 	transfer_buffer(this,BUFFER,BUFFERLEN);
413 	break;
414 
415       case 's': 		/* string */
416 	q = va_arg(values, char *);
417 	transfer_string(this,q,strlen(q));
418 	break;
419 
420       case 'r': 		/* string reversed */
421 	q = va_arg(values, char *);
422 	transfer_string_reverse(this,q,strlen(q));
423 	break;
424 
425       case 'R': 		/* string reverse complemented */
426 	q = va_arg(values, char *);
427 	transfer_string_revcomp(this,q,strlen(q));
428 	break;
429 
430       case '.':			/* float or double */
431 	if (*++p == '*') {
432 	  precision = va_arg(values, int);
433 	  ++p;
434 	} else {
435 	  sscanf(p,"%d",&precision);
436 	  while (isdigit(*++p)) ;
437 	}
438 	switch (*p) {
439 	case 'f':
440 	  sprintf(BUFFER,"%.*f",precision,va_arg(values, double));
441 	  transfer_buffer(this,BUFFER,BUFFERLEN);
442 	  break;
443 
444 	case 'e':
445 	  sprintf(BUFFER,"%.*e",precision,va_arg(values, double));
446 	  transfer_buffer(this,BUFFER,BUFFERLEN);
447 	  break;
448 
449 	case 'g':
450 	  sprintf(BUFFER,"%.*g",precision,va_arg(values, double));
451 	  transfer_buffer(this,BUFFER,BUFFERLEN);
452 	  break;
453 
454 	case 's':
455 	  transfer_string(this,/*string*/va_arg(values, char *),/*stringlen*/precision);
456 	  break;
457 
458 	case 'r':
459 	  transfer_string_reverse(this,/*string*/va_arg(values, char *),/*stringlen*/precision);
460 	  break;
461 
462 	case 'R':
463 	  transfer_string_revcomp(this,/*string*/va_arg(values, char *),/*stringlen*/precision);
464 	  break;
465 
466 	default: fprintf(stderr,"Cannot parse %%.%d%c\n",precision,*p); abort();
467 	}
468 	break;
469 
470       case '*':			/* indirect int or string */
471 	precision = va_arg(values, int);
472 	debug(printf("format is %c\n",p[1]));
473 	switch (*++p) {
474 	case 'd':
475 	  sprintf(BUFFER,"%*d",precision,va_arg(values, int));
476 	  transfer_buffer(this,BUFFER,BUFFERLEN);
477 	  break;
478 	case 'u':
479 	  sprintf(BUFFER,"%*u",precision,va_arg(values, unsigned int));
480 	  transfer_buffer(this,BUFFER,BUFFERLEN);
481 	  break;
482 	case 's':
483 	  /* Right justify */
484 	  string = va_arg(values, char *);
485 	  if ((stringlen = (int) strlen(string)) < precision) {
486 	    for (i = 0; i < precision - stringlen; i++) {
487 	      transfer_char(this,' ');
488 	    }
489   	    transfer_string(this,string,stringlen);
490 	  } else {
491 	    transfer_string(this,string,/*stringlen*/precision);
492 	  }
493 	  break;
494 	case 'r':
495 	  string = va_arg(values, char *);
496 	  if ((stringlen = (int) strlen(string)) < precision) {
497 	    for (i = 0; i < precision - stringlen; i++) {
498 	      transfer_char(this,' ');
499 	    }
500 	    transfer_string_reverse(this,string,stringlen);
501 	  } else {
502 	    transfer_string_reverse(this,string,/*stringlen*/precision);
503 	  }
504 	  break;
505 	case 'R':
506 	  string = va_arg(values, char *);
507 	  if ((stringlen = (int) strlen(string)) < precision) {
508 	    for (i = 0; i < precision - stringlen; i++) {
509 	      transfer_char(this,' ');
510 	    }
511 	    transfer_string_revcomp(this,string,stringlen);
512 	  } else {
513 	    transfer_string_revcomp(this,string,/*stringlen*/precision);
514 	  }
515 	  break;
516 	default: fprintf(stderr,"Cannot parse %%*%c\n",*p); abort();
517 	}
518 	break;
519 
520       case 'd':			/* int */
521 	sprintf(BUFFER,"%d",va_arg(values, int));
522 	transfer_buffer(this,BUFFER,BUFFERLEN);
523 	break;
524 
525       case 'f':			/* float */
526 	sprintf(BUFFER,"%f",va_arg(values, double));
527 	transfer_buffer(this,BUFFER,BUFFERLEN);
528 	break;
529 
530       case 'u':			/* unsigned int */
531 	sprintf(BUFFER,"%u",va_arg(values, unsigned int));
532 	transfer_buffer(this,BUFFER,BUFFERLEN);
533 	break;
534 
535       case 'l':
536 	switch (*++p) {
537 	case 'd':			/* long int */
538 	  sprintf(BUFFER,"%ld",va_arg(values, long int));
539 	  transfer_buffer(this,BUFFER,BUFFERLEN);
540 	  break;
541 
542 	case 'u':			/* unsigned long */
543 	  sprintf(BUFFER,"%lu",va_arg(values, unsigned long));
544 	  transfer_buffer(this,BUFFER,BUFFERLEN);
545 	  break;
546 
547 	case 'l':
548 	  switch (*++p) {
549 	  case 'd':			/* long long int */
550 	    sprintf(BUFFER,"%lld",va_arg(values, long long int));
551 	    transfer_buffer(this,BUFFER,BUFFERLEN);
552 	    break;
553 
554 	  case 'u':			/* unsigned long long */
555 	    sprintf(BUFFER,"%llu",va_arg(values, unsigned long long));
556 	    transfer_buffer(this,BUFFER,BUFFERLEN);
557 	    break;
558 
559 	  default: fprintf(stderr,"Cannot parse %%ll%c\n",*p); abort();
560 	  }
561 	  break;
562 
563 	default: fprintf(stderr,"Cannot parse %%l%c\n",*p); abort();
564 	}
565 	break;
566 
567       default: fprintf(stderr,"Cannot parse %%%c\n",*p); abort();
568       }
569 
570     } else {
571       /* transfer_char(this,c); -- effectively inlined here */
572       if (this->nleft == 0) {
573 	block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
574 	this->blocks = List_push_out(this->blocks,(void *) block);
575 	this->nleft = BLOCKSIZE;
576 	this->ptr = &(block[0]);
577       }
578       *this->ptr++ = c;
579       this->nleft -= 1;
580     }
581 
582     p++;
583   }
584 
585   va_end(values);
586 
587   return;
588 }
589 
590 void
Filestring_putc(char c,T this)591 Filestring_putc (char c, T this) {
592   char *block;
593 
594   if (this->nleft == 0) {
595     block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
596     this->blocks = List_push_out(this->blocks,(void *) block);
597     this->nleft = BLOCKSIZE;
598     this->ptr = &(block[0]);
599   }
600   *this->ptr++ = c;
601   this->nleft -= 1;
602 }
603 
604 
605 /* Modified from transfer_string */
606 void
Filestring_puts(T this,char * string,int strlength)607 Filestring_puts (T this, char *string, int strlength) {
608   char *block, *q;
609 
610   for (q = string; --strlength >= 0; q++) {
611     if (this->nleft == 0) {
612       block = (char *) MALLOC_OUT(BLOCKSIZE * sizeof(char));
613       this->blocks = List_push_out(this->blocks,(void *) block);
614       this->nleft = BLOCKSIZE;
615       this->ptr = &(block[0]);
616     }
617     *this->ptr++ = *q;
618     this->nleft -= 1;
619   }
620 
621   return;
622 }
623 
624 
625 
626 /* Assumes that Filestring_stringify has been called on source */
627 void
Filestring_merge(T dest,T source)628 Filestring_merge (T dest, T source) {
629   if (source->string != NULL) {
630     transfer_string(dest,source->string,strlen(source->string));
631   }
632 
633   return;
634 }
635 
636 
637 #ifdef USE_MPI
638 char *
Filestring_extract(int * strlength,T this)639 Filestring_extract (int *strlength, T this) {
640   Filestring_stringify(this);
641   if ((*strlength = this->strlength) == 0) {
642     return (char *) NULL;
643   } else {
644     return this->string;
645   }
646 }
647 
648 
649 void
Filestring_send(T this,int dest,int tag,MPI_Comm comm)650 Filestring_send (T this, int dest, int tag, MPI_Comm comm) {
651   Filestring_stringify(this);
652   MPI_SEND(&this->strlength,1,MPI_INT,dest,tag,comm);
653   if (this->strlength > 0) {
654     MPI_SEND(this->string,this->strlength+1,MPI_CHAR,dest,tag,comm);
655   }
656   return;
657 }
658 
659 
660 char *
Filestring_recv(int * strlength,int source,int tag,MPI_Comm comm)661 Filestring_recv (int *strlength, int source, int tag, MPI_Comm comm) {
662   char *string;
663   MPI_Status status;
664 
665   MPI_RECV(&(*strlength),1,MPI_INT,source,tag,comm,&status);
666   if (*strlength <= 0) {
667     string = (char *) MALLOC(1 * sizeof(char));
668     string[0] = '\0';
669     *strlength = 0;
670   } else {
671     string = (char *) MALLOC(((*strlength) + 1) * sizeof(char));
672     MPI_RECV(string,(*strlength) + 1,MPI_CHAR,source,tag,comm,&status);
673   }
674 
675   return string;
676 }
677 #endif
678 
679 
680 
681