1 /*****************************************************************
2  * SQUID - a library of functions for biological sequence analysis
3  * Copyright (C) 1992-2002 Washington University School of Medicine
4  *
5  *     This source code is freely distributed under the terms of the
6  *     GNU General Public License. See the files COPYRIGHT and LICENSE
7  *     for details.
8  *****************************************************************/
9 
10 /* a2m.c
11  *
12  * reading/writing A2M (aligned FASTA) files.
13  *
14  * RCS $Id: a2m.c 297 2014-10-31 13:02:37Z fabian $ (Original squid RCS Id: a2m.c,v 1.1 1999/07/15 22:26:40 eddy Exp)
15  */
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include "squid.h"
21 #include "msa.h"
22 
23 /* Function: ReadA2M()
24  * Date:     SRE, Sun Jun  6 17:11:29 1999 [bus from Madison 1999 worm mtg]
25  *
26  * Purpose:  Parse an alignment read from an open A2M format
27  *           alignment file. A2M is a single alignment format.
28  *           Return the alignment, or NULL if we've already
29  *           read the alignment.
30  *
31  * Args:     afp - open alignment file
32  *
33  * Returns:  MSA *  - an alignment object.
34  *                    Caller responsible for an MSAFree()
35  */
36 MSA *
ReadA2M(MSAFILE * afp)37 ReadA2M(MSAFILE *afp)
38 {
39   MSA  *msa;
40   char *buf;
41   char *name;
42   char *desc;
43   char *seq;
44   int   idx;
45   int   len1, len2;
46 
47   if (feof(afp->f)) return NULL;
48 
49   name = NULL;
50   msa  = MSAAlloc(10, 0);
51   idx  = 0;
52   while ((buf = MSAFileGetLine(afp)) != NULL)
53     {
54       if (*buf == '>')
55 	{
56 	  buf++;		/* skip the '>' */
57 	  if ((name = sre_strtok(&buf, WHITESPACE, &len1)) == NULL)
58 	    Die("Blank name in A2M file %s (line %d)\n", afp->fname, afp->linenumber);
59 	  desc = sre_strtok(&buf, "\n", &len2);
60 
61 	  idx = GKIStoreKey(msa->index, name);
62 	  if (idx >= msa->nseqalloc) MSAExpand(msa);
63 
64 	  msa->sqname[idx] = sre_strdup(name, len1);
65 	  if (desc != NULL) MSASetSeqDescription(msa, idx, desc);
66 	  msa->nseq++;
67 	}
68       else if (name != NULL)
69 	{
70 	  if ((seq = sre_strtok(&buf, WHITESPACE, &len1)) == NULL) continue;
71 	  msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], seq, len1);
72 	}
73     }
74   if (name == NULL) { MSAFree(msa); return NULL; }
75 
76   MSAVerifyParse(msa);
77   return msa;
78 }
79 
80 
81 /* Function: WriteA2M()
82  * Date:     SRE, Sun Jun  6 17:40:35 1999 [bus from Madison, 1999 worm mtg]
83  *
84  * Purpose:  Write an "aligned FASTA" (aka a2m, to UCSC) formatted
85  *           alignment.
86  *
87  * Args:     fp    - open FILE to write to.
88  *           msa   - alignment to write
89  *
90  * Returns:  void
91  */
92 void
93 #ifdef CLUSTALO
94 /*WriteA2M(FILE *fp, MSA *msa, int vienna)*/
WriteA2M(FILE * fp,MSA * msa,int iWrap)95 WriteA2M(FILE *fp, MSA *msa, int iWrap)
96 #else
97 WriteA2M(FILE *fp, MSA *msa)
98 #endif
99 {
100   int  idx;			/* sequence index */
101   int  pos;			/* position in sequence */
102 #ifdef CLUSTALO
103   char  *buf;                   /* buffer for writing seq        */
104   int    cpl = msa->alen<iWrap ? msa->alen+10 : (iWrap > 0 ? iWrap : 60);          /* char per line (< 64)          */
105 #else
106   char buf[64];			/* buffer for individual lines */
107   int  cpl = 60;		/* char per line; must be < 64 unless buf is bigger */
108 #endif
109 
110 #ifdef CLUSTALO
111   if (NULL == (buf = (char *)malloc(cpl+20))){
112     Die("%s:%s:%d: could not malloc %d char for buffer",
113         __FUNCTION__, __FILE__, __LINE__, cpl+20);
114   }
115   else {
116     memset(buf, 0, cpl+20);
117   }
118 #endif
119 
120 
121   buf[cpl] = '\0';
122   for (idx = 0; idx < msa->nseq; idx++)
123     {
124 #ifdef CLUSTALO
125       /* most fasta sequences don't have a description, which
126        * leads to a trailing white space in the original code
127        */
128       fprintf(fp, ">%s",  msa->sqname[idx]);
129 
130       if (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL /*&& !vienna*/) {
131 	fprintf(fp, " %s", msa->sqdesc[idx]);
132       }
133       fprintf(fp, "\n");
134 #else
135       fprintf(fp, ">%s %s\n",
136 	      msa->sqname[idx],
137 	      (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL) ? msa->sqdesc[idx] : "");
138 #endif
139       for (pos = 0; pos < msa->alen; pos+=cpl)
140         {
141 	  strncpy(buf, &(msa->aseq[idx][pos]), cpl);
142 	  /*if (vienna)
143 	    fprintf(fp, "%s", buf);
144 	    else*/
145 	  fprintf(fp, "%s\n", buf);
146         }
147       /*if (vienna)
148 	fprintf(fp, "\n");*/
149 
150     }
151 
152 #ifdef CLUSTALO
153   free(buf); buf = NULL;
154 #endif
155 
156 } /* this is the end of WriteA2M() */
157