1 #include "mltaln.h"
2
3 #define DEBUG 0
4
5 char *alignmentfile;
6 int keeplength;
7
fillorichar(int nseq,int * oripos,char ** a,char ** s)8 static void fillorichar( int nseq, int *oripos, char **a, char **s )
9 {
10 int i;
11 char *pta, *pts;
12 for( i=0; i<nseq; i++ )
13 {
14
15 pta = a[i];
16 pts = s[oripos[i]];
17 while( *pta )
18 {
19 if( *pta != '-' ) *pta = *pts++;
20 if( *pta++ == 0 )
21 {
22 fprintf( stderr, "ERROR!!\n" );
23 fprintf( stderr, "alignment is inconsistent with the original sequences (1)\n" );
24 exit( 1 );
25 }
26 }
27 if( *pts != 0 )
28 {
29 fprintf( stderr, "ERROR!!\n" );
30 fprintf( stderr, "alignment is inconsistent with the original sequences (2)\n" );
31 exit( 1 );
32 }
33 }
34 }
35
arguments(int argc,char * argv[])36 void arguments( int argc, char *argv[] )
37 {
38 int c;
39
40 keeplength = 0;
41
42 while( --argc > 0 && (*++argv)[0] == '-' )
43 {
44 while ( (c = *++argv[0]) )
45 {
46 switch( c )
47 {
48 case 'i':
49 inputfile = *++argv;
50 --argc;
51 goto nextoption;
52 case 'a':
53 alignmentfile = *++argv;
54 --argc;
55 goto nextoption;
56 case 'Z': // add2ndharfarg wo tsukau tame.
57 break;
58 case 'p': // add2ndharfarg wo tsukau tame.
59 break;
60 case 'Y':
61 keeplength = 1;
62 break;
63 default:
64 fprintf( stderr, "illegal option %c\n", c );
65 argc = 0;
66 break;
67 }
68 }
69 nextoption:
70 ;
71 }
72 if( argc != 0 )
73 {
74 fprintf( stderr, "options: Check source file !\n" );
75 exit( 1 );
76 }
77 }
78
79
80
main(int argc,char * argv[])81 int main( int argc, char *argv[] )
82 {
83 FILE *infp;
84 FILE *alfp;
85 FILE *dlfp;
86 char **name;
87 char **aname;
88 char **oname;
89 char **seq;
90 char **aseq;
91 int *nlen;
92 int *oripos;
93 char *npt, *npt0, *npt2, *pt, *pt2;
94 int i, o, prelen;
95 int nlenmin;
96 int njobs, njoba;
97 // int **dlist;
98 // int *ndel;
99 char *gett;
100
101 arguments( argc, argv );
102
103 if( inputfile )
104 {
105 infp = fopen( inputfile, "r" );
106 if( !infp )
107 {
108 fprintf( stderr, "Cannot open %s\n", inputfile );
109 exit( 1 );
110 }
111 }
112 else
113 infp = stdin;
114
115 if( alignmentfile )
116 {
117 alfp = fopen( alignmentfile, "r" );
118 if( !alfp )
119 {
120 fprintf( stderr, "Cannot open %s\n", alignmentfile );
121 exit( 1 );
122 }
123 }
124 else
125 {
126 fprintf( stderr, "No alignment is given.\n" );
127 exit( 1 );
128 }
129
130 if( keeplength )
131 {
132 dlfp = fopen( "_deletelist", "r" );
133 if( !dlfp )
134 {
135 fprintf( stderr, "Cannot open _deletefile\n" );
136 exit( 1 );
137 }
138 }
139
140 dorp = NOTSPECIFIED;
141 getnumlen_casepreserve( infp, &nlenmin );
142 njobs = njob;
143 // fprintf( stderr, "in infp, %d x %d - %d %c\n", njob, nlenmin, nlenmax, dorp );
144
145 seq = AllocateCharMtx( njob, nlenmax+1 );
146 name = AllocateCharMtx( njob, B+1 );
147 nlen = AllocateIntVec( njob );
148 oripos = AllocateIntVec( njob );
149 readData_pointer_casepreserve( infp, name, nlen, seq );
150 fclose( infp );
151
152 dorp = NOTSPECIFIED;
153 getnumlen( alfp );
154 njoba = njob;
155 // fprintf( stderr, "in alfp, %d x %d %c\n", njob, nlenmax, dorp );
156 aseq = AllocateCharMtx( njob, nlenmax+1 );
157 aname = AllocateCharMtx( njob, B+1 );
158 oname = AllocateCharMtx( njob, B+1 );
159 readData_pointer( alfp, aname, nlen, aseq );
160 fclose( alfp );
161
162 for( i=0; i<njob; i++ ) gappick_samestring( seq[i] );
163
164 // reporterr( "seq[njob-1] = %s\n", seq[njob-1] );
165
166 if( keeplength )
167 {
168 gett = calloc( 1000, sizeof( char ) );
169 // ndel = (int *)calloc( njob, sizeof( int ) );
170 // for( i=0; i<njob; i++ ) ndel[i] = 0;
171 // dlist = (int **)calloc( njob+1, sizeof( int * ) );
172 // for( i=0; i<njob; i++ ) dlist[i] = NULL;
173 // dlist[njob] = NULL;
174
175 while( 1 )
176 {
177 fgets( gett, 999, dlfp );
178 if( feof( dlfp ) ) break;
179 sscanf( gett, "%d %d", &i, &o );
180 // reporterr( "%d, %d\n", i, o );
181 // dlist[i] = realloc( dlist[i], sizeof( int ) * (ndel[i]+2) );
182 // dlist[i][ndel[i]] = o;
183 // ndel[i]++;
184
185 seq[i][o] = '-';
186
187 }
188 fclose( dlfp );
189 }
190
191 for( i=0; i<njob; i++ )
192 {
193 gappick_samestring( seq[i] );
194 }
195
196 if( njoba != njobs )
197 {
198 fprintf( stderr, "ERROR!!\n" );
199 fprintf( stderr, "In input file,\n" );
200 fprintf( stderr, "njob = %d\n", njobs );
201 fprintf( stderr, "but in alignment file,\n" );
202 fprintf( stderr, "njob = %d\n", njoba );
203 exit( 1 );
204 }
205
206 for( i=0; i<njob; i++ )
207 {
208 #if 0
209 if( strstr( aname[i], "_seed_" ) )
210 {
211 npt2 = aname[i] + 7;
212 strcpy( oname[i], "=_seed_" );
213 }
214 else
215 {
216 npt2 = aname[i] + 1;
217 strcpy( oname[i], "=" );
218 }
219
220 fprintf( stderr, "npt2 = %s\n", npt2 );
221
222 o = oripos[i] = atoi( npt2 );
223 npt = strstr( npt2, "_oe_" );
224 if( npt == NULL )
225 {
226 fprintf( stderr, "Format error!\n" );
227 exit( 1 );
228 }
229 npt += 4;
230 strcat( oname[i], npt+1 );
231 #endif
232 npt0 = strstr( aname[i], "_os_" );
233 if( npt0 == NULL )
234 {
235 fprintf( stderr, "Format error!\n" );
236 exit( 1 );
237 }
238 npt2 = npt0 + 4;
239 o = oripos[i] = atoi( npt2 );
240
241 npt = strstr( aname[i], "_oe_" );
242 if( npt == NULL )
243 {
244 fprintf( stderr, "Format error!\n" );
245 exit( 1 );
246 }
247 npt += 4;
248
249 pt2 = npt;
250 pt = npt2 - 4;
251 while( *pt ) *pt++ = *pt2++; // okashii
252
253 prelen = npt0-aname[i];
254 strncpy( oname[i], aname[i], prelen ); oname[i][prelen] = 0;
255 strcat( oname[i], npt0 );
256
257 #if 0
258 pt = strstr( aname[i], "_numo_e" );
259 if( pt ) pt += 8;
260 else pt = aname[i] + 1;
261
262 if( strstr( pt, "_seed_" ) ) pt += 6;
263
264 fprintf( stderr, "pt = :%s:\n", pt );
265
266 #endif
267 // fprintf( stderr, "npt0 = :%s:\n", npt0 );
268 //
269 // reporterr( "oname[i] = %s\n", oname[i] );
270 // reporterr( "aname[i] = %s\n", aname[i] );
271 // reporterr( " name[i] = %s\n", name[i] );
272
273 // fprintf( stderr, "aname[i] = :%s:\n", aname[i] );
274 // fprintf( stderr, "pt = :%s:\n", pt );
275 // fprintf( stderr, "oname[i] = :%s:\n", oname[i] );
276 // fprintf( stderr, "name[o] = :%s:\n", name[o] );
277
278 if( strncmp( npt0, name[o]+1, 10 ) )
279 {
280 fprintf( stderr, "ERROR!!\n" );
281 fprintf( stderr, "In input file,\n" );
282 fprintf( stderr, "name[%d] = %s\n", o, name[o] );
283 fprintf( stderr, "but in alignment file,\n" );
284 fprintf( stderr, "oname[%d] = %s\n", i, oname[i] );
285 fprintf( stderr, "npt0 = %s\n", npt0 );
286 fprintf( stderr, "prelen = %d\n", prelen );
287 fprintf( stderr, "name[%d] = %s\n", i, aname[i] );
288 exit( 1 );
289 }
290 #if 0
291 else
292 {
293 fprintf( stderr, "OK!!\n" );
294 fprintf( stderr, "In input file,\n" );
295 fprintf( stderr, "name[%d] = %s\n", o, name[o] );
296 fprintf( stderr, "and in alignment file,\n" );
297 fprintf( stderr, "name[%d] = %s\n", i, aname[i] );
298 fprintf( stderr, "\n" );
299 }
300 #endif
301 }
302 // fprintf( stderr, "seq[0] = %s\n", seq[0] );
303 // fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
304
305 fillorichar( njob, oripos, aseq, seq );
306
307
308 writeData_pointer( stdout, njob, oname, nlen, aseq );
309
310 FreeCharMtx( seq );
311 FreeCharMtx( aseq );
312 FreeCharMtx( name );
313 FreeCharMtx( aname );
314 FreeCharMtx( oname );
315 free( nlen );
316 free( oripos );
317
318 return( 0 );
319 }
320