1 #include "mltaln.h"
2 
3 #define DEBUG 0
4 
5 char *alignmentfile;
6 int keeplength;
7 
fillorichar(int nseq,int * oripos,char ** a,char ** s)8 static void fillorichar( int nseq, int *oripos, char **a, char **s )
9 {
10 	int i;
11 	char *pta, *pts;
12 	for( i=0; i<nseq; i++ )
13 	{
14 
15 		pta = a[i];
16 		pts = s[oripos[i]];
17 		while( *pta )
18 		{
19 			if( *pta != '-' ) *pta = *pts++;
20 			if( *pta++ == 0 )
21 			{
22 				fprintf( stderr, "ERROR!!\n" );
23 				fprintf( stderr, "alignment is inconsistent with the original sequences (1)\n" );
24 				exit( 1 );
25 			}
26 		}
27 		if( *pts != 0 )
28 		{
29 			fprintf( stderr, "ERROR!!\n" );
30 			fprintf( stderr, "alignment is inconsistent with the original sequences (2)\n" );
31 			exit( 1 );
32 		}
33 	}
34 }
35 
arguments(int argc,char * argv[])36 void arguments( int argc, char *argv[] )
37 {
38     int c;
39 
40 	keeplength = 0;
41 
42     while( --argc > 0 && (*++argv)[0] == '-' )
43 	{
44         while ( (c = *++argv[0]) )
45 		{
46             switch( c )
47             {
48 				case 'i':
49 					inputfile = *++argv;
50 					--argc;
51 					goto nextoption;
52 				case 'a':
53 					alignmentfile = *++argv;
54 					--argc;
55 					goto nextoption;
56 				case 'Z': // add2ndharfarg wo tsukau tame.
57 					break;
58 				case 'p': // add2ndharfarg wo tsukau tame.
59 					break;
60 				case 'Y':
61 					keeplength = 1;
62 					break;
63                 default:
64                     fprintf( stderr, "illegal option %c\n", c );
65                     argc = 0;
66                     break;
67             }
68 		}
69 		nextoption:
70 			;
71 	}
72     if( argc != 0 )
73     {
74         fprintf( stderr, "options: Check source file !\n" );
75         exit( 1 );
76     }
77 }
78 
79 
80 
main(int argc,char * argv[])81 int main( int argc, char *argv[] )
82 {
83 	FILE *infp;
84 	FILE *alfp;
85 	FILE *dlfp;
86 	char **name;
87 	char **aname;
88 	char **oname;
89 	char **seq;
90 	char **aseq;
91 	int *nlen;
92 	int *oripos;
93 	char *npt, *npt0, *npt2, *pt, *pt2;
94 	int i, o, prelen;
95 	int nlenmin;
96 	int njobs, njoba;
97 //	int **dlist;
98 //	int *ndel;
99 	char *gett;
100 
101 	arguments( argc, argv );
102 
103 	if( inputfile )
104 	{
105 		infp = fopen( inputfile, "r" );
106 		if( !infp )
107 		{
108 			fprintf( stderr, "Cannot open %s\n", inputfile );
109 			exit( 1 );
110 		}
111 	}
112 	else
113 		infp = stdin;
114 
115 	if( alignmentfile )
116 	{
117 		alfp = fopen( alignmentfile, "r" );
118 		if( !alfp )
119 		{
120 			fprintf( stderr, "Cannot open %s\n", alignmentfile );
121 			exit( 1 );
122 		}
123 	}
124 	else
125 	{
126 		fprintf( stderr, "No alignment is given.\n" );
127 		exit( 1 );
128 	}
129 
130 	if( keeplength )
131 	{
132 		dlfp = fopen( "_deletelist", "r" );
133 		if( !dlfp )
134 		{
135 			fprintf( stderr, "Cannot open _deletefile\n" );
136 			exit( 1 );
137 		}
138 	}
139 
140 	dorp = NOTSPECIFIED;
141 	getnumlen_casepreserve( infp, &nlenmin );
142 	njobs = njob;
143 //	fprintf( stderr, "in infp, %d x %d - %d %c\n", njob, nlenmin, nlenmax, dorp );
144 
145 	seq = AllocateCharMtx( njob, nlenmax+1 );
146 	name = AllocateCharMtx( njob, B+1 );
147 	nlen = AllocateIntVec( njob );
148 	oripos = AllocateIntVec( njob );
149 	readData_pointer_casepreserve( infp, name, nlen, seq );
150 	fclose( infp );
151 
152 	dorp = NOTSPECIFIED;
153 	getnumlen( alfp );
154 	njoba = njob;
155 //	fprintf( stderr, "in alfp, %d x %d %c\n", njob, nlenmax, dorp );
156 	aseq = AllocateCharMtx( njob, nlenmax+1 );
157 	aname = AllocateCharMtx( njob, B+1 );
158 	oname = AllocateCharMtx( njob, B+1 );
159 	readData_pointer( alfp, aname, nlen, aseq );
160 	fclose( alfp );
161 
162 	for( i=0; i<njob; i++ ) gappick_samestring( seq[i] );
163 
164 //	reporterr( "seq[njob-1] = %s\n", seq[njob-1] );
165 
166 	if( keeplength )
167 	{
168 		gett = calloc( 1000, sizeof( char ) );
169 //		ndel = (int *)calloc( njob, sizeof( int ) );
170 //		for( i=0; i<njob; i++ ) ndel[i] = 0;
171 //		dlist = (int **)calloc( njob+1, sizeof( int * ) );
172 //		for( i=0; i<njob; i++ ) dlist[i] = NULL;
173 //		dlist[njob] = NULL;
174 
175 		while( 1 )
176 		{
177 			fgets( gett, 999, dlfp );
178 			if( feof( dlfp ) ) break;
179 			sscanf( gett, "%d %d", &i, &o );
180 //			reporterr( "%d, %d\n", i, o );
181 //			dlist[i] = realloc( dlist[i], sizeof( int ) * (ndel[i]+2) );
182 //			dlist[i][ndel[i]] = o;
183 //			ndel[i]++;
184 
185 			seq[i][o] = '-';
186 
187 		}
188 		fclose( dlfp );
189 	}
190 
191 	for( i=0; i<njob; i++ )
192 	{
193 		gappick_samestring( seq[i] );
194 	}
195 
196 	if( njoba != njobs )
197 	{
198 		fprintf( stderr, "ERROR!!\n" );
199 		fprintf( stderr, "In input file,\n" );
200 		fprintf( stderr, "njob = %d\n", njobs );
201 		fprintf( stderr, "but in alignment file,\n" );
202 		fprintf( stderr, "njob = %d\n", njoba );
203 		exit( 1 );
204 	}
205 
206 	for( i=0; i<njob; i++ )
207 	{
208 #if 0
209 		if( strstr( aname[i], "_seed_" ) )
210 		{
211 			npt2 = aname[i] + 7;
212 			strcpy( oname[i], "=_seed_" );
213 		}
214 		else
215 		{
216 			npt2 = aname[i] + 1;
217 			strcpy( oname[i], "=" );
218 		}
219 
220 		fprintf( stderr, "npt2 = %s\n", npt2 );
221 
222 		o = oripos[i] = atoi( npt2 );
223 		npt = strstr( npt2, "_oe_" );
224 		if( npt == NULL )
225 		{
226 			fprintf( stderr, "Format error!\n" );
227 			exit( 1 );
228 		}
229 		npt += 4;
230 		strcat( oname[i], npt+1 );
231 #endif
232 		npt0 = strstr( aname[i], "_os_" );
233 		if( npt0 == NULL )
234 		{
235 			fprintf( stderr, "Format error!\n" );
236 			exit( 1 );
237 		}
238 		npt2 = npt0 + 4;
239 		o = oripos[i] = atoi( npt2 );
240 
241 		npt = strstr( aname[i], "_oe_" );
242 		if( npt == NULL )
243 		{
244 			fprintf( stderr, "Format error!\n" );
245 			exit( 1 );
246 		}
247 		npt += 4;
248 
249 		pt2 = npt;
250 		pt = npt2 - 4;
251 		while( *pt ) *pt++ = *pt2++; // okashii
252 
253 		prelen = npt0-aname[i];
254 		strncpy( oname[i], aname[i], prelen ); oname[i][prelen] = 0;
255 		strcat( oname[i], npt0 );
256 
257 #if 0
258 		pt = strstr( aname[i], "_numo_e" );
259 		if( pt ) pt += 8;
260 		else pt = aname[i] + 1;
261 
262 		if( strstr( pt, "_seed_" ) ) pt += 6;
263 
264 		fprintf( stderr, "pt = :%s:\n", pt );
265 
266 #endif
267 //		fprintf( stderr, "npt0 = :%s:\n", npt0 );
268 //
269 //		reporterr( "oname[i] = %s\n", oname[i] );
270 //		reporterr( "aname[i] = %s\n", aname[i] );
271 //		reporterr( " name[i] = %s\n",  name[i] );
272 
273 //		fprintf( stderr, "aname[i] = :%s:\n", aname[i] );
274 //		fprintf( stderr, "pt = :%s:\n", pt );
275 //		fprintf( stderr, "oname[i] = :%s:\n", oname[i] );
276 //		fprintf( stderr, "name[o] = :%s:\n", name[o] );
277 
278 		if( strncmp( npt0, name[o]+1, 10 ) )
279 		{
280 			fprintf( stderr, "ERROR!!\n" );
281 			fprintf( stderr, "In input file,\n" );
282 			fprintf( stderr, "name[%d] = %s\n", o, name[o] );
283 			fprintf( stderr, "but in alignment file,\n" );
284 			fprintf( stderr, "oname[%d] = %s\n", i, oname[i] );
285 			fprintf( stderr, "npt0 = %s\n", npt0 );
286 			fprintf( stderr, "prelen = %d\n", prelen );
287 			fprintf( stderr, "name[%d] = %s\n", i, aname[i] );
288 			exit( 1 );
289 		}
290 #if 0
291 		else
292 		{
293 			fprintf( stderr, "OK!!\n" );
294 			fprintf( stderr, "In input file,\n" );
295 			fprintf( stderr, "name[%d] = %s\n", o, name[o] );
296 			fprintf( stderr, "and in alignment file,\n" );
297 			fprintf( stderr, "name[%d] = %s\n", i, aname[i] );
298 			fprintf( stderr, "\n" );
299 		}
300 #endif
301 	}
302 //	fprintf( stderr, "seq[0] = %s\n", seq[0] );
303 //	fprintf( stderr, "aseq[0] = %s\n", aseq[0] );
304 
305 	fillorichar( njob, oripos, aseq, seq );
306 
307 
308 	writeData_pointer( stdout, njob, oname, nlen, aseq );
309 
310 	FreeCharMtx( seq );
311 	FreeCharMtx( aseq );
312 	FreeCharMtx( name );
313 	FreeCharMtx( aname );
314 	FreeCharMtx( oname );
315 	free( nlen );
316 	free( oripos );
317 
318 	return( 0 );
319 }
320