1 static char const rcsid[] = "$Id: makemat.c,v 6.17 2005/07/28 14:52:22 coulouri Exp $";
2
3 /*
4 * ===========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 */
28
29 /*****************************************************************************
30
31 File name: makemat.c
32
33 Author: Alejandro Schaffer
34
35 Contents: main routines for makematrices program to convert
36 PSI-BLAST checkpoints into score matrices.
37
38 *****************************************************************************/
39
40 #include <ncbi.h>
41 #include <objseq.h>
42 #include <objsset.h>
43 #include <sequtil.h>
44 #include <seqport.h>
45 #include <tofasta.h>
46 #include <blast.h>
47 #include <blastpri.h>
48 #include <txalign.h>
49 #include <simutil.h>
50 #include <posit.h>
51 #include <profiles.h>
52 #include <sqnutils.h>
53
54
55 /*counts the number of items in sequencesFile and matricesFile, assumed to
56 be one per line, and checks that the numbers are equal.
57 returns the number if equal, 0 if unequal, rewinds the file descriptors
58 before returning*/
countProfiles(FILE * sequencesFile,FILE * profilesFile)59 static Int4 countProfiles(FILE *sequencesFile, FILE *profilesFile)
60 {
61 Int4 sequencesCount = 0; /*count for sequencesFile*/
62 Int4 matricesCount = 0; /*count for profilesFile*/
63 Char oneFileName[MAXLINELEN]; /*for reading one line per file*/
64
65 while (fgets(oneFileName,MAXLINELEN,sequencesFile))
66 sequencesCount++;
67 while (fgets(oneFileName,MAXLINELEN,profilesFile))
68 matricesCount++;
69 rewind(profilesFile);
70 rewind(sequencesFile);
71 if (sequencesCount == matricesCount) {
72 return(sequencesCount);
73 } else {
74 ErrPostEx(SEV_FATAL, 1, 0, "profiles: Sequences file has %ld entries; Matrices file has %d entries; these should be equal\n", (long) sequencesCount,matricesCount);
75 return(0);
76 }
77 }
78
79
80 /*converts name of profile file to matrix file by changing
81 suffix to mtx or appending suffix mtx*/
makeMatrixName(Char * profileName)82 static Char *makeMatrixName(Char *profileName)
83 {
84 int length; /*length of a name*/
85 Char *returnName; /*string to treturn*/
86 int c, lastc; /*loop indices*/
87
88 length = strlen(profileName);
89 returnName = (Char *) MemNew((length + 5) * sizeof(Char));
90
91 for(c = 0; c < length; c++) {
92 returnName[c] = profileName[c];
93 if(('.' == profileName[c]) && ('c' == profileName[c+1])
94 && ('h' == profileName[c+2]))
95 lastc = c;
96 }
97 returnName[lastc] = '.';
98 returnName[lastc+1] = 'm';
99 returnName[lastc+2] = 't';
100 returnName[lastc+3] = 'x';
101 returnName[lastc+4] = '\0';
102 return(returnName);
103 }
104
105 /*print out some parameters associated with a Karlin-Alschul
106 scoring system
107 checkFile is the file descriptor to write to
108 kbp is the pointer to a structure with the parameters
109 scaling determines whether scores are being scaled or not
110 scalingDown is 1/scalingFactor because if scores are
111 scaled up, then Lambda is to be scaled down*/
putMatrixKbp(FILE * checkFile,BLAST_KarlinBlkPtr kbp,Boolean scaling,Nlm_FloatHi scalingDown)112 static void putMatrixKbp(FILE * checkFile, BLAST_KarlinBlkPtr kbp, Boolean scaling, Nlm_FloatHi scalingDown)
113 {
114 if (scaling)
115 fprintf(checkFile,"%le\n",kbp->Lambda * scalingDown);
116 else
117 fprintf(checkFile,"%le\n",kbp->Lambda);
118
119 fprintf(checkFile,"%le\n",kbp->K);
120 fprintf(checkFile,"%le\n",kbp->logK);
121 fprintf(checkFile,"%le\n",kbp->H);
122 }
123
124 /*print out a score matrix into the file descriptor checkfile
125 compactSerarch and psoSearch stroe information about the
126 matrix and the associated sequence
127 scaleScores determines whether scores are scaled or not*/
putMatrixMatrix(FILE * checkFile,compactSearchItems * compactSearch,posSearchItems * posSearch,Boolean scaleScores)128 static void putMatrixMatrix(FILE *checkFile, compactSearchItems * compactSearch, posSearchItems *posSearch, Boolean scaleScores)
129 {
130 Int4 i, j; /*loop indices*/
131
132 if (scaleScores) {
133 for(i = 0; i < compactSearch->qlength; i++) {
134 for(j = 0; j < compactSearch->alphabetSize; j++)
135 fprintf(checkFile,"%ld ", (long) posSearch->posPrivateMatrix[i][j]);
136 fprintf(checkFile,"\n");
137 }
138 }
139 else {
140 for(i = 0; i < compactSearch->qlength; i++) {
141 for(j = 0; j < compactSearch->alphabetSize; j++)
142 fprintf(checkFile,"%ld ", (long) posSearch->posMatrix[i][j]);
143 fprintf(checkFile,"\n");
144 }
145 }
146 }
147
148 /*Write out the matrix
149 compactSearch and PosSearch include fields that store the matrix
150 and the sequence
151 sbp includes information about the underlying
152 matrix
153 fileName is where the matrix is to be written
154 error_return holds error messages
155 scaleScores indicates whether scores in the matrix are to be scaled
156 scalingFactor is the multiplicative factor to use if scaleScores
157 is true */
takeMatrixCheckpoint(compactSearchItems * compactSearch,posSearchItems * posSearch,BLAST_ScoreBlkPtr sbp,Char * fileName,ValNodePtr * error_return,Boolean scaleScores,Nlm_FloatHi scalingFactor)158 static Boolean takeMatrixCheckpoint(compactSearchItems * compactSearch,
159 posSearchItems *posSearch, BLAST_ScoreBlkPtr sbp,
160 Char *fileName,ValNodePtr *error_return, Boolean scaleScores, Nlm_FloatHi
161 scalingFactor)
162 {
163
164 FILE * checkFile; /*file in which to take the checkpoint*/
165 Int4 length; /*length of query sequence, and an index for it*/
166 Int4 i; /*indices to position and alphabet */
167 Char localChar; /*temporary character*/
168
169 checkFile = FileOpen(fileName, "w");
170
171 if (NULL == checkFile) {
172 ErrPostEx(SEV_ERROR, 0,0, "Could not open checkpoint file");
173 return(FALSE);
174 }
175
176 length = compactSearch->qlength;
177 fprintf(checkFile,"%ld\n",(long) length);
178
179 for(i = 0; i < length; i++) {
180 localChar = getRes(compactSearch->query[i]);
181
182 fprintf(checkFile,"%c",localChar);
183
184 /* The following 2 lines are needed to preserve compatibility with the
185 * checkpoint file libraries distributed with IMPALA (from personal
186 * communication with IMPALA's author) */
187 posSearch->posMatrix[i][Xchar] = Xscore;
188 posSearch->posPrivateMatrix[i][Xchar] = Xscore * scalingFactor;
189 }
190
191 fprintf(checkFile,"\n");
192 putMatrixKbp(checkFile, compactSearch->kbp_gap_std[0], scaleScores, 1/scalingFactor);
193 putMatrixKbp(checkFile, compactSearch->kbp_gap_psi[0], scaleScores, 1/scalingFactor);
194 putMatrixKbp(checkFile, sbp->kbp_ideal, scaleScores, 1/scalingFactor);
195 putMatrixMatrix(checkFile, compactSearch, posSearch, scaleScores);
196
197 FileClose(checkFile);
198 return(TRUE);
199 }
200
201 /*convert to matrices is the high-level procedure to convert a set of
202 PSI-BLAST checkpoints into a corresponding set of score matrices
203 profilesFile is a descriptor to a file listing the file names of files
204 containing checkpoints, one file name per line
205 sequencesFile is a descriptor to a file listing the file names of sequences
206 containing the master sequences for checkpoints, one file name per line
207 matricesFile is an output file to print out the names of the newly
208 produced files containing score matrices, one file name per line
209 auxiliaryFile is an outputput file to contain some general information
210 about the library of matrices and some parameters for each matrix
211 count is the number of checkpoints/sequences
212 gap_open is the cost of opening a gap
213 gap_extend is the cost of extending a gap
214 effectiveSize is the the size of the original sequence database used to
215 make the PSI-BLAST matrices
216 underlyignMatrixName is the original score matrix used to make the
217 PSI-BLAST matrices
218 scaleScores indicates whether scores should be scaled
219 scalingFactor indicates by how much scores are scaled, if at all*/
220
convertToMatrices(FILE * profilesFile,FILE * sequencesFile,FILE * matricesFile,FILE * auxiliaryFile,Int4 count,Int4 gap_open,Int4 gap_extend,Int4 effectiveSize,Char * underlyingMatrixName,Boolean scaleScores,Nlm_FloatHi scalingFactor,Char * directoryPrefix)221 static Int4 convertToMatrices(FILE *profilesFile, FILE *sequencesFile, FILE *matricesFile, FILE *auxiliaryFile, Int4 count, Int4 gap_open, Int4 gap_extend,
222 Int4 effectiveSize, Char *underlyingMatrixName, Boolean scaleScores,
223 Nlm_FloatHi scalingFactor, Char *directoryPrefix)
224 {
225 int i; /*loop index over profiles*/
226 FILE *thisProfileFile, *thisSequenceFile; /*file
227 descriptors for a single profile*/
228 Char profileFileName[MAX_NAME_LENGTH], sequenceFileName[MAX_NAME_LENGTH];
229 /*file names for profiles*/
230 Char * matrixFileName, *relativeMatrixFileName; /*file name for corresponding matrix file*/
231 Char relativeProfileFileName[MAX_NAME_LENGTH], relativeSequenceFileName[MAX_NAME_LENGTH];
232 Int4 prefixLength; /*length of directoryPrefix*/
233 Int4 c1,c2; /*indices over characters in names*/
234 posSearchItems *posSearch; /*used to store matrix*/
235 Uint1Ptr query =NULL; /*query sequence read in*/
236 Int4 queryLength; /*length of query sequence*/
237 Int4 c; /*index over query*/
238 compactSearchItems *compactSearch; /*stores query related items*/
239 ValNodePtr error_return; /*stores error messages*/
240 Boolean success; /*did one checkpoint recovery succeed*/
241 BLAST_ResFreqPtr stdrfp; /* gets standard frequencies in prob field */
242 Int4 a; /*index over characters*/
243 SeqCodeTablePtr sctp;
244 BLAST_ScoreBlkPtr sbp;
245 BioseqPtr query_bsp; /*structure to hold query information*/
246 SeqEntryPtr sep; /*structure to hold query retrieval result*/
247 Int4 *lengthArray; /*array of sequence lengths*/
248 Nlm_FloatHi *KArray; /*array of K values, one per sequence*/
249 Int4 maxLength; /*maximum length of a sequence*/
250 Int4 KarlinReturn; /*return value from calls to set up matrix parameters*/
251
252 error_return = NULL;
253 posSearch = (posSearchItems *) MemNew (1 * sizeof(posSearchItems));
254 compactSearch = (compactSearchItems *) MemNew (1 * sizeof(compactSearchItems));
255 sctp = SeqCodeTableFindObj(Seq_code_ncbistdaa);
256 compactSearch->alphabetSize = sctp->num;
257
258 fprintf(auxiliaryFile,"%s\n",underlyingMatrixName);
259 fprintf(auxiliaryFile,"%ld\n",(long) gap_open);
260 fprintf(auxiliaryFile,"%ld\n",(long) gap_extend);
261
262 lengthArray = (Int4 *) MemNew(count * sizeof(Int4));
263 KArray = (Nlm_FloatHi *) MemNew(count * sizeof(Nlm_FloatHi));
264 maxLength = 0;
265
266 if ('\0' != directoryPrefix[0]) {
267 strcpy(profileFileName, directoryPrefix);
268 strcpy(sequenceFileName, directoryPrefix);
269 prefixLength = strlen(directoryPrefix);
270 }
271
272 posSearch->stdFreqRatios =
273 PSIMatrixFrequencyRatiosNew(underlyingMatrixName);
274
275 for(i = 0; i < count; i++) {
276 if ('\0' == directoryPrefix[0])
277 fscanf(profilesFile,"%s", profileFileName);
278 else {
279 fscanf(profilesFile,"%s", relativeProfileFileName);
280 for(c1 = prefixLength, c2 = 0; relativeProfileFileName[c2] != '\0';
281 c1++, c2++)
282 profileFileName[c1] = relativeProfileFileName[c2];
283 profileFileName[c1] = '\0';
284 }
285 if ((thisProfileFile = FileOpen(profileFileName, "rb")) == NULL) {
286 ErrPostEx(SEV_FATAL, 1, 0, "Unable to open file %s\n", profileFileName);
287 return (1);
288 }
289 if ('\0' == directoryPrefix[0])
290 fscanf(sequencesFile,"%s", sequenceFileName);
291 else {
292 fscanf(sequencesFile,"%s", relativeSequenceFileName);
293 for(c1 = prefixLength, c2 = 0; relativeSequenceFileName[c2] != '\0';
294 c1++, c2++)
295 sequenceFileName[c1] = relativeSequenceFileName[c2];
296 sequenceFileName[c1] = '\0';
297 }
298 if ((thisSequenceFile = FileOpen(sequenceFileName, "r")) == NULL) {
299 ErrPostEx(SEV_FATAL, 1, 0, "Unable to open file %s\n", sequenceFileName);
300 return (1);
301 }
302
303 sep = FastaToSeqEntryEx(thisSequenceFile, FALSE, NULL, FALSE);
304 if (sep != NULL) {
305 query_bsp = NULL;
306 SeqEntryExplore(sep, &query_bsp, FindProt);
307 if (query_bsp == NULL) {
308 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
309 return 2;
310 }
311 query = BlastGetSequenceFromBioseq(query_bsp, &queryLength);
312 }
313 compactSearch->query = query;
314 for (c= 0; c < queryLength; c++)
315 query[c] = ResToInt(query[c]);
316 compactSearch->qlength = queryLength;
317
318
319 sbp = BLAST_ScoreBlkNew(Seq_code_ncbistdaa, 1);
320 sbp->read_in_matrix = TRUE;
321 sbp->protein_alphabet = TRUE;
322 sbp->posMatrix = NULL;
323 sbp->number_of_contexts = 1;
324 BlastScoreBlkMatFill(sbp, underlyingMatrixName);
325 compactSearch->matrix = sbp->matrix;
326 compactSearch->gapped_calculation = TRUE;
327 /* Note that these two assignments are not really needed for
328 * makemat's operation and thus their values are irrelevant */
329 compactSearch->pseudoCountConst = 10;
330 compactSearch->ethresh = 0.001;
331 BlastScoreBlkFill(sbp, (CharPtr) query, queryLength, 0);
332
333 if (0 == i) {
334 fprintf(auxiliaryFile, "%le\n", sbp->kbp_std[0]->K);
335 fprintf(auxiliaryFile, "%le\n", sbp->kbp_std[0]->H);
336 }
337
338 sbp->kbp_gap_std[0] = BlastKarlinBlkCreate();
339 KarlinReturn = BlastKarlinBlkGappedCalc(sbp->kbp_gap_std[0], gap_open, gap_extend,
340 sbp->name, &error_return);
341 if (1 == KarlinReturn) {
342 BlastErrorPrint(error_return);
343 return(-1);
344 }
345 sbp->kbp_gap_psi[0] = BlastKarlinBlkCreate();
346 KarlinReturn = BlastKarlinBlkGappedCalc(sbp->kbp_gap_psi[0], gap_open, gap_extend,
347 sbp->name, &error_return);
348 if (1 == KarlinReturn) {
349 BlastErrorPrint(error_return);
350 return(-1);
351 }
352
353 if (sbp->kbp_ideal == NULL)
354 sbp->kbp_ideal = BlastKarlinBlkStandardCalcEx(sbp);
355 compactSearch->lambda = sbp->kbp_gap_std[0]->Lambda;
356 compactSearch->kbp_std = sbp->kbp_std;
357 compactSearch->kbp_psi = sbp->kbp_psi;
358 compactSearch->kbp_gap_psi = sbp->kbp_gap_psi;
359 compactSearch->kbp_gap_std = sbp->kbp_gap_std;
360 compactSearch->lambda_ideal = sbp->kbp_ideal->Lambda;
361 compactSearch->K_ideal = sbp->kbp_ideal->K;
362
363 stdrfp = BlastResFreqNew(sbp);
364 BlastResFreqStdComp(sbp,stdrfp);
365 compactSearch->standardProb = MemNew(compactSearch->alphabetSize * sizeof(Nlm_FloatHi));
366
367 if (NULL == compactSearch->standardProb)
368 exit(EXIT_FAILURE);
369 for(a = 0; a < compactSearch->alphabetSize; a++)
370 compactSearch->standardProb[a] = stdrfp->prob[a];
371 stdrfp = BlastResFreqDestruct(stdrfp);
372
373 posSearch->posInformation = NULL;
374 success = impalaReadCheckpoint(posSearch, compactSearch, profileFileName, &error_return, scalingFactor);
375
376 if (!success) {
377 ErrPostEx(SEV_FATAL, 1,0, "Unable to recover checkpoint from %s\n",profileFileName);
378 return(1);
379 }
380 /*conversion to matrix and scaling is done in impalaReadCheckpopint*/
381 if ('\0' == directoryPrefix[0]) {
382 matrixFileName = makeMatrixName(profileFileName);
383 fprintf(matricesFile,"%s\n",matrixFileName);
384 } else {
385 matrixFileName = makeMatrixName(profileFileName);
386 relativeMatrixFileName = makeMatrixName(relativeProfileFileName);
387 fprintf(matricesFile,"%s\n",relativeMatrixFileName);
388 }
389
390 success = takeMatrixCheckpoint(compactSearch, posSearch, sbp, matrixFileName, &error_return, scaleScores, scalingFactor);
391
392 if (!success) {
393 ErrPostEx(SEV_FATAL, 1,0, "Unable to take matrix checkpoint from %s\n",profileFileName);
394 return(1);
395 }
396
397 lengthArray[i] = queryLength;
398 KArray[i] = sbp->kbp_gap_psi[0]->K;
399
400 if (lengthArray[i] > maxLength)
401 maxLength = lengthArray[i];
402
403 posCheckpointFreeMemory(posSearch, queryLength);
404 FileClose(thisProfileFile);
405 thisProfileFile = NULL;
406 FileClose(thisSequenceFile);
407 thisSequenceFile = NULL;
408 MemFree(query);
409 SeqEntryFree(sep);
410 sbp = BLAST_ScoreBlkDestruct(sbp);
411 compactSearch->standardProb = MemFree(compactSearch->standardProb);
412
413 if (success) {
414 MemFree(matrixFileName);
415 if ('\0' != directoryPrefix[0])
416 MemFree(relativeMatrixFileName);
417 }
418 }
419
420 fprintf(auxiliaryFile, "%ld\n", (long) maxLength);
421 fprintf(auxiliaryFile, "%ld\n", (long) effectiveSize);
422 fprintf(auxiliaryFile, "%lf\n", scalingFactor);
423
424 for(i = 0; i < count; i++) {
425 fprintf(auxiliaryFile, "%ld\n", (long) lengthArray[i]);
426 fprintf(auxiliaryFile, "%le\n", KArray[i]);
427 }
428 MemFree(KArray);
429 MemFree(lengthArray);
430 FileClose(profilesFile);
431 FileClose(sequencesFile);
432 FileClose(matricesFile);
433 FileClose(auxiliaryFile);
434 compactSearchDestruct(compactSearch);
435 PSIMatrixFrequencyRatiosFree(posSearch->stdFreqRatios);
436 MemFree(posSearch);
437 BLAST_ScoreBlkDestruct(sbp);
438 return(0);
439 }
440
441 #define NUMARG 8
442
443 static Args myargs [NUMARG] = {
444 { "Database name for profile database",
445 "stdin", NULL, NULL, FALSE, 'P', ARG_FILE_IN, 0.0, 0, NULL},
446 { "Cost to open a gap",
447 "11", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL},
448 { "Cost to extend a gap",
449 "1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL},
450 { "Underlying Matrix",
451 "BLOSUM62", NULL, NULL, FALSE, 'U', ARG_STRING, 0.0, 0, NULL},
452 { "Underlying sequence database used to make profiles",
453 "nr", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL},
454 { "Effective length of the profile database (0 for length of -d option)",
455 "0", NULL, NULL, FALSE, 'z', ARG_INT, 0.0, 0, NULL},
456 { "Scaling factor for matrix outputs to avoid round-off problems",
457 "100.0", NULL, NULL, FALSE, 'S', ARG_FLOAT, 0.0, 0, NULL},
458 { "Print help; overrides all other arguments",
459 "F", NULL, NULL, FALSE, 'H', ARG_BOOLEAN, 0.0, 0, NULL}
460 };
461
Main(void)462 Int2 Main(void)
463
464 {
465 Char *profilesDatabase;
466 Char profilesFileName[MAX_NAME_LENGTH];
467 Char sequencesFileName[MAX_NAME_LENGTH];
468 Char matricesFileName[MAX_NAME_LENGTH];
469 Char auxiliaryFileName[MAX_NAME_LENGTH];
470 Char mmapFileName[MAX_NAME_LENGTH];
471 FILE *profilesFile, *sequencesFile, *matricesFile, *auxiliaryFile;
472 Int4 count; /*how many profiles*/
473 Int4 effSize; /*effective database size to use*/
474 Int4 retcode;
475 ReadDBFILEPtr rdpt=NULL; /*holds result of attempt to read database*/
476 Boolean scaling; /*are score matrix values going to be scaled*/
477 Char *directoryPrefix; /*directory where profile library is kept, used
478 to reach other directories indirectly*/
479
480 if (! GetArgs ("makematrices", NUMARG, myargs)) {
481 return (1);
482 }
483
484 if (! SeqEntryLoad())
485 return (1);
486
487 UseLocalAsnloadDataAndErrMsg();
488 ErrSetLogLevel(SEV_WARNING);
489
490 if ((Boolean) myargs[7].intvalue) {
491 IMPALAPrintHelp(FALSE, 80, "makemat", stdout);
492 return(1);
493 }
494 profilesDatabase = myargs[0].strvalue;
495 directoryPrefix = (Char *) MemNew(MAX_NAME_LENGTH *sizeof(char));
496 strcpy(directoryPrefix,profilesDatabase);
497 impalaMakeFileNames(profilesDatabase,auxiliaryFileName,
498 mmapFileName,sequencesFileName,matricesFileName,
499 profilesFileName, directoryPrefix);
500
501 if ((profilesFile = FileOpen(profilesFileName, "r")) == NULL) {
502 ErrPostEx(SEV_FATAL, 1, 0, "Unable to open profiles file %s\n", profilesFileName);
503 return (1);
504 }
505
506 if ((sequencesFile = FileOpen(sequencesFileName, "r")) == NULL) {
507 ErrPostEx(SEV_FATAL, 1, 0, "Unable to open sequences file %s\n", sequencesFileName);
508 return (1);
509 }
510
511 if ((matricesFile = FileOpen(matricesFileName, "w")) == NULL) {
512 ErrPostEx(SEV_FATAL, 1, 0, "Unable to open matrices file %s\n", matricesFileName);
513 return (1);
514 }
515
516 if ((auxiliaryFile = FileOpen(auxiliaryFileName, "w")) == NULL) {
517 ErrPostEx(SEV_FATAL, 1, 0, "Unable to open auxiliary file %s\n", auxiliaryFileName);
518 return (1);
519 }
520
521 effSize = myargs[5].intvalue;
522 if (0 == effSize) {
523 rdpt = readdb_new(myargs[4].strvalue, TRUE);
524 effSize = readdb_get_dblen(rdpt);
525 rdpt = readdb_destruct(rdpt);
526 }
527
528 count = countProfiles(sequencesFile, profilesFile);
529 scaling = ((myargs[6].floatvalue < 0.99) ||
530 (myargs[6].floatvalue > 1.01));
531
532 retcode = convertToMatrices(profilesFile, sequencesFile, matricesFile,
533 auxiliaryFile, count, myargs[1].intvalue,
534 myargs[2].intvalue, effSize, myargs[3].strvalue,
535 scaling, myargs[6].floatvalue, directoryPrefix);
536
537 MemFree(directoryPrefix);
538 return retcode;
539 }
540
541
542