1 static char const rcsid[] = "$Id: makemat.c,v 6.17 2005/07/28 14:52:22 coulouri Exp $";
2 
3 /*
4 * ===========================================================================
5 *
6 *                            PUBLIC DOMAIN NOTICE
7 *               National Center for Biotechnology Information
8 *
9 *  This software/database is a "United States Government Work" under the
10 *  terms of the United States Copyright Act.  It was written as part of
11 *  the author's official duties as a United States Government employee and
12 *  thus cannot be copyrighted.  This software/database is freely available
13 *  to the public for use. The National Library of Medicine and the U.S.
14 *  Government have not placed any restriction on its use or reproduction.
15 *
16 *  Although all reasonable efforts have been taken to ensure the accuracy
17 *  and reliability of the software and data, the NLM and the U.S.
18 *  Government do not and cannot warrant the performance or results that
19 *  may be obtained by using this software or data. The NLM and the U.S.
20 *  Government disclaim all warranties, express or implied, including
21 *  warranties of performance, merchantability or fitness for any particular
22 *  purpose.
23 *
24 *  Please cite the author in any work or product based on this material.
25 *
26 * ===========================================================================
27 */
28 
29 /*****************************************************************************
30 
31 File name: makemat.c
32 
33 Author: Alejandro Schaffer
34 
35 Contents: main routines for makematrices program to convert
36     PSI-BLAST checkpoints into score matrices.
37 
38 *****************************************************************************/
39 
40 #include <ncbi.h>
41 #include <objseq.h>
42 #include <objsset.h>
43 #include <sequtil.h>
44 #include <seqport.h>
45 #include <tofasta.h>
46 #include <blast.h>
47 #include <blastpri.h>
48 #include <txalign.h>
49 #include <simutil.h>
50 #include <posit.h>
51 #include <profiles.h>
52 #include <sqnutils.h>
53 
54 
55 /*counts the number of items in sequencesFile and matricesFile, assumed to
56   be one per line, and checks that the numbers are equal.
57   returns the number if equal, 0 if unequal, rewinds the file descriptors
58   before returning*/
countProfiles(FILE * sequencesFile,FILE * profilesFile)59 static Int4 countProfiles(FILE *sequencesFile, FILE *profilesFile)
60 {
61     Int4 sequencesCount = 0; /*count for sequencesFile*/
62     Int4 matricesCount = 0; /*count for profilesFile*/
63     Char oneFileName[MAXLINELEN]; /*for reading one line per file*/
64 
65     while (fgets(oneFileName,MAXLINELEN,sequencesFile))
66         sequencesCount++;
67     while (fgets(oneFileName,MAXLINELEN,profilesFile))
68         matricesCount++;
69     rewind(profilesFile);
70     rewind(sequencesFile);
71     if (sequencesCount == matricesCount) {
72         return(sequencesCount);
73     } else {
74         ErrPostEx(SEV_FATAL, 1, 0, "profiles: Sequences file has %ld entries; Matrices file has %d entries; these should be equal\n", (long) sequencesCount,matricesCount);
75         return(0);
76     }
77 }
78 
79 
80 /*converts name of profile file to matrix file by changing
81   suffix to mtx or appending suffix mtx*/
makeMatrixName(Char * profileName)82 static Char *makeMatrixName(Char *profileName)
83 {
84     int length; /*length of a name*/
85     Char *returnName; /*string to treturn*/
86     int c, lastc; /*loop indices*/
87 
88     length = strlen(profileName);
89     returnName = (Char *) MemNew((length + 5) * sizeof(Char));
90 
91     for(c = 0; c < length; c++) {
92         returnName[c] = profileName[c];
93         if(('.' == profileName[c]) && ('c' == profileName[c+1])
94            && ('h' == profileName[c+2]))
95             lastc = c;
96     }
97     returnName[lastc] = '.';
98     returnName[lastc+1] = 'm';
99     returnName[lastc+2] = 't';
100     returnName[lastc+3] = 'x';
101     returnName[lastc+4] = '\0';
102     return(returnName);
103 }
104 
105 /*print out some parameters associated with a Karlin-Alschul
106   scoring system
107   checkFile is the file descriptor to write to
108   kbp is the pointer to a structure with the parameters
109   scaling determines whether scores are being scaled or not
110   scalingDown is 1/scalingFactor because if scores are
111   scaled up, then Lambda is to be scaled down*/
putMatrixKbp(FILE * checkFile,BLAST_KarlinBlkPtr kbp,Boolean scaling,Nlm_FloatHi scalingDown)112 static void putMatrixKbp(FILE * checkFile, BLAST_KarlinBlkPtr kbp, Boolean scaling, Nlm_FloatHi scalingDown)
113 {
114     if (scaling)
115         fprintf(checkFile,"%le\n",kbp->Lambda * scalingDown);
116     else
117         fprintf(checkFile,"%le\n",kbp->Lambda);
118 
119    fprintf(checkFile,"%le\n",kbp->K);
120    fprintf(checkFile,"%le\n",kbp->logK);
121    fprintf(checkFile,"%le\n",kbp->H);
122 }
123 
124 /*print out a score matrix into the file descriptor checkfile
125   compactSerarch and psoSearch stroe information about the
126   matrix and the associated sequence
127   scaleScores determines whether scores are scaled or not*/
putMatrixMatrix(FILE * checkFile,compactSearchItems * compactSearch,posSearchItems * posSearch,Boolean scaleScores)128 static void putMatrixMatrix(FILE *checkFile, compactSearchItems * compactSearch, posSearchItems *posSearch, Boolean scaleScores)
129 {
130     Int4 i, j; /*loop indices*/
131 
132     if (scaleScores) {
133         for(i = 0; i < compactSearch->qlength; i++) {
134             for(j = 0; j < compactSearch->alphabetSize; j++)
135                 fprintf(checkFile,"%ld  ", (long) posSearch->posPrivateMatrix[i][j]);
136             fprintf(checkFile,"\n");
137         }
138     }
139     else {
140         for(i = 0; i < compactSearch->qlength; i++) {
141             for(j = 0; j < compactSearch->alphabetSize; j++)
142                 fprintf(checkFile,"%ld  ", (long) posSearch->posMatrix[i][j]);
143             fprintf(checkFile,"\n");
144         }
145     }
146 }
147 
148 /*Write out the matrix
149   compactSearch and PosSearch include fields that store the matrix
150   and the sequence
151   sbp includes information about the underlying
152   matrix
153   fileName is where the matrix is to be written
154   error_return holds error messages
155   scaleScores indicates whether scores in the matrix are to be scaled
156   scalingFactor is the multiplicative factor to use if scaleScores
157    is true */
takeMatrixCheckpoint(compactSearchItems * compactSearch,posSearchItems * posSearch,BLAST_ScoreBlkPtr sbp,Char * fileName,ValNodePtr * error_return,Boolean scaleScores,Nlm_FloatHi scalingFactor)158 static Boolean takeMatrixCheckpoint(compactSearchItems * compactSearch,
159     posSearchItems *posSearch,  BLAST_ScoreBlkPtr sbp,
160     Char *fileName,ValNodePtr *error_return, Boolean scaleScores, Nlm_FloatHi
161     scalingFactor)
162 {
163 
164     FILE * checkFile; /*file in which to take the checkpoint*/
165     Int4 length; /*length of query sequence, and an index for it*/
166     Int4 i; /*indices to position and alphabet */
167     Char localChar; /*temporary character*/
168 
169     checkFile = FileOpen(fileName, "w");
170 
171     if (NULL == checkFile) {
172         ErrPostEx(SEV_ERROR, 0,0, "Could not open checkpoint file");
173         return(FALSE);
174     }
175 
176     length = compactSearch->qlength;
177     fprintf(checkFile,"%ld\n",(long) length);
178 
179     for(i = 0; i < length; i++) {
180         localChar = getRes(compactSearch->query[i]);
181 
182         fprintf(checkFile,"%c",localChar);
183 
184         /* The following 2 lines are needed to preserve compatibility with the
185          * checkpoint file libraries distributed with IMPALA (from personal
186          * communication with IMPALA's author) */
187         posSearch->posMatrix[i][Xchar] = Xscore;
188         posSearch->posPrivateMatrix[i][Xchar] = Xscore * scalingFactor;
189     }
190 
191     fprintf(checkFile,"\n");
192     putMatrixKbp(checkFile, compactSearch->kbp_gap_std[0], scaleScores, 1/scalingFactor);
193     putMatrixKbp(checkFile, compactSearch->kbp_gap_psi[0], scaleScores, 1/scalingFactor);
194     putMatrixKbp(checkFile, sbp->kbp_ideal, scaleScores, 1/scalingFactor);
195     putMatrixMatrix(checkFile, compactSearch, posSearch, scaleScores);
196 
197     FileClose(checkFile);
198     return(TRUE);
199 }
200 
201 /*convert to matrices is the high-level procedure to convert a set of
202   PSI-BLAST checkpoints into a corresponding set of score matrices
203   profilesFile is a descriptor to a file listing the file names of files
204   containing checkpoints, one file name per line
205   sequencesFile is a descriptor to a file listing the file names of sequences
206   containing the master sequences for checkpoints, one file name per line
207   matricesFile is an output file to print out the names of the newly
208   produced files containing score matrices, one file name per line
209   auxiliaryFile is an outputput file to contain some general information
210   about the library of matrices and some parameters for each matrix
211   count is the number of checkpoints/sequences
212   gap_open is the cost of opening a gap
213   gap_extend is the cost of extending a gap
214   effectiveSize is the the size of the original sequence database used to
215   make the PSI-BLAST matrices
216   underlyignMatrixName is the original score matrix used to make the
217   PSI-BLAST matrices
218   scaleScores indicates whether scores should be scaled
219   scalingFactor indicates by how much scores are scaled, if at all*/
220 
convertToMatrices(FILE * profilesFile,FILE * sequencesFile,FILE * matricesFile,FILE * auxiliaryFile,Int4 count,Int4 gap_open,Int4 gap_extend,Int4 effectiveSize,Char * underlyingMatrixName,Boolean scaleScores,Nlm_FloatHi scalingFactor,Char * directoryPrefix)221 static Int4 convertToMatrices(FILE *profilesFile, FILE *sequencesFile, FILE *matricesFile, FILE *auxiliaryFile, Int4 count, Int4 gap_open, Int4 gap_extend,
222 Int4 effectiveSize, Char *underlyingMatrixName, Boolean scaleScores,
223 Nlm_FloatHi scalingFactor, Char *directoryPrefix)
224 {
225     int i; /*loop index over profiles*/
226     FILE *thisProfileFile, *thisSequenceFile; /*file
227                 descriptors for a single profile*/
228     Char profileFileName[MAX_NAME_LENGTH], sequenceFileName[MAX_NAME_LENGTH];
229     /*file names for profiles*/
230     Char * matrixFileName, *relativeMatrixFileName;  /*file name for corresponding matrix file*/
231     Char relativeProfileFileName[MAX_NAME_LENGTH], relativeSequenceFileName[MAX_NAME_LENGTH];
232     Int4 prefixLength; /*length of directoryPrefix*/
233     Int4 c1,c2; /*indices over characters in names*/
234     posSearchItems *posSearch; /*used to store matrix*/
235     Uint1Ptr query =NULL;  /*query sequence read in*/
236     Int4  queryLength;  /*length of query sequence*/
237     Int4 c; /*index over query*/
238     compactSearchItems *compactSearch; /*stores query related items*/
239     ValNodePtr  error_return; /*stores error messages*/
240     Boolean success;      /*did one checkpoint recovery succeed*/
241     BLAST_ResFreqPtr stdrfp; /* gets standard frequencies in prob field */
242     Int4 a; /*index over characters*/
243     SeqCodeTablePtr sctp;
244     BLAST_ScoreBlkPtr sbp;
245     BioseqPtr query_bsp;  /*structure to hold query information*/
246     SeqEntryPtr sep;      /*structure to hold query retrieval result*/
247     Int4 *lengthArray; /*array of sequence lengths*/
248     Nlm_FloatHi *KArray;  /*array of K values, one per sequence*/
249     Int4 maxLength; /*maximum length of a sequence*/
250     Int4 KarlinReturn; /*return value from calls to set up matrix parameters*/
251 
252     error_return = NULL;
253     posSearch = (posSearchItems *) MemNew (1 * sizeof(posSearchItems));
254     compactSearch = (compactSearchItems *) MemNew (1 * sizeof(compactSearchItems));
255     sctp = SeqCodeTableFindObj(Seq_code_ncbistdaa);
256     compactSearch->alphabetSize = sctp->num;
257 
258     fprintf(auxiliaryFile,"%s\n",underlyingMatrixName);
259     fprintf(auxiliaryFile,"%ld\n",(long) gap_open);
260     fprintf(auxiliaryFile,"%ld\n",(long) gap_extend);
261 
262     lengthArray = (Int4 *) MemNew(count * sizeof(Int4));
263     KArray = (Nlm_FloatHi *) MemNew(count * sizeof(Nlm_FloatHi));
264     maxLength = 0;
265 
266     if ('\0' != directoryPrefix[0]) {
267         strcpy(profileFileName, directoryPrefix);
268         strcpy(sequenceFileName, directoryPrefix);
269         prefixLength = strlen(directoryPrefix);
270     }
271 
272     posSearch->stdFreqRatios =
273                       PSIMatrixFrequencyRatiosNew(underlyingMatrixName);
274 
275     for(i = 0; i < count; i++) {
276         if ('\0' == directoryPrefix[0])
277             fscanf(profilesFile,"%s", profileFileName);
278         else {
279             fscanf(profilesFile,"%s", relativeProfileFileName);
280             for(c1 = prefixLength, c2 = 0; relativeProfileFileName[c2] != '\0';
281                 c1++, c2++)
282                 profileFileName[c1] = relativeProfileFileName[c2];
283             profileFileName[c1] = '\0';
284         }
285         if ((thisProfileFile = FileOpen(profileFileName, "rb")) == NULL) {
286             ErrPostEx(SEV_FATAL, 1, 0, "Unable to open file %s\n", profileFileName);
287             return (1);
288         }
289         if ('\0' == directoryPrefix[0])
290             fscanf(sequencesFile,"%s", sequenceFileName);
291         else {
292             fscanf(sequencesFile,"%s", relativeSequenceFileName);
293             for(c1 = prefixLength, c2 = 0; relativeSequenceFileName[c2] != '\0';
294                 c1++, c2++)
295                 sequenceFileName[c1] = relativeSequenceFileName[c2];
296             sequenceFileName[c1] = '\0';
297         }
298         if ((thisSequenceFile = FileOpen(sequenceFileName, "r")) == NULL) {
299             ErrPostEx(SEV_FATAL, 1, 0, "Unable to open file %s\n", sequenceFileName);
300             return (1);
301         }
302 
303         sep = FastaToSeqEntryEx(thisSequenceFile, FALSE, NULL, FALSE);
304         if (sep != NULL) {
305             query_bsp = NULL;
306             SeqEntryExplore(sep, &query_bsp, FindProt);
307             if (query_bsp == NULL) {
308                 ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
309                 return 2;
310             }
311             query = BlastGetSequenceFromBioseq(query_bsp, &queryLength);
312         }
313         compactSearch->query = query;
314         for (c= 0; c < queryLength; c++)
315             query[c] = ResToInt(query[c]);
316         compactSearch->qlength = queryLength;
317 
318 
319         sbp = BLAST_ScoreBlkNew(Seq_code_ncbistdaa, 1);
320         sbp->read_in_matrix = TRUE;
321         sbp->protein_alphabet = TRUE;
322         sbp->posMatrix = NULL;
323         sbp->number_of_contexts = 1;
324         BlastScoreBlkMatFill(sbp, underlyingMatrixName);
325         compactSearch->matrix = sbp->matrix;
326         compactSearch->gapped_calculation = TRUE;
327         /* Note that these two assignments are not really needed for
328          * makemat's operation and thus their values are irrelevant */
329         compactSearch->pseudoCountConst = 10;
330         compactSearch->ethresh = 0.001;
331         BlastScoreBlkFill(sbp,  (CharPtr) query, queryLength, 0);
332 
333         if (0 == i) {
334             fprintf(auxiliaryFile, "%le\n", sbp->kbp_std[0]->K);
335             fprintf(auxiliaryFile, "%le\n", sbp->kbp_std[0]->H);
336         }
337 
338         sbp->kbp_gap_std[0] = BlastKarlinBlkCreate();
339         KarlinReturn = BlastKarlinBlkGappedCalc(sbp->kbp_gap_std[0], gap_open, gap_extend,
340                                                 sbp->name, &error_return);
341         if (1 == KarlinReturn) {
342             BlastErrorPrint(error_return);
343             return(-1);
344         }
345         sbp->kbp_gap_psi[0] = BlastKarlinBlkCreate();
346         KarlinReturn = BlastKarlinBlkGappedCalc(sbp->kbp_gap_psi[0], gap_open, gap_extend,
347                                                 sbp->name, &error_return);
348         if (1 == KarlinReturn) {
349             BlastErrorPrint(error_return);
350             return(-1);
351         }
352 
353         if (sbp->kbp_ideal == NULL)
354             sbp->kbp_ideal = BlastKarlinBlkStandardCalcEx(sbp);
355         compactSearch->lambda =  sbp->kbp_gap_std[0]->Lambda;
356         compactSearch->kbp_std = sbp->kbp_std;
357         compactSearch->kbp_psi = sbp->kbp_psi;
358         compactSearch->kbp_gap_psi = sbp->kbp_gap_psi;
359         compactSearch->kbp_gap_std = sbp->kbp_gap_std;
360         compactSearch->lambda_ideal = sbp->kbp_ideal->Lambda;
361         compactSearch->K_ideal = sbp->kbp_ideal->K;
362 
363         stdrfp = BlastResFreqNew(sbp);
364         BlastResFreqStdComp(sbp,stdrfp);
365         compactSearch->standardProb = MemNew(compactSearch->alphabetSize * sizeof(Nlm_FloatHi));
366 
367         if (NULL == compactSearch->standardProb)
368             exit(EXIT_FAILURE);
369         for(a = 0; a < compactSearch->alphabetSize; a++)
370             compactSearch->standardProb[a] = stdrfp->prob[a];
371         stdrfp = BlastResFreqDestruct(stdrfp);
372 
373         posSearch->posInformation = NULL;
374         success = impalaReadCheckpoint(posSearch, compactSearch, profileFileName, &error_return, scalingFactor);
375 
376         if (!success) {
377             ErrPostEx(SEV_FATAL, 1,0, "Unable to recover checkpoint from %s\n",profileFileName);
378             return(1);
379         }
380         /*conversion to matrix and scaling is done in impalaReadCheckpopint*/
381         if ('\0' == directoryPrefix[0]) {
382             matrixFileName = makeMatrixName(profileFileName);
383             fprintf(matricesFile,"%s\n",matrixFileName);
384         } else {
385             matrixFileName = makeMatrixName(profileFileName);
386             relativeMatrixFileName = makeMatrixName(relativeProfileFileName);
387             fprintf(matricesFile,"%s\n",relativeMatrixFileName);
388         }
389 
390         success = takeMatrixCheckpoint(compactSearch, posSearch, sbp, matrixFileName, &error_return, scaleScores, scalingFactor);
391 
392         if (!success) {
393             ErrPostEx(SEV_FATAL, 1,0, "Unable to take matrix checkpoint from %s\n",profileFileName);
394             return(1);
395         }
396 
397         lengthArray[i] = queryLength;
398         KArray[i] = sbp->kbp_gap_psi[0]->K;
399 
400         if (lengthArray[i] > maxLength)
401             maxLength = lengthArray[i];
402 
403         posCheckpointFreeMemory(posSearch, queryLength);
404         FileClose(thisProfileFile);
405         thisProfileFile = NULL;
406         FileClose(thisSequenceFile);
407         thisSequenceFile = NULL;
408         MemFree(query);
409         SeqEntryFree(sep);
410         sbp = BLAST_ScoreBlkDestruct(sbp);
411         compactSearch->standardProb = MemFree(compactSearch->standardProb);
412 
413         if (success) {
414             MemFree(matrixFileName);
415             if ('\0' != directoryPrefix[0])
416                 MemFree(relativeMatrixFileName);
417         }
418     }
419 
420     fprintf(auxiliaryFile, "%ld\n", (long) maxLength);
421     fprintf(auxiliaryFile, "%ld\n", (long) effectiveSize);
422     fprintf(auxiliaryFile, "%lf\n", scalingFactor);
423 
424     for(i = 0; i < count; i++) {
425         fprintf(auxiliaryFile, "%ld\n", (long) lengthArray[i]);
426         fprintf(auxiliaryFile, "%le\n", KArray[i]);
427     }
428     MemFree(KArray);
429     MemFree(lengthArray);
430     FileClose(profilesFile);
431     FileClose(sequencesFile);
432     FileClose(matricesFile);
433     FileClose(auxiliaryFile);
434     compactSearchDestruct(compactSearch);
435     PSIMatrixFrequencyRatiosFree(posSearch->stdFreqRatios);
436     MemFree(posSearch);
437     BLAST_ScoreBlkDestruct(sbp);
438     return(0);
439 }
440 
441 #define NUMARG 8
442 
443 static Args myargs [NUMARG] = {
444     { "Database name for profile database",
445       "stdin", NULL, NULL, FALSE, 'P', ARG_FILE_IN, 0.0, 0, NULL},
446     { "Cost to open a gap",
447       "11", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL},
448     { "Cost to extend a gap",
449       "1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL},
450     { "Underlying Matrix",
451       "BLOSUM62", NULL, NULL, FALSE, 'U', ARG_STRING, 0.0, 0, NULL},
452     { "Underlying sequence database used to make profiles",
453       "nr", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL},
454     { "Effective length of the profile database (0 for length of -d option)",
455       "0", NULL, NULL, FALSE, 'z', ARG_INT, 0.0, 0, NULL},
456     { "Scaling factor for  matrix outputs to avoid round-off problems",
457       "100.0", NULL, NULL, FALSE, 'S', ARG_FLOAT, 0.0, 0, NULL},
458     { "Print help; overrides all other arguments",
459       "F", NULL, NULL, FALSE, 'H', ARG_BOOLEAN, 0.0, 0, NULL}
460 };
461 
Main(void)462 Int2 Main(void)
463 
464 {
465     Char *profilesDatabase;
466     Char profilesFileName[MAX_NAME_LENGTH];
467     Char sequencesFileName[MAX_NAME_LENGTH];
468     Char matricesFileName[MAX_NAME_LENGTH];
469     Char auxiliaryFileName[MAX_NAME_LENGTH];
470     Char mmapFileName[MAX_NAME_LENGTH];
471     FILE *profilesFile, *sequencesFile, *matricesFile, *auxiliaryFile;
472     Int4 count; /*how many profiles*/
473     Int4 effSize; /*effective database size to use*/
474     Int4 retcode;
475     ReadDBFILEPtr rdpt=NULL;  /*holds result of attempt to read database*/
476     Boolean scaling; /*are score matrix values going to be scaled*/
477     Char *directoryPrefix; /*directory where profile library is kept, used
478                              to reach other directories indirectly*/
479 
480     if (! GetArgs ("makematrices", NUMARG, myargs)) {
481         return (1);
482     }
483 
484     if (! SeqEntryLoad())
485         return (1);
486 
487     UseLocalAsnloadDataAndErrMsg();
488     ErrSetLogLevel(SEV_WARNING);
489 
490     if ((Boolean) myargs[7].intvalue) {
491         IMPALAPrintHelp(FALSE, 80, "makemat", stdout);
492         return(1);
493     }
494     profilesDatabase = myargs[0].strvalue;
495     directoryPrefix = (Char *) MemNew(MAX_NAME_LENGTH *sizeof(char));
496     strcpy(directoryPrefix,profilesDatabase);
497     impalaMakeFileNames(profilesDatabase,auxiliaryFileName,
498                         mmapFileName,sequencesFileName,matricesFileName,
499                         profilesFileName,  directoryPrefix);
500 
501     if ((profilesFile = FileOpen(profilesFileName, "r")) == NULL) {
502 	ErrPostEx(SEV_FATAL, 1, 0, "Unable to open profiles file %s\n", profilesFileName);
503 	return (1);
504     }
505 
506     if ((sequencesFile = FileOpen(sequencesFileName, "r")) == NULL) {
507 	ErrPostEx(SEV_FATAL, 1, 0, "Unable to open sequences file %s\n", sequencesFileName);
508 	return (1);
509     }
510 
511     if ((matricesFile = FileOpen(matricesFileName, "w")) == NULL) {
512 	ErrPostEx(SEV_FATAL, 1, 0, "Unable to open matrices file %s\n", matricesFileName);
513 	return (1);
514     }
515 
516     if ((auxiliaryFile = FileOpen(auxiliaryFileName, "w")) == NULL) {
517 	ErrPostEx(SEV_FATAL, 1, 0, "Unable to open auxiliary file %s\n", auxiliaryFileName);
518 	return (1);
519     }
520 
521     effSize = myargs[5].intvalue;
522     if (0 == effSize) {
523         rdpt = readdb_new(myargs[4].strvalue, TRUE);
524         effSize = readdb_get_dblen(rdpt);
525         rdpt = readdb_destruct(rdpt);
526     }
527 
528     count = countProfiles(sequencesFile, profilesFile);
529     scaling = ((myargs[6].floatvalue < 0.99) ||
530                (myargs[6].floatvalue > 1.01));
531 
532     retcode = convertToMatrices(profilesFile, sequencesFile, matricesFile,
533                                 auxiliaryFile, count,  myargs[1].intvalue,
534                                 myargs[2].intvalue, effSize, myargs[3].strvalue,
535                                 scaling,  myargs[6].floatvalue, directoryPrefix);
536 
537     MemFree(directoryPrefix);
538     return retcode;
539 }
540 
541 
542