1 /* PSIPRED 2 - Neural Network Prediction of Secondary Structure */
2 
3 /* Copyright (C) 2000 David T. Jones - Created : January 2000 */
4 /* Original Neural Network code Copyright (C) 1990 David T. Jones */
5 
6 /* Average Prediction Module */
7 
8 #include <ctype.h>
9 #include <math.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <time.h>
14 
15 #include <QDir>
16 #include <QFile>
17 #include <QString>
18 #include <QTemporaryFile>
19 #include <QTextStream>
20 
21 #include <U2Core/AppContext.h>
22 #include <U2Core/AppSettings.h>
23 #include <U2Core/UserApplicationsSettings.h>
24 
25 #ifdef Q_OS_WIN
26 #    pragma warning(disable : 4996)
27 #endif
28 
29 #include "sspred_avpred.h"
30 #include "sspred_net.h"
31 #include "sspred_utils.h"
32 
33 //void           *calloc(), *malloc();
34 
35 // char           *wtfnm;
36 //
37 // int             nwtsum, fwt_to[TOTAL], lwt_to[TOTAL];
38 // float           activation[TOTAL], bias[TOTAL], *weight[TOTAL];
39 //
40 // int             profile[MAXSEQLEN][20];
41 //
42 // int             seqlen;
43 //
44 // char seq[MAXSEQLEN];
45 
46 enum aacodes {
47     ALA,
48     ARG,
49     ASN,
50     ASP,
51     CYS,
52     GLN,
53     GLU,
54     GLY,
55     HIS,
56     ILE,
57     LEU,
58     LYS,
59     MET,
60     PHE,
61     PRO,
62     SER,
63     THR,
64     TRP,
65     TYR,
66     VAL,
67     UNK
68 };
69 
PsiPassOne(QTemporaryFile * matFile,const QStringList & weightFiles)70 PsiPassOne::PsiPassOne(QTemporaryFile *matFile, const QStringList &weightFiles)
71     : matrixFile(matFile), weightFileNames(weightFiles) {
72     fwt_to = (int *)malloc(TOTAL * sizeof(int));
73     lwt_to = (int *)malloc(TOTAL * sizeof(int));
74     activation = (float *)malloc(TOTAL * sizeof(float));
75     bias = (float *)malloc(TOTAL * sizeof(float));
76     weight = (float **)malloc(TOTAL * sizeof(float *));
77 
78     profile = (int **)malloc(MAXSEQLEN * sizeof(int *));
79     for (int i = 0; i < MAXSEQLEN; i++) {
80         profile[i] = (int *)malloc(20 * sizeof(int));
81     }
82 }
83 
~PsiPassOne()84 PsiPassOne::~PsiPassOne() {
85     free(fwt_to);
86     free(lwt_to);
87     free(activation);
88     free(bias);
89     free(weight);
90 
91     for (int i = 0; i < MAXSEQLEN; i++) {
92         delete profile[i];
93     }
94     delete profile;
95 }
96 
compute_output(void)97 void PsiPassOne::compute_output(void) {
98     int i, j;
99     float netinp;
100 
101     for (i = NUM_IN; i < TOTAL; i++) {
102         netinp = bias[i];
103 
104         for (j = fwt_to[i]; j < lwt_to[i]; j++)
105             netinp += activation[j] * weight[i][j];
106 
107         /* Trigger neuron */
108         activation[i] = logistic(netinp);
109     }
110 }
111 
112 /*
113 * load weights - load all link weights from a disk file
114 */
load_wts(const char * fname)115 void PsiPassOne::load_wts(const char *fname) {
116     int i, j;
117     double t;
118 
119     QFile weightFile(fname);
120     if (!weightFile.open(QIODevice::ReadOnly)) {
121         return;
122     }
123 
124     QTextStream stream(&weightFile);
125 
126     /* Load input units to hidden layer weights */
127     for (i = NUM_IN; i < NUM_IN + NUM_HID; i++) {
128         for (j = fwt_to[i]; j < lwt_to[i]; j++) {
129             stream >> t;
130             weight[i][j] = t;
131         }
132     }
133 
134     /* Load hidden layer to output units weights */
135     for (i = NUM_IN + NUM_HID; i < TOTAL; i++) {
136         for (j = fwt_to[i]; j < lwt_to[i]; j++) {
137             stream >> t;
138             weight[i][j] = t;
139         }
140     }
141 
142     /* Load bias weights */
143     for (j = NUM_IN; j < TOTAL; j++) {
144         stream >> t;
145         bias[j] = t;
146     }
147 }
148 
149 /* Initialize network */
init(void)150 void PsiPassOne::init(void) {
151     int i;
152 
153     for (i = NUM_IN; i < TOTAL; i++)
154         if (!(weight[i] = (float *)calloc(TOTAL - NUM_OUT, sizeof(float))))
155             fail("init: Out of Memory!");
156 
157     /* Connect input units to hidden layer */
158     for (i = NUM_IN; i < NUM_IN + NUM_HID; i++) {
159         fwt_to[i] = 0;
160         lwt_to[i] = NUM_IN;
161     }
162 
163     /* Connect hidden units to output layer */
164     for (i = NUM_IN + NUM_HID; i < TOTAL; i++) {
165         fwt_to[i] = NUM_IN;
166         lwt_to[i] = NUM_IN + NUM_HID;
167     }
168 }
169 
170 /* Make 1st level prediction averaged over specified weight sets */
predict()171 void PsiPassOne::predict() {
172     //int             aa, i, j, k, n, winpos,ws;
173     int aa, j, winpos;
174     //char fname[80], predsst[MAXSEQLEN];
175     char *predsst;
176     //float   avout[MAXSEQLEN][3], conf, confsum[MAXSEQLEN];
177     float **avout, conf, *confsum;
178 
179     // Allocate buffers
180     // TODO: not good, memory is allocated in small chunks for avout
181     predsst = (char *)malloc(seqlen * sizeof(char));
182     avout = (float **)malloc(seqlen * sizeof(float *));
183     for (int i = 0; i < seqlen; ++i) {
184         avout[i] = (float *)malloc(3 * sizeof(float));
185     }
186     confsum = (float *)malloc(seqlen * sizeof(float));
187 
188     for (winpos = 0; winpos < seqlen; winpos++)
189         avout[winpos][0] = avout[winpos][1] = avout[winpos][2] = confsum[winpos] = 0.0F;
190 
191     foreach (const QString &wfName, weightFileNames) {
192         load_wts(qPrintable(wfName));
193 
194         for (winpos = 0; winpos < seqlen; winpos++) {
195             for (j = 0; j < NUM_IN; j++)
196                 activation[j] = 0.0;
197             for (j = WINL; j <= WINR; j++) {
198                 if (j + winpos >= 0 && j + winpos < seqlen)
199                     for (aa = 0; aa < 20; aa++)
200                         activation[(j - WINL) * 21 + aa] = profile[j + winpos][aa] / 1000.0;
201                 else
202                     activation[(j - WINL) * 21 + 20] = 1.0;
203             }
204 
205             compute_output();
206 
207             conf = (2 * MAX(MAX(activation[TOTAL - NUM_OUT], activation[TOTAL - NUM_OUT + 1]), activation[TOTAL - NUM_OUT + 2]) - (activation[TOTAL - NUM_OUT] + activation[TOTAL - NUM_OUT + 1] + activation[TOTAL - NUM_OUT + 2]) + MIN(MIN(activation[TOTAL - NUM_OUT], activation[TOTAL - NUM_OUT + 1]), activation[TOTAL - NUM_OUT + 2]));
208 
209             avout[winpos][0] += conf * activation[TOTAL - NUM_OUT];
210             avout[winpos][1] += conf * activation[TOTAL - NUM_OUT + 1];
211             avout[winpos][2] += conf * activation[TOTAL - NUM_OUT + 2];
212             confsum[winpos] += conf;
213         }
214     }
215 
216     for (winpos = 0; winpos < seqlen; winpos++) {
217         avout[winpos][0] /= confsum[winpos];
218         avout[winpos][1] /= confsum[winpos];
219         avout[winpos][2] /= confsum[winpos];
220         if (avout[winpos][0] >= MAX(avout[winpos][1], avout[winpos][2]))
221             predsst[winpos] = 'C';
222         else if (avout[winpos][2] >= MAX(avout[winpos][0], avout[winpos][1]))
223             predsst[winpos] = 'E';
224         else
225             predsst[winpos] = 'H';
226     }
227     QString pFilePath = U2::AppContext::getAppSettings()->getUserAppsSettings()->getUserTemporaryDirPath() + QDir::separator() + "output.ss";
228     FILE *pFile = fopen(pFilePath.toLatin1().constData(), "w");
229     if (!pFile) {
230         fail("failed opening file for writing");
231     }
232 
233     for (winpos = 0; winpos < seqlen; winpos++)
234         fprintf(pFile, "%4d %c %c  %6.3f %6.3f %6.3f\n", winpos + 1, seq.constData()[winpos], predsst[winpos], avout[winpos][0], avout[winpos][1], avout[winpos][2]);
235     fclose(pFile);
236 
237     // Deallocate buffers
238     free(predsst);
239     for (int i = 0; i < seqlen; ++i) {
240         free(avout[i]);
241     }
242     free(avout);
243     free(confsum);
244 }
245 
246 #define BUFSIZE 256
247 
248 /* Read PSI AA frequency data */
getmtx()249 int PsiPassOne::getmtx() {
250     int j, naa;
251 
252     QTextStream stream(matrixFile);
253     qDebug("%s", qPrintable(matrixFile->fileName()));
254 
255     stream >> naa;
256     if (naa == 0) {
257         fail("Bad mtx file - no sequence length!");
258     }
259 
260     if (naa > MAXSEQLEN)
261         fail("Input sequence too long!");
262 
263     stream >> seq;
264     if (seq.size() == 0) {
265         fail("Bad mtx file - no sequence!");
266     }
267 
268     while (!stream.atEnd()) {
269         QByteArray line;
270         line = stream.readLine().toLatin1();
271         char *buf = line.data();
272         if (!strncmp(buf, "-32768 ", 7)) {
273             for (j = 0; j < naa; j++) {
274                 if (sscanf(buf, "%*d%d%*d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%*d%d", &profile[j][ALA], &profile[j][CYS], &profile[j][ASP], &profile[j][GLU], &profile[j][PHE], &profile[j][GLY], &profile[j][HIS], &profile[j][ILE], &profile[j][LYS], &profile[j][LEU], &profile[j][MET], &profile[j][ASN], &profile[j][PRO], &profile[j][GLN], &profile[j][ARG], &profile[j][SER], &profile[j][THR], &profile[j][VAL], &profile[j][TRP], &profile[j][TYR]) != 20)
275                     fail("Bad mtx format!");
276                 line = stream.readLine().toLatin1();
277                 if (line.size() == 0)
278                     break;
279                 buf = line.data();
280             }
281         }
282     }
283 
284     return naa;
285 }
286 
runPsiPass()287 int PsiPassOne::runPsiPass() {
288     seqlen = getmtx();
289 
290     init();
291 
292     predict();
293 
294     return 0;
295 }
296