1 /* PSIPRED 2 - Neural Network Prediction of Secondary Structure */
2
3 /* Copyright (C) 2000 David T. Jones - Created : January 2000 */
4 /* Original Neural Network code Copyright (C) 1990 David T. Jones */
5
6 /* Average Prediction Module */
7
8 #include <ctype.h>
9 #include <math.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <time.h>
14
15 #include <QDir>
16 #include <QFile>
17 #include <QString>
18 #include <QTemporaryFile>
19 #include <QTextStream>
20
21 #include <U2Core/AppContext.h>
22 #include <U2Core/AppSettings.h>
23 #include <U2Core/UserApplicationsSettings.h>
24
25 #ifdef Q_OS_WIN
26 # pragma warning(disable : 4996)
27 #endif
28
29 #include "sspred_avpred.h"
30 #include "sspred_net.h"
31 #include "sspred_utils.h"
32
33 //void *calloc(), *malloc();
34
35 // char *wtfnm;
36 //
37 // int nwtsum, fwt_to[TOTAL], lwt_to[TOTAL];
38 // float activation[TOTAL], bias[TOTAL], *weight[TOTAL];
39 //
40 // int profile[MAXSEQLEN][20];
41 //
42 // int seqlen;
43 //
44 // char seq[MAXSEQLEN];
45
46 enum aacodes {
47 ALA,
48 ARG,
49 ASN,
50 ASP,
51 CYS,
52 GLN,
53 GLU,
54 GLY,
55 HIS,
56 ILE,
57 LEU,
58 LYS,
59 MET,
60 PHE,
61 PRO,
62 SER,
63 THR,
64 TRP,
65 TYR,
66 VAL,
67 UNK
68 };
69
PsiPassOne(QTemporaryFile * matFile,const QStringList & weightFiles)70 PsiPassOne::PsiPassOne(QTemporaryFile *matFile, const QStringList &weightFiles)
71 : matrixFile(matFile), weightFileNames(weightFiles) {
72 fwt_to = (int *)malloc(TOTAL * sizeof(int));
73 lwt_to = (int *)malloc(TOTAL * sizeof(int));
74 activation = (float *)malloc(TOTAL * sizeof(float));
75 bias = (float *)malloc(TOTAL * sizeof(float));
76 weight = (float **)malloc(TOTAL * sizeof(float *));
77
78 profile = (int **)malloc(MAXSEQLEN * sizeof(int *));
79 for (int i = 0; i < MAXSEQLEN; i++) {
80 profile[i] = (int *)malloc(20 * sizeof(int));
81 }
82 }
83
~PsiPassOne()84 PsiPassOne::~PsiPassOne() {
85 free(fwt_to);
86 free(lwt_to);
87 free(activation);
88 free(bias);
89 free(weight);
90
91 for (int i = 0; i < MAXSEQLEN; i++) {
92 delete profile[i];
93 }
94 delete profile;
95 }
96
compute_output(void)97 void PsiPassOne::compute_output(void) {
98 int i, j;
99 float netinp;
100
101 for (i = NUM_IN; i < TOTAL; i++) {
102 netinp = bias[i];
103
104 for (j = fwt_to[i]; j < lwt_to[i]; j++)
105 netinp += activation[j] * weight[i][j];
106
107 /* Trigger neuron */
108 activation[i] = logistic(netinp);
109 }
110 }
111
112 /*
113 * load weights - load all link weights from a disk file
114 */
load_wts(const char * fname)115 void PsiPassOne::load_wts(const char *fname) {
116 int i, j;
117 double t;
118
119 QFile weightFile(fname);
120 if (!weightFile.open(QIODevice::ReadOnly)) {
121 return;
122 }
123
124 QTextStream stream(&weightFile);
125
126 /* Load input units to hidden layer weights */
127 for (i = NUM_IN; i < NUM_IN + NUM_HID; i++) {
128 for (j = fwt_to[i]; j < lwt_to[i]; j++) {
129 stream >> t;
130 weight[i][j] = t;
131 }
132 }
133
134 /* Load hidden layer to output units weights */
135 for (i = NUM_IN + NUM_HID; i < TOTAL; i++) {
136 for (j = fwt_to[i]; j < lwt_to[i]; j++) {
137 stream >> t;
138 weight[i][j] = t;
139 }
140 }
141
142 /* Load bias weights */
143 for (j = NUM_IN; j < TOTAL; j++) {
144 stream >> t;
145 bias[j] = t;
146 }
147 }
148
149 /* Initialize network */
init(void)150 void PsiPassOne::init(void) {
151 int i;
152
153 for (i = NUM_IN; i < TOTAL; i++)
154 if (!(weight[i] = (float *)calloc(TOTAL - NUM_OUT, sizeof(float))))
155 fail("init: Out of Memory!");
156
157 /* Connect input units to hidden layer */
158 for (i = NUM_IN; i < NUM_IN + NUM_HID; i++) {
159 fwt_to[i] = 0;
160 lwt_to[i] = NUM_IN;
161 }
162
163 /* Connect hidden units to output layer */
164 for (i = NUM_IN + NUM_HID; i < TOTAL; i++) {
165 fwt_to[i] = NUM_IN;
166 lwt_to[i] = NUM_IN + NUM_HID;
167 }
168 }
169
170 /* Make 1st level prediction averaged over specified weight sets */
predict()171 void PsiPassOne::predict() {
172 //int aa, i, j, k, n, winpos,ws;
173 int aa, j, winpos;
174 //char fname[80], predsst[MAXSEQLEN];
175 char *predsst;
176 //float avout[MAXSEQLEN][3], conf, confsum[MAXSEQLEN];
177 float **avout, conf, *confsum;
178
179 // Allocate buffers
180 // TODO: not good, memory is allocated in small chunks for avout
181 predsst = (char *)malloc(seqlen * sizeof(char));
182 avout = (float **)malloc(seqlen * sizeof(float *));
183 for (int i = 0; i < seqlen; ++i) {
184 avout[i] = (float *)malloc(3 * sizeof(float));
185 }
186 confsum = (float *)malloc(seqlen * sizeof(float));
187
188 for (winpos = 0; winpos < seqlen; winpos++)
189 avout[winpos][0] = avout[winpos][1] = avout[winpos][2] = confsum[winpos] = 0.0F;
190
191 foreach (const QString &wfName, weightFileNames) {
192 load_wts(qPrintable(wfName));
193
194 for (winpos = 0; winpos < seqlen; winpos++) {
195 for (j = 0; j < NUM_IN; j++)
196 activation[j] = 0.0;
197 for (j = WINL; j <= WINR; j++) {
198 if (j + winpos >= 0 && j + winpos < seqlen)
199 for (aa = 0; aa < 20; aa++)
200 activation[(j - WINL) * 21 + aa] = profile[j + winpos][aa] / 1000.0;
201 else
202 activation[(j - WINL) * 21 + 20] = 1.0;
203 }
204
205 compute_output();
206
207 conf = (2 * MAX(MAX(activation[TOTAL - NUM_OUT], activation[TOTAL - NUM_OUT + 1]), activation[TOTAL - NUM_OUT + 2]) - (activation[TOTAL - NUM_OUT] + activation[TOTAL - NUM_OUT + 1] + activation[TOTAL - NUM_OUT + 2]) + MIN(MIN(activation[TOTAL - NUM_OUT], activation[TOTAL - NUM_OUT + 1]), activation[TOTAL - NUM_OUT + 2]));
208
209 avout[winpos][0] += conf * activation[TOTAL - NUM_OUT];
210 avout[winpos][1] += conf * activation[TOTAL - NUM_OUT + 1];
211 avout[winpos][2] += conf * activation[TOTAL - NUM_OUT + 2];
212 confsum[winpos] += conf;
213 }
214 }
215
216 for (winpos = 0; winpos < seqlen; winpos++) {
217 avout[winpos][0] /= confsum[winpos];
218 avout[winpos][1] /= confsum[winpos];
219 avout[winpos][2] /= confsum[winpos];
220 if (avout[winpos][0] >= MAX(avout[winpos][1], avout[winpos][2]))
221 predsst[winpos] = 'C';
222 else if (avout[winpos][2] >= MAX(avout[winpos][0], avout[winpos][1]))
223 predsst[winpos] = 'E';
224 else
225 predsst[winpos] = 'H';
226 }
227 QString pFilePath = U2::AppContext::getAppSettings()->getUserAppsSettings()->getUserTemporaryDirPath() + QDir::separator() + "output.ss";
228 FILE *pFile = fopen(pFilePath.toLatin1().constData(), "w");
229 if (!pFile) {
230 fail("failed opening file for writing");
231 }
232
233 for (winpos = 0; winpos < seqlen; winpos++)
234 fprintf(pFile, "%4d %c %c %6.3f %6.3f %6.3f\n", winpos + 1, seq.constData()[winpos], predsst[winpos], avout[winpos][0], avout[winpos][1], avout[winpos][2]);
235 fclose(pFile);
236
237 // Deallocate buffers
238 free(predsst);
239 for (int i = 0; i < seqlen; ++i) {
240 free(avout[i]);
241 }
242 free(avout);
243 free(confsum);
244 }
245
246 #define BUFSIZE 256
247
248 /* Read PSI AA frequency data */
getmtx()249 int PsiPassOne::getmtx() {
250 int j, naa;
251
252 QTextStream stream(matrixFile);
253 qDebug("%s", qPrintable(matrixFile->fileName()));
254
255 stream >> naa;
256 if (naa == 0) {
257 fail("Bad mtx file - no sequence length!");
258 }
259
260 if (naa > MAXSEQLEN)
261 fail("Input sequence too long!");
262
263 stream >> seq;
264 if (seq.size() == 0) {
265 fail("Bad mtx file - no sequence!");
266 }
267
268 while (!stream.atEnd()) {
269 QByteArray line;
270 line = stream.readLine().toLatin1();
271 char *buf = line.data();
272 if (!strncmp(buf, "-32768 ", 7)) {
273 for (j = 0; j < naa; j++) {
274 if (sscanf(buf, "%*d%d%*d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%*d%d", &profile[j][ALA], &profile[j][CYS], &profile[j][ASP], &profile[j][GLU], &profile[j][PHE], &profile[j][GLY], &profile[j][HIS], &profile[j][ILE], &profile[j][LYS], &profile[j][LEU], &profile[j][MET], &profile[j][ASN], &profile[j][PRO], &profile[j][GLN], &profile[j][ARG], &profile[j][SER], &profile[j][THR], &profile[j][VAL], &profile[j][TRP], &profile[j][TYR]) != 20)
275 fail("Bad mtx format!");
276 line = stream.readLine().toLatin1();
277 if (line.size() == 0)
278 break;
279 buf = line.data();
280 }
281 }
282 }
283
284 return naa;
285 }
286
runPsiPass()287 int PsiPassOne::runPsiPass() {
288 seqlen = getmtx();
289
290 init();
291
292 predict();
293
294 return 0;
295 }
296