1 /*
2 
3 This program was created at:  Fri Apr 17 14:59:53 2015
4 This program was created by:  Zev N. Kronenberg
5 
6 
7 Contact: zev.kronenber@gmail.com
8 
9 Organization: Unviersity of Utah
10     School of Medicine
11     Salt Lake City, Utah
12 
13 
14 The MIT License (MIT)
15 
16 Copyright (c) <2015> <Zev N. Kronenberg>
17 
18 Permission is hereby granted, free of charge, to any person obtaining a copy
19 of this software and associated documentation files (the "Software"), to deal
20 in the Software without restriction, including without limitation the rights
21 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 copies of the Software, and to permit persons to whom the Software is
23 furnished to do so, subject to the following conditions:
24 
25 The above copyright notice and this permission notice shall be included in
26 all copies or substantial portions of the Software.
27 
28 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
34 THE SOFTWARE.
35 
36 
37 */
38 #include <fstream>
39 #include "split.h"
40 #include <vector>
41 #include <string>
42 #include <iostream>
43 #include <math.h>
44 #include <cmath>
45 #include <stdlib.h>
46 #include <time.h>
47 #include <stdio.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 #include "gpatInfo.hpp"
51 
52 struct options{
53   std::string file;
54   int npermutation;
55   int nsuc;
56 }globalOpts;
57 
58 static const char *optString = "f:n:s:";
59 
60 using namespace std;
61 
62 
63 //-------------------------------   OPTIONS   --------------------------------
parseOpts(int argc,char ** argv)64 int parseOpts(int argc, char** argv)
65     {
66     int opt = 0;
67     globalOpts.file = "NA";
68 
69     globalOpts.nsuc         = 1;
70     globalOpts.npermutation = 1000;
71 
72     opt = getopt(argc, argv, optString);
73     while(opt != -1){
74       switch(opt){
75       case 'f':
76 	{
77 	  globalOpts.file =  optarg;
78 	  break;
79 	}
80       case 'n':
81 	{
82 	  globalOpts.npermutation = atoi(((string)optarg).c_str());
83 	  cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
84 	  break;
85 	}
86       case 's':
87 	{
88 	  globalOpts.nsuc = atoi(((string)optarg).c_str());
89 	  cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
90 	  break;
91 	}
92       case '?':
93 	{
94 	  break;
95 	}
96       }
97 
98       opt = getopt( argc, argv, optString );
99     }
100     return 1;
101     }
102 //------------------------------- SUBROUTINE --------------------------------
103 /*
104  Function input  : NA
105 
106  Function does   : prints help
107 
108  Function returns: NA
109 
110 */
printHelp()111 void printHelp()
112 {
113 
114   cerr << endl << endl;
115   cerr << "INFO: help" << endl;
116   cerr << "INFO: description:" << endl;
117   cerr << "     permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl ;
118   cerr << "     Currently permuteGPAT++ only supports wcFst, but will be extended.    " << endl ;
119   cerr << endl;
120   cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl;
121   cerr << "        1. The number of successes                         " << endl;
122   cerr << "        2. The number of trials                            " << endl;
123   cerr << "        3. The empirical p-value                           " << endl << endl;
124 
125   cerr << "INFO: usage:  permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl;
126   cerr << endl;
127   cerr << "INFO: file:    f   -- argument: the input file     "<< endl;
128   cerr << "INFO: number:  n   -- argument: the number of permutations to run for each value [1000]" << endl;
129   cerr << "INFO: success: s   -- argument: stop permutations after \'s\' successes [1]"             << endl;
130 
131   cerr << endl;
132   printVersion();
133 
134 }
135 
136 //-------------------------------    MAIN     --------------------------------
137 /*
138  Comments:
139 */
140 
main(int argc,char ** argv)141 int main( int argc, char** argv)
142 {
143 int parse = parseOpts(argc, argv);
144 
145  if(globalOpts.file.compare("NA") == 0){
146    cerr << "FATAL: no file was provided" << endl;
147    printHelp();
148    exit(1);
149  }
150 
151 
152  vector<double> data;
153 
154  ifstream gpat (globalOpts.file.c_str());
155 
156  string line;
157 
158  if(gpat.is_open()){
159 
160    while(getline(gpat, line)){
161      vector<string> region = split(line, "\t");
162      // will change for other output
163      double fst = atof(region[4].c_str());
164 
165      if(fst < 0){
166        fst = 0;
167      }
168 
169      data.push_back(fst);
170    }
171  }
172  else{
173    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
174    exit(1);
175  }
176 
177  gpat.clear();
178  gpat.seekg(0, gpat.beg);
179 
180  cerr << "INFO: read values to permute: " << data.size() << endl;
181 
182  srand (time(NULL));
183 
184  if(gpat.is_open()){
185 
186    while(getline(gpat, line)){
187      vector<string> region = split(line, "\t");
188 
189      double value = atof(region[4].c_str());
190 
191      if(value < 0){
192        value = 0;
193      }
194 
195 
196      double suc   = 0;
197      double per   = 0;
198      int    datas = data.size();
199      double pv = (1.0 / globalOpts.npermutation);
200 
201      while( suc < globalOpts.nsuc && per < globalOpts.npermutation){
202        per += 1.0;
203 
204        int r = rand() % datas;
205 
206        if(value < data[r]){
207 	 suc += 1;
208        }
209      }
210      if(suc > 0){
211        pv = suc / per;
212      }
213      cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl;
214    }
215  }
216  else{
217    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
218    exit(1);
219  }
220 
221 
222 return 0;
223 }
224