1 /*
2 
3 This program was created at:  Fri Apr 17 14:59:53 2015
4 This program was created by:  Zev N. Kronenberg
5 
6 
7 Contact: zev.kronenber@gmail.com
8 
9 Organization: Unviersity of Utah
10     School of Medicine
11     Salt Lake City, Utah
12 
13 
14 The MIT License (MIT)
15 
16 Copyright (c) <2015> <Zev N. Kronenberg>
17 
18 Permission is hereby granted, free of charge, to any person obtaining a copy
19 of this software and associated documentation files (the "Software"), to deal
20 in the Software without restriction, including without limitation the rights
21 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 copies of the Software, and to permit persons to whom the Software is
23 furnished to do so, subject to the following conditions:
24 
25 The above copyright notice and this permission notice shall be included in
26 all copies or substantial portions of the Software.
27 
28 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
34 THE SOFTWARE.
35 
36 
37 */
38 #include <fstream>
39 #include "split.h"
40 #include <vector>
41 #include <string>
42 #include <iostream>
43 #include <math.h>
44 #include <cmath>
45 #include <stdlib.h>
46 #include <time.h>
47 #include <stdio.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 
51 struct options{
52   std::string file;
53   int npermutation;
54   int nsuc;
55 }globalOpts;
56 
57 static const char *optString = "f:n:s:";
58 
59 using namespace std;
60 
61 
62 //-------------------------------   OPTIONS   --------------------------------
parseOpts(int argc,char ** argv)63 int parseOpts(int argc, char** argv)
64     {
65     int opt = 0;
66     globalOpts.file = "NA";
67 
68     globalOpts.nsuc         = 1;
69     globalOpts.npermutation = 1000;
70 
71     opt = getopt(argc, argv, optString);
72     while(opt != -1){
73       switch(opt){
74       case 'f':
75 	{
76 	  globalOpts.file =  optarg;
77 	  break;
78 	}
79       case 'n':
80 	{
81 	  globalOpts.npermutation = atoi(((string)optarg).c_str());
82 	  cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
83 	  break;
84 	}
85       case 's':
86 	{
87 	  globalOpts.nsuc = atoi(((string)optarg).c_str());
88 	  cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
89 	  break;
90 	}
91       case '?':
92 	{
93 	  break;
94 	}
95       }
96 
97       opt = getopt( argc, argv, optString );
98     }
99     return 1;
100     }
101 //------------------------------- SUBROUTINE --------------------------------
102 /*
103  Function input  : NA
104 
105  Function does   : prints help
106 
107  Function returns: NA
108 
109 */
printHelp()110 void printHelp()
111 {
112 
113   cerr << endl << endl;
114   cerr << "INFO: help" << endl;
115   cerr << "INFO: description:" << endl;
116   cerr << "     permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl ;
117   cerr << "     Currently permuteGPAT++ only supports wcFst, but will be extended.    " << endl ;
118   cerr << endl;
119   cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl;
120   cerr << "        1. The number of successes                         " << endl;
121   cerr << "        2. The number of trials                            " << endl;
122   cerr << "        3. The empirical p-value                           " << endl << endl;
123 
124   cerr << "INFO: usage:  permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl;
125   cerr << endl;
126   cerr << "INFO: file:    f   -- argument: the input file     "<< endl;
127   cerr << "INFO: number:  n   -- argument: the number of permutations to run for each value [1000]" << endl;
128   cerr << "INFO: success: s   -- argument: stop permutations after \'s\' successes [1]"             << endl;
129 
130 
131   cerr << endl;
132 
133 }
134 
135 //-------------------------------    MAIN     --------------------------------
136 /*
137  Comments:
138 */
139 
main(int argc,char ** argv)140 int main( int argc, char** argv)
141 {
142 int parse = parseOpts(argc, argv);
143 
144  if(globalOpts.file.compare("NA") == 0){
145    cerr << "FATAL: no file was provided" << endl;
146    printHelp();
147    exit(1);
148  }
149 
150 
151  vector<double> data;
152 
153  ifstream gpat (globalOpts.file.c_str());
154 
155  string line;
156 
157  if(gpat.is_open()){
158 
159    while(getline(gpat, line)){
160      vector<string> region = split(line, "\t");
161      // will change for other output
162      double fst = atof(region[4].c_str());
163 
164      if(fst < 0){
165        fst = 0;
166      }
167 
168      data.push_back(fst);
169    }
170  }
171  else{
172    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
173    exit(1);
174  }
175 
176  gpat.clear();
177  gpat.seekg(0, gpat.beg);
178 
179  cerr << "INFO: read values to permute: " << data.size() << endl;
180 
181  srand (time(NULL));
182 
183  if(gpat.is_open()){
184 
185    while(getline(gpat, line)){
186      vector<string> region = split(line, "\t");
187 
188      double value = atof(region[4].c_str());
189 
190      if(value < 0){
191        value = 0;
192      }
193 
194 
195      double suc   = 0;
196      double per   = 0;
197      int    datas = data.size();
198      double pv = (1.0 / globalOpts.npermutation);
199 
200      while( suc < globalOpts.nsuc && per < globalOpts.npermutation){
201        per += 1.0;
202 
203        int r = rand() % datas;
204 
205        if(value < data[r]){
206 	 suc += 1;
207        }
208      }
209      if(suc > 0){
210        pv = suc / per;
211      }
212      cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl;
213    }
214 
215  }
216  else{
217    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
218    exit(1);
219  }
220 
221 
222 return 0;
223 }
224