1 /*
2     vcflib C++ library for parsing and manipulating VCF files
3 
4     Copyright © 2010-2020 Erik Garrison
5     Copyright © 2020      Pjotr Prins
6 
7     This software is published under the MIT License. See the LICENSE file.
8 */
9 
10 /*
11 
12 This program was created at:  Fri Apr 17 14:59:53 2015
13 This program was created by:  Zev N. Kronenberg
14 
15 
16 Contact: zev.kronenber@gmail.com
17 
18 Organization: Unviersity of Utah
19     School of Medicine
20     Salt Lake City, Utah
21 
22 
23 The MIT License (MIT)
24 
25 Copyright (c) <2015> <Zev N. Kronenberg>
26 
27 Permission is hereby granted, free of charge, to any person obtaining a copy
28 of this software and associated documentation files (the "Software"), to deal
29 in the Software without restriction, including without limitation the rights
30 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31 copies of the Software, and to permit persons to whom the Software is
32 furnished to do so, subject to the following conditions:
33 
34 The above copyright notice and this permission notice shall be included in
35 all copies or substantial portions of the Software.
36 
37 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
43 THE SOFTWARE.
44 
45 
46 */
47 #include <fstream>
48 #include "split.h"
49 #include <vector>
50 #include <string>
51 #include <iostream>
52 #include <math.h>
53 #include <cmath>
54 #include <stdlib.h>
55 #include <time.h>
56 #include <stdio.h>
57 #include <unistd.h>
58 #include <stdlib.h>
59 #include "gpatInfo.hpp"
60 
61 struct options{
62   std::string file;
63   int npermutation;
64   int nsuc;
65 }globalOpts;
66 
67 static const char *optString = "f:n:s:";
68 
69 using namespace std;
70 
71 
72 //-------------------------------   OPTIONS   --------------------------------
parseOpts(int argc,char ** argv)73 int parseOpts(int argc, char** argv)
74     {
75     int opt = 0;
76     globalOpts.file = "NA";
77 
78     globalOpts.nsuc         = 1;
79     globalOpts.npermutation = 1000;
80 
81     opt = getopt(argc, argv, optString);
82     while(opt != -1){
83       switch(opt){
84       case 'f':
85 	{
86 	  globalOpts.file =  optarg;
87 	  break;
88 	}
89       case 'n':
90 	{
91 	  globalOpts.npermutation = atoi(((string)optarg).c_str());
92 	  cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
93 	  break;
94 	}
95       case 's':
96 	{
97 	  globalOpts.nsuc = atoi(((string)optarg).c_str());
98 	  cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
99 	  break;
100 	}
101       case '?':
102 	{
103 	  break;
104 	}
105       }
106 
107       opt = getopt( argc, argv, optString );
108     }
109     return 1;
110     }
111 //------------------------------- SUBROUTINE --------------------------------
112 /*
113  Function input  : NA
114 
115  Function does   : prints help
116 
117  Function returns: NA
118 
119 */
printHelp()120 void printHelp()
121 {
122 
123   cerr << endl << endl;
124   cerr << "INFO: help" << endl;
125   cerr << "INFO: description:" << endl;
126   cerr << "     permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl ;
127   cerr << "     Currently permuteGPAT++ only supports wcFst, but will be extended.    " << endl ;
128   cerr << endl;
129   cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl;
130   cerr << "        1. The number of successes                         " << endl;
131   cerr << "        2. The number of trials                            " << endl;
132   cerr << "        3. The empirical p-value                           " << endl << endl;
133 
134   cerr << "INFO: usage:  permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl;
135   cerr << endl;
136   cerr << "INFO: file:    f   -- argument: the input file     "<< endl;
137   cerr << "INFO: number:  n   -- argument: the number of permutations to run for each value [1000]" << endl;
138   cerr << "INFO: success: s   -- argument: stop permutations after \'s\' successes [1]"             << endl;
139 
140   cerr << endl;
141   printVersion();
142 
143 }
144 
145 //-------------------------------    MAIN     --------------------------------
146 /*
147  Comments:
148 */
149 
main(int argc,char ** argv)150 int main( int argc, char** argv)
151 {
152 int parse = parseOpts(argc, argv);
153 
154  if(globalOpts.file.compare("NA") == 0){
155    cerr << "FATAL: no file was provided" << endl;
156    printHelp();
157    exit(1);
158  }
159 
160 
161  vector<double> data;
162 
163  ifstream gpat (globalOpts.file.c_str());
164 
165  string line;
166 
167  if(gpat.is_open()){
168 
169    while(getline(gpat, line)){
170      vector<string> region = split(line, "\t");
171      // will change for other output
172      double fst = atof(region[4].c_str());
173 
174      if(fst < 0){
175        fst = 0;
176      }
177 
178      data.push_back(fst);
179    }
180  }
181  else{
182    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
183    exit(1);
184  }
185 
186  gpat.clear();
187  gpat.seekg(0, gpat.beg);
188 
189  cerr << "INFO: read values to permute: " << data.size() << endl;
190 
191  srand (time(NULL));
192 
193  if(gpat.is_open()){
194 
195    while(getline(gpat, line)){
196      vector<string> region = split(line, "\t");
197 
198      double value = atof(region[4].c_str());
199 
200      if(value < 0){
201        value = 0;
202      }
203 
204 
205      double suc   = 0;
206      double per   = 0;
207      int    datas = data.size();
208      double pv = (1.0 / globalOpts.npermutation);
209 
210      while( suc < globalOpts.nsuc && per < globalOpts.npermutation){
211        per += 1.0;
212 
213        int r = rand() % datas;
214 
215        if(value < data[r]){
216 	 suc += 1;
217        }
218      }
219      if(suc > 0){
220        pv = suc / per;
221      }
222      cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl;
223    }
224  }
225  else{
226    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
227    exit(1);
228  }
229 
230 
231 return 0;
232 }
233