1 /*
2     vcflib C++ library for parsing and manipulating VCF files
3 
4     Copyright © 2010-2020 Erik Garrison
5     Copyright © 2015      Zev N. Kronenberg
6     Copyright © 2020      Pjotr Prins
7 
8     This software is published under the MIT License. See the LICENSE file.
9 */
10 
11 /*
12 
13 This program was created at:  Fri Apr 17 14:59:53 2015
14 This program was created by:  Zev N. Kronenberg
15 
16 
17 Contact: zev.kronenber@gmail.com
18 
19 Organization: Unviersity of Utah
20     School of Medicine
21     Salt Lake City, Utah
22 
23 
24 The MIT License (MIT)
25 
26 Copyright (c) <2015> <Zev N. Kronenberg>
27 
28 Permission is hereby granted, free of charge, to any person obtaining a copy
29 of this software and associated documentation files (the "Software"), to deal
30 in the Software without restriction, including without limitation the rights
31 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
32 copies of the Software, and to permit persons to whom the Software is
33 furnished to do so, subject to the following conditions:
34 
35 The above copyright notice and this permission notice shall be included in
36 all copies or substantial portions of the Software.
37 
38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
42 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
44 THE SOFTWARE.
45 
46 
47 */
48 #include <fstream>
49 #include "split.h"
50 #include <vector>
51 #include <string>
52 #include <iostream>
53 #include <math.h>
54 #include <cmath>
55 #include <stdlib.h>
56 #include <time.h>
57 #include <stdio.h>
58 #include <unistd.h>
59 #include <stdlib.h>
60 
61 struct options{
62   std::string file;
63   int npermutation;
64   int nsuc;
65 }globalOpts;
66 
67 static const char *optString = "f:n:s:";
68 
69 using namespace std;
70 
71 
72 //-------------------------------   OPTIONS   --------------------------------
parseOpts(int argc,char ** argv)73 int parseOpts(int argc, char** argv)
74     {
75     int opt = 0;
76     globalOpts.file = "NA";
77 
78     globalOpts.nsuc         = 1;
79     globalOpts.npermutation = 1000;
80 
81     opt = getopt(argc, argv, optString);
82     while(opt != -1){
83       switch(opt){
84       case 'f':
85 	{
86 	  globalOpts.file =  optarg;
87 	  break;
88 	}
89       case 'n':
90 	{
91 	  globalOpts.npermutation = atoi(((string)optarg).c_str());
92 	  cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
93 	  break;
94 	}
95       case 's':
96 	{
97 	  globalOpts.nsuc = atoi(((string)optarg).c_str());
98 	  cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
99 	  break;
100 	}
101       case '?':
102 	{
103 	  break;
104 	}
105       }
106 
107       opt = getopt( argc, argv, optString );
108     }
109     return 1;
110     }
111 //------------------------------- SUBROUTINE --------------------------------
112 /*
113  Function input  : NA
114 
115  Function does   : prints help
116 
117  Function returns: NA
118 
119 */
printHelp()120 void printHelp()
121 {
122 
123   cerr << endl << endl;
124   cerr << "INFO: help" << endl;
125   cerr << "INFO: description:" << endl;
126   cerr << "     permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl << endl ;
127   cerr << "     Currently permuteGPAT++ only supports wcFst, but will be extended.    " << endl ;
128   cerr << endl;
129   cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl;
130   cerr << "        1. The number of successes                         " << endl;
131   cerr << "        2. The number of trials                            " << endl;
132   cerr << "        3. The empirical p-value                           " << endl << endl;
133 
134   cerr << "INFO: usage:  permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl;
135   cerr << endl;
136   cerr << "INFO: file:    f   -- argument: the input file     "<< endl;
137   cerr << "INFO: number:  n   -- argument: the number of permutations to run for each value [1000]" << endl;
138   cerr << "INFO: success: s   -- argument: stop permutations after \'s\' successes [1]"             << endl;
139 
140   cerr << endl << "Type: phenotype" << endl << endl;
141   cerr << endl;
142 
143 }
144 
145 //-------------------------------    MAIN     --------------------------------
146 /*
147  Comments:
148 */
149 
main(int argc,char ** argv)150 int main( int argc, char** argv)
151 {
152   if (argc == 2) {
153     string h_flag = argv[1];
154 
155     if (argc == 2 && (h_flag == "-h" || h_flag == "--help")) {
156       printHelp();
157       exit(1);
158     }
159   }
160   int parse = parseOpts(argc, argv);
161  if(globalOpts.file.compare("NA") == 0){
162    printHelp();
163    exit(1);
164  }
165 
166 
167  vector<double> data;
168 
169  ifstream gpat (globalOpts.file.c_str());
170 
171  string line;
172 
173  if(gpat.is_open()){
174 
175    while(getline(gpat, line)){
176      vector<string> region = split(line, "\t");
177      // will change for other output
178      double fst = atof(region[4].c_str());
179 
180      if(fst < 0){
181        fst = 0;
182      }
183 
184      data.push_back(fst);
185    }
186  }
187  else{
188    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
189    exit(1);
190  }
191 
192  gpat.clear();
193  gpat.seekg(0, gpat.beg);
194 
195  cerr << "INFO: read values to permute: " << data.size() << endl;
196 
197  srand (time(NULL));
198 
199  if(gpat.is_open()){
200 
201    while(getline(gpat, line)){
202      vector<string> region = split(line, "\t");
203 
204      double value = atof(region[4].c_str());
205 
206      if(value < 0){
207        value = 0;
208      }
209 
210 
211      double suc   = 0;
212      double per   = 0;
213      int    datas = data.size();
214      double pv = (1.0 / globalOpts.npermutation);
215 
216      while( suc < globalOpts.nsuc && per < globalOpts.npermutation){
217        per += 1.0;
218 
219        int r = rand() % datas;
220 
221        if(value < data[r]){
222 	 suc += 1;
223        }
224      }
225      if(suc > 0){
226        pv = suc / per;
227      }
228      cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl;
229    }
230 
231  }
232  else{
233    cerr << "FATAL: could not open file: " << globalOpts.file << endl;
234    exit(1);
235  }
236 
237 
238 return 0;
239 }
240