1 /*
2 vcflib C++ library for parsing and manipulating VCF files
3
4 Copyright © 2010-2020 Erik Garrison
5 Copyright © 2020 Pjotr Prins
6
7 This software is published under the MIT License. See the LICENSE file.
8 */
9
10 /*
11
12 This program was created at: Fri Apr 17 14:59:53 2015
13 This program was created by: Zev N. Kronenberg
14
15
16 Contact: zev.kronenber@gmail.com
17
18 Organization: Unviersity of Utah
19 School of Medicine
20 Salt Lake City, Utah
21
22
23 The MIT License (MIT)
24
25 Copyright (c) <2015> <Zev N. Kronenberg>
26
27 Permission is hereby granted, free of charge, to any person obtaining a copy
28 of this software and associated documentation files (the "Software"), to deal
29 in the Software without restriction, including without limitation the rights
30 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31 copies of the Software, and to permit persons to whom the Software is
32 furnished to do so, subject to the following conditions:
33
34 The above copyright notice and this permission notice shall be included in
35 all copies or substantial portions of the Software.
36
37 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
43 THE SOFTWARE.
44
45
46 */
47 #include <fstream>
48 #include "split.h"
49 #include <vector>
50 #include <string>
51 #include <iostream>
52 #include <math.h>
53 #include <cmath>
54 #include <stdlib.h>
55 #include <time.h>
56 #include <stdio.h>
57 #include <unistd.h>
58 #include <stdlib.h>
59 #include "gpatInfo.hpp"
60
61 struct options{
62 std::string file;
63 int npermutation;
64 int nsuc;
65 }globalOpts;
66
67 static const char *optString = "f:n:s:";
68
69 using namespace std;
70
71
72 //------------------------------- OPTIONS --------------------------------
parseOpts(int argc,char ** argv)73 int parseOpts(int argc, char** argv)
74 {
75 int opt = 0;
76 globalOpts.file = "NA";
77
78 globalOpts.nsuc = 1;
79 globalOpts.npermutation = 1000;
80
81 opt = getopt(argc, argv, optString);
82 while(opt != -1){
83 switch(opt){
84 case 'f':
85 {
86 globalOpts.file = optarg;
87 break;
88 }
89 case 'n':
90 {
91 globalOpts.npermutation = atoi(((string)optarg).c_str());
92 cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
93 break;
94 }
95 case 's':
96 {
97 globalOpts.nsuc = atoi(((string)optarg).c_str());
98 cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
99 break;
100 }
101 case '?':
102 {
103 break;
104 }
105 }
106
107 opt = getopt( argc, argv, optString );
108 }
109 return 1;
110 }
111 //------------------------------- SUBROUTINE --------------------------------
112 /*
113 Function input : NA
114
115 Function does : prints help
116
117 Function returns: NA
118
119 */
printHelp()120 void printHelp()
121 {
122
123 cerr << endl << endl;
124 cerr << "INFO: help" << endl;
125 cerr << "INFO: description:" << endl;
126 cerr << " permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl ;
127 cerr << " Currently permuteGPAT++ only supports wcFst, but will be extended. " << endl ;
128 cerr << endl;
129 cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl;
130 cerr << " 1. The number of successes " << endl;
131 cerr << " 2. The number of trials " << endl;
132 cerr << " 3. The empirical p-value " << endl << endl;
133
134 cerr << "INFO: usage: permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl;
135 cerr << endl;
136 cerr << "INFO: file: f -- argument: the input file "<< endl;
137 cerr << "INFO: number: n -- argument: the number of permutations to run for each value [1000]" << endl;
138 cerr << "INFO: success: s -- argument: stop permutations after \'s\' successes [1]" << endl;
139
140 cerr << endl;
141 printVersion();
142
143 }
144
145 //------------------------------- MAIN --------------------------------
146 /*
147 Comments:
148 */
149
main(int argc,char ** argv)150 int main( int argc, char** argv)
151 {
152 int parse = parseOpts(argc, argv);
153
154 if(globalOpts.file.compare("NA") == 0){
155 cerr << "FATAL: no file was provided" << endl;
156 printHelp();
157 exit(1);
158 }
159
160
161 vector<double> data;
162
163 ifstream gpat (globalOpts.file.c_str());
164
165 string line;
166
167 if(gpat.is_open()){
168
169 while(getline(gpat, line)){
170 vector<string> region = split(line, "\t");
171 // will change for other output
172 double fst = atof(region[4].c_str());
173
174 if(fst < 0){
175 fst = 0;
176 }
177
178 data.push_back(fst);
179 }
180 }
181 else{
182 cerr << "FATAL: could not open file: " << globalOpts.file << endl;
183 exit(1);
184 }
185
186 gpat.clear();
187 gpat.seekg(0, gpat.beg);
188
189 cerr << "INFO: read values to permute: " << data.size() << endl;
190
191 srand (time(NULL));
192
193 if(gpat.is_open()){
194
195 while(getline(gpat, line)){
196 vector<string> region = split(line, "\t");
197
198 double value = atof(region[4].c_str());
199
200 if(value < 0){
201 value = 0;
202 }
203
204
205 double suc = 0;
206 double per = 0;
207 int datas = data.size();
208 double pv = (1.0 / globalOpts.npermutation);
209
210 while( suc < globalOpts.nsuc && per < globalOpts.npermutation){
211 per += 1.0;
212
213 int r = rand() % datas;
214
215 if(value < data[r]){
216 suc += 1;
217 }
218 }
219 if(suc > 0){
220 pv = suc / per;
221 }
222 cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl;
223 }
224 }
225 else{
226 cerr << "FATAL: could not open file: " << globalOpts.file << endl;
227 exit(1);
228 }
229
230
231 return 0;
232 }
233