1 /*
2 vcflib C++ library for parsing and manipulating VCF files
3
4 Copyright © 2010-2020 Erik Garrison
5 Copyright © 2015 Zev N. Kronenberg
6 Copyright © 2020 Pjotr Prins
7
8 This software is published under the MIT License. See the LICENSE file.
9 */
10
11 /*
12
13 This program was created at: Fri Apr 17 14:59:53 2015
14 This program was created by: Zev N. Kronenberg
15
16
17 Contact: zev.kronenber@gmail.com
18
19 Organization: Unviersity of Utah
20 School of Medicine
21 Salt Lake City, Utah
22
23
24 The MIT License (MIT)
25
26 Copyright (c) <2015> <Zev N. Kronenberg>
27
28 Permission is hereby granted, free of charge, to any person obtaining a copy
29 of this software and associated documentation files (the "Software"), to deal
30 in the Software without restriction, including without limitation the rights
31 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
32 copies of the Software, and to permit persons to whom the Software is
33 furnished to do so, subject to the following conditions:
34
35 The above copyright notice and this permission notice shall be included in
36 all copies or substantial portions of the Software.
37
38 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
42 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
44 THE SOFTWARE.
45
46
47 */
48 #include <fstream>
49 #include "split.h"
50 #include <vector>
51 #include <string>
52 #include <iostream>
53 #include <math.h>
54 #include <cmath>
55 #include <stdlib.h>
56 #include <time.h>
57 #include <stdio.h>
58 #include <unistd.h>
59 #include <stdlib.h>
60
61 struct options{
62 std::string file;
63 int npermutation;
64 int nsuc;
65 }globalOpts;
66
67 static const char *optString = "f:n:s:";
68
69 using namespace std;
70
71
72 //------------------------------- OPTIONS --------------------------------
parseOpts(int argc,char ** argv)73 int parseOpts(int argc, char** argv)
74 {
75 int opt = 0;
76 globalOpts.file = "NA";
77
78 globalOpts.nsuc = 1;
79 globalOpts.npermutation = 1000;
80
81 opt = getopt(argc, argv, optString);
82 while(opt != -1){
83 switch(opt){
84 case 'f':
85 {
86 globalOpts.file = optarg;
87 break;
88 }
89 case 'n':
90 {
91 globalOpts.npermutation = atoi(((string)optarg).c_str());
92 cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
93 break;
94 }
95 case 's':
96 {
97 globalOpts.nsuc = atoi(((string)optarg).c_str());
98 cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
99 break;
100 }
101 case '?':
102 {
103 break;
104 }
105 }
106
107 opt = getopt( argc, argv, optString );
108 }
109 return 1;
110 }
111 //------------------------------- SUBROUTINE --------------------------------
112 /*
113 Function input : NA
114
115 Function does : prints help
116
117 Function returns: NA
118
119 */
printHelp()120 void printHelp()
121 {
122
123 cerr << endl << endl;
124 cerr << "INFO: help" << endl;
125 cerr << "INFO: description:" << endl;
126 cerr << " permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl << endl ;
127 cerr << " Currently permuteGPAT++ only supports wcFst, but will be extended. " << endl ;
128 cerr << endl;
129 cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl;
130 cerr << " 1. The number of successes " << endl;
131 cerr << " 2. The number of trials " << endl;
132 cerr << " 3. The empirical p-value " << endl << endl;
133
134 cerr << "INFO: usage: permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl;
135 cerr << endl;
136 cerr << "INFO: file: f -- argument: the input file "<< endl;
137 cerr << "INFO: number: n -- argument: the number of permutations to run for each value [1000]" << endl;
138 cerr << "INFO: success: s -- argument: stop permutations after \'s\' successes [1]" << endl;
139
140 cerr << endl << "Type: phenotype" << endl << endl;
141 cerr << endl;
142
143 }
144
145 //------------------------------- MAIN --------------------------------
146 /*
147 Comments:
148 */
149
main(int argc,char ** argv)150 int main( int argc, char** argv)
151 {
152 if (argc == 2) {
153 string h_flag = argv[1];
154
155 if (argc == 2 && (h_flag == "-h" || h_flag == "--help")) {
156 printHelp();
157 exit(1);
158 }
159 }
160 int parse = parseOpts(argc, argv);
161 if(globalOpts.file.compare("NA") == 0){
162 printHelp();
163 exit(1);
164 }
165
166
167 vector<double> data;
168
169 ifstream gpat (globalOpts.file.c_str());
170
171 string line;
172
173 if(gpat.is_open()){
174
175 while(getline(gpat, line)){
176 vector<string> region = split(line, "\t");
177 // will change for other output
178 double fst = atof(region[4].c_str());
179
180 if(fst < 0){
181 fst = 0;
182 }
183
184 data.push_back(fst);
185 }
186 }
187 else{
188 cerr << "FATAL: could not open file: " << globalOpts.file << endl;
189 exit(1);
190 }
191
192 gpat.clear();
193 gpat.seekg(0, gpat.beg);
194
195 cerr << "INFO: read values to permute: " << data.size() << endl;
196
197 srand (time(NULL));
198
199 if(gpat.is_open()){
200
201 while(getline(gpat, line)){
202 vector<string> region = split(line, "\t");
203
204 double value = atof(region[4].c_str());
205
206 if(value < 0){
207 value = 0;
208 }
209
210
211 double suc = 0;
212 double per = 0;
213 int datas = data.size();
214 double pv = (1.0 / globalOpts.npermutation);
215
216 while( suc < globalOpts.nsuc && per < globalOpts.npermutation){
217 per += 1.0;
218
219 int r = rand() % datas;
220
221 if(value < data[r]){
222 suc += 1;
223 }
224 }
225 if(suc > 0){
226 pv = suc / per;
227 }
228 cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl;
229 }
230
231 }
232 else{
233 cerr << "FATAL: could not open file: " << globalOpts.file << endl;
234 exit(1);
235 }
236
237
238 return 0;
239 }
240