1 /*
2
3 This program was created at: Fri Apr 17 14:59:53 2015
4 This program was created by: Zev N. Kronenberg
5
6
7 Contact: zev.kronenber@gmail.com
8
9 Organization: Unviersity of Utah
10 School of Medicine
11 Salt Lake City, Utah
12
13
14 The MIT License (MIT)
15
16 Copyright (c) <2015> <Zev N. Kronenberg>
17
18 Permission is hereby granted, free of charge, to any person obtaining a copy
19 of this software and associated documentation files (the "Software"), to deal
20 in the Software without restriction, including without limitation the rights
21 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 copies of the Software, and to permit persons to whom the Software is
23 furnished to do so, subject to the following conditions:
24
25 The above copyright notice and this permission notice shall be included in
26 all copies or substantial portions of the Software.
27
28 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
34 THE SOFTWARE.
35
36
37 */
38 #include <fstream>
39 #include "split.h"
40 #include <vector>
41 #include <string>
42 #include <iostream>
43 #include <math.h>
44 #include <cmath>
45 #include <stdlib.h>
46 #include <time.h>
47 #include <stdio.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 #include "gpatInfo.hpp"
51
52 struct options{
53 std::string file;
54 int npermutation;
55 int nsuc;
56 }globalOpts;
57
58 static const char *optString = "f:n:s:";
59
60 using namespace std;
61
62
63 //------------------------------- OPTIONS --------------------------------
parseOpts(int argc,char ** argv)64 int parseOpts(int argc, char** argv)
65 {
66 int opt = 0;
67 globalOpts.file = "NA";
68
69 globalOpts.nsuc = 1;
70 globalOpts.npermutation = 1000;
71
72 opt = getopt(argc, argv, optString);
73 while(opt != -1){
74 switch(opt){
75 case 'f':
76 {
77 globalOpts.file = optarg;
78 break;
79 }
80 case 'n':
81 {
82 globalOpts.npermutation = atoi(((string)optarg).c_str());
83 cerr << "INFO: permuteGPAT++ will do N permutations: " << globalOpts.npermutation << endl;
84 break;
85 }
86 case 's':
87 {
88 globalOpts.nsuc = atoi(((string)optarg).c_str());
89 cerr << "INFO: permuteGPAT++ will stop permutations after N successes: " << globalOpts.nsuc << endl;
90 break;
91 }
92 case '?':
93 {
94 break;
95 }
96 }
97
98 opt = getopt( argc, argv, optString );
99 }
100 return 1;
101 }
102 //------------------------------- SUBROUTINE --------------------------------
103 /*
104 Function input : NA
105
106 Function does : prints help
107
108 Function returns: NA
109
110 */
printHelp()111 void printHelp()
112 {
113
114 cerr << endl << endl;
115 cerr << "INFO: help" << endl;
116 cerr << "INFO: description:" << endl;
117 cerr << " permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl ;
118 cerr << " Currently permuteGPAT++ only supports wcFst, but will be extended. " << endl ;
119 cerr << endl;
120 cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl;
121 cerr << " 1. The number of successes " << endl;
122 cerr << " 2. The number of trials " << endl;
123 cerr << " 3. The empirical p-value " << endl << endl;
124
125 cerr << "INFO: usage: permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl;
126 cerr << endl;
127 cerr << "INFO: file: f -- argument: the input file "<< endl;
128 cerr << "INFO: number: n -- argument: the number of permutations to run for each value [1000]" << endl;
129 cerr << "INFO: success: s -- argument: stop permutations after \'s\' successes [1]" << endl;
130
131 cerr << endl;
132 printVersion();
133
134 }
135
136 //------------------------------- MAIN --------------------------------
137 /*
138 Comments:
139 */
140
main(int argc,char ** argv)141 int main( int argc, char** argv)
142 {
143 int parse = parseOpts(argc, argv);
144
145 if(globalOpts.file.compare("NA") == 0){
146 cerr << "FATAL: no file was provided" << endl;
147 printHelp();
148 exit(1);
149 }
150
151
152 vector<double> data;
153
154 ifstream gpat (globalOpts.file.c_str());
155
156 string line;
157
158 if(gpat.is_open()){
159
160 while(getline(gpat, line)){
161 vector<string> region = split(line, "\t");
162 // will change for other output
163 double fst = atof(region[4].c_str());
164
165 if(fst < 0){
166 fst = 0;
167 }
168
169 data.push_back(fst);
170 }
171 }
172 else{
173 cerr << "FATAL: could not open file: " << globalOpts.file << endl;
174 exit(1);
175 }
176
177 gpat.clear();
178 gpat.seekg(0, gpat.beg);
179
180 cerr << "INFO: read values to permute: " << data.size() << endl;
181
182 srand (time(NULL));
183
184 if(gpat.is_open()){
185
186 while(getline(gpat, line)){
187 vector<string> region = split(line, "\t");
188
189 double value = atof(region[4].c_str());
190
191 if(value < 0){
192 value = 0;
193 }
194
195
196 double suc = 0;
197 double per = 0;
198 int datas = data.size();
199 double pv = (1.0 / globalOpts.npermutation);
200
201 while( suc < globalOpts.nsuc && per < globalOpts.npermutation){
202 per += 1.0;
203
204 int r = rand() % datas;
205
206 if(value < data[r]){
207 suc += 1;
208 }
209 }
210 if(suc > 0){
211 pv = suc / per;
212 }
213 cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl;
214 }
215 }
216 else{
217 cerr << "FATAL: could not open file: " << globalOpts.file << endl;
218 exit(1);
219 }
220
221
222 return 0;
223 }
224