1#!/bin/bash
2## compare the Spectrum of the segregating sites, frequencies of observing a k mutation...
3
4###########################
5#!!!!! need to check is there a formula for the theoretical probability????
6
7mkdir test-spectrum
8cd test-spectrum
9
10mst=(10 20 50 100 10)
11
12msNsample=(3 7 10)
13
14msNsample=(3)
15
16rep=1000
17#npop=20000
18
19compareSPEC=compareSPEC
20
21rm ${compareSPEC}
22#echo -e "compare number of mutations for ${rep} replicates \n\t\t|\t\t\t|\t\t ms \t\t|\t\t scrm\nNsam\ttheta\t|\tmean\tstdv\t|\tmean\tstdv\tstd err\t|\tmean\tstdv \tstd err" >${compareSEG}
23
24for t in "${mst[@]}"
25	do
26	for nsam in "${msNsample[@]}"
27		do
28		prefix=${nsam}sample${t}mut
29		out=${prefix}out
30		nseg=${prefix}Seg
31		rm mscolsum*
32		find . -name "xx*" -print0 | xargs -0 rm
33
34		ms ${nsam} ${rep} -t ${t} | tail -n +4 | sed '/segsites/d' | sed '/positions/d' | gawk '/^\/\//{f="xx"++d} f{print > f} '
35
36		for file in $(seq 1 1 ${rep})
37				do
38				sed '/\/\//d' xx${file} | sed 's/.\{1\}/& /g' | awk '
39{ for(i=1;i<=NF;++i) t[i]+=$i
40  if(n<NF)n=NF
41}
42END {
43  printf t[1]
44  for(i=2;i<=n;++i) printf ","t[i]
45  printf "\n"
46}
47' >> mscolsumsOld
48				done
49		cat mscolsumsOld | tr '\n' ',' > mscolsums
50
51		rm scrmcolsum*
52		find . -name "yy*" -print0 | xargs -0 rm
53
54		scrm ${nsam} ${rep} -t ${t} | tail -n +4 | sed '/segsites/d' | sed '/positions/d' | gawk '/^\/\//{f="yy"++d} f{print > f} '
55
56        for file in $(seq 1 1 ${rep})
57				do
58				sed '/\/\//d' yy${file} | sed 's/.\{1\}/& /g' | awk '
59{ for(i=1;i<=NF;++i) t[i]+=$i
60  if(n<NF)n=NF
61}
62END {
63  printf t[1]
64  for(i=2;i<=n;++i) printf ","t[i]
65  printf "\n"
66}
67' >> scrmcolsumsOld
68				done
69		cat scrmcolsumsOld | tr '\n' ',' > scrmcolsums
70
71
72#echo  -e "Sample size = ${nsam}, theta = ${t}" >> ${compareSPEC}
73
74
75		echo "rm(list=ls());
76		#source(\"fun_src.r\");
77		msdata=as.numeric(read.table(\"mscolsums\",sep=\",\"));
78		msnum=length(msdata)-1
79		mstable=table(msdata)
80
81		a=as.numeric(read.table(\"scrmcolsums\",sep=\",\"));
82		scrmdata=a[!is.na(a)]
83		scrmdata=a[a>0]
84		scrmnum=length(scrmdata)-1
85		scrmtable=table(scrmdata)
86		test=chisq.test(cbind(mstable, scrmtable));
87		cat(paste(\"Sample size = ${nsam}, theta = ${t}, test statistics = \",
88format(test\$statistic,digits=4),\", p-value = \",format(test\$p.value,scientific = TRUE),
89sep=\"\"),file=\"${compareSPEC}\",append=TRUE);cat(\"\n\",file=\"${compareSPEC}\",append=TRUE);
90
91		for (i in (1:(${nsam}-1))){
92		cat(paste( paste(\"P(S=\",i,\")  \",sep=\"\") , \"|\", format(mstable[i]/msnum,scientific = TRUE), \"|\", format(scrmtable[i]/scrmnum,scientific = TRUE),
93sep=\"\t\"),file=\"${compareSPEC}\",append=TRUE);
94cat(\"\n\",file=\"${compareSPEC}\",append=TRUE);
95		}
96		" > dummy.r
97		R CMD BATCH dummy.r
98        find . -name "xx*" -print0 | xargs -0 rm
99        find . -name "yy*" -print0 | xargs -0 rm
100		#rm ms${out} ms${nseg} scrm${out} scrm${nseg}
101		done
102	done
103
104
105
106
107# make the transpose?
108#awk '
109#{
110    #for (i=1; i<=NF; i++)  {
111        #a[NR,i] = $i
112    #}
113#}
114#NF>p { p = NF }
115#END {
116    #for(j=1; j<=p; j++) {
117        #str=a[1,j]
118        #for(i=2; i<=NR; i++){
119            #str=str" "a[i,j];
120        #}
121        #print str
122    #}
123#}' > xx${file}transpose
124