1#!/bin/bash 2## compare the Spectrum of the segregating sites, frequencies of observing a k mutation... 3 4########################### 5#!!!!! need to check is there a formula for the theoretical probability???? 6 7mkdir test-spectrum 8cd test-spectrum 9 10mst=(10 20 50 100 10) 11 12msNsample=(3 7 10) 13 14msNsample=(3) 15 16rep=1000 17#npop=20000 18 19compareSPEC=compareSPEC 20 21rm ${compareSPEC} 22#echo -e "compare number of mutations for ${rep} replicates \n\t\t|\t\t\t|\t\t ms \t\t|\t\t scrm\nNsam\ttheta\t|\tmean\tstdv\t|\tmean\tstdv\tstd err\t|\tmean\tstdv \tstd err" >${compareSEG} 23 24for t in "${mst[@]}" 25 do 26 for nsam in "${msNsample[@]}" 27 do 28 prefix=${nsam}sample${t}mut 29 out=${prefix}out 30 nseg=${prefix}Seg 31 rm mscolsum* 32 find . -name "xx*" -print0 | xargs -0 rm 33 34 ms ${nsam} ${rep} -t ${t} | tail -n +4 | sed '/segsites/d' | sed '/positions/d' | gawk '/^\/\//{f="xx"++d} f{print > f} ' 35 36 for file in $(seq 1 1 ${rep}) 37 do 38 sed '/\/\//d' xx${file} | sed 's/.\{1\}/& /g' | awk ' 39{ for(i=1;i<=NF;++i) t[i]+=$i 40 if(n<NF)n=NF 41} 42END { 43 printf t[1] 44 for(i=2;i<=n;++i) printf ","t[i] 45 printf "\n" 46} 47' >> mscolsumsOld 48 done 49 cat mscolsumsOld | tr '\n' ',' > mscolsums 50 51 rm scrmcolsum* 52 find . -name "yy*" -print0 | xargs -0 rm 53 54 scrm ${nsam} ${rep} -t ${t} | tail -n +4 | sed '/segsites/d' | sed '/positions/d' | gawk '/^\/\//{f="yy"++d} f{print > f} ' 55 56 for file in $(seq 1 1 ${rep}) 57 do 58 sed '/\/\//d' yy${file} | sed 's/.\{1\}/& /g' | awk ' 59{ for(i=1;i<=NF;++i) t[i]+=$i 60 if(n<NF)n=NF 61} 62END { 63 printf t[1] 64 for(i=2;i<=n;++i) printf ","t[i] 65 printf "\n" 66} 67' >> scrmcolsumsOld 68 done 69 cat scrmcolsumsOld | tr '\n' ',' > scrmcolsums 70 71 72#echo -e "Sample size = ${nsam}, theta = ${t}" >> ${compareSPEC} 73 74 75 echo "rm(list=ls()); 76 #source(\"fun_src.r\"); 77 msdata=as.numeric(read.table(\"mscolsums\",sep=\",\")); 78 msnum=length(msdata)-1 79 mstable=table(msdata) 80 81 a=as.numeric(read.table(\"scrmcolsums\",sep=\",\")); 82 scrmdata=a[!is.na(a)] 83 scrmdata=a[a>0] 84 scrmnum=length(scrmdata)-1 85 scrmtable=table(scrmdata) 86 test=chisq.test(cbind(mstable, scrmtable)); 87 cat(paste(\"Sample size = ${nsam}, theta = ${t}, test statistics = \", 88format(test\$statistic,digits=4),\", p-value = \",format(test\$p.value,scientific = TRUE), 89sep=\"\"),file=\"${compareSPEC}\",append=TRUE);cat(\"\n\",file=\"${compareSPEC}\",append=TRUE); 90 91 for (i in (1:(${nsam}-1))){ 92 cat(paste( paste(\"P(S=\",i,\") \",sep=\"\") , \"|\", format(mstable[i]/msnum,scientific = TRUE), \"|\", format(scrmtable[i]/scrmnum,scientific = TRUE), 93sep=\"\t\"),file=\"${compareSPEC}\",append=TRUE); 94cat(\"\n\",file=\"${compareSPEC}\",append=TRUE); 95 } 96 " > dummy.r 97 R CMD BATCH dummy.r 98 find . -name "xx*" -print0 | xargs -0 rm 99 find . -name "yy*" -print0 | xargs -0 rm 100 #rm ms${out} ms${nseg} scrm${out} scrm${nseg} 101 done 102 done 103 104 105 106 107# make the transpose? 108#awk ' 109#{ 110 #for (i=1; i<=NF; i++) { 111 #a[NR,i] = $i 112 #} 113#} 114#NF>p { p = NF } 115#END { 116 #for(j=1; j<=p; j++) { 117 #str=a[1,j] 118 #for(i=2; i<=NR; i++){ 119 #str=str" "a[i,j]; 120 #} 121 #print str 122 #} 123#}' > xx${file}transpose 124