1#!/bin/sh 2 3 4# 5# This script does a binary search to look for the smallest input that will 6# break something. 7# 8 9# The folder for us to store our temporary files. There may be many of them. 10WORKDIR=tmp/search 11 12# This setting determines where we save the input we generate. 13# You should change it to something you have write access to. 14CLOUDINPUT=gs://jpmartin/hellbender-test-inputs/search_input.bam 15 16# file containing the known good output 17GOODOUTPUT=$WORKDIR/recaltable.correct 18 19# file containing the output under test. 20TESTOUTPUT=$WORKDIR/recaltable.cloud 21 22# 23# Set here the genomic interval you'd like to start from. 24# 25FIRST=1000250; 26LAST=1000312; 27 28 29# The code below assumes there's a "hb" script on the root that runs hellbender. 30# Its contents would be: 31# 32# #!/bin/sh 33# build/install/$HELLBENDER/bin/$HELLBENDER "$@" 34# 35# where you replace $HELLBENDER with the name of your hellbender folder 36# (typically just "hellbender") 37 38# 39# Generates the input to be tested. 40# It takes two arguments: the first and last element of the 41# interval. 42# The output will be saved to $CLOUDINPUT 43# 44genInput () { 45 INPUT=$WORKDIR/CEUTrio.test.ch20.$1-$2.bam 46 STDINPUT=$WORKDIR/search_input.bam 47 rm $STDINPUT 48 # keep a copy of every input that we generated 49 ./hb PrintReads -I src/test/resources/org/broadinstitute/hellbender/tools/BQSR/CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam -O $INPUT -L 20:$1-$2 2>&1 > $WORKDIR/inputgen.log || echo "input gen reports an error" 50 # Standard filename for the next stage 51 cp $INPUT $STDINPUT 52 samtools index $STDINPUT 53 # copy to $CLOUDINPUT so the next phases can get to it 54 gsutil cp $STDINPUT $CLOUDINPUT 55 gsutil acl set public-read $CLOUDINPUT 56 gsutil cp $STDINPUT.bai $CLOUDINPUT.bai 57 gsutil acl set public-read $CLOUDINPUT.bai 58} 59 60# 61# Generates the "known good" output. 62# 63genCorrectOutput () { 64 STDINPUT=$WORKDIR/search_input.bam 65 # change this line to run the known good version of the computation you're interested in. 66 ./hb BaseRecalibrator -R ../../sample-data/GRCh37.ch20.fixup.fa -I $STDINPUT --knownSites tstBQSR/dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf -sortAllCols --RECAL_TABLE_FILE $GOODOUTPUT 2>&1 > $WORKDIR/outputgen.log || echo "correct output gen reports an error" 67} 68 69# 70# Runs the computation under test. 71# 72genTestOutput () { 73 # change this line to run the computation you're testing. 74 ./hb BaseRecalibratorDataflow --runner=BLOCKING --numWorkers=1 -R gg://reference/EOSsjdnTicvzwAE -I $CLOUDINPUT --baseRecalibrationKnownVariants tstBQSR/dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf --project=genomics-pipelines --staging=gs://jpmartin/staging-jpmartin -sortAllCols --RECAL_TABLE_FILE $TESTOUTPUT 2>&1 > $WORKDIR/testgen.log || echo "test output gen reports an error" 75} 76 77# This tries to shrink a failing interval by removing less than half. 78# (It runs after the binary search, so we know that removing half is too much) 79rogner() { 80# try to move the left side by 1/5th at a time 81while true; do 82 MID=$(( $FIRST + ($LAST - $FIRST) / 5 )) 83 if [ $FIRST -eq $MID ]; then 84 echo "Done shrinking the left" 85 break 86 fi 87 rm -f $GOODOUTPUT $TESTOUTPUT 88 genInput $MID $LAST 89 genCorrectOutput 90 genTestOutput 91 if [ ! -f $GOODOUTPUT ]; then 92 echo "missing correct output for $MID-$LAST." 93 break 94 fi 95 if [ ! -f $TESTOUTPUT ]; then 96 echo "Output missing (test) for $MID-$LAST." 97 FIRST=$MID; 98 continue 99 fi 100 if ! `diff $WORKDIR/recaltable.correct $TESTOUTPUT`; then 101 # they differ, explore this branch 102 echo "Output is wrong for $MID-$LAST, going deeper" 103 FIRST=$MID; 104 else 105 # they are the same, try the other half 106 echo "Output matches for $MID-$LAST, going to the right side" 107 break 108 fi 109done 110# right side now, again 1/5th at a time 111while true; do 112 MID=$(( $LAST - ($LAST - $FIRST) / 5 )) 113 if [ $LAST -eq $MID ]; then 114 echo "Done shrinking the right" 115 break 116 fi 117 rm -f $GOODOUTPUT $TESTOUTPUT 118 genInput $FIRST $MID 119 genCorrectOutput 120 genTestOutput 121 if [ ! -f $GOODOUTPUT ]; then 122 echo "missing correct output for $FIRST-$MID." 123 break 124 fi 125 if [ ! -f $TESTOUTPUT ]; then 126 echo "Output missing (test) for $FIRST-$MID." 127 LAST=$MID; 128 continue 129 fi 130 if ! `diff $GOODOUTPUT $TESTOUTPUT`; then 131 # they differ, explore this branch 132 echo "Output is wrong for $FIRST-$MID, going deeper" 133 LAST=$MID; 134 else 135 # they are the same, try the other half 136 echo "Output matches for $FIRST-$MID, done" 137 break 138 fi 139done 140 141echo "Output is now $FIRST-$LAST" 142} 143 144 145 146# 147# This runs a binary search to look for the smallest interval there the output 148# of the test program differs from the output of the good program. 149# 150binarySearch () { 151while true; do 152 153 MID=$(( ( $FIRST + $LAST ) / 2 )) 154 if [ $FIRST -eq $MID ]; then 155 echo "We have narrowed it down to just $FIRST-$LAST, we're done now." 156 break 157 fi 158 rm -f $GOODOUTPUT $TESTOUTPUT 159 genInput $FIRST $MID 160 genCorrectOutput 161 genTestOutput 162 if [ ! -f $GOODOUTPUT ]; then 163 echo "missing correct output for $FIRST-$MID." 164 break 165 fi 166 if [ ! -f $TESTOUTPUT ]; then 167 echo "missing test output for $FIRST-$MID." 168 fi 169 if ! `diff $GOODOUTPUT $TESTOUTPUT`; then 170 # they differ, explore this branch 171 echo "Output is wrong for $FIRST-$MID, going deeper" 172 LAST=$MID; 173 else 174 # they are the same, try the other half 175 echo "Output is correct for $FIRST-$MID, trying the latter half" 176 rm -f $GOODOUTPUT $TESTOUTPUT 177 genInput $MID $LAST 178 genCorrectOutput 179 genTestOutput 180 if [ ! -f $GOODOUTPUT ]; then 181 echo "missing correct output for $MID-$LAST." 182 break 183 fi 184 if [ ! -f $TESTOUTPUT ]; then 185 echo "missing test output for $MID-$LAST." 186 fi 187 if ! `diff $GOODOUTPUT $TESTOUTPUT`; then 188 # they differ, explore this branch 189 echo "Output is wrong for $MID-$LAST, going deeper" 190 FIRST=$MID; 191 else 192 echo "Output is correct for $MID-$LAST too!" 193 # they are the same, too! 194 break 195 fi 196 fi 197 198done 199} 200 201 202# create the work directory if it's not already there 203[ -e $WORKDIR ] || mkdir -p $WORKDIR 204 205# shrink the range, halving every time 206binarySearch 207# if there is still something left, try removing 1/5th at a time 208rogner 209