1#!/usr/bin/env python 2 3import argparse 4import subprocess 5import sys 6import re 7import os 8 9#SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) 10BIN_DIR = #__# 11WORKING_DIR = os.getcwd() 12 13CONF_TEMPLATE = """DATA 14PE = pe %d %d %s 15END 16PARAMETERS 17STOP_AFTER_SUPERREADS=1 18NUM_THREADS=%d 19GRAPH_KMER_SIZE=auto 20END 21""" 22 23if __name__ == "__main__": 24 parser = argparse.ArgumentParser(description="Quantifies Super-Reads") 25 parser.add_argument("-1", "--short-pair1", required=False, type=str, default=None, help="Paired short read FASTQ. Must also specify '--short-pair2' and NOT '--short-unpaired'") 26 parser.add_argument("-2", "--short-pair2", required=False, type=str, default=None, help="Paired short read FASTQ. Must also specify '--short-pair1' and NOT '--short-unpaired'") 27 parser.add_argument("-U", "--short-unpaired", required=False, type=str, default=None, help="Unpaired short read FASTQ. Must not include '--short-pair1' or '--short-pair2'") 28 parser.add_argument("-w", "--work-dir", required=False, type=str, default=None, help="MaSuRCA super-read assembly 'work1' directory. Will perform super-read assembly if not included.") 29 parser.add_argument("-H", "--hisat-index", required=True, type=str, help="HISAT2 index prefix for aligning short reads") 30 parser.add_argument("-G", "--gmap-index", required=True, type=str, help="Gmap index for aligning super-reads") 31 parser.add_argument("-g", "--gmap-directory", required=False, type=str, default=None, help="Gmap directory to find gmap index") 32 parser.add_argument("-p", "--num-threads", required=False, type=int, default=1, help="Number of threads each tool can use") 33 parser.add_argument("-o", "--out-dir", required=False, type=str, default="", help="Output directory") 34 parser.add_argument("--frag-len", required=False, type=float, default=100, help="Paired fragment length for MaSuRCA to use. Will only be used if '--work-dir' not specify") 35 parser.add_argument("--frag-std", required=False, type=float, default=20, help="Paired fragment standard deviation for MaSuRCA to use. Will only be used if '--work-dir' not specify") 36 parser.add_argument("--gmap-cmd", required=False, type=str, default="gmap", help="Gmap executable") 37 parser.add_argument("--hisat2-cmd", required=False, type=str, default="hisat2", help="HISAT2 executable") 38 args = parser.parse_args() 39 40 if not os.path.isdir(BIN_DIR): 41 sys.stderr.write("Error: programs directory not found. Please run './install.sh''\n") 42 sys.exit(1) 43 44 if args.short_unpaired != None: 45 short_reads = os.path.abspath(args.short_unpaired) 46 paired = False 47 elif args.short_pair1 != None and args.short_pair2 != None: 48 short_reads = "%s %s" % (os.path.abspath(args.short_pair1), os.path.abspath(args.short_pair2)) 49 paired = True 50 else: 51 sys.stderr.write("Error: must specify short read input") 52 sys.exit(1) 53 54 55 if args.work_dir == None: 56 if not os.path.exists(args.out_dir): 57 try: 58 os.makedirs(args.out_dir) 59 except OSError, e: 60 sys.stderr.write("\nError creating directory %s (%s)" % (args.out_dir, e)); 61 sys.exit(1) 62 os.chdir(args.out_dir) 63 conf_str = CONF_TEMPLATE % ( 64 args.frag_len, 65 args.frag_std, 66 short_reads, 67 args.num_threads) 68 conf_file = open("super_reads.conf", "w") 69 conf_file.write(conf_str) 70 conf_file.close() 71 subprocess.call([os.path.join(BIN_DIR, "createSuperReads_RNA"), "super_reads.conf"]) 72 subprocess.call("./assemble.sh") 73 74 os.chdir(WORKING_DIR) 75 work1_dir = os.path.join(args.out_dir, "work1") 76 else: 77 work1_dir = args.work_dir 78 79 gmap_call = [args.gmap_cmd, 80 "-f", "samse", 81 "-t", str(args.num_threads), 82 "-d", args.gmap_index] 83 84 if args.gmap_directory != None: 85 gmap_call += ["--dir", args.gmap_directory] 86 gmap_call += ["%s/superReadSequences.fasta" % work1_dir] 87 88 gmap_out_name = os.path.join(args.out_dir, "sr.sam") 89 90 gmap_out = open(gmap_out_name, "w") 91 subprocess.call(gmap_call, stdout=gmap_out) 92 gmap_out.close() 93 94 cmd = [os.path.join(BIN_DIR, "assign_reads"), 95 "-p", str(args.num_threads), 96 "-w", work1_dir, 97 "-s", gmap_out_name, 98 "-o", os.path.join(args.out_dir, "sr_quant.sam")] 99 subprocess.call(cmd) 100 101 hisat2_call = [args.hisat2_cmd, 102 "-p", str(args.num_threads), 103 "-x", args.hisat_index] 104 105 if paired: 106 hisat2_call += ["-1", args.short_pair1, "-2", args.short_pair2] 107 else: 108 hisat2_call += ["-U", args.short_unpaired] 109 110 hisat2_call += [">", os.path.join(args.out_dir, "short.sam")] 111 112 subprocess.call(hisat2_call) 113 114 subprocess.call(["samtools", "merge", "-f", 115 "-@", str(args.num_threads), 116 os.path.join(args.out_dir, "sr_merge.bam"), 117 os.path.join(args.out_dir, "sr_quant.sam"), 118 os.path.join(args.out_dir, "short.sam")]) 119 120 subprocess.call(["samtools", "sort", 121 os.path.join(args.out_dir, "sr_merge.bam"), 122 "-o", os.path.join(args.out_dir, "sr_sort.bam")]) 123