1#!/usr/local/bin/python3.8 2from __future__ import with_statement 3 4import sys 5import argparse 6import os 7import textwrap 8from collections import namedtuple as nt 9import random as rnd 10rnd.seed(1982) 11import utils 12from Bio.Seq import Seq 13from Bio import SeqIO 14from Bio.SeqRecord import SeqRecord 15from Bio.SeqFeature import SeqFeature 16 17def read_params(args): 18 parser = argparse.ArgumentParser(description='') 19 arg = parser.add_argument 20 arg( 'inp_f', metavar='INPUT_FILE', nargs='?', default=None, type=str, 21 help="the input fna file [stdin if not present]") 22 arg( 'out_f', metavar='OUTPUT_FILE', nargs='?', default=None, type=str, 23 help="the output fna file [stdout if not present]") 24 arg( '-a', default=None, type=int, 25 help="number of char after the match to report") 26 arg( '-n', default=None, type=int, 27 help="number of matching primers") 28 29 parser.add_argument('-s', metavar='Subsequene to look for', required = True, type = str ) 30 31 return vars(parser.parse_args()) 32 33if __name__ == '__main__': 34 par = read_params(sys.argv) 35 36 ss = par['s'].lower() 37 ssr = Seq(par['s']).reverse_complement().lower() 38 f = os.path.basename(par['inp_f']).split(".")[0] 39 with utils.openw( par['out_f'] ) as outf: 40 for r in SeqIO.parse( utils.openr(par['inp_f']), "fasta"): 41 rl = r.seq.lower() 42 if ss in rl or ssr in rl: 43 if par['a']: 44 if ss in rl: 45 i = str(rl).index(str(ss)) 46 subs = rl[i:i+len(ss)+par['a']] if i+len(ss)+par['a'] < len(rl) else rl[i:] 47 else: 48 i = str(rl).index(str(ssr)) 49 subs = rl[i:i+len(ssr)+par['a']] if i+len(ssr)+par['a'] < len(rl) else rl[i:] 50 outf.write( f + "\t" + str(r.id) + "\t" + str(subs) + "\n" ) 51 else: 52 if par['n']: 53 n = str(rl).count(str(ss)) + str(rl).count(str(ssr)) 54 outf.write( f + "\t" + str(r.id) + "\t" + str(n) + "\n" ) 55 else: 56 outf.write( f + "\t" + str(r.id) + "\n" ) 57