1#!/usr/bin/env python 2# Copyright 2000 Brad Chapman. All rights reserved. 3# 4# This code is part of the Biopython distribution and governed by its 5# license. Please see the LICENSE file that should have been included 6# as part of this package. 7 8"""Run clustalw and parse the output. 9 10Example code to show how to create a clustalw command line, run clustalw 11and parse the results into an object that can be dealt with easily. 12""" 13# standard library 14 15 16import sys 17import subprocess 18 19# biopython 20from Bio.Align.Applications import ClustalwCommandline 21from Bio import AlignIO 22from Bio.Align import AlignInfo 23 24# create the command line to run clustalw 25# this assumes you've got clustalw somewhere on your path, otherwise 26# you need to pass the full path of the executable to this via cmd="..." 27cline = ClustalwCommandline(infile="opuntia.fasta", outfile="test.aln") 28 29# actually perform the alignment 30return_code = subprocess.call(str(cline), shell=(sys.platform != "win32")) 31assert return_code == 0, "Calling ClustalW failed" 32 33# Parse the output 34alignment = AlignIO.read("test.aln", "clustal") 35 36print(alignment) 37 38print("first description: %s" % alignment[0].description) 39print("first sequence: %s" % alignment[0].seq) 40 41# get the length of the alignment 42print("length %i" % alignment.get_alignment_length()) 43 44print(alignment) 45 46# print out interesting information about the alignment 47summary_align = AlignInfo.SummaryInfo(alignment) 48 49consensus = summary_align.dumb_consensus() 50print("consensus %s" % consensus) 51 52my_pssm = summary_align.pos_specific_score_matrix(consensus, chars_to_ignore=["N"]) 53print(my_pssm) 54 55expect_freq = {"A": 0.3, "G": 0.2, "T": 0.3, "C": 0.2} 56 57info_content = summary_align.information_content( 58 5, 30, chars_to_ignore=["N"], e_freq_table=expect_freq 59) 60 61print("relative info content: %f" % info_content) 62