1#!/usr/bin/env python
2# Copyright 2000 Brad Chapman.  All rights reserved.
3#
4# This code is part of the Biopython distribution and governed by its
5# license.  Please see the LICENSE file that should have been included
6# as part of this package.
7
8"""Run clustalw and parse the output.
9
10Example code to show how to create a clustalw command line, run clustalw
11and parse the results into an object that can be dealt with easily.
12"""
13# standard library
14
15
16import sys
17import subprocess
18
19# biopython
20from Bio.Align.Applications import ClustalwCommandline
21from Bio import AlignIO
22from Bio.Align import AlignInfo
23
24# create the command line to run clustalw
25# this assumes you've got clustalw somewhere on your path, otherwise
26# you need to pass the full path of the executable to this via cmd="..."
27cline = ClustalwCommandline(infile="opuntia.fasta", outfile="test.aln")
28
29# actually perform the alignment
30return_code = subprocess.call(str(cline), shell=(sys.platform != "win32"))
31assert return_code == 0, "Calling ClustalW failed"
32
33# Parse the output
34alignment = AlignIO.read("test.aln", "clustal")
35
36print(alignment)
37
38print("first description: %s" % alignment[0].description)
39print("first sequence: %s" % alignment[0].seq)
40
41# get the length of the alignment
42print("length %i" % alignment.get_alignment_length())
43
44print(alignment)
45
46# print out interesting information about the alignment
47summary_align = AlignInfo.SummaryInfo(alignment)
48
49consensus = summary_align.dumb_consensus()
50print("consensus %s" % consensus)
51
52my_pssm = summary_align.pos_specific_score_matrix(consensus, chars_to_ignore=["N"])
53print(my_pssm)
54
55expect_freq = {"A": 0.3, "G": 0.2, "T": 0.3, "C": 0.2}
56
57info_content = summary_align.information_content(
58    5, 30, chars_to_ignore=["N"], e_freq_table=expect_freq
59)
60
61print("relative info content: %f" % info_content)
62