2 from ost
import settings, io, seq, LogError
6 def ClustalW(seq1, seq2=None, clustalw=None, keep_files=False, nopgap=False,
7 clustalw_option_string=
False):
9 Runs a clustalw multiple sequence alignment. The results are returned as a
10 :class:`~ost.seq.AlignmentHandle` instance.
12 There are two ways to use this function:
14 - align exactly two sequences:
16 :param seq1: sequence_one
17 :type seq1: :class:`~ost.seq.SequenceHandle` or :class:`str`
19 :param seq2: sequence_two
20 :type seq2: :class:`~ost.seq.SequenceHandle` or :class:`str`
22 The two sequences can be specified as two separate function parameters
23 (`seq1`, `seq2`). The type of both parameters can be either
24 :class:`~ost.seq.SequenceHandle` or :class:`str`, but must be the same for
27 - align two or more sequences:
29 :param seq1: sequence_list
30 :type seq1: :class:`~ost.seq.SequenceList`
32 :param seq2: must be :class:`None`
34 Two or more sequences can be specified by using a
35 :class:`~ost.seq.SequenceList`. It is then passed as the first function
36 parameter (`seq1`). The second parameter (`seq2`) must be :class:`None`.
39 :param clustalw: path to clustalw executable (used in :func:`~ost.settings.Locate`)
40 :type clustalw: :class:`str`
41 :param nopgap: turn residue-specific gaps off
42 :type nopgap: :class:`bool`
43 :param clustalw_option_string: additional clustalw flags (see http://toolkit.tuebingen.mpg.de/clustalw/help_params)
44 :type clustalw_option_string: :class:`str`
45 :param keep_files: do not delete temporary files
46 :type keep_files: :class:`bool`
48 Note: ClustalW will convert lowercase to uppercase, and change all '.' to '-'.
49 OST will convert and '?' to 'X' before aligning sequences with Clustalw.
51 ClustalW will accept only IUB/IUPAC amino acid and nucleic acid codes:
53 ======= ======================= ======= ============================
54 Residue Name Residue Name
55 ======= ======================= ======= ============================
57 B aspartate or asparagine Q glutamine
60 E glutamate T threonine
61 F phenylalanine U selenocysteine
63 H histidine W tryptophan
64 I isoleucine Y tyrosine
65 K lysine Z glutamate or glutamine
67 M methionine \* translation stop
68 N asparagine \- gap of indeterminate length
69 ======= ======================= ======= ============================
72 clustalw_path=settings.Locate((
'clustalw',
'clustalw2'),
73 explicit_file_name=clustalw)
77 seq_list=seq.CreateSequenceList()
78 seq_list.AddSequence(seq1)
79 seq_list.AddSequence(seq2)
80 elif isinstance(seq1, str)
and isinstance(seq2, str):
81 seqh1=seq.CreateSequence(
"seq1", seq1)
82 seqh2=seq.CreateSequence(
"seq2", seq2)
83 seq_list=seq.CreateSequenceList()
84 seq_list.AddSequence(seqh1)
85 seq_list.AddSequence(seqh2)
87 LogError(
"WARNING: Specify at least two Sequences")
92 LogError(
"WARNING: Specify either two SequenceHandles or one SequenceList")
95 sequence_names = set()
97 sequence_names.add(s.GetName())
98 if len(sequence_names) < len(seq_list):
99 raise ValueError(
"ClustalW can only process sequences with unique identifiers!")
102 new_list = seq.CreateSequenceList()
105 for i,c
in enumerate(ss):
108 new_list.AddSequence(ss)
114 out=os.path.join(temp_dir.dirname,
'out.fasta')
115 command=
'%s -infile="%s" -output=fasta -outfile="%s"' % (clustalw_path,
120 if clustalw_option_string!=
False:
121 command=command+
" "+clustalw_option_string
123 ps=subprocess.Popen(command, shell=
True, stdout=subprocess.PIPE)
124 ps.stdout.readlines()
125 aln=io.LoadAlignment(out)
128 for sequence
in seq_list:
129 for seq_num,aln_seq
in enumerate(aln.sequences):
130 if aln_seq.GetName()==sequence.GetName():
132 aln.SetSequenceOffset(seq_num,sequence.offset)
133 if sequence.HasAttachedView():
134 aln.AttachView(seq_num,sequence.GetAttachedView().Copy())