1 from ._ost_seq_alg
import *
6 Checks if sequence in alignment has same connectivity as residues in chain.
7 This looks for connected stretches in both the sequence and the chain and
8 returns False if they don't match. This uses the connectivity of the protein
11 :param aln: Alignment of two sequences with second one expected to map to
13 :type aln: :class:`~ost.seq.AlignmentHandle`
14 :param chain: Source of the sequence.
15 :type chain: :class:`~ost.mol.ChainHandle`
17 :returns: True if all residues (beside gapped ones) are connected, False
20 from ost
import LogWarning
23 if aln.GetCount() != 2:
24 raise ValueError(
'Alignment contains more than 2 sequences!')
25 sequence = aln.GetSequence(1)
26 if len(sequence) == 0:
29 if sequence.HasAttachedView() ==
False:
30 raise ValueError(
"Alignment is missing an attached chain view.")
31 chain = sequence.GetAttachedView()
32 residues = chain.residues
42 for s
in sequence[j:]:
47 if r1.one_letter_code==
'?' or r2.one_letter_code==
'?':
50 if not mol.InSequence(r1.handle, r2.handle):
51 LogWarning(
'%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
54 if mol.InSequence(r1.handle, r2.handle):
55 LogWarning(
'%s and %s are connected by peptide bond' % (str(r1), str(r2)))
60 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
62 Aligns the residues of chain to the SEQRES sequence, inserting gaps where
63 needed. The function uses the connectivity of the protein backbone to find
64 consecutive peptide fragments. These fragments are then aligned to the SEQRES
67 All the non-ligand, peptide-linking residues of the chain must be listed in
68 SEQRES. If there are any additional residues in the chain, the function
71 :param chain: Source of the sequence
72 :type chain: :class:`~ost.mol.ChainHandle`
73 :param seqres: SEQRES sequence
74 :type seqres: :class:`str`
75 :param try_resnum_first: If set to True, this first builds an alignment using
76 residue numbers and checks if the one-letter-codes
77 match. If they all match, this alignment is used
78 (and possibly validated). Otherwise, it displays a
79 warning and falls back to the connectivity-based
81 :type try_resnum_first: :class:`bool`
82 :param validate: If set to True, the alignment is additionally checked by
83 :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises
84 a ValueError if the validation failed.
85 :type validate: :class:`bool`
87 :returns: The alignment of the residues in the chain and the SEQRES entries.
88 :rtype: :class:`~ost.seq.AlignmentHandle`
91 def IsEqual(olc1, olc2):
92 return olc1
in (
'X',
'?')
or olc2
in (
'X',
'?')
or olc1 == olc2
96 from ost
import LogWarning
98 residues=view.residues
100 return seq.CreateAlignment()
102 aln_seq = seq.CreateSequence(
'atoms',
'-'*len(seqres))
103 aligned_resnums = set()
105 if r1.number.num
in aligned_resnums:
106 LogWarning(
'Residue numbers must be unique. Already observed %i, ' \
107 'cannot align %s anymore.'%(r1.number.num, r1.qualified_name))
108 try_resnum_first =
False
110 if r1.number.num <= len(seqres)
and r1.number.num > 0:
111 if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
112 aln_seq[r1.number.num - 1] = r1.one_letter_code
113 aligned_resnums.add(r1.number.num)
115 msg =
'Sequence mismatch: chain'
116 if len(view.GetName().strip()):
117 msg += f
' ({view.GetName().strip()})'
118 msg += f
' has {r1.one_letter_code}, while SEQRES is '
119 msg += f
'{seqres[r1.number.num-1]} at the corresponding position.'
121 try_resnum_first =
False
124 warning =
'Residue with number %i is outside of the range covered by '\
125 'SEQRES [1, %i]'%(r1.number.num, len(seqres))
127 try_resnum_first =
False
129 if not try_resnum_first:
130 fragments=[residues[0].one_letter_code]
131 for r1, r2
in zip(residues[:-1], residues[1:]):
132 if not mol.InSequence(r1.handle, r2.handle):
134 fragments[-1]+=r2.one_letter_code
138 for frag
in fragments:
139 new_pos=ss.find(frag, pos)
141 raise ValueError(
'"%s" is not a substring of "%s"' % (frag, ss))
142 aln_seq+=
'-'*(new_pos-pos)+frag
143 pos=new_pos+len(frag)
144 aln_seq = seq.CreateSequence(
'atoms',
145 aln_seq+(
'-'*(len(seqres)-len(aln_seq))))
146 alignment = seq.CreateAlignment(seq.CreateSequence(
'SEQRES', str(seqres)),
149 raise ValueError(
"SEQRES cannot be aligned with its corresponding chain.")
154 view_seq_name='view'):
156 Creates and returns the sequence alignment of the given chain view to the
157 chain handle. The alignment contains two sequences, the first containing all
158 non-ligand peptide-linking residues, the second containing all non-ligand
159 peptide-linking residues that are part of the view.
161 :param chain: A valid chain
162 :type chain: :class:`~ost.mol.ChainView`
164 :param handle_seq_name: Name of the handle sequence in the output alignment
165 :param view_seq_name: Name of the view sequence in the output alignment
166 :returns: The alignment
167 :rtype: :class:`~ost.seq.AlignmentHandle`
171 v0=chain.handle.Select(
'ligand=false and peptide=true')
172 v1=chain.Select(
'ligand=false and peptide=true')
173 s0=seq.CreateSequence(handle_seq_name,
'')
174 s1=seq.CreateSequence(view_seq_name,
'')
180 while idx0<len(res0):
181 s0.Append(res0[idx0].one_letter_code)
182 if idx1<len(res1)
and res1[idx1].handle==res0[idx0].handle:
183 s1.Append(res1[idx1].one_letter_code)
188 return seq.CreateAlignment(s0, s1)
192 Predicts contacts from a multiple sequence alignment using a combination
193 of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
194 MI is calculated with the APC and small number corrections as well as with a
195 transformation into Z-scores. The *CoEvoSc* is calculated using the default
196 PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
197 The final score for a pair of columns *(i,j)* of **ali** is obtained from:
199 Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j)) if *(i,j)* >=0
201 Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j)) if *(i,j)* <0
203 :param ali: The multiple sequence alignment
204 :type ali: :class:`~ost.seq.AlignmentHandle`
209 print(
"Parameter should be an AlignmentHandle")
214 for i
in range(ncol):
215 for j
in range(ncol):
216 if mi.matrix[i][j]>=0:
217 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
219 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
220 mi.RefreshSortedIndices()
225 Calculate the probability of a predicted contact to be correct.
226 This simply transforms the score associated with a prediction into a probability.
228 :param cpred_res: A contact prediction result
229 :param method: The method which was used for contact prediction. Should be one
230 of "MI", "MIp", "MIpz", "cevoMI", "cevo"
232 :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
233 :type method: :class:`str`
236 def _growth_function(x,K,x0,B,nu,Q):
237 p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
241 def _decay_function(x,A,k):
247 prob_params[
"MI"]=[0.05858455345122933, 0.8930350957023122]
248 prob_params[
"MIp"]=[0.10019621004607637, 0.9065429261332942]
249 prob_params[
"MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
250 prob_params[
"cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
251 prob_params[
"cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
253 if not method
in prob_params:
254 raise ValueError(
"method should be one of MI, MIp, MIpz, cevoMI, cevo")
255 params=prob_params[method]
256 cpred_res.RefreshSortedIndices()
257 nres=len(cpred_res.matrix)
258 probabilities=[[0
for i
in range(nres)]
for j
in range(nres)]
260 if len(params)==2:func=_decay_function
261 else:func=_growth_function
265 for k,(i,j)
in enumerate(cpred_res.sorted_indices):
266 p=_decay_function(math.log10(0.01+k/nres),*params)
267 probabilities[i][j]=p
268 probabilities[j][i]=p
270 for k,(i,j)
in enumerate(cpred_res.sorted_indices):
271 p=_growth_function(cpred_res.GetScore(i,j),*params)
272 probabilities[i][j]=p
273 probabilities[j][i]=p
274 cpred_res.probabilities=probabilities
representation of a multiple sequence alignemnt consisting of two or more sequences
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateContactSubstitutionScore(const AlignmentHandle &aln, int ref_seq_index=0, PairSubstWeightMatrix w=LoadDefaultPairSubstWeightMatrix())
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True)
def ValidateSEQRESAlignment(aln, chain=None)
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateMutualInformation(const AlignmentHandle &aln, ContactWeightMatrix w=LoadConstantContactWeightMatrix(), bool apc_correction=true, bool zpx_transformation=true, float small_number_correction=0.05)
def CalculateContactProbability(cpred_res, method)
def AlignmentFromChainView(chain, handle_seq_name='handle', view_seq_name='view')