OpenStructure
Loading...
Searching...
No Matches
__init__.py
Go to the documentation of this file.
1from ._ost_seq_alg import *
2from ost.seq.alg.mat import *
3
4def ValidateSEQRESAlignment(aln, chain=None):
5 """
6 Checks if sequence in alignment has same connectivity as residues in chain.
7 This looks for connected stretches in both the sequence and the chain and
8 returns False if they don't match. This uses the connectivity of the protein
9 backbone.
10
11 :param aln: Alignment of two sequences with second one expected to map to
12 residues in *chain*.
13 :type aln: :class:`~ost.seq.AlignmentHandle`
14 :param chain: Source of the sequence.
15 :type chain: :class:`~ost.mol.ChainHandle`
16
17 :returns: True if all residues (beside gapped ones) are connected, False
18 otherwise.
19 """
20 from ost import LogWarning
21 from ost import seq
22 from ost import mol
23 if aln.GetCount() != 2:
24 raise ValueError('Alignment contains more than 2 sequences!')
25 sequence = aln.GetSequence(1)
26 if len(sequence) == 0:
27 return True
28 if chain == None:
29 if sequence.HasAttachedView() == False:
30 raise ValueError("Alignment is missing an attached chain view.")
31 chain = sequence.GetAttachedView()
32 residues = chain.residues
33 # eat up all beginning gaps
34 j = 1
35 for s in sequence:
36 if s != '-':
37 break
38 j += 1;
39 l = sequence[j-1]
40 i = 0
41 # run over sequence & alignment
42 for s in sequence[j:]:
43 if s != '-':
44 i += 1
45 r1 = residues[i-1]
46 r2 = residues[i]
47 if r1.one_letter_code=='?' or r2.one_letter_code=='?':
48 continue
49 if l != '-':
50 if not mol.InSequence(r1.handle, r2.handle):
51 LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
52 return False
53 else:
54 if mol.InSequence(r1.handle, r2.handle):
55 LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2)))
56 return False
57 l = s
58 return True
59
60def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
61 """
62 Aligns the residues of chain to the SEQRES sequence, inserting gaps where
63 needed. The function uses the connectivity of the protein backbone to find
64 consecutive peptide fragments. These fragments are then aligned to the SEQRES
65 sequence.
66
67 All the non-ligand, peptide-linking residues of the chain must be listed in
68 SEQRES. If there are any additional residues in the chain, the function
69 raises a ValueError.
70
71 :param chain: Source of the sequence
72 :type chain: :class:`~ost.mol.ChainHandle`
73 :param seqres: SEQRES sequence
74 :type seqres: :class:`str`
75 :param try_resnum_first: If set to True, this first builds an alignment using
76 residue numbers and checks if the one-letter-codes
77 match. If they all match, this alignment is used
78 (and possibly validated). Otherwise, it displays a
79 warning and falls back to the connectivity-based
80 alignment.
81 :type try_resnum_first: :class:`bool`
82 :param validate: If set to True, the alignment is additionally checked by
83 :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises
84 a ValueError if the validation failed.
85 :type validate: :class:`bool`
86
87 :returns: The alignment of the residues in the chain and the SEQRES entries.
88 :rtype: :class:`~ost.seq.AlignmentHandle`
89 """
90
91 def IsEqual(olc1, olc2):
92 return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2
93
94 from ost import seq
95 from ost import mol
96 from ost import LogWarning
97 view=chain
98 residues=view.residues
99 if len(residues)==0:
100 return seq.CreateAlignment()
101 if try_resnum_first:
102 aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
103 aligned_resnums = set()
104 for r1 in residues:
105 if r1.number.num in aligned_resnums:
106 LogWarning('Residue numbers must be unique. Already observed %i, ' \
107 'cannot align %s anymore.'%(r1.number.num, r1.qualified_name))
108 try_resnum_first = False
109 break
110 if r1.number.num <= len(seqres) and r1.number.num > 0:
111 if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
112 aln_seq[r1.number.num - 1] = r1.one_letter_code
113 aligned_resnums.add(r1.number.num)
114 else:
115 msg = 'Sequence mismatch: chain'
116 if len(view.GetName().strip()):
117 msg += f' ({view.GetName().strip()})'
118 msg += f' has {r1.one_letter_code}, while SEQRES is '
119 msg += f'{seqres[r1.number.num-1]} at the corresponding position.'
120 LogWarning(msg)
121 try_resnum_first = False
122 break
123 else:
124 warning = 'Residue with number %i is outside of the range covered by '\
125 'SEQRES [1, %i]'%(r1.number.num, len(seqres))
126 LogWarning(warning)
127 try_resnum_first = False
128 break
129 if not try_resnum_first:
130 fragments=[residues[0].one_letter_code]
131 for r1, r2 in zip(residues[:-1], residues[1:]):
132 if not mol.InSequence(r1.handle, r2.handle):
133 fragments.append('')
134 fragments[-1]+=r2.one_letter_code
135 ss=str(seqres)
136 pos=0
137 aln_seq=''
138 for frag in fragments:
139 new_pos=ss.find(frag, pos)
140 if new_pos==-1:
141 raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
142 aln_seq+='-'*(new_pos-pos)+frag
143 pos=new_pos+len(frag)
144 aln_seq = seq.CreateSequence('atoms',
145 aln_seq+('-'*(len(seqres)-len(aln_seq))))
146 alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
147 aln_seq)
148 if validate and not ValidateSEQRESAlignment(alignment, view):
149 raise ValueError("SEQRES cannot be aligned with its corresponding chain.")
150 return alignment
151
152
153def AlignmentFromChainView(chain, handle_seq_name='handle',
154 view_seq_name='view'):
155 """
156 Creates and returns the sequence alignment of the given chain view to the
157 chain handle. The alignment contains two sequences, the first containing all
158 non-ligand peptide-linking residues, the second containing all non-ligand
159 peptide-linking residues that are part of the view.
160
161 :param chain: A valid chain
162 :type chain: :class:`~ost.mol.ChainView`
163
164 :param handle_seq_name: Name of the handle sequence in the output alignment
165 :param view_seq_name: Name of the view sequence in the output alignment
166 :returns: The alignment
167 :rtype: :class:`~ost.seq.AlignmentHandle`
168
169 """
170 from ost import seq
171 v0=chain.handle.Select('ligand=false and peptide=true')
172 v1=chain.Select('ligand=false and peptide=true')
173 s0=seq.CreateSequence(handle_seq_name, '')
174 s1=seq.CreateSequence(view_seq_name, '')
175 s0.AttachView(v0)
176 s1.AttachView(v1)
177 res0=v0.residues
178 res1=v1.residues
179 idx0, idx1=(0, 0)
180 while idx0<len(res0):
181 s0.Append(res0[idx0].one_letter_code)
182 if idx1<len(res1) and res1[idx1].handle==res0[idx0].handle:
183 s1.Append(res1[idx1].one_letter_code)
184 idx1+=1
185 else:
186 s1.Append('-')
187 idx0+=1
188 return seq.CreateAlignment(s0, s1)
189
191 """
192 Predicts contacts from a multiple sequence alignment using a combination
193 of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
194 MI is calculated with the APC and small number corrections as well as with a
195 transformation into Z-scores. The *CoEvoSc* is calculated using the default
196 PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
197 The final score for a pair of columns *(i,j)* of **ali** is obtained from:
198
199 Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j)) if *(i,j)* >=0
200
201 Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j)) if *(i,j)* <0
202
203 :param ali: The multiple sequence alignment
204 :type ali: :class:`~ost.seq.AlignmentHandle`
205 """
206 import math
207 from ost import seq
208 if not type(ali)==type(seq.AlignmentHandle()):
209 print("Parameter should be an AlignmentHandle")
210 return
213 ncol=ali.GetLength()
214 for i in range(ncol):
215 for j in range(ncol):
216 if mi.matrix[i][j]>=0:
217 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
218 else:
219 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
220 mi.RefreshSortedIndices()
221 return mi
222
223def CalculateContactProbability(cpred_res,method):
224 """
225 Calculate the probability of a predicted contact to be correct.
226 This simply transforms the score associated with a prediction into a probability.
227
228 :param cpred_res: A contact prediction result
229 :param method: The method which was used for contact prediction. Should be one
230 of "MI", "MIp", "MIpz", "cevoMI", "cevo"
231
232 :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
233 :type method: :class:`str`
234 """
235 import math
236 def _growth_function(x,K,x0,B,nu,Q):
237 p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
238 p=min(1,max(0,p))
239 return p
240
241 def _decay_function(x,A,k):
242 p=A*math.exp(-k*x)
243 p=min(1,max(0,p))
244 return p
245
246 prob_params={}
247 prob_params["MI"]=[0.05858455345122933, 0.8930350957023122]
248 prob_params["MIp"]=[0.10019621004607637, 0.9065429261332942]
249 prob_params["MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
250 prob_params["cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
251 prob_params["cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
252
253 if not method in prob_params:
254 raise ValueError("method should be one of MI, MIp, MIpz, cevoMI, cevo")
255 params=prob_params[method]
256 cpred_res.RefreshSortedIndices()
257 nres=len(cpred_res.matrix)
258 probabilities=[[0 for i in range(nres)] for j in range(nres)]
259
260 if len(params)==2:func=_decay_function
261 else:func=_growth_function
262
263 nres=float(nres)
264 if len(params)==2:
265 for k,(i,j) in enumerate(cpred_res.sorted_indices):
266 p=_decay_function(math.log10(0.01+k/nres),*params)
267 probabilities[i][j]=p
268 probabilities[j][i]=p
269 else:
270 for k,(i,j) in enumerate(cpred_res.sorted_indices):
271 p=_growth_function(cpred_res.GetScore(i,j),*params)
272 probabilities[i][j]=p
273 probabilities[j][i]=p
274 cpred_res.probabilities=probabilities
275 return cpred_res
276
277
278
279
representation of a multiple sequence alignemnt consisting of two or more sequences
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateMutualInformation(const AlignmentHandle &aln, ContactWeightMatrix w=LoadConstantContactWeightMatrix(), bool apc_correction=true, bool zpx_transformation=true, float small_number_correction=0.05)
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateContactSubstitutionScore(const AlignmentHandle &aln, int ref_seq_index=0, PairSubstWeightMatrix w=LoadDefaultPairSubstWeightMatrix())
ValidateSEQRESAlignment(aln, chain=None)
Definition __init__.py:4
PredictContacts(ali)
Definition __init__.py:190
CalculateContactProbability(cpred_res, method)
Definition __init__.py:223
AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True)
Definition __init__.py:60
AlignmentFromChainView(chain, handle_seq_name='handle', view_seq_name='view')
Definition __init__.py:154