1 from _ost_seq_alg
import *
6 Checks a sequence aligned to a SEQRES sequence to be free of strand breaks.
7 Residues divided by gaps are not considered as breakage but may also not be
11 :type aln: :class:`~ost.seq.AlignmentHandle`
12 :param chain: Source of the sequence
13 :type chain: :class:`~ost.mol.ChainHandle`
15 :returns: True if all residues (beside gapped ones) are connected, False
18 from ost
import LogWarning
21 if aln.GetCount() != 2:
22 raise ValueError(
'Alignment contains more than 2 sequences!')
23 sequence = aln.GetSequence(1)
24 if len(sequence) == 0:
27 if sequence.HasAttachedView() ==
False:
28 raise ValueError(
"Alignment is missing an attached chain view.")
29 chain = sequence.GetAttachedView()
30 residues = chain.residues
40 for s
in sequence[j:]:
45 if r1.one_letter_code==
'?' or r2.one_letter_code==
'?':
48 if not mol.InSequence(r1.handle, r2.handle):
49 LogWarning(
'%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
52 if mol.InSequence(r1.handle, r2.handle):
53 LogWarning(
'%s and %s are connected by peptide bond' % (str(r1), str(r2)))
58 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
60 Aligns the residues of chain to the SEQRES sequence, inserting gaps where
61 needed. The function uses the connectivity of the protein backbone to find
62 consecutive peptide fragments. These fragments are then aligned to the SEQRES
65 All the non-ligand, peptide-linking residues of the chain must be listed in
66 SEQRES. If there are any additional residues in the chain, the function
69 If 'try_resnum_first' is set, building the alignment following residue numbers
72 If 'validate' is set (default), the alignment is checked using
73 :func:`~ost.seq.alg.ValidateSEQRESAlignment`.
75 :param chain: Source of the sequence
76 :type chain: :class:`~ost.mol.ChainHandle`
77 :param seqres: SEQRES sequence
78 :type seqres: :class:`str`
79 :param try_resnum_first: Try to align by residue number
80 :type try_resnum_first: :class:`bool`
81 :param validate: Validate alignment by
82 :func:`~ost.seq.alg.ValidateSEQRESAlignment`
83 :type validate: :class:`bool`
85 :returns: The alignment of the residues in the chain and the SEQRES entries.
86 :rtype: :class:`~ost.seq.AlignmentHandle`
89 def IsEqual(olc1, olc2):
90 return olc1
in (
'X',
'?')
or olc2
in (
'X',
'?')
or olc1 == olc2
94 from ost
import LogWarning
96 residues=view.residues
98 return seq.CreateAlignment()
100 aln_seq = seq.CreateSequence(
'atoms',
'-'*len(seqres))
102 if r1.number.num <= len(seqres)
and r1.number.num > 0:
103 if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
104 aln_seq[r1.number.num - 1] = r1.one_letter_code
106 LogWarning(
'Sequence mismatch: chain has "' + r1.one_letter_code +
107 '", while SEQRES is "' + seqres[r1.number.num - 1] +
108 '" at the corresponding position.')
109 try_resnum_first =
False
111 if not try_resnum_first:
112 fragments=[residues[0].one_letter_code]
113 for r1, r2
in zip(residues[:-1], residues[1:]):
114 if not mol.InSequence(r1.handle, r2.handle):
116 fragments[-1]+=r2.one_letter_code
120 for frag
in fragments:
121 new_pos=ss.find(frag, pos)
123 raise ValueError(
'"%s" is not a substring of "%s"' % (frag, ss))
124 aln_seq+=
'-'*(new_pos-pos)+frag
125 pos=new_pos+len(frag)
126 aln_seq = seq.CreateSequence(
'atoms',
127 aln_seq+(
'-'*(len(seqres)-len(aln_seq))))
128 alignment = seq.CreateAlignment(seq.CreateSequence(
'SEQRES', str(seqres)),
131 raise ValueError(
"SEQRES cannot be aligned with its corresponding chain.")
136 view_seq_name=
'view'):
138 Creates and returns the sequence alignment of the given chain view to the
139 chain handle. The alignment contains two sequences, the first containing all
140 non-ligand peptide-linking residues, the second containing all non-ligand
141 peptide-linking residues that are part of the view.
143 :param chain: A valid chain
144 :type chain: :class:`~ost.mol.ChainView`
146 :param handle_seq_name: Name of the handle sequence in the output alignment
147 :param view_seq_name: Name of the view sequence in the output alignment
148 :returns: The alignment
149 :rtype: :class:`~ost.seq.AlignmentHandle`
153 v0=chain.handle.Select(
'ligand=false and peptide=true')
154 v1=chain.Select(
'ligand=false and peptide=true')
155 s0=seq.CreateSequence(handle_seq_name,
'')
156 s1=seq.CreateSequence(view_seq_name,
'')
162 while idx0<len(res0):
163 s0.Append(res0[idx0].one_letter_code)
164 if idx1<len(res1)
and res1[idx1].handle==res0[idx0].handle:
165 s1.Append(res1[idx1].one_letter_code)
170 return seq.CreateAlignment(s0, s1)
174 Predicts contacts from a multiple sequence alignment using a combination
175 of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
176 MI is calculated with the APC and small number corrections as well as with a
177 transformation into Z-scores. The *CoEvoSc* is calculated using the default
178 PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
179 The final score for a pair of columns *(i,j)* of **ali** is obtained from:
181 Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j)) if *(i,j)* >=0
183 Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j)) if *(i,j)* <0
185 :param ali: The multiple sequence alignment
186 :type ali: :class:`~ost.seq.AlignmentHandle`
191 print "Parameter should be an AlignmentHandle"
196 for i
in range(ncol):
197 for j
in range(ncol):
198 if mi.matrix[i][j]>=0:
199 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
201 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
202 mi.RefreshSortedIndices()
207 Calculate the probability of a predicted contact to be correct.
208 This simply transforms the score associated with a prediction into a probability.
210 :param cpred_res: A contact prediction result
211 :param method: The method which was used for contact prediction. Should be one
212 of "MI", "MIp", "MIpz", "cevoMI", "cevo"
214 :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
215 :type method: :class:`str`
218 def _growth_function(x,K,x0,B,nu,Q):
219 p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
223 def _decay_function(x,A,k):
229 prob_params[
"MI"]=[0.05858455345122933, 0.8930350957023122]
230 prob_params[
"MIp"]=[0.10019621004607637, 0.9065429261332942]
231 prob_params[
"MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
232 prob_params[
"cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
233 prob_params[
"cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
235 if not method
in prob_params:
236 raise ValueError(
"method should be one of MI, MIp, MIpz, cevoMI, cevo")
237 params=prob_params[method]
238 cpred_res.RefreshSortedIndices()
239 nres=len(cpred_res.matrix)
240 probabilities=[[0
for i
in range(nres)]
for j
in range(nres)]
242 if len(params)==2:func=_decay_function
243 else:func=_growth_function
247 for k,(i,j)
in enumerate(cpred_res.sorted_indices):
248 p=_decay_function(math.log10(0.01+k/nres),*params)
249 probabilities[i][j]=p
250 probabilities[j][i]=p
252 for k,(i,j)
in enumerate(cpred_res.sorted_indices):
253 p=_growth_function(cpred_res.GetScore(i,j),*params)
254 probabilities[i][j]=p
255 probabilities[j][i]=p
256 cpred_res.probabilities=probabilities
def ValidateSEQRESAlignment
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateMutualInformation(const AlignmentHandle &aln, ContactWeightMatrix w=LoadConstantContactWeightMatrix(), bool apc_correction=true, bool zpx_transformation=true, float small_number_correction=0.05)
def CalculateContactProbability
ContactPredictionScoreResult DLLEXPORT_OST_SEQ_ALG CalculateContactSubstitutionScore(const AlignmentHandle &aln, int ref_seq_index=0, PairSubstWeightMatrix w=LoadDefaultPairSubstWeightMatrix())
representation of a multiple sequence alignemnt consisting of two or more sequences ...
def AlignmentFromChainView