00001 from _ost_seq_alg import *
00002 from ost.seq.alg.mat import *
00003
00004 def ValidateSEQRESAlignment(aln, chain=None):
00005 """
00006 Checks a sequence aligned to a SEQRES sequence to be free of strand breaks.
00007 Residues divided by gaps are not considered as breakage but may also not be
00008 connected.
00009
00010 :param aln: Alignment
00011 :type aln: :class:`~ost.seq.AlignmentHandle`
00012 :param chain: Source of the sequence
00013 :type chain: :class:`~ost.mol.ChainHandle`
00014
00015 :returns: True if all residues (beside gapped ones) are connected, False
00016 otherwise.
00017 """
00018 from ost import LogWarning
00019 from ost import seq
00020 from ost import mol
00021 if aln.GetCount() != 2:
00022 raise ValueError('Alignment contains more than 2 sequences!')
00023 sequence = aln.GetSequence(1)
00024 if len(sequence) == 0:
00025 return True
00026 if chain == None:
00027 if sequence.HasAttachedView() == False:
00028 raise ValueError("Alignment is missing an attached chain view.")
00029 chain = sequence.GetAttachedView()
00030 residues = chain.residues
00031
00032 j = 1
00033 for s in sequence:
00034 if s != '-':
00035 break
00036 j += 1;
00037 l = sequence[j-1]
00038 i = 0
00039
00040 for s in sequence[j:]:
00041 if s != '-':
00042 i += 1
00043 r1 = residues[i-1]
00044 r2 = residues[i]
00045 if r1.one_letter_code=='?' or r2.one_letter_code=='?':
00046 continue
00047 if l != '-':
00048 if not mol.InSequence(r1.handle, r2.handle):
00049 LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
00050 return False
00051 else:
00052 if mol.InSequence(r1.handle, r2.handle):
00053 LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2)))
00054 return False
00055 l = s
00056 return True
00057
00058 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
00059 """
00060 Aligns the residues of chain to the SEQRES sequence, inserting gaps where
00061 needed. The function uses the connectivity of the protein backbone to find
00062 consecutive peptide fragments. These fragments are then aligned to the SEQRES
00063 sequence.
00064
00065 All the non-ligand, peptide-linking residues of the chain must be listed in
00066 SEQRES. If there are any additional residues in the chain, the function
00067 raises a ValueError.
00068
00069 If 'try_resnum_first' is set, building the alignment following residue numbers
00070 is tried first.
00071
00072 If 'validate' is set (default), the alignment is checked using
00073 :func:`~ost.seq.alg.ValidateSEQRESAlignment`.
00074
00075 :param chain: Source of the sequence
00076 :type chain: :class:`~ost.mol.ChainHandle`
00077 :param seqres: SEQRES sequence
00078 :type seqres: :class:`str`
00079 :param try_resnum_first: Try to align by residue number
00080 :type try_resnum_first: :class:`bool`
00081 :param validate: Validate alignment by
00082 :func:`~ost.seq.alg.ValidateSEQRESAlignment`
00083 :type validate: :class:`bool`
00084
00085 :returns: The alignment of the residues in the chain and the SEQRES entries.
00086 :rtype: :class:`~ost.seq.AlignmentHandle`
00087 """
00088
00089 def IsEqual(olc1, olc2):
00090 return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2
00091
00092 from ost import seq
00093 from ost import mol
00094 from ost import LogWarning
00095 view=chain
00096 residues=view.residues
00097 if len(residues)==0:
00098 return seq.CreateAlignment()
00099 if try_resnum_first:
00100 aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
00101 for r1 in residues:
00102 if r1.number.num <= len(seqres) and r1.number.num > 0:
00103 if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
00104 aln_seq[r1.number.num - 1] = r1.one_letter_code
00105 else:
00106 LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code +
00107 '", while SEQRES is "' + seqres[r1.number.num - 1] +
00108 '" at the corresponding position.')
00109 try_resnum_first = False
00110 break
00111 if not try_resnum_first:
00112 fragments=[residues[0].one_letter_code]
00113 for r1, r2 in zip(residues[:-1], residues[1:]):
00114 if not mol.InSequence(r1.handle, r2.handle):
00115 fragments.append('')
00116 fragments[-1]+=r2.one_letter_code
00117 ss=str(seqres)
00118 pos=0
00119 aln_seq=''
00120 for frag in fragments:
00121 new_pos=ss.find(frag, pos)
00122 if new_pos==-1:
00123 raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
00124 aln_seq+='-'*(new_pos-pos)+frag
00125 pos=new_pos+len(frag)
00126 aln_seq = seq.CreateSequence('atoms',
00127 aln_seq+('-'*(len(seqres)-len(aln_seq))))
00128 alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
00129 aln_seq)
00130 if validate and not ValidateSEQRESAlignment(alignment, view):
00131 raise ValueError("SEQRES cannot be aligned with its corresponding chain.")
00132 return alignment
00133
00134
00135 def AlignmentFromChainView(chain, handle_seq_name='handle',
00136 view_seq_name='view'):
00137 """
00138 Creates and returns the sequence alignment of the given chain view to the
00139 chain handle. The alignment contains two sequences, the first containing all
00140 non-ligand peptide-linking residues, the second containing all non-ligand
00141 peptide-linking residues that are part of the view.
00142
00143 :param chain: A valid chain
00144 :type chain: :class:`~ost.mol.ChainView`
00145
00146 :param handle_seq_name: Name of the handle sequence in the output alignment
00147 :param view_seq_name: Name of the view sequence in the output alignment
00148 :returns: The alignment
00149 :rtype: :class:`~ost.seq.AlignmentHandle`
00150
00151 """
00152 from ost import seq
00153 v0=chain.handle.Select('ligand=false and peptide=true')
00154 v1=chain.Select('ligand=false and peptide=true')
00155 s0=seq.CreateSequence(handle_seq_name, '')
00156 s1=seq.CreateSequence(view_seq_name, '')
00157 s0.AttachView(v0)
00158 s1.AttachView(v1)
00159 res0=v0.residues
00160 res1=v1.residues
00161 idx0, idx1=(0, 0)
00162 while idx0<len(res0):
00163 s0.Append(res0[idx0].one_letter_code)
00164 if idx1<len(res1) and res1[idx1].handle==res0[idx0].handle:
00165 s1.Append(res1[idx1].one_letter_code)
00166 idx1+=1
00167 else:
00168 s1.Append('-')
00169 idx0+=1
00170 return seq.CreateAlignment(s0, s1)
00171
00172 def PredictContacts(ali):
00173 """
00174 Predicts contacts from a multiple sequence alignment using a combination
00175 of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
00176 MI is calculated with the APC and small number corrections as well as with a
00177 transformation into Z-scores. The *CoEvoSc* is calculated using the default
00178 PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
00179 The final score for a pair of columns *(i,j)* of **ali** is obtained from:
00180
00181 Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j)) if *(i,j)* >=0
00182
00183 Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j)) if *(i,j)* <0
00184
00185 :param ali: The multiple sequence alignment
00186 :type ali: :class:`~ost.seq.AlignmentHandle`
00187 """
00188 import math
00189 from ost import seq
00190 if not type(ali)==type(seq.AlignmentHandle()):
00191 print "Parameter should be an AlignmentHandle"
00192 return
00193 mi=CalculateMutualInformation(ali)
00194 CoEvoSc=CalculateContactSubstitutionScore(ali)
00195 ncol=ali.GetLength()
00196 for i in range(ncol):
00197 for j in range(ncol):
00198 if mi.matrix[i][j]>=0:
00199 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
00200 else:
00201 mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
00202 mi.RefreshSortedIndices()
00203 return mi
00204
00205 def CalculateContactProbability(cpred_res,method):
00206 """
00207 Calculate the probability of a predicted contact to be correct.
00208 This simply transforms the score associated with a prediction into a probability.
00209
00210 :param cpred_res: A contact prediction result
00211 :param method: The method which was used for contact prediction. Should be one
00212 of "MI", "MIp", "MIpz", "cevoMI", "cevo"
00213
00214 :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
00215 :type method: :class:`str`
00216 """
00217 import math
00218 def _growth_function(x,K,x0,B,nu,Q):
00219 p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
00220 p=min(1,max(0,p))
00221 return p
00222
00223 def _decay_function(x,A,k):
00224 p=A*math.exp(-k*x)
00225 p=min(1,max(0,p))
00226 return p
00227
00228 prob_params={}
00229 prob_params["MI"]=[0.05858455345122933, 0.8930350957023122]
00230 prob_params["MIp"]=[0.10019621004607637, 0.9065429261332942]
00231 prob_params["MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
00232 prob_params["cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
00233 prob_params["cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
00234
00235 if not method in prob_params:
00236 raise ValueError("method should be one of MI, MIp, MIpz, cevoMI, cevo")
00237 params=prob_params[method]
00238 cpred_res.RefreshSortedIndices()
00239 nres=len(cpred_res.matrix)
00240 probabilities=[[0 for i in range(nres)] for j in range(nres)]
00241
00242 if len(params)==2:func=_decay_function
00243 else:func=_growth_function
00244
00245 nres=float(nres)
00246 if len(params)==2:
00247 for k,(i,j) in enumerate(cpred_res.sorted_indices):
00248 p=_decay_function(math.log10(0.01+k/nres),*params)
00249 probabilities[i][j]=p
00250 probabilities[j][i]=p
00251 else:
00252 for k,(i,j) in enumerate(cpred_res.sorted_indices):
00253 p=_growth_function(cpred_res.GetScore(i,j),*params)
00254 probabilities[i][j]=p
00255 probabilities[j][i]=p
00256 cpred_res.probabilities=probabilities
00257 return cpred_res
00258
00259
00260
00261