OpenStructure: stage/lib64/python2.7/site-packages/ost/seq/alg/__init_

00001 from _ost_seq_alg import *
00002 from ost.seq.alg.mat import *
00003 
00004 def ValidateSEQRESAlignment(aln, chain=None):
00005   """
00006   Checks a sequence aligned to a SEQRES sequence to be free of strand breaks.
00007   Residues divided by gaps are not considered as breakage but may also not be
00008   connected.
00009 
00010   :param aln: Alignment
00011   :type aln: :class:`~ost.seq.AlignmentHandle`
00012   :param chain: Source of the sequence
00013   :type chain: :class:`~ost.mol.ChainHandle`
00014 
00015   :returns: True if all residues (beside gapped ones) are connected, False
00016             otherwise.
00017   """
00018   from ost import LogWarning
00019   from ost import seq
00020   from ost import mol
00021   if aln.GetCount() != 2:
00022     raise ValueError('Alignment contains more than 2 sequences!')
00023   sequence = aln.GetSequence(1)
00024   if len(sequence) == 0:
00025     return True
00026   if chain == None:
00027     if sequence.HasAttachedView() == False:
00028       raise ValueError("Alignment is missing an attached chain view.")
00029     chain = sequence.GetAttachedView()
00030   residues = chain.residues
00031   # eat up all beginning gaps
00032   j = 1
00033   for s in sequence:
00034     if s != '-':
00035       break
00036     j += 1;
00037   l = sequence[j-1]
00038   i = 0
00039   # run over sequence & alignment
00040   for s in sequence[j:]:
00041     if s != '-':
00042       i += 1
00043       r1 = residues[i-1]
00044       r2 = residues[i]
00045       if r1.one_letter_code=='?' or r2.one_letter_code=='?':
00046         continue
00047       if l != '-':
00048         if not mol.InSequence(r1.handle, r2.handle):
00049           LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
00050           return False
00051       else:
00052         if mol.InSequence(r1.handle, r2.handle):
00053           LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2)))
00054           return False
00055     l = s
00056   return True
00057 
00058 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
00059   """
00060   Aligns the residues of chain to the SEQRES sequence, inserting gaps where 
00061   needed. The function uses the connectivity of the protein backbone to find 
00062   consecutive peptide fragments. These fragments are then aligned to the SEQRES 
00063   sequence.
00064   
00065   All the non-ligand, peptide-linking residues of the chain must be listed in 
00066   SEQRES. If there are any additional residues in the chain, the function
00067   raises a ValueError.
00068 
00069   If 'try_resnum_first' is set, building the alignment following residue numbers
00070   is tried first.
00071 
00072   If 'validate' is set (default), the alignment is checked using
00073   :func:`~ost.seq.alg.ValidateSEQRESAlignment`.
00074 
00075   :param chain: Source of the sequence
00076   :type chain: :class:`~ost.mol.ChainHandle`
00077   :param seqres: SEQRES sequence
00078   :type seqres: :class:`str`
00079   :param try_resnum_first: Try to align by residue number
00080   :type try_resnum_first: :class:`bool`
00081   :param validate: Validate alignment by
00082                    :func:`~ost.seq.alg.ValidateSEQRESAlignment`
00083   :type validate: :class:`bool`
00084 
00085   :returns: The alignment of the residues in the chain and the SEQRES entries.
00086   :rtype: :class:`~ost.seq.AlignmentHandle`
00087   """
00088 
00089   def IsEqual(olc1, olc2):
00090     return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2
00091 
00092   from ost import seq
00093   from ost import mol
00094   from ost import LogWarning
00095   view=chain
00096   residues=view.residues
00097   if len(residues)==0:
00098     return seq.CreateAlignment()
00099   if try_resnum_first:
00100     aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
00101     for r1 in residues:
00102       if r1.number.num <= len(seqres) and r1.number.num > 0:
00103         if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
00104           aln_seq[r1.number.num - 1] = r1.one_letter_code
00105         else:
00106           LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code +
00107                      '", while SEQRES is "' + seqres[r1.number.num - 1] +
00108                      '" at the corresponding position.')
00109           try_resnum_first = False
00110           break
00111   if not try_resnum_first:
00112     fragments=[residues[0].one_letter_code]
00113     for r1, r2 in zip(residues[:-1], residues[1:]):
00114       if not mol.InSequence(r1.handle, r2.handle):
00115         fragments.append('')
00116       fragments[-1]+=r2.one_letter_code
00117     ss=str(seqres)
00118     pos=0
00119     aln_seq=''
00120     for frag in fragments:
00121       new_pos=ss.find(frag, pos)
00122       if new_pos==-1:
00123         raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
00124       aln_seq+='-'*(new_pos-pos)+frag
00125       pos=new_pos+len(frag)
00126     aln_seq = seq.CreateSequence('atoms',
00127                                  aln_seq+('-'*(len(seqres)-len(aln_seq))))
00128   alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)), 
00129                                   aln_seq)
00130   if validate and not ValidateSEQRESAlignment(alignment, view):
00131     raise ValueError("SEQRES cannot be aligned with its corresponding chain.")
00132   return alignment
00133 
00134 
00135 def AlignmentFromChainView(chain, handle_seq_name='handle', 
00136                            view_seq_name='view'):
00137   """
00138   Creates and returns the sequence alignment of the given chain view to the 
00139   chain handle. The alignment contains two sequences, the first containing all 
00140   non-ligand peptide-linking residues, the second containing all non-ligand 
00141   peptide-linking residues that are part of the view. 
00142   
00143   :param chain: A valid chain
00144   :type chain: :class:`~ost.mol.ChainView`
00145   
00146   :param handle_seq_name: Name of the handle sequence in the output alignment
00147   :param view_seq_name: Name of the view sequence in the output alignment
00148   :returns: The alignment
00149   :rtype: :class:`~ost.seq.AlignmentHandle`
00150   
00151   """
00152   from ost import seq
00153   v0=chain.handle.Select('ligand=false and peptide=true')
00154   v1=chain.Select('ligand=false and peptide=true')
00155   s0=seq.CreateSequence(handle_seq_name, '')
00156   s1=seq.CreateSequence(view_seq_name, '')
00157   s0.AttachView(v0)
00158   s1.AttachView(v1)
00159   res0=v0.residues
00160   res1=v1.residues
00161   idx0, idx1=(0, 0)
00162   while idx0<len(res0):
00163     s0.Append(res0[idx0].one_letter_code)
00164     if idx1<len(res1) and res1[idx1].handle==res0[idx0].handle:
00165       s1.Append(res1[idx1].one_letter_code)
00166       idx1+=1
00167     else:
00168       s1.Append('-')
00169     idx0+=1
00170   return seq.CreateAlignment(s0, s1)
00171 
00172 def PredictContacts(ali):
00173   """
00174   Predicts contacts from a multiple sequence alignment using a combination
00175   of Mutual Information (*MI*) and the Contact Substitution Score (*CoEvoSc*).
00176   MI is calculated with the APC and small number corrections as well as with a 
00177   transformation into Z-scores. The *CoEvoSc* is calculated using the default 
00178   PairSubstWeightMatrix (see seq.alg.LoadDefaultPairSubstWeightMatrix).
00179   The final score for a pair of columns *(i,j)* of **ali** is obtained from:
00180   
00181   Sc(i,j)=MI(i,j)exp(CoEvoSc(i,j))      if *(i,j)* >=0
00182   
00183   Sc(i,j)=MI(i,j)exp(1-CoEvoSc(i,j))    if *(i,j)* <0
00184 
00185   :param ali: The multiple sequence alignment
00186   :type ali: :class:`~ost.seq.AlignmentHandle`
00187   """
00188   import math
00189   from ost import seq
00190   if not type(ali)==type(seq.AlignmentHandle()):
00191     print "Parameter should be an AlignmentHandle"
00192     return
00193   mi=CalculateMutualInformation(ali)
00194   CoEvoSc=CalculateContactSubstitutionScore(ali)
00195   ncol=ali.GetLength()
00196   for i in range(ncol):
00197     for j in range(ncol):
00198       if mi.matrix[i][j]>=0:
00199         mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(CoEvoSc.matrix[i][j]))
00200       else:
00201         mi.matrix[i][j]=mi.matrix[i][j]*(math.exp(1.-CoEvoSc.matrix[i][j]))
00202   mi.RefreshSortedIndices()
00203   return mi
00204 
00205 def CalculateContactProbability(cpred_res,method):
00206   """
00207   Calculate the probability of a predicted contact to be correct.
00208   This simply transforms the score associated with a prediction into a probability.
00209 
00210   :param cpred_res: A contact prediction result
00211   :param method: The method which was used for contact prediction. Should be one
00212    of "MI", "MIp", "MIpz", "cevoMI", "cevo"
00213 
00214   :type cpred_res: :class:`ost.seq.alg.ContactPredictionScoreResult`
00215   :type method: :class:`str`
00216   """
00217   import math
00218   def _growth_function(x,K,x0,B,nu,Q):
00219     p=K/(1+Q*math.exp(-B*(x-x0)))**(1/nu)
00220     p=min(1,max(0,p))
00221     return p
00222   
00223   def _decay_function(x,A,k):
00224     p=A*math.exp(-k*x)
00225     p=min(1,max(0,p))
00226     return p
00227 
00228   prob_params={}
00229   prob_params["MI"]=[0.05858455345122933, 0.8930350957023122]
00230   prob_params["MIp"]=[0.10019621004607637, 0.9065429261332942]
00231   prob_params["MIpz"]=[0.7368147563063437, 4.638820470770171, 0.6383539191372934, 1.1627300209863376, 19.63060874042289]
00232   prob_params["cevoMI"]=[0.779979757231944, 1.642357937131157, 0.3267847173036033, 0.3742848849873358, 5.1816922372446]
00233   prob_params["cevo"]=[0.7564532665623617, 0.5947472274271304, 3.8548775389879166, 0.46203017320927053, 1.6198602780123705]
00234 
00235   if not method in prob_params:
00236     raise ValueError("method should be one of MI, MIp, MIpz, cevoMI, cevo")
00237   params=prob_params[method]
00238   cpred_res.RefreshSortedIndices()
00239   nres=len(cpred_res.matrix)
00240   probabilities=[[0 for i in range(nres)] for j in range(nres)]
00241 
00242   if len(params)==2:func=_decay_function
00243   else:func=_growth_function
00244 
00245   nres=float(nres)
00246   if len(params)==2:
00247     for k,(i,j) in enumerate(cpred_res.sorted_indices):
00248       p=_decay_function(math.log10(0.01+k/nres),*params)
00249       probabilities[i][j]=p
00250       probabilities[j][i]=p
00251   else:
00252     for k,(i,j) in enumerate(cpred_res.sorted_indices):
00253       p=_growth_function(cpred_res.GetScore(i,j),*params)
00254       probabilities[i][j]=p
00255       probabilities[j][i]=p
00256   cpred_res.probabilities=probabilities
00257   return cpred_res
00258 
00259 
00260 
00261
stage/lib64/python2.7/site-packages/ost/seq/alg/__init__.py

stage/lib64/python2.7/site-packages/ost/seq/alg/init.py