OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
__init__.py
Go to the documentation of this file.
1 from _ost_seq_alg import *
2 from ost.seq.alg.mat import *
3 
4 def ValidateSEQRESAlignment(aln, chain=None):
5  """
6  Checks a sequence aligned to a SEQRES sequence to be free of strand breaks.
7  Residues divided by gaps are not considered as breakage but may also not be
8  connected.
9 
10  :param aln: Alignment
11  :type aln: :class:`~ost.seq.AlignmentHandle`
12  :param chain: Source of the sequence
13  :type chain: :class:`~ost.mol.ChainHandle`
14 
15  :returns: True if all residues (beside gapped ones) are connected, False
16  otherwise.
17  """
18  from ost import LogWarning
19  from ost import seq
20  from ost import mol
21  if aln.GetCount() != 2:
22  raise ValueError('Alignment contains more than 2 sequences!')
23  sequence = aln.GetSequence(1)
24  if len(sequence) == 0:
25  return True
26  if chain == None:
27  if sequence.HasAttachedView() == False:
28  raise ValueError("Alignment is missing an attached chain view.")
29  chain = sequence.GetAttachedView()
30  residues = chain.residues
31  # eat up all beginning gaps
32  j = 1
33  for s in sequence:
34  if s != '-':
35  break
36  j += 1;
37  l = sequence[j-1]
38  i = 0
39  # run over sequence & alignment
40  for s in sequence[j:]:
41  if s != '-':
42  i += 1
43  r1 = residues[i-1]
44  r2 = residues[i]
45  if r1.one_letter_code=='?' or r2.one_letter_code=='?':
46  continue
47  if l != '-':
48  if not mol.InSequence(r1.handle, r2.handle):
49  LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2)))
50  return False
51  else:
52  if mol.InSequence(r1.handle, r2.handle):
53  LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2)))
54  return False
55  l = s
56  return True
57 
58 def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
59  """
60  Aligns the residues of chain to the SEQRES sequence, inserting gaps where
61  needed. The function uses the connectivity of the protein backbone to find
62  consecutive peptide fragments. These fragments are then aligned to the SEQRES
63  sequence.
64 
65  All the non-ligand, peptide-linking residues of the chain must be listed in
66  SEQRES. If there are any additional residues in the chain, the function
67  raises a ValueError.
68 
69  If 'try_resnum_first' is set, building the alignment following residue numbers
70  is tried first.
71 
72  If 'validate' is set (default), the alignment is checked using
73  :func:`~ost.seq.alg.ValidateSEQRESAlignment`.
74 
75  :param chain: Source of the sequence
76  :type chain: :class:`~ost.mol.ChainHandle`
77  :param seqres: SEQRES sequence
78  :type seqres: :class:`str`
79  :param try_resnum_first: Try to align by residue number
80  :type try_resnum_first: :class:`bool`
81  :param validate: Validate alignment by
82  :func:`~ost.seq.alg.ValidateSEQRESAlignment`
83  :type validate: :class:`bool`
84 
85  :returns: The alignment of the residues in the chain and the SEQRES entries.
86  :rtype: :class:`~ost.seq.AlignmentHandle`
87  """
88 
89  def IsEqual(olc1, olc2):
90  return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2
91 
92  from ost import seq
93  from ost import mol
94  from ost import LogWarning
95  view=chain
96  residues=view.residues
97  if len(residues)==0:
98  return seq.CreateAlignment()
99  if try_resnum_first:
100  aln_seq = seq.CreateSequence('atoms', '-'*len(seqres))
101  for r1 in residues:
102  if r1.number.num <= len(seqres) and r1.number.num > 0:
103  if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
104  aln_seq[r1.number.num - 1] = r1.one_letter_code
105  else:
106  LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code +
107  '", while SEQRES is "' + seqres[r1.number.num - 1] +
108  '" at the corresponding position.')
109  try_resnum_first = False
110  break
111  if not try_resnum_first:
112  fragments=[residues[0].one_letter_code]
113  for r1, r2 in zip(residues[:-1], residues[1:]):
114  if not mol.InSequence(r1.handle, r2.handle):
115  fragments.append('')
116  fragments[-1]+=r2.one_letter_code
117  ss=str(seqres)
118  pos=0
119  aln_seq=''
120  for frag in fragments:
121  new_pos=ss.find(frag, pos)
122  if new_pos==-1:
123  raise ValueError('"%s" is not a substring of "%s"' % (frag, ss))
124  aln_seq+='-'*(new_pos-pos)+frag
125  pos=new_pos+len(frag)
126  aln_seq = seq.CreateSequence('atoms',
127  aln_seq+('-'*(len(seqres)-len(aln_seq))))
128  alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
129  aln_seq)
130  if validate and not ValidateSEQRESAlignment(alignment, view):
131  raise ValueError("SEQRES cannot be aligned with its corresponding chain.")
132  return alignment
133 
134 
135 def AlignmentFromChainView(chain, handle_seq_name='handle',
136  view_seq_name='view'):
137  """
138  Creates and returns the sequence alignment of the given chain view to the
139  chain handle. The alignment contains two sequences, the first containing all
140  non-ligand peptide-linking residues, the second containing all non-ligand
141  peptide-linking residues that are part of the view.
142 
143  :param chain: A valid chain
144  :type chain: :class:`~ost.mol.ChainView`
145 
146  :param handle_seq_name: Name of the handle sequence in the output alignment
147  :param view_seq_name: Name of the view sequence in the output alignment
148  :returns: The alignment
149  :rtype: :class:`~ost.seq.AlignmentHandle`
150 
151  """
152  from ost import seq
153  v0=chain.handle.Select('ligand=false and peptide=true')
154  v1=chain.Select('ligand=false and peptide=true')
155  s0=seq.CreateSequence(handle_seq_name, '')
156  s1=seq.CreateSequence(view_seq_name, '')
157  s0.AttachView(v0)
158  s1.AttachView(v1)
159  res0=v0.residues
160  res1=v1.residues
161  idx0, idx1=(0, 0)
162  while idx0<len(res0):
163  s0.Append(res0[idx0].one_letter_code)
164  if idx1<len(res1) and res1[idx1].handle==res0[idx0].handle:
165  s1.Append(res1[idx1].one_letter_code)
166  idx1+=1
167  else:
168  s1.Append('-')
169  idx0+=1
170  return seq.CreateAlignment(s0, s1)