OpenStructure
stereochemistry.py
Go to the documentation of this file.
1 """
2 .. note::
3 
4  This is a new implementation of the stereochemistry checks, introduced in
5  OpenStructure 2.4, with support for nucleotides. The
6  :doc:`previous stereochemistry checks <stereochemistry_deprecated>` that come
7  with `Mariani et al. <https://dx.doi.org/10.1093/bioinformatics/btt473>`_ are
8  considered deprecated.
9 """
10 
11 import os
12 import json
13 import datetime
14 
15 import numpy as np
16 
17 import ost
18 from ost import geom
19 from ost import mol
20 
21 
22 def _AtomToQualifiedName(a):
23  """ Returns string to uniquely identify atom
24 
25  format: <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
26  """
27  r = a.GetResidue()
28  ch = r.GetChain()
29  num = r.number.num
30  ins_code = r.number.ins_code.strip("\u0000")
31  return f"{ch.name}.{r.number.num}.{ins_code}.{a.name}"
32 
33 
34 def _PotentialDisulfid(a_one, a_two):
35  """ Returns whether two atoms can potentially build a disulfid bond
36 
37  Assumes that they're from two distinct residues
38  """
39  if a_one.GetName() == "SG" and a_two.GetName() == "SG":
40  if a_one.GetResidue().GetName() == "CYS":
41  if a_two.GetResidue().GetName() == "CYS":
42  return True
43  return False
44 
45 
46 def _GetAngles(bonds):
47  """ Returns list of angles based on bonds
48 
49  Returns list of tuples, each tuple has three atom handles
50  representing angles
51  """
52  angles = list()
53  done = set()
54  for bond in bonds:
55  h1 = bond.first.GetHashCode()
56  h2 = bond.second.GetHashCode()
57  for a in bond.first.GetBondPartners():
58  h0 = a.GetHashCode()
59  if h0 != h2:
60  if ((h0, h1, h2)) not in done and (h2, h1, h0) not in done:
61  angles.append((a, bond.first, bond.second))
62  done.add((h0, h1, h2))
63  for a in bond.second.GetBondPartners():
64  h3 = a.GetHashCode()
65  if h3 != h1:
66  if ((h1, h2, h3)) not in done and (h3, h2, h1) not in done:
67  angles.append((bond.first, bond.second, a))
68  done.add((h1, h2, h3))
69  return angles
70 
71 
72 def _GetResidueType(atoms):
73  """ Identifies type in StereoLinkData
74 
75  :param atoms: Atoms that define a bond or angle
76  :type atoms: :class:`list` of :class:`AtomHandle`
77  :returns: :class:`str` with which the respective parameters can be
78  accessed in default stereo link data, None if no match is found
79  """
80  residues = [a.GetResidue().handle for a in atoms]
81  chem_types = list(set([str(r.GetChemType()) for r in residues]))
82 
83  if len(chem_types) == 1 and chem_types[0] == 'N':
84  return "NA"
85  elif len(chem_types) == 1 and chem_types[0] == 'A':
86  # in both cases, bond or angle, there should be exactly two residues
87  # involved
88  tmp = list()
89  r_hashes = set()
90  for r in residues:
91  h = r.GetHashCode()
92  if h not in r_hashes:
93  r_hashes.add(h)
94  tmp.append(r)
95  residues = tmp
96  if len(residues) != 2:
97  return None
98 
99  # need to be sorted
100  if residues[0].GetNumber() > residues[1].GetNumber():
101  r0 = residues[1]
102  r1 = residues[0]
103  else:
104  r0 = residues[0]
105  r1 = residues[1]
106 
107  if r1.GetName() == "GLY":
108  return "GLY"
109  elif r1.GetName() == "PRO":
110  a = r0.FindAtom("CA")
111  b = r0.FindAtom("C")
112  c = r1.FindAtom("N")
113  d = r1.FindAtom("CA")
114  if a.IsValid() and b.IsValid() and c.IsValid() and d.IsValid():
115  omega = geom.DihedralAngle(a.GetPos(), b.GetPos(),
116  c.GetPos(), d.GetPos())
117  if abs(omega) < 1.57:
118  return "PRO_CIS"
119  else:
120  return "PRO_TRANS"
121  else:
122  return "PEPTIDE"
123 
124  return None
125 
126 
127 def _ParseBondData(doc):
128  """ Parse stereochemistry data for bonds
129 
130  That is expected distances and standard deviations from a
131  :class:`gemmi.Document`. Concatenates results form all loops with tags:
132  _chem_comp_bond.comp_id, _chem_comp_bond.atom_id_1,
133  _chem_comp_bond.atom_id_2, _chem_comp_bond.value_dist,
134  _chem_comp_bond.value_dist_esd
135 
136  :param doc: Gemmi doc representing cif file opened with
137  gemmi.cif.read_file(filepath)
138  :type doc: :class:`gemmi.Document`
139  :returns: :class:`dict` with one key per compound, the respective value
140  is again a dict with key f"{at_1}_{at_2}" and value
141  [dist, dist_std].
142  """
143  data = dict()
144  for block in doc:
145  comp_id = block.find_values("_chem_comp_bond.comp_id")
146  at_1 = block.find_values("_chem_comp_bond.atom_id_1")
147  at_2 = block.find_values("_chem_comp_bond.atom_id_2")
148  dist = block.find_values("_chem_comp_bond.value_dist")
149  dist_std = block.find_values("_chem_comp_bond.value_dist_esd")
150  if None not in [comp_id, at_1, at_2, dist, dist_std]:
151  for a, b, c, d, e in zip(comp_id, at_1, at_2, dist, dist_std):
152  if a not in data:
153  data[a] = dict()
154  key = '_'.join([b.strip('\"'), c.strip('\"')])
155  data[a][key] = [float(d), float(e)]
156  return data
157 
158 
159 def _ParseAngleData(doc):
160  """ Parse stereochemistry data for angles
161 
162  That is expected distances and standard deviations from a
163  :class:`gemmi.Document`. Concatenates results form all loops with tags:
164  _chem_comp_angle.comp_id, _chem_comp_angle.atom_id_1,
165  _chem_comp_angle.atom_id_2, _chem_comp_angle.atom_id_2,
166  _chem_comp_angle.value_angle, _chem_comp_angle.value_angle_esd
167 
168  :param doc: Gemmi doc representing cif file opened with
169  gemmi.cif.read_file(filepath)
170  :type doc: :class:`gemmi.Document`
171  :returns: :class:`dict` with one key per compound, the respective value
172  is again a dict with key f"{at_1}_{at_2}_{at_3}" and value
173  [angle, angle_std].
174  """
175  data = dict()
176  for block in doc:
177  comp_id = block.find_values("_chem_comp_angle.comp_id")
178  at_1 = block.find_values("_chem_comp_angle.atom_id_1")
179  at_2 = block.find_values("_chem_comp_angle.atom_id_2")
180  at_3 = block.find_values("_chem_comp_angle.atom_id_3")
181  angle = block.find_values("_chem_comp_angle.value_angle")
182  angle_std = block.find_values("_chem_comp_angle.value_angle_esd")
183  if None not in [comp_id, at_1, at_2, at_3, angle, angle_std]:
184  for a, b, c, d, e, f in zip(comp_id, at_1, at_2, at_3, angle,
185  angle_std):
186  if a not in data:
187  data[a] = dict()
188  key = '_'.join([b.strip('\"'), c.strip('\"'), d.strip('\"')])
189  data[a][key] = [float(e), float(f)]
190  return data
191 
192 
193 def StereoDataFromMON_LIB(mon_lib_path, compounds=None):
194  """ Parses stereochemistry parameters from CCP4 MON_LIB
195 
196  CCP4 `MON_LIB <https://www.ccp4.ac.uk/html/mon_lib.html>`_ contains
197  data on ideal bond lengths/angles for compounds.
198 
199  Original data (several updates in the meantime) come from:
200 
201  * Amino acid bond lengths and angles: Engh and Huber, Acta Cryst.
202  A47, 392-400 (1991).
203  * Purine and pyrimidine bond lengths and angles: O. Kennard & R. Taylor
204  (1982), J. Am. Soc. Chem. vol. 104, pp. 3209-3212.
205  * Sugar-phosphate backbone bond lengths and bond angles: W. Saenger’s
206  Principles of Nucleic Acid Structure (1983), Springer-Verlag, pp. 70,86.
207 
208  This function adds a dependency to the
209  `gemmi <https://github.com/project-gemmi/gemmi/>`_ library to read cif
210  files.
211 
212  :param mon_lib_path: Path to CCP4 MON_LIB
213  :type mon_lib_path: :class:`str`
214  :param compounds: Compounds to parse - parses proteinogenic amino acids
215  and nucleotides if not given.
216  :type compounds: :class:`list`
217  :returns: :class:`dict` with stereochemistry parameters
218  """
219  if compounds is None:
220  compounds = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY',
221  'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO',
222  'SER', 'THR', 'TRP', 'TYR', 'VAL', 'DA', 'A', 'DC', 'C',
223  'DG', 'G', 'DU', 'U', 'DT', 'DI', 'I']
224 
225  cif_paths = list()
226  for c in compounds:
227  p = os.path.join(mon_lib_path, c[0].lower(), c + ".cif")
228  if not os.path.exists(p):
229  raise RuntimeError(f"Tried to find cif file for compound {c} "
230  f"in specified MON_LIB ({mon_lib_path})."
231  f"Expected file ({p}) does not exist.")
232  cif_paths.append(p)
233 
234  # hide import to avoid it as dependency for the whole module
235  from gemmi import cif
236  # construct return dict from first element and subsequently
237  # add the remainder
238  doc = cif.read_file(cif_paths[0])
239  data = {"bond_data": _ParseBondData(doc),
240  "angle_data": _ParseAngleData(doc)}
241  for cp in cif_paths[1:]:
242  doc = cif.read_file(cp)
243  bond_data = _ParseBondData(doc)
244  angle_data = _ParseAngleData(doc)
245  data["bond_data"].update(bond_data)
246  data["angle_data"].update(angle_data)
247 
248  # add license info
249  copying_str = f"This data has been derived from the CCP4 MON_LIB on "
250  copying_str += f"{datetime.datetime.now()}. MON_LIB is licensed under "
251  copying_str += f"GNU LESSER GENERAL PUBLIC LICENSE Version 3. Consult the "
252  copying_str += f"latest CCP4 for the full license text."
253  data["COPYING"] = copying_str
254 
255  return data
256 
257 
258 def GetBondParam(a1, a2, stereo_data = None, stereo_link_data = None):
259  """ Returns mean and standard deviation for bond
260 
261  :param a1: First atom that defines bond
262  :type a1: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
263  :param a2: Second atom that defines bond
264  :type a2: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
265  :param stereo_data: Stereochemistry data, use return value of
266  :func:`GetDefaultStereoData` if not given.
267  If you call this function repeatedly, you
268  really should provide *stereo_data*!
269  :type stereo_data: :class:`dict`
270  :param stereo_link_data: Stereochemistry data, use return value of
271  :func:`GetDefaultStereoLinkData` if not given.
272  If you call this function repeatedly, you
273  really should provide *stereo_link_data*!
274  :type stereo_link_data: :class:`dict`
275  :returns: :class:`tuple` with mean and standard deviation. Values are None
276  if respective bond is not found in *stereo_data*
277  """
278  if stereo_data is None:
279  stereo_data = GetDefaultStereoData()
280  if stereo_link_data is None:
281  stereo_link_data = GetDefaultStereoLinkData()
282 
283  residue_data = None
284  if a1.GetResidue().GetHashCode() == a2.GetResidue().GetHashCode():
285  # intra residue case
286  rname = a1.GetResidue().GetName()
287  if rname in stereo_data["bond_data"]:
288  residue_data = stereo_data["bond_data"][rname]
289  else:
290  # inter residue case
291  residue_type = _GetResidueType([a1, a2])
292  if residue_type is not None:
293  residue_data = stereo_link_data["bond_data"][residue_type]
294 
295  if residue_data is not None:
296  a1name = a1.GetName()
297  a2name = a2.GetName()
298  key = a1name + "_" + a2name
299  if key in residue_data:
300  return (residue_data[key][0], residue_data[key][1])
301  key = a2name + "_" + a1name
302  if key in residue_data:
303  return (residue_data[key][0], residue_data[key][1])
304 
305  return (None, None)
306 
307 
308 def GetAngleParam(a1, a2, a3, stereo_data = None, stereo_link_data = None):
309  """ Returns mean and standard deviation for angle
310 
311  :param a1: First atom that defines angle
312  :type a1: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
313  :param a2: Second atom that defines angle
314  :type a2: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
315  :param a3: Third atom that defines angle
316  :type a3: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
317  :param stereo_data: Stereochemistry data, use return value of
318  :func:`GetDefaultStereoData` if not given.
319  If you call this function repeatedly, you
320  really should provide *stereo_data*!
321  :type stereo_data: :class:`dict`
322  :param stereo_link_data: Stereochemistry data, use return value of
323  :func:`GetDefaultStereoLinkData` if not given.
324  If you call this function repeatedly, you
325  really should provide *stereo_link_data*!
326  :type stereo_link_data: :class:`dict`
327  :returns: :class:`tuple` with mean and standard deviation. Values are None
328  if respective angle is not found in *stereo_data*
329  """
330  if stereo_data is None:
331  stereo_data = GetDefaultStereoData()
332  if stereo_link_data is None:
333  stereo_link_data = GetDefaultStereoLinkData()
334  h1 = a1.GetResidue().handle.GetHashCode()
335  h2 = a2.GetResidue().handle.GetHashCode()
336  h3 = a3.GetResidue().handle.GetHashCode()
337  residue_data = None
338  if h1 == h2 and h2 == h3:
339  # intra residue case
340  rname = a1.GetResidue().GetName()
341  if rname in stereo_data["angle_data"]:
342  residue_data = stereo_data["angle_data"][rname]
343  else:
344  # inter residue case
345  residue_type = _GetResidueType([a1, a2, a3])
346  if residue_type in stereo_link_data["angle_data"]:
347  residue_data = stereo_link_data["angle_data"][residue_type]
348 
349  if residue_data is not None:
350  a1name = a1.GetName()
351  a2name = a2.GetName()
352  a3name = a3.GetName()
353  key = a1name + "_" + a2name + "_" + a3name
354  if key in residue_data:
355  return (residue_data[key][0], residue_data[key][1])
356  key = a3name + "_" + a2name + "_" + a1name
357  if key in residue_data:
358  return (residue_data[key][0], residue_data[key][1])
359  return (None, None)
360 
361 
362 class ClashInfo:
363  """ Object to hold info on clash
364 
365  Constructor arguments are available as attributes:
366 
367  * a1 (:class:`ost.mol.AtomHandle`)
368  * a2 (:class:`ost.mol.AtomHandle`)
369  * dist (:class:`float`)
370  * tolerated_dist (:class:`float`)
371  """
372  def __init__(self, a1, a2, dist, tolerated_dist):
373  self.a1a1 = a1
374  self.a2a2 = a2
375  self.distdist = dist
376  self.tolerated_disttolerated_dist = tolerated_dist
377 
378  def ToJSON(self, decimals = 3):
379  """ Return JSON serializable dict
380 
381  Atoms are represented by a string in format:
382  <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
383  """
384  return {"a1": _AtomToQualifiedName(self.a1a1),
385  "a2": _AtomToQualifiedName(self.a2a2),
386  "dist": round(self.distdist, decimals),
387  "tolerated_dist": round(self.tolerated_disttolerated_dist, decimals)}
388 
389 
391  """ Object to hold info on bond violation
392 
393  Constructor arguments are available as attributes:
394 
395  * a1 (:class:`ost.mol.AtomHandle`)
396  * a2 (:class:`ost.mol.AtomHandle`)
397  * length (:class:`float`)
398  * exp_length (:class:`float`)
399  * std (:class:`float`)
400  """
401  def __init__(self, a1, a2, length, exp_length, std):
402  self.a1a1 = a1
403  self.a2a2 = a2
404  self.lengthlength = length
405  self.exp_lengthexp_length = exp_length
406  self.stdstd = std
407 
408  def ToJSON(self, decimals = 3):
409  """ Return JSON serializable dict
410 
411  Atoms are represented by a string in format:
412  <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
413  """
414  return {"a1": _AtomToQualifiedName(self.a1a1),
415  "a2": _AtomToQualifiedName(self.a2a2),
416  "length": round(self.lengthlength, decimals),
417  "exp_length": round(self.exp_lengthexp_length, decimals),
418  "std": round(self.stdstd, decimals)}
419 
420 
422  """ Object to hold info on angle violation
423 
424  Constructor arguments are available as attributes:
425 
426  * a1 (:class:`ost.mol.AtomHandle`)
427  * a2 (:class:`ost.mol.AtomHandle`)
428  * a3 (:class:`ost.mol.AtomHandle`)
429  * angle (:class:`float`)
430  * exp_angle (:class:`float`)
431  * std (:class:`float`)
432  """
433  def __init__(self, a1, a2, a3, angle, exp_angle, std):
434  self.a1a1 = a1
435  self.a2a2 = a2
436  self.a3a3 = a3
437  self.angleangle = angle
438  self.exp_angleexp_angle = exp_angle
439  self.stdstd = std
440 
441  def ToJSON(self, decimals = 3):
442  """ Return JSON serializable dict
443 
444  Atoms are represented by a string in format:
445  <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
446  """
447  return {"a1": _AtomToQualifiedName(self.a1a1),
448  "a2": _AtomToQualifiedName(self.a2a2),
449  "a3": _AtomToQualifiedName(self.a3a3),
450  "angle": round(self.angleangle, decimals),
451  "exp_angle": round(self.exp_angleexp_angle, decimals),
452  "std": round(self.stdstd, decimals)}
453 
454 
455 def GetClashes(ent, vdw_radii = None, tolerance = 1.5, disulfid_dist = 2.03,
456  disulfid_tolerance = 1.0):
457  """ Identifies clashing atoms
458 
459  A clash between two non-bonded atoms is defined as their distance d being
460  below the sum of their vdw radii with some subtracted tolerance value.
461 
462  The default values are not very sensitive.
463 
464  :param ent: Entity for which you want to identify clashing atoms
465  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
466  :param vdw_radii: Element based van der Waals radii. Only atoms of these
467  elements will be considered. If not given, default values
468  for all elements occuring in proteins/nucleotides are
469  used. Must be provided as :class:`dict`, where they key
470  are elements (capitalized) and value the respective radii
471  in Angstrom.
472  :type vdw_radii: :class:`dict`
473  :param tolerance: Tolerance value
474  :param disulfid_dist: Summed vdw radius that is used if two Sulfurs that can
475  potentially build a disulfid bond interact
476  :type disulfid_dist: :class:`float`
477  :param disulfid_tolerance: The respective tolerance
478  :type disulfid_dist: :class:`float`
479  :returns: A :class:`list` of :class:`ClashInfo`
480  """
481 
482  if vdw_radii is None:
483  vdw_radii = {"C": 1.70, "N": 1.55, "O": 1.52, "P": 1.80, "S": 1.80}
484 
485  for ele in vdw_radii.keys():
486  if ele.upper() != ele:
487  raise RuntimeError(f"Elements in vdw_radii must be upper case. "
488  f"Got {ele}")
489 
490  # it would be elegant to just do a selection by the ele property. However,
491  # thats case sensitive. So the element could be Cl but the vdw radii
492  # are all caps.
493  elements = set([ele.upper() for ele in vdw_radii.keys()])
494  for a in ent.atoms:
495  if a.GetElement().upper() in elements:
496  a.SetIntProp("clash_check", 1)
497  clash_ent = ent.Select("gaclash_check:0=1")
498 
499  max_radius = max(vdw_radii.values())
500  max_radius = max(max_radius, 0.5*disulfid_dist)
501  min_tolerance = min(tolerance, disulfid_tolerance)
502  radius = 2*max_radius-min_tolerance
503 
504  done = set()
505  return_list = list()
506  for a in clash_ent.atoms:
507  a_hash = a.handle.GetHashCode()
508  close_atoms = clash_ent.FindWithin(a.GetPos(), radius)
509  for ca in close_atoms:
510  ca_hash = ca.handle.GetHashCode()
511  if a_hash != ca_hash and not mol.BondExists(a.handle, ca.handle):
512  d = geom.Distance(a.GetPos(), ca.GetPos())
513  if _PotentialDisulfid(a, ca):
514  thresh = disulfid_dist - disulfid_tolerance
515  else:
516  thresh = vdw_radii[a.GetElement().upper()]
517  thresh += vdw_radii[ca.GetElement().upper()]
518  thresh -= tolerance
519  if d < thresh:
520  # check if already there, add if not
521  hash_pair = (min(a_hash, ca_hash), max(a_hash, ca_hash))
522  if hash_pair not in done:
523  done.add(hash_pair)
524  return_list.append(ClashInfo(a.handle, ca.handle, d,
525  thresh))
526  return return_list
527 
528 
529 def GetBadBonds(ent, stereo_data = None, stereo_link_data = None, tolerance=12):
530  """ Identify unrealistic bonds
531 
532  :param ent: Entity for which you want to identify unrealistic bonds
533  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
534  :param stereo_data: Stereochemistry data, use return value of
535  :func:`GetDefaultStereoData` if not given.
536  :type stereo_data: :class:`dict`
537  :param stereo_link_data: Stereochemistry data, use return value of
538  :func:`GetDefaultStereoLinkData` if not given.
539  :type stereo_link_data: :class:`dict`
540  :param tolerance: Bonds that devaiate more than *tolerance* times standard
541  deviation from expected mean are considered bad
542  :type tolerance: :class:`int`
543  :returns: :class:`list` :class:`BondViolationInfo`
544 
545  """
546  if stereo_data is None:
547  stereo_data = GetDefaultStereoData()
548  if stereo_link_data is None:
549  stereo_link_data = GetDefaultStereoLinkData()
550  return_list = list()
551  for b in ent.bonds:
552  a1 = b.first
553  a2 = b.second
554  mean, std = GetBondParam(a1, a2, stereo_data = stereo_data,
555  stereo_link_data = stereo_link_data)
556  if None not in [mean, std]:
557  l = b.length
558  if abs(mean-l) > tolerance*std:
559  return_list.append(BondViolationInfo(a1, a2, l, mean, std))
560  return return_list
561 
562 
563 def GetBadAngles(ent, stereo_data = None, stereo_link_data = None,
564  tolerance = 12):
565  """ Identify unrealistic angles
566 
567  :param ent: Entity for which you want to identify unrealistic angles
568  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
569  :param stereo_data: Stereochemistry data, use return value of
570  :func:`GetDefaultStereoData` if not given.
571  :type stereo_data: :class:`dict`
572  :param stereo_link_data: Stereochemistry data, use return value of
573  :func:`GetDefaultStereoLinkData` if not given.
574  :type stereo_link_data: :class:`dict`
575  :param tolerance: Angles that devaiate more than *tolerance* times standard
576  deviation from expected mean are considered bad
577  :type tolerance: :class:`int`
578  :returns: :class:`list` of :class:`AngleViolationInfo`
579  """
580  if stereo_data is None:
581  stereo_data = GetDefaultStereoData()
582  if stereo_link_data is None:
583  stereo_link_data = GetDefaultStereoLinkData()
584  return_list = list()
585  for a in _GetAngles(ent.bonds):
586  mean, std = GetAngleParam(a[0], a[1], a[2], stereo_data = stereo_data,
587  stereo_link_data = stereo_link_data)
588  if None not in [mean, std]:
589  angle = geom.Angle(a[0].GetPos() - a[1].GetPos(),
590  a[2].GetPos() - a[1].GetPos())
591  angle = angle/np.pi*180 # stereo params are in degrees
592  diff = abs(mean-angle)
593  if diff > tolerance*std:
594  return_list.append(AngleViolationInfo(a[0], a[1], a[2], angle,
595  mean, std))
596  return return_list
597 
598 
599 def StereoCheck(ent, stereo_data = None, stereo_link_data = None):
600  """ Remove atoms with stereochemical problems
601 
602  Selects for peptide/nucleotides and calls :func:`GetClashes`,
603  :func:`GetBadBonds` and :func:`GetBadAngles` with default
604  parameters.
605 
606  * Amino acids: Remove full residue if backbone atom is involved in
607  stereochemistry issue ("N", "CA", "C", "O"). Remove sidechain if any of
608  the sidechain atoms is involved in stereochemistry issues.
609  * Nucleotides: Remove full residue if backbone atom is involved in
610  stereochemistry issue ("P", "OP1", "OP2", "OP3", "O5'", "C5'", "C4'",
611  "C3'", "C2'", "C1'", "O4'", "O3'", "O2'"). Remove sidechain (base) if any
612  of the sidechain atoms is involved in stereochemistry issues.
613 
614  :param ent: Entity to be stereochecked
615  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
616  :param stereo_data: Stereochemistry data, use return value of
617  :func:`GetDefaultStereoData` if not given.
618  :type stereo_data: :class:`dict`
619  :param stereo_link_data: Stereochemistry data, use return value of
620  :func:`GetDefaultStereoLinkData` if not given.
621  :type stereo_link_data: :class:`dict`
622  :returns: Tuple with four elements: 1) :class:`ost.mol.EntityView` of
623  *ent* processed as described above 2) Return value of
624  :func:`GetClashes` 3) return value of :func:`GetBadBonds`
625  4) return value of :func:`GetBadAngles`
626  """
627  if stereo_data is None:
628  stereo_data = GetDefaultStereoData()
629 
630  sel = ent.Select("peptide=true or nucleotide=true")
631  clashes = GetClashes(sel)
632  bad_bonds = GetBadBonds(sel, stereo_data = stereo_data)
633  bad_angles = GetBadAngles(sel, stereo_data = stereo_data)
634 
635  # set stereo problems as properties on an atom level
636  for clash in clashes:
637  clash.a1.SetIntProp("stereo_problem", 1)
638  clash.a2.SetIntProp("stereo_problem", 1)
639 
640  for bond in bad_bonds:
641  bond.a1.SetIntProp("stereo_problem", 1)
642  bond.a2.SetIntProp("stereo_problem", 1)
643 
644  for angle in bad_angles:
645  angle.a1.SetIntProp("stereo_problem", 1)
646  angle.a2.SetIntProp("stereo_problem", 1)
647  angle.a3.SetIntProp("stereo_problem", 1)
648 
649  # set stereo problems as properties on a residue level
650  bad_ent = ent.Select("gastereo_problem:0=1")
651  if len(bad_ent.residues) > 0:
652  pep_bb = set(["N", "CA", "C", "O"])
653  nuc_bb = set(["P", "OP1", "OP2", "OP3", "O5'", "C5'", "C4'", "C3'",
654  "C2'", "C1'", "O4'", "O3'", "O2'"])
655 
656  for r in bad_ent.residues:
657  bad_atoms = set([a.GetName() for a in r.atoms])
658  r.SetIntProp("stereo_problem", 1)
659  if r.GetChemType() == mol.ChemType.NUCLEOTIDES:
660  if len(nuc_bb.intersection(bad_atoms)) > 0:
661  r.SetIntProp("stereo_problem_bb", 1)
662  elif r.GetChemType() == mol.ChemType.AMINOACIDS:
663  if len(pep_bb.intersection(bad_atoms)) > 0:
664  r.SetIntProp("stereo_problem_bb", 1)
665 
666  # explicitely add " as OpenStructure query language would not
667  # understand ' otherwise
668  nuc_bb = [f"\"{name}\"" for name in nuc_bb]
669 
670  pep_query = f"(peptide=true and grstereo_problem:0=0) or "
671  pep_query += f"(peptide=true and grstereo_problem_bb:0=0 and "
672  pep_query += f"aname={','.join(pep_bb)})"
673  nuc_query = f"(nucleotide=true and grstereo_problem:0=0) or "
674  nuc_query += f"(nucleotide=true and grstereo_problem_bb:0=0 and "
675  nuc_query += f"aname={','.join(nuc_bb)})"
676  query = pep_query + " or " + nuc_query
677  return_view = sel.Select(query)
678  else:
679  return_view = sel
680 
681  return return_view, clashes, bad_bonds, bad_angles
682 
683 
685  """ Get default stereo data derived from CCP4 MON_LIB
686 
687  Used as default if not provided in :func:`GetBadBonds`, :func:`GetBadAngles`
688  and :func:`StereoCheck`.
689 
690  MON_LIB is licensed under GNU LESSER GENERAL PUBLIC LICENSE Version 3.
691  Consult the latest CCP4 for the full license text.
692  """
693  data_path = os.path.join(ost.GetSharedDataPath(), "stereo_data.json")
694  with open(data_path, 'r') as fh:
695  return json.load(fh)
696 
697 
699  """ Get default stereo data for links between compounds
700 
701  Hardcoded from arbitrary sources, see comments in the code.
702 
703  :returns: Data for peptide bonds, nucleotide links and disulfid bonds that
704  are used as default if not provided in :func:`GetBadBonds`,
705  :func:`GetBadAngles` and :func:`StereoCheck`.
706  """
707  data = {"bond_data": dict(),
708  "angle_data": dict()}
709 
710  # data for nucleotides - deliberately stolen from
711  # geostd (https://github.com/phenix-project/geostd) which is basically
712  # the Phenix equivalent for MON_LIB
713  # used file: $GEOSTD_DIR/rna_dna/chain_link_rna2p.cif
714  # Reason to not use the same data origin as peptides is that in CCP4
715  # there is a bit a more fine grained differentiation of NA types
716  # which makes things more complicated.
717  data["bond_data"]["NA"] = dict()
718  data["bond_data"]["NA"]["O3'_P"] = [1.607, 0.015]
719 
720  data["angle_data"]["NA"] = dict()
721  data["angle_data"]["NA"]["O3'_P_O5'"] = [104.000, 1.500]
722  data["angle_data"]["NA"]["O3'_P_OP1"] = [108.000, 3.000]
723  data["angle_data"]["NA"]["O3'_P_OP2"] = [108.000, 3.000]
724  data["angle_data"]["NA"]["C3'_O3'_P"] = [120.200, 1.500]
725 
726  # data for peptides - deliberately stolen from standard_geometry.cif file
727  # which is shipped with CCP4
728  # (_standard_geometry.version "Fri Feb 22 17:25:15 GMT 2013").
729  data["bond_data"]["PEPTIDE"] = dict()
730  data["bond_data"]["PEPTIDE"]["C_N"] = [1.336, 0.023]
731  data["bond_data"]["PEPTIDE"]["SG_SG"] = [2.033, 0.016]
732 
733  data["bond_data"]["GLY"] = dict()
734  data["bond_data"]["GLY"]["C_N"] = [1.326, 0.018]
735 
736  data["bond_data"]["PRO_CIS"] = dict()
737  data["bond_data"]["PRO_CIS"]["C_N"] = [1.338, 0.019]
738  data["bond_data"]["PRO_TRANS"] = dict()
739  data["bond_data"]["PRO_TRANS"]["C_N"] = [1.338, 0.019]
740 
741  data["angle_data"]["PEPTIDE"] = dict()
742  data["angle_data"]["PEPTIDE"]["CA_C_N"] = [117.2, 2.2]
743  data["angle_data"]["PEPTIDE"]["O_C_N"] = [122.7, 1.6]
744  data["angle_data"]["PEPTIDE"]["C_N_CA"] = [121.7, 2.5]
745 
746  data["angle_data"]["GLY"] = dict()
747  data["angle_data"]["GLY"]["CA_C_N"] = [116.2, 2.0]
748  data["angle_data"]["GLY"]["O_C_N"] = [123.2, 1.7]
749  data["angle_data"]["GLY"]["C_N_CA"] = [122.3, 2.1]
750 
751  data["angle_data"]["PRO_TRANS"] = dict()
752  data["angle_data"]["PRO_TRANS"]["CA_C_N"] = [117.1, 2.8]
753  data["angle_data"]["PRO_TRANS"]["O_C_N"] = [121.1, 1.9]
754  data["angle_data"]["PRO_TRANS"]["C_N_CA"] = [119.3, 1.5]
755  data["angle_data"]["PRO_TRANS"]["C_N_CD"] = [128.4, 2.1]
756 
757  data["angle_data"]["PRO_CIS"] = dict()
758  data["angle_data"]["PRO_CIS"]["CA_C_N"] = [117.1, 2.8]
759  data["angle_data"]["PRO_CIS"]["O_C_N"] = [121.1, 1.9]
760  data["angle_data"]["PRO_CIS"]["C_N_CA"] = [127.0, 2.4]
761  data["angle_data"]["PRO_CIS"]["C_N_CD"] = [120.6, 2.2]
762 
763  return data
def __init__(self, a1, a2, a3, angle, exp_angle, std)
def __init__(self, a1, a2, length, exp_length, std)
def __init__(self, a1, a2, dist, tolerated_dist)
Real DLLEXPORT_OST_GEOM Angle(const Line2 &l1, const Line2 &l2)
Real DLLEXPORT_OST_GEOM Distance(const Line2 &l, const Vec2 &v)
Real DihedralAngle(const Vec3 &p1, const Vec3 &p2, const Vec3 &p3, const Vec3 &p4)
Get dihedral angle for p1-p2-p3-p4.
Definition: vecmat3_op.hh:170
def GetAngleParam(a1, a2, a3, stereo_data=None, stereo_link_data=None)
def StereoCheck(ent, stereo_data=None, stereo_link_data=None)
def GetClashes(ent, vdw_radii=None, tolerance=1.5, disulfid_dist=2.03, disulfid_tolerance=1.0)
def GetBadBonds(ent, stereo_data=None, stereo_link_data=None, tolerance=12)
def GetBadAngles(ent, stereo_data=None, stereo_link_data=None, tolerance=12)
def StereoDataFromMON_LIB(mon_lib_path, compounds=None)
def GetBondParam(a1, a2, stereo_data=None, stereo_link_data=None)
String DLLEXPORT_OST_BASE GetSharedDataPath()