OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
stereochemistry.py
Go to the documentation of this file.
1 import os
2 import json
3 import datetime
4 
5 import numpy as np
6 
7 import ost
8 from ost import geom
9 from ost import mol
10 
11 
12 def _AtomToQualifiedName(a):
13  """ Returns string to uniquely identify atom
14 
15  format: <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
16  """
17  r = a.GetResidue()
18  ch = r.GetChain()
19  num = r.number.num
20  ins_code = r.number.ins_code.strip("\u0000")
21  return f"{ch.name}.{r.number.num}.{ins_code}.{a.name}"
22 
23 
24 def _PotentialDisulfid(a_one, a_two):
25  """ Returns whether two atoms can potentially build a disulfid bond
26 
27  Assumes that they're from two distinct residues
28  """
29  if a_one.GetName() == "SG" and a_two.GetName() == "SG":
30  if a_one.GetResidue().GetName() == "CYS":
31  if a_two.GetResidue().GetName() == "CYS":
32  return True
33  return False
34 
35 
36 def _GetAngles(bonds):
37  """ Returns list of angles based on bonds
38 
39  Returns list of tuples, each tuple has three atom handles
40  representing angles
41  """
42  angles = list()
43  done = set()
44  for bond in bonds:
45  h1 = bond.first.GetHashCode()
46  h2 = bond.second.GetHashCode()
47  for a in bond.first.GetBondPartners():
48  h0 = a.GetHashCode()
49  if h0 != h2:
50  if ((h0, h1, h2)) not in done and (h2, h1, h0) not in done:
51  angles.append((a, bond.first, bond.second))
52  done.add((h0, h1, h2))
53  for a in bond.second.GetBondPartners():
54  h3 = a.GetHashCode()
55  if h3 != h1:
56  if ((h1, h2, h3)) not in done and (h3, h2, h1) not in done:
57  angles.append((bond.first, bond.second, a))
58  done.add((h1, h2, h3))
59  return angles
60 
61 
62 def _GetResidueType(atoms):
63  """ Identifies type in StereoLinkData
64 
65  :param atoms: Atoms that define a bond or angle
66  :type atoms: :class:`list` of :class:`AtomHandle`
67  :returns: :class:`str` with which the respective parameters can be
68  accessed in default stereo link data, None if no match is found
69  """
70  residues = [a.GetResidue().handle for a in atoms]
71  chem_types = list(set([str(r.GetChemType()) for r in residues]))
72 
73  if len(chem_types) == 1 and chem_types[0] == 'N':
74  return "NA"
75  elif len(chem_types) == 1 and chem_types[0] == 'A':
76  # in both cases, bond or angle, there should be exactly two residues
77  # involved
78  tmp = list()
79  r_hashes = set()
80  for r in residues:
81  h = r.GetHashCode()
82  if h not in r_hashes:
83  r_hashes.add(h)
84  tmp.append(r)
85  residues = tmp
86  if len(residues) != 2:
87  return None
88 
89  # need to be sorted
90  if residues[0].GetNumber() > residues[1].GetNumber():
91  r0 = residues[1]
92  r1 = residues[0]
93  else:
94  r0 = residues[0]
95  r1 = residues[1]
96 
97  if r1.GetName() == "GLY":
98  return "GLY"
99  elif r1.GetName() == "PRO":
100  a = r0.FindAtom("CA")
101  b = r0.FindAtom("C")
102  c = r1.FindAtom("N")
103  d = r1.FindAtom("CA")
104  if a.IsValid() and b.IsValid() and c.IsValid() and d.IsValid():
105  omega = geom.DihedralAngle(a.GetPos(), b.GetPos(),
106  c.GetPos(), d.GetPos())
107  if abs(omega) < 1.57:
108  return "PRO_CIS"
109  else:
110  return "PRO_TRANS"
111  else:
112  return "PEPTIDE"
113 
114  return None
115 
116 
117 def _ParseBondData(doc):
118  """ Parse stereochemistry data for bonds
119 
120  That is expected distances and standard deviations from a
121  :class:`gemmi.Document`. Concatenates results form all loops with tags:
122  _chem_comp_bond.comp_id, _chem_comp_bond.atom_id_1,
123  _chem_comp_bond.atom_id_2, _chem_comp_bond.value_dist,
124  _chem_comp_bond.value_dist_esd
125 
126  :param doc: Gemmi doc representing cif file opened with
127  gemmi.cif.read_file(filepath)
128  :type doc: :class:`gemmi.Document`
129  :returns: :class:`dict` with one key per compound, the respective value
130  is again a dict with key f"{at_1}_{at_2}" and value
131  [dist, dist_std].
132  """
133  data = dict()
134  for block in doc:
135  comp_id = block.find_values("_chem_comp_bond.comp_id")
136  at_1 = block.find_values("_chem_comp_bond.atom_id_1")
137  at_2 = block.find_values("_chem_comp_bond.atom_id_2")
138  dist = block.find_values("_chem_comp_bond.value_dist")
139  dist_std = block.find_values("_chem_comp_bond.value_dist_esd")
140  if None not in [comp_id, at_1, at_2, dist, dist_std]:
141  for a, b, c, d, e in zip(comp_id, at_1, at_2, dist, dist_std):
142  if a not in data:
143  data[a] = dict()
144  key = '_'.join([b.strip('\"'), c.strip('\"')])
145  data[a][key] = [float(d), float(e)]
146  return data
147 
148 
149 def _ParseAngleData(doc):
150  """ Parse stereochemistry data for angles
151 
152  That is expected distances and standard deviations from a
153  :class:`gemmi.Document`. Concatenates results form all loops with tags:
154  _chem_comp_angle.comp_id, _chem_comp_angle.atom_id_1,
155  _chem_comp_angle.atom_id_2, _chem_comp_angle.atom_id_2,
156  _chem_comp_angle.value_angle, _chem_comp_angle.value_angle_esd
157 
158  :param doc: Gemmi doc representing cif file opened with
159  gemmi.cif.read_file(filepath)
160  :type doc: :class:`gemmi.Document`
161  :returns: :class:`dict` with one key per compound, the respective value
162  is again a dict with key f"{at_1}_{at_2}_{at_3}" and value
163  [angle, angle_std].
164  """
165  data = dict()
166  for block in doc:
167  comp_id = block.find_values("_chem_comp_angle.comp_id")
168  at_1 = block.find_values("_chem_comp_angle.atom_id_1")
169  at_2 = block.find_values("_chem_comp_angle.atom_id_2")
170  at_3 = block.find_values("_chem_comp_angle.atom_id_3")
171  angle = block.find_values("_chem_comp_angle.value_angle")
172  angle_std = block.find_values("_chem_comp_angle.value_angle_esd")
173  if None not in [comp_id, at_1, at_2, at_3, angle, angle_std]:
174  for a, b, c, d, e, f in zip(comp_id, at_1, at_2, at_3, angle,
175  angle_std):
176  if a not in data:
177  data[a] = dict()
178  key = '_'.join([b.strip('\"'), c.strip('\"'), d.strip('\"')])
179  data[a][key] = [float(e), float(f)]
180  return data
181 
182 
183 def StereoDataFromMON_LIB(mon_lib_path, compounds=None):
184  """ Parses stereochemistry parameters from CCP4 MON_LIB
185 
186  CCP4 `MON_LIB <https://www.ccp4.ac.uk/html/mon_lib.html>`_ contains
187  data on ideal bond lengths/angles for compounds.
188 
189  Original data (several updates in the meantime) come from:
190 
191  * Amino acid bond lengths and angles: Engh and Huber, Acta Cryst.
192  A47, 392-400 (1991).
193  * Purine and pyrimidine bond lengths and angles: O. Kennard & R. Taylor
194  (1982), J. Am. Soc. Chem. vol. 104, pp. 3209-3212.
195  * Sugar-phosphate backbone bond lengths and bond angles: W. Saenger’s
196  Principles of Nucleic Acid Structure (1983), Springer-Verlag, pp. 70,86.
197 
198  This function adds a dependency to the
199  `gemmi <https://github.com/project-gemmi/gemmi/>`_ library to read cif
200  files.
201 
202  :param mon_lib_path: Path to CCP4 MON_LIB
203  :type mon_lib_path: :class:`str`
204  :param compounds: Compounds to parse - parses proteinogenic amino acids
205  and nucleotides if not given.
206  :type compounds: :class:`list`
207  :returns: :class:`dict` with stereochemistry parameters
208  """
209  if compounds is None:
210  compounds = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY',
211  'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO',
212  'SER', 'THR', 'TRP', 'TYR', 'VAL', 'DA', 'A', 'DC', 'C',
213  'DG', 'G', 'DU', 'U', 'DT', 'DI', 'I']
214 
215  cif_paths = list()
216  for c in compounds:
217  p = os.path.join(mon_lib_path, c[0].lower(), c + ".cif")
218  if not os.path.exists(p):
219  raise RuntimeError(f"Tried to find cif file for compound {c} "
220  f"in specified MON_LIB ({mon_lib_path})."
221  f"Expected file ({p}) does not exist.")
222  cif_paths.append(p)
223 
224  # hide import to avoid it as dependency for the whole module
225  from gemmi import cif
226  # construct return dict from first element and subsequently
227  # add the remainder
228  doc = cif.read_file(cif_paths[0])
229  data = {"bond_data": _ParseBondData(doc),
230  "angle_data": _ParseAngleData(doc)}
231  for cp in cif_paths[1:]:
232  doc = cif.read_file(cp)
233  bond_data = _ParseBondData(doc)
234  angle_data = _ParseAngleData(doc)
235  data["bond_data"].update(bond_data)
236  data["angle_data"].update(angle_data)
237 
238  # add license info
239  copying_str = f"This data has been derived from the CCP4 MON_LIB on "
240  copying_str += f"{datetime.datetime.now()}. MON_LIB is licensed under "
241  copying_str += f"GNU LESSER GENERAL PUBLIC LICENSE Version 3. Consult the "
242  copying_str += f"latest CCP4 for the full license text."
243  data["COPYING"] = copying_str
244 
245  return data
246 
247 
248 def GetBondParam(a1, a2, stereo_data = None, stereo_link_data = None):
249  """ Returns mean and standard deviation for bond
250 
251  :param a1: First atom that defines bond
252  :type a1: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
253  :param a2: Second atom that defines bond
254  :type a2: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
255  :param stereo_data: Stereochemistry data, use return value of
256  :func:`GetDefaultStereoData` if not given.
257  If you call this function repeatedly, you
258  really should provide *stereo_data*!
259  :type stereo_data: :class:`dict`
260  :param stereo_link_data: Stereochemistry data, use return value of
261  :func:`GetDefaultStereoLinkData` if not given.
262  If you call this function repeatedly, you
263  really should provide *stereo_link_data*!
264  :type stereo_link_data: :class:`dict`
265  :returns: :class:`tuple` with mean and standard deviation. Values are None
266  if respective bond is not found in *stereo_data*
267  """
268  if stereo_data is None:
269  stereo_data = GetDefaultStereoData()
270  if stereo_link_data is None:
271  stereo_link_data = GetDefaultStereoLinkData()
272 
273  residue_data = None
274  if a1.GetResidue().GetHashCode() == a2.GetResidue().GetHashCode():
275  # intra residue case
276  rname = a1.GetResidue().GetName()
277  if rname in stereo_data["bond_data"]:
278  residue_data = stereo_data["bond_data"][rname]
279  else:
280  # inter residue case
281  residue_type = _GetResidueType([a1, a2])
282  if residue_type is not None:
283  residue_data = stereo_link_data["bond_data"][residue_type]
284 
285  if residue_data is not None:
286  a1name = a1.GetName()
287  a2name = a2.GetName()
288  key = a1name + "_" + a2name
289  if key in residue_data:
290  return (residue_data[key][0], residue_data[key][1])
291  key = a2name + "_" + a1name
292  if key in residue_data:
293  return (residue_data[key][0], residue_data[key][1])
294 
295  return (None, None)
296 
297 
298 def GetAngleParam(a1, a2, a3, stereo_data = None, stereo_link_data = None):
299  """ Returns mean and standard deviation for angle
300 
301  :param a1: First atom that defines angle
302  :type a1: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
303  :param a2: Second atom that defines angle
304  :type a2: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
305  :param a3: Third atom that defines angle
306  :type a3: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
307  :param stereo_data: Stereochemistry data, use return value of
308  :func:`GetDefaultStereoData` if not given.
309  If you call this function repeatedly, you
310  really should provide *stereo_data*!
311  :type stereo_data: :class:`dict`
312  :param stereo_link_data: Stereochemistry data, use return value of
313  :func:`GetDefaultStereoLinkData` if not given.
314  If you call this function repeatedly, you
315  really should provide *stereo_link_data*!
316  :type stereo_link_data: :class:`dict`
317  :returns: :class:`tuple` with mean and standard deviation. Values are None
318  if respective angle is not found in *stereo_data*
319  """
320  if stereo_data is None:
321  stereo_data = GetDefaultStereoData()
322  if stereo_link_data is None:
323  stereo_link_data = GetDefaultStereoLinkData()
324  h1 = a1.GetResidue().handle.GetHashCode()
325  h2 = a2.GetResidue().handle.GetHashCode()
326  h3 = a3.GetResidue().handle.GetHashCode()
327  residue_data = None
328  if h1 == h2 and h2 == h3:
329  # intra residue case
330  rname = a1.GetResidue().GetName()
331  if rname in stereo_data["angle_data"]:
332  residue_data = stereo_data["angle_data"][rname]
333  else:
334  # inter residue case
335  residue_type = _GetResidueType([a1, a2, a3])
336  if residue_type in stereo_link_data["angle_data"]:
337  residue_data = stereo_link_data["angle_data"][residue_type]
338 
339  if residue_data is not None:
340  a1name = a1.GetName()
341  a2name = a2.GetName()
342  a3name = a3.GetName()
343  key = a1name + "_" + a2name + "_" + a3name
344  if key in residue_data:
345  return (residue_data[key][0], residue_data[key][1])
346  key = a3name + "_" + a2name + "_" + a1name
347  if key in residue_data:
348  return (residue_data[key][0], residue_data[key][1])
349  return (None, None)
350 
351 
352 class ClashInfo:
353  """ Object to hold info on clash
354 
355  Constructor arguments are available as attributes:
356 
357  * a1 (:class:`ost.mol.AtomHandle`)
358  * a2 (:class:`ost.mol.AtomHandle`)
359  * dist (:class:`float`)
360  * tolerated_dist (:class:`float`)
361  """
362  def __init__(self, a1, a2, dist, tolerated_dist):
363  self.a1 = a1
364  self.a2 = a2
365  self.dist = dist
366  self.tolerated_dist = tolerated_dist
367 
368  def ToJSON(self, decimals = 3):
369  """ Return JSON serializable dict
370 
371  Atoms are represented by a string in format:
372  <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
373  """
374  return {"a1": _AtomToQualifiedName(self.a1),
375  "a2": _AtomToQualifiedName(self.a2),
376  "dist": round(self.dist, decimals),
377  "tolerated_dist": round(self.tolerated_dist, decimals)}
378 
379 
381  """ Object to hold info on bond violation
382 
383  Constructor arguments are available as attributes:
384 
385  * a1 (:class:`ost.mol.AtomHandle`)
386  * a2 (:class:`ost.mol.AtomHandle`)
387  * length (:class:`float`)
388  * exp_length (:class:`float`)
389  * std (:class:`float`)
390  """
391  def __init__(self, a1, a2, length, exp_length, std):
392  self.a1 = a1
393  self.a2 = a2
394  self.length = length
395  self.exp_length = exp_length
396  self.std = std
397 
398  def ToJSON(self, decimals = 3):
399  """ Return JSON serializable dict
400 
401  Atoms are represented by a string in format:
402  <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
403  """
404  return {"a1": _AtomToQualifiedName(self.a1),
405  "a2": _AtomToQualifiedName(self.a2),
406  "length": round(self.length, decimals),
407  "exp_length": round(self.exp_length, decimals),
408  "std": round(self.std, decimals)}
409 
410 
412  """ Object to hold info on angle violation
413 
414  Constructor arguments are available as attributes:
415 
416  * a1 (:class:`ost.mol.AtomHandle`)
417  * a2 (:class:`ost.mol.AtomHandle`)
418  * a3 (:class:`ost.mol.AtomHandle`)
419  * angle (:class:`float`)
420  * exp_angle (:class:`float`)
421  * std (:class:`float`)
422  """
423  def __init__(self, a1, a2, a3, angle, exp_angle, std):
424  self.a1 = a1
425  self.a2 = a2
426  self.a3 = a3
427  self.angle = angle
428  self.exp_angle = exp_angle
429  self.std = std
430 
431  def ToJSON(self, decimals = 3):
432  """ Return JSON serializable dict
433 
434  Atoms are represented by a string in format:
435  <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
436  """
437  return {"a1": _AtomToQualifiedName(self.a1),
438  "a2": _AtomToQualifiedName(self.a2),
439  "a3": _AtomToQualifiedName(self.a3),
440  "angle": round(self.angle, decimals),
441  "exp_angle": round(self.exp_angle, decimals),
442  "std": round(self.std, decimals)}
443 
444 
445 def GetClashes(ent, vdw_radii = None, tolerance = 1.5, disulfid_dist = 2.03,
446  disulfid_tolerance = 1.0):
447  """ Identifies clashing atoms
448 
449  A clash between two non-bonded atoms is defined as their distance d being
450  below the sum of their vdw radii with some subtracted tolerance value.
451 
452  The default values are not very sensitive.
453 
454  :param ent: Entity for which you want to identify clashing atoms
455  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
456  :param vdw_radii: Element based van der Waals radii. Only atoms of these
457  elements will be considered. If not given, default values
458  for all elements occuring in proteins/nucleotides are
459  used. Must be provided as :class:`dict`, where they key
460  are elements (capitalized) and value the respective radii
461  in Angstrom.
462  :type vdw_radii: :class:`dict`
463  :param tolerance: Tolerance value
464  :param disulfid_dist: Summed vdw radius that is used if two Sulfurs that can
465  potentially build a disulfid bond interact
466  :type disulfid_dist: :class:`float`
467  :param disulfid_tolerance: The respective tolerance
468  :type disulfid_dist: :class:`float`
469  :returns: A :class:`list` of :class:`ClashInfo`
470  """
471 
472  if vdw_radii is None:
473  vdw_radii = {"C": 1.70, "N": 1.55, "O": 1.52, "P": 1.80, "S": 1.80}
474 
475  for ele in vdw_radii.keys():
476  if ele.upper() != ele:
477  raise RuntimeError(f"Elements in vdw_radii must be upper case. "
478  f"Got {ele}")
479 
480  # it would be elegant to just do a selection by the ele property. However,
481  # thats case sensitive. So the element could be Cl but the vdw radii
482  # are all caps.
483  elements = set([ele.upper() for ele in vdw_radii.keys()])
484  for a in ent.atoms:
485  if a.GetElement().upper() in elements:
486  a.SetIntProp("clash_check", 1)
487  clash_ent = ent.Select("gaclash_check:0=1")
488 
489  max_radius = max(vdw_radii.values())
490  max_radius = max(max_radius, 0.5*disulfid_dist)
491  min_tolerance = min(tolerance, disulfid_tolerance)
492  radius = 2*max_radius-min_tolerance
493 
494  done = set()
495  return_list = list()
496  for a in clash_ent.atoms:
497  a_hash = a.handle.GetHashCode()
498  close_atoms = clash_ent.FindWithin(a.GetPos(), radius)
499  for ca in close_atoms:
500  ca_hash = ca.handle.GetHashCode()
501  if a_hash != ca_hash and not mol.BondExists(a.handle, ca.handle):
502  d = geom.Distance(a.GetPos(), ca.GetPos())
503  if _PotentialDisulfid(a, ca):
504  thresh = disulfid_dist - disulfid_tolerance
505  else:
506  thresh = vdw_radii[a.GetElement().upper()]
507  thresh += vdw_radii[ca.GetElement().upper()]
508  thresh -= tolerance
509  if d < thresh:
510  # check if already there, add if not
511  hash_pair = (min(a_hash, ca_hash), max(a_hash, ca_hash))
512  if hash_pair not in done:
513  done.add(hash_pair)
514  return_list.append(ClashInfo(a.handle, ca.handle, d,
515  thresh))
516  return return_list
517 
518 
519 def GetBadBonds(ent, stereo_data = None, stereo_link_data = None, tolerance=12):
520  """ Identify unrealistic bonds
521 
522  :param ent: Entity for which you want to identify unrealistic bonds
523  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
524  :param stereo_data: Stereochemistry data, use return value of
525  :func:`GetDefaultStereoData` if not given.
526  :type stereo_data: :class:`dict`
527  :param stereo_link_data: Stereochemistry data, use return value of
528  :func:`GetDefaultStereoLinkData` if not given.
529  :type stereo_link_data: :class:`dict`
530  :param tolerance: Bonds that devaiate more than *tolerance* times standard
531  deviation from expected mean are considered bad
532  :type tolerance: :class:`int`
533  :returns: :class:`list` :class:`BondViolationInfo`
534 
535  """
536  if stereo_data is None:
537  stereo_data = GetDefaultStereoData()
538  if stereo_link_data is None:
539  stereo_link_data = GetDefaultStereoLinkData()
540  return_list = list()
541  for b in ent.bonds:
542  a1 = b.first
543  a2 = b.second
544  mean, std = GetBondParam(a1, a2, stereo_data = stereo_data,
545  stereo_link_data = stereo_link_data)
546  if None not in [mean, std]:
547  l = b.length
548  if abs(mean-l) > tolerance*std:
549  return_list.append(BondViolationInfo(a1, a2, l, mean, std))
550  return return_list
551 
552 
553 def GetBadAngles(ent, stereo_data = None, stereo_link_data = None,
554  tolerance = 12):
555  """ Identify unrealistic angles
556 
557  :param ent: Entity for which you want to identify unrealistic angles
558  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
559  :param stereo_data: Stereochemistry data, use return value of
560  :func:`GetDefaultStereoData` if not given.
561  :type stereo_data: :class:`dict`
562  :param stereo_link_data: Stereochemistry data, use return value of
563  :func:`GetDefaultStereoLinkData` if not given.
564  :type stereo_link_data: :class:`dict`
565  :param tolerance: Angles that devaiate more than *tolerance* times standard
566  deviation from expected mean are considered bad
567  :type tolerance: :class:`int`
568  :returns: :class:`list` of :class:`AngleViolationInfo`
569  """
570  if stereo_data is None:
571  stereo_data = GetDefaultStereoData()
572  if stereo_link_data is None:
573  stereo_link_data = GetDefaultStereoLinkData()
574  return_list = list()
575  for a in _GetAngles(ent.bonds):
576  mean, std = GetAngleParam(a[0], a[1], a[2], stereo_data = stereo_data,
577  stereo_link_data = stereo_link_data)
578  if None not in [mean, std]:
579  angle = geom.Angle(a[0].GetPos() - a[1].GetPos(),
580  a[2].GetPos() - a[1].GetPos())
581  angle = angle/np.pi*180 # stereo params are in degrees
582  diff = abs(mean-angle)
583  if diff > tolerance*std:
584  return_list.append(AngleViolationInfo(a[0], a[1], a[2], angle,
585  mean, std))
586  return return_list
587 
588 
589 def StereoCheck(ent, stereo_data = None, stereo_link_data = None):
590  """ Remove atoms with stereochemical problems
591 
592  Selects for peptide/nucleotides and calls :func:`GetClashes`,
593  :func:`GetBadBonds` and :func:`GetBadAngles` with default
594  parameters.
595 
596  * Amino acids: Remove full residue if backbone atom is involved in
597  stereochemistry issue ("N", "CA", "C", "O"). Remove sidechain if any of
598  the sidechain atoms is involved in stereochemistry issues.
599  * Nucleotides: Remove full residue if backbone atom is involved in
600  stereochemistry issue ("P", "OP1", "OP2", "OP3", "O5'", "C5'", "C4'",
601  "C3'", "C2'", "C1'", "O4'", "O3'", "O2'"). Remove sidechain (base) if any
602  of the sidechain atoms is involved in stereochemistry issues.
603 
604  :param ent: Entity to be stereochecked
605  :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
606  :param stereo_data: Stereochemistry data, use return value of
607  :func:`GetDefaultStereoData` if not given.
608  :type stereo_data: :class:`dict`
609  :param stereo_link_data: Stereochemistry data, use return value of
610  :func:`GetDefaultStereoLinkData` if not given.
611  :type stereo_link_data: :class:`dict`
612  :returns: Tuple with four elements: 1) :class:`ost.mol.EntityView` of
613  *ent* processed as described above 2) Return value of
614  :func:`GetClashes` 3) return value of :func:`GetBadBonds`
615  4) return value of :func:`GetBadAngles`
616  """
617  if stereo_data is None:
618  stereo_data = GetDefaultStereoData()
619 
620  sel = ent.Select("peptide=true or nucleotide=true")
621  clashes = GetClashes(sel)
622  bad_bonds = GetBadBonds(sel, stereo_data = stereo_data)
623  bad_angles = GetBadAngles(sel, stereo_data = stereo_data)
624 
625  # set stereo problems as properties on an atom level
626  for clash in clashes:
627  clash.a1.SetIntProp("stereo_problem", 1)
628  clash.a2.SetIntProp("stereo_problem", 1)
629 
630  for bond in bad_bonds:
631  bond.a1.SetIntProp("stereo_problem", 1)
632  bond.a2.SetIntProp("stereo_problem", 1)
633 
634  for angle in bad_angles:
635  angle.a1.SetIntProp("stereo_problem", 1)
636  angle.a2.SetIntProp("stereo_problem", 1)
637  angle.a3.SetIntProp("stereo_problem", 1)
638 
639  # set stereo problems as properties on a residue level
640  bad_ent = ent.Select("gastereo_problem:0=1")
641  if len(bad_ent.residues) > 0:
642  pep_bb = set(["N", "CA", "C", "O"])
643  nuc_bb = set(["P", "OP1", "OP2", "OP3", "O5'", "C5'", "C4'", "C3'",
644  "C2'", "C1'", "O4'", "O3'", "O2'"])
645 
646  for r in bad_ent.residues:
647  bad_atoms = set([a.GetName() for a in r.atoms])
648  r.SetIntProp("stereo_problem", 1)
649  if r.GetChemType() == mol.ChemType.NUCLEOTIDES:
650  if len(nuc_bb.intersection(bad_atoms)) > 0:
651  r.SetIntProp("stereo_problem_bb", 1)
652  elif r.GetChemType() == mol.ChemType.AMINOACIDS:
653  if len(pep_bb.intersection(bad_atoms)) > 0:
654  r.SetIntProp("stereo_problem_bb", 1)
655 
656  # explicitely add " as OpenStructure query language would not
657  # understand ' otherwise
658  nuc_bb = [f"\"{name}\"" for name in nuc_bb]
659 
660  pep_query = f"(peptide=true and grstereo_problem:0=0) or "
661  pep_query += f"(peptide=true and grstereo_problem_bb:0=0 and "
662  pep_query += f"aname={','.join(pep_bb)})"
663  nuc_query = f"(nucleotide=true and grstereo_problem:0=0) or "
664  nuc_query += f"(nucleotide=true and grstereo_problem_bb:0=0 and "
665  nuc_query += f"aname={','.join(nuc_bb)})"
666  query = pep_query + " or " + nuc_query
667  return_view = sel.Select(query)
668  else:
669  return_view = sel
670 
671  return return_view, clashes, bad_bonds, bad_angles
672 
673 
675  """ Get default stereo data derived from CCP4 MON_LIB
676 
677  Used as default if not provided in :func:`GetBadBonds`, :func:`GetBadAngles`
678  and :func:`StereoCheck`.
679 
680  MON_LIB is licensed under GNU LESSER GENERAL PUBLIC LICENSE Version 3.
681  Consult the latest CCP4 for the full license text.
682  """
683  data_path = os.path.join(ost.GetSharedDataPath(), "stereo_data.json")
684  with open(data_path, 'r') as fh:
685  return json.load(fh)
686 
687 
689  """ Get default stereo data for links between compounds
690 
691  Hardcoded from arbitrary sources, see comments in the code.
692 
693  :returns: Data for peptide bonds, nucleotide links and disulfid bonds that
694  are used as default if not provided in :func:`GetBadBonds`,
695  :func:`GetBadAngles` and :func:`StereoCheck`.
696  """
697  data = {"bond_data": dict(),
698  "angle_data": dict()}
699 
700  # data for nucleotides - deliberately stolen from
701  # geostd (https://github.com/phenix-project/geostd) which is basically
702  # the Phenix equivalent for MON_LIB
703  # used file: $GEOSTD_DIR/rna_dna/chain_link_rna2p.cif
704  # Reason to not use the same data origin as peptides is that in CCP4
705  # there is a bit a more fine grained differentiation of NA types
706  # which makes things more complicated.
707  data["bond_data"]["NA"] = dict()
708  data["bond_data"]["NA"]["O3'_P"] = [1.607, 0.015]
709 
710  data["angle_data"]["NA"] = dict()
711  data["angle_data"]["NA"]["O3'_P_O5'"] = [104.000, 1.500]
712  data["angle_data"]["NA"]["O3'_P_OP1"] = [108.000, 3.000]
713  data["angle_data"]["NA"]["O3'_P_OP2"] = [108.000, 3.000]
714  data["angle_data"]["NA"]["C3'_O3'_P"] = [120.200, 1.500]
715 
716  # data for peptides - deliberately stolen from standard_geometry.cif file
717  # which is shipped with CCP4
718  # (_standard_geometry.version "Fri Feb 22 17:25:15 GMT 2013").
719  data["bond_data"]["PEPTIDE"] = dict()
720  data["bond_data"]["PEPTIDE"]["C_N"] = [1.336, 0.023]
721  data["bond_data"]["PEPTIDE"]["SG_SG"] = [2.033, 0.016]
722 
723  data["bond_data"]["GLY"] = dict()
724  data["bond_data"]["GLY"]["C_N"] = [1.326, 0.018]
725 
726  data["bond_data"]["PRO_CIS"] = dict()
727  data["bond_data"]["PRO_CIS"]["C_N"] = [1.338, 0.019]
728  data["bond_data"]["PRO_TRANS"] = dict()
729  data["bond_data"]["PRO_TRANS"]["C_N"] = [1.338, 0.019]
730 
731  data["angle_data"]["PEPTIDE"] = dict()
732  data["angle_data"]["PEPTIDE"]["CA_C_N"] = [117.2, 2.2]
733  data["angle_data"]["PEPTIDE"]["O_C_N"] = [122.7, 1.6]
734  data["angle_data"]["PEPTIDE"]["C_N_CA"] = [121.7, 2.5]
735 
736  data["angle_data"]["GLY"] = dict()
737  data["angle_data"]["GLY"]["CA_C_N"] = [116.2, 2.0]
738  data["angle_data"]["GLY"]["O_C_N"] = [123.2, 1.7]
739  data["angle_data"]["GLY"]["C_N_CA"] = [122.3, 2.1]
740 
741  data["angle_data"]["PRO_TRANS"] = dict()
742  data["angle_data"]["PRO_TRANS"]["CA_C_N"] = [117.1, 2.8]
743  data["angle_data"]["PRO_TRANS"]["O_C_N"] = [121.1, 1.9]
744  data["angle_data"]["PRO_TRANS"]["C_N_CA"] = [119.3, 1.5]
745  data["angle_data"]["PRO_TRANS"]["C_N_CD"] = [128.4, 2.1]
746 
747  data["angle_data"]["PRO_CIS"] = dict()
748  data["angle_data"]["PRO_CIS"]["CA_C_N"] = [117.1, 2.8]
749  data["angle_data"]["PRO_CIS"]["O_C_N"] = [121.1, 1.9]
750  data["angle_data"]["PRO_CIS"]["C_N_CA"] = [127.0, 2.4]
751  data["angle_data"]["PRO_CIS"]["C_N_CD"] = [120.6, 2.2]
752 
753  return data
Real DihedralAngle(const Vec3 &p1, const Vec3 &p2, const Vec3 &p3, const Vec3 &p4)
Get dihedral angle for p1-p2-p3-p4.
Definition: vecmat3_op.hh:170
Real DLLEXPORT_OST_GEOM Angle(const Vec4 &v1, const Vec4 &v2)
Real Distance(const Vec3 &p1, const Vec3 &p2)
returns the distance between two points
Definition: vecmat3_op.hh:199
String DLLEXPORT_OST_BASE GetSharedDataPath()