OpenStructure
Loading...
Searching...
No Matches
stereochemistry.py
Go to the documentation of this file.
1"""
2.. note::
3
4 This is a new implementation of the stereochemistry checks, introduced in
5 OpenStructure 2.4, with support for nucleotides. The
6 :doc:`previous stereochemistry checks <stereochemistry_deprecated>` that come
7 with `Mariani et al. <https://dx.doi.org/10.1093/bioinformatics/btt473>`_ are
8 considered deprecated.
9"""
10
11import os
12import json
13import datetime
14
15import numpy as np
16
17import ost
18from ost import geom
19from ost import mol
20
21
23 """ Returns string to uniquely identify atom
24
25 format: <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
26 """
27 r = a.GetResidue()
28 ch = r.GetChain()
29 num = r.number.num
30 ins_code = r.number.ins_code.strip("\u0000")
31 return f"{ch.name}.{r.number.num}.{ins_code}.{a.name}"
32
33
34def _PotentialDisulfid(a_one, a_two):
35 """ Returns whether two atoms can potentially build a disulfid bond
36
37 Assumes that they're from two distinct residues
38 """
39 if a_one.GetName() == "SG" and a_two.GetName() == "SG":
40 if a_one.GetResidue().GetName() == "CYS":
41 if a_two.GetResidue().GetName() == "CYS":
42 return True
43 return False
44
45
46def _GetAngles(bonds):
47 """ Returns list of angles based on bonds
48
49 Returns list of tuples, each tuple has three atom handles
50 representing angles
51 """
52 angles = list()
53 done = set()
54 for bond in bonds:
55 h1 = bond.first.GetHashCode()
56 h2 = bond.second.GetHashCode()
57 for a in bond.first.GetBondPartners():
58 h0 = a.GetHashCode()
59 if h0 != h2:
60 if ((h0, h1, h2)) not in done and (h2, h1, h0) not in done:
61 angles.append((a, bond.first, bond.second))
62 done.add((h0, h1, h2))
63 for a in bond.second.GetBondPartners():
64 h3 = a.GetHashCode()
65 if h3 != h1:
66 if ((h1, h2, h3)) not in done and (h3, h2, h1) not in done:
67 angles.append((bond.first, bond.second, a))
68 done.add((h1, h2, h3))
69 return angles
70
71
72def _GetResidueType(atoms):
73 """ Identifies type in StereoLinkData
74
75 :param atoms: Atoms that define a bond or angle
76 :type atoms: :class:`list` of :class:`AtomHandle`
77 :returns: :class:`str` with which the respective parameters can be
78 accessed in default stereo link data, None if no match is found
79 """
80 residues = [a.GetResidue().handle for a in atoms]
81 chem_types = list(set([str(r.GetChemType()) for r in residues]))
82
83 if len(chem_types) == 1 and chem_types[0] == 'N':
84 return "NA"
85 elif len(chem_types) == 1 and chem_types[0] == 'A':
86 # in both cases, bond or angle, there should be exactly two residues
87 # involved
88 tmp = list()
89 r_hashes = set()
90 for r in residues:
91 h = r.GetHashCode()
92 if h not in r_hashes:
93 r_hashes.add(h)
94 tmp.append(r)
95 residues = tmp
96 if len(residues) != 2:
97 return None
98
99 # need to be sorted
100 if residues[0].GetNumber() > residues[1].GetNumber():
101 r0 = residues[1]
102 r1 = residues[0]
103 else:
104 r0 = residues[0]
105 r1 = residues[1]
106
107 if r1.GetName() == "GLY":
108 return "GLY"
109 elif r1.GetName() == "PRO":
110 a = r0.FindAtom("CA")
111 b = r0.FindAtom("C")
112 c = r1.FindAtom("N")
113 d = r1.FindAtom("CA")
114 if a.IsValid() and b.IsValid() and c.IsValid() and d.IsValid():
115 omega = geom.DihedralAngle(a.GetPos(), b.GetPos(),
116 c.GetPos(), d.GetPos())
117 if abs(omega) < 1.57:
118 return "PRO_CIS"
119 else:
120 return "PRO_TRANS"
121 else:
122 return "PEPTIDE"
123
124 return None
125
126
128 """ Parse stereochemistry data for bonds
129
130 That is expected distances and standard deviations from a
131 :class:`gemmi.Document`. Concatenates results form all loops with tags:
132 _chem_comp_bond.comp_id, _chem_comp_bond.atom_id_1,
133 _chem_comp_bond.atom_id_2, _chem_comp_bond.value_dist,
134 _chem_comp_bond.value_dist_esd
135
136 :param doc: Gemmi doc representing cif file opened with
137 gemmi.cif.read_file(filepath)
138 :type doc: :class:`gemmi.Document`
139 :returns: :class:`dict` with one key per compound, the respective value
140 is again a dict with key f"{at_1}_{at_2}" and value
141 [dist, dist_std].
142 """
143 data = dict()
144 for block in doc:
145 comp_id = block.find_values("_chem_comp_bond.comp_id")
146 at_1 = block.find_values("_chem_comp_bond.atom_id_1")
147 at_2 = block.find_values("_chem_comp_bond.atom_id_2")
148 dist = block.find_values("_chem_comp_bond.value_dist")
149 dist_std = block.find_values("_chem_comp_bond.value_dist_esd")
150 if None not in [comp_id, at_1, at_2, dist, dist_std]:
151 for a, b, c, d, e in zip(comp_id, at_1, at_2, dist, dist_std):
152 if a not in data:
153 data[a] = dict()
154 key = '_'.join([b.strip('\"'), c.strip('\"')])
155 data[a][key] = [float(d), float(e)]
156 return data
157
158
160 """ Parse stereochemistry data for angles
161
162 That is expected distances and standard deviations from a
163 :class:`gemmi.Document`. Concatenates results form all loops with tags:
164 _chem_comp_angle.comp_id, _chem_comp_angle.atom_id_1,
165 _chem_comp_angle.atom_id_2, _chem_comp_angle.atom_id_2,
166 _chem_comp_angle.value_angle, _chem_comp_angle.value_angle_esd
167
168 :param doc: Gemmi doc representing cif file opened with
169 gemmi.cif.read_file(filepath)
170 :type doc: :class:`gemmi.Document`
171 :returns: :class:`dict` with one key per compound, the respective value
172 is again a dict with key f"{at_1}_{at_2}_{at_3}" and value
173 [angle, angle_std].
174 """
175 data = dict()
176 for block in doc:
177 comp_id = block.find_values("_chem_comp_angle.comp_id")
178 at_1 = block.find_values("_chem_comp_angle.atom_id_1")
179 at_2 = block.find_values("_chem_comp_angle.atom_id_2")
180 at_3 = block.find_values("_chem_comp_angle.atom_id_3")
181 angle = block.find_values("_chem_comp_angle.value_angle")
182 angle_std = block.find_values("_chem_comp_angle.value_angle_esd")
183 if None not in [comp_id, at_1, at_2, at_3, angle, angle_std]:
184 for a, b, c, d, e, f in zip(comp_id, at_1, at_2, at_3, angle,
185 angle_std):
186 if a not in data:
187 data[a] = dict()
188 key = '_'.join([b.strip('\"'), c.strip('\"'), d.strip('\"')])
189 data[a][key] = [float(e), float(f)]
190 return data
191
192
193def StereoDataFromMON_LIB(mon_lib_path, compounds=None):
194 """ Parses stereochemistry parameters from CCP4 MON_LIB
195
196 CCP4 `MON_LIB <https://www.ccp4.ac.uk/html/mon_lib.html>`_ contains
197 data on ideal bond lengths/angles for compounds.
198
199 Original data (several updates in the meantime) come from:
200
201 * Amino acid bond lengths and angles: Engh and Huber, Acta Cryst.
202 A47, 392-400 (1991).
203 * Purine and pyrimidine bond lengths and angles: O. Kennard & R. Taylor
204 (1982), J. Am. Soc. Chem. vol. 104, pp. 3209-3212.
205 * Sugar-phosphate backbone bond lengths and bond angles: W. Saenger’s
206 Principles of Nucleic Acid Structure (1983), Springer-Verlag, pp. 70,86.
207
208 This function adds a dependency to the
209 `gemmi <https://github.com/project-gemmi/gemmi/>`_ library to read cif
210 files.
211
212 :param mon_lib_path: Path to CCP4 MON_LIB
213 :type mon_lib_path: :class:`str`
214 :param compounds: Compounds to parse - parses proteinogenic amino acids
215 and nucleotides if not given.
216 :type compounds: :class:`list`
217 :returns: :class:`dict` with stereochemistry parameters
218 """
219 if compounds is None:
220 compounds = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY',
221 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'MSE', 'PHE', 'PRO',
222 'SER', 'THR', 'TRP', 'TYR', 'VAL', 'DA', 'A', 'DC', 'C',
223 'DG', 'G', 'DU', 'U', 'DT', 'DI', 'I']
224
225 cif_paths = list()
226 for c in compounds:
227 p = os.path.join(mon_lib_path, c[0].lower(), c + ".cif")
228 if not os.path.exists(p):
229 raise RuntimeError(f"Tried to find cif file for compound {c} "
230 f"in specified MON_LIB ({mon_lib_path})."
231 f"Expected file ({p}) does not exist.")
232 cif_paths.append(p)
233
234 # hide import to avoid it as dependency for the whole module
235 from gemmi import cif
236 # construct return dict from first element and subsequently
237 # add the remainder
238 doc = cif.read_file(cif_paths[0])
239 data = {"bond_data": _ParseBondData(doc),
240 "angle_data": _ParseAngleData(doc)}
241 for cp in cif_paths[1:]:
242 doc = cif.read_file(cp)
243 bond_data = _ParseBondData(doc)
244 angle_data = _ParseAngleData(doc)
245 data["bond_data"].update(bond_data)
246 data["angle_data"].update(angle_data)
247
248 # add license info
249 copying_str = f"This data has been derived from the CCP4 MON_LIB on "
250 copying_str += f"{datetime.datetime.now()}. MON_LIB is licensed under "
251 copying_str += f"GNU LESSER GENERAL PUBLIC LICENSE Version 3. Consult the "
252 copying_str += f"latest CCP4 for the full license text."
253 data["COPYING"] = copying_str
254
255 return data
256
257
258def GetBondParam(a1, a2, stereo_data = None, stereo_link_data = None):
259 """ Returns mean and standard deviation for bond
260
261 :param a1: First atom that defines bond
262 :type a1: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
263 :param a2: Second atom that defines bond
264 :type a2: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
265 :param stereo_data: Stereochemistry data, use return value of
266 :func:`GetDefaultStereoData` if not given.
267 If you call this function repeatedly, you
268 really should provide *stereo_data*!
269 :type stereo_data: :class:`dict`
270 :param stereo_link_data: Stereochemistry data, use return value of
271 :func:`GetDefaultStereoLinkData` if not given.
272 If you call this function repeatedly, you
273 really should provide *stereo_link_data*!
274 :type stereo_link_data: :class:`dict`
275 :returns: :class:`tuple` with mean and standard deviation. Values are None
276 if respective bond is not found in *stereo_data*
277 """
278 if stereo_data is None:
279 stereo_data = GetDefaultStereoData()
280 if stereo_link_data is None:
281 stereo_link_data = GetDefaultStereoLinkData()
282
283 residue_data = None
284 if a1.GetResidue().GetHashCode() == a2.GetResidue().GetHashCode():
285 # intra residue case
286 rname = a1.GetResidue().GetName()
287 if rname in stereo_data["bond_data"]:
288 residue_data = stereo_data["bond_data"][rname]
289 else:
290 # inter residue case
291 residue_type = _GetResidueType([a1, a2])
292 if residue_type is not None:
293 residue_data = stereo_link_data["bond_data"][residue_type]
294
295 if residue_data is not None:
296 a1name = a1.GetName()
297 a2name = a2.GetName()
298 key = a1name + "_" + a2name
299 if key in residue_data:
300 return (residue_data[key][0], residue_data[key][1])
301 key = a2name + "_" + a1name
302 if key in residue_data:
303 return (residue_data[key][0], residue_data[key][1])
304
305 return (None, None)
306
307
308def GetAngleParam(a1, a2, a3, stereo_data = None, stereo_link_data = None):
309 """ Returns mean and standard deviation for angle
310
311 :param a1: First atom that defines angle
312 :type a1: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
313 :param a2: Second atom that defines angle
314 :type a2: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
315 :param a3: Third atom that defines angle
316 :type a3: :class:`ost.mol.AtomView`/:class:`ost.mol.AtomHandle`
317 :param stereo_data: Stereochemistry data, use return value of
318 :func:`GetDefaultStereoData` if not given.
319 If you call this function repeatedly, you
320 really should provide *stereo_data*!
321 :type stereo_data: :class:`dict`
322 :param stereo_link_data: Stereochemistry data, use return value of
323 :func:`GetDefaultStereoLinkData` if not given.
324 If you call this function repeatedly, you
325 really should provide *stereo_link_data*!
326 :type stereo_link_data: :class:`dict`
327 :returns: :class:`tuple` with mean and standard deviation. Values are None
328 if respective angle is not found in *stereo_data*
329 """
330 if stereo_data is None:
331 stereo_data = GetDefaultStereoData()
332 if stereo_link_data is None:
333 stereo_link_data = GetDefaultStereoLinkData()
334 h1 = a1.GetResidue().handle.GetHashCode()
335 h2 = a2.GetResidue().handle.GetHashCode()
336 h3 = a3.GetResidue().handle.GetHashCode()
337 residue_data = None
338 if h1 == h2 and h2 == h3:
339 # intra residue case
340 rname = a1.GetResidue().GetName()
341 if rname in stereo_data["angle_data"]:
342 residue_data = stereo_data["angle_data"][rname]
343 else:
344 # inter residue case
345 residue_type = _GetResidueType([a1, a2, a3])
346 if residue_type in stereo_link_data["angle_data"]:
347 residue_data = stereo_link_data["angle_data"][residue_type]
348
349 if residue_data is not None:
350 a1name = a1.GetName()
351 a2name = a2.GetName()
352 a3name = a3.GetName()
353 key = a1name + "_" + a2name + "_" + a3name
354 if key in residue_data:
355 return (residue_data[key][0], residue_data[key][1])
356 key = a3name + "_" + a2name + "_" + a1name
357 if key in residue_data:
358 return (residue_data[key][0], residue_data[key][1])
359 return (None, None)
360
361
363 """ Object to hold info on clash
364
365 Constructor arguments are available as attributes:
366
367 * a1 (:class:`ost.mol.AtomHandle`)
368 * a2 (:class:`ost.mol.AtomHandle`)
369 * dist (:class:`float`)
370 * tolerated_dist (:class:`float`)
371 """
372 def __init__(self, a1, a2, dist, tolerated_dist):
373 self.a1 = a1
374 self.a2 = a2
375 self.dist = dist
376 self.tolerated_dist = tolerated_dist
377
378 def ToJSON(self, decimals = 3):
379 """ Return JSON serializable dict
380
381 Atoms are represented by a string in format:
382 <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
383 """
384 return {"a1": _AtomToQualifiedName(self.a1),
385 "a2": _AtomToQualifiedName(self.a2),
386 "dist": round(self.dist, decimals),
387 "tolerated_dist": round(self.tolerated_dist, decimals)}
388
389
391 """ Object to hold info on bond violation
392
393 Constructor arguments are available as attributes:
394
395 * a1 (:class:`ost.mol.AtomHandle`)
396 * a2 (:class:`ost.mol.AtomHandle`)
397 * length (:class:`float`)
398 * exp_length (:class:`float`)
399 * std (:class:`float`)
400 """
401 def __init__(self, a1, a2, length, exp_length, std):
402 self.a1 = a1
403 self.a2 = a2
404 self.length = length
405 self.exp_length = exp_length
406 self.std = std
407
408 def ToJSON(self, decimals = 3):
409 """ Return JSON serializable dict
410
411 Atoms are represented by a string in format:
412 <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
413 """
414 return {"a1": _AtomToQualifiedName(self.a1),
415 "a2": _AtomToQualifiedName(self.a2),
416 "length": round(self.length, decimals),
417 "exp_length": round(self.exp_length, decimals),
418 "std": round(self.std, decimals)}
419
420
422 """ Object to hold info on angle violation
423
424 Constructor arguments are available as attributes:
425
426 * a1 (:class:`ost.mol.AtomHandle`)
427 * a2 (:class:`ost.mol.AtomHandle`)
428 * a3 (:class:`ost.mol.AtomHandle`)
429 * angle (:class:`float`)
430 * exp_angle (:class:`float`)
431 * std (:class:`float`)
432 """
433 def __init__(self, a1, a2, a3, angle, exp_angle, std):
434 self.a1 = a1
435 self.a2 = a2
436 self.a3 = a3
437 self.angle = angle
438 self.exp_angle = exp_angle
439 self.std = std
440
441 def ToJSON(self, decimals = 3):
442 """ Return JSON serializable dict
443
444 Atoms are represented by a string in format:
445 <chain_name>.<resnum>.<resnum_inscode>.<atom_name>
446 """
447 return {"a1": _AtomToQualifiedName(self.a1),
448 "a2": _AtomToQualifiedName(self.a2),
449 "a3": _AtomToQualifiedName(self.a3),
450 "angle": round(self.angle, decimals),
451 "exp_angle": round(self.exp_angle, decimals),
452 "std": round(self.std, decimals)}
453
454
455def GetClashes(ent, vdw_radii = None, tolerance = 1.5, disulfid_dist = 2.03,
456 disulfid_tolerance = 1.0):
457 """ Identifies clashing atoms
458
459 A clash between two non-bonded atoms is defined as their distance d being
460 below the sum of their vdw radii with some subtracted tolerance value.
461
462 The default values are not very sensitive.
463
464 :param ent: Entity for which you want to identify clashing atoms
465 :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
466 :param vdw_radii: Element based van der Waals radii. Only atoms of these
467 elements will be considered. If not given, default values
468 for all elements occuring in proteins/nucleotides are
469 used. Must be provided as :class:`dict`, where they key
470 are elements (capitalized) and value the respective radii
471 in Angstrom.
472 :type vdw_radii: :class:`dict`
473 :param tolerance: Tolerance value
474 :param disulfid_dist: Summed vdw radius that is used if two Sulfurs that can
475 potentially build a disulfid bond interact
476 :type disulfid_dist: :class:`float`
477 :param disulfid_tolerance: The respective tolerance
478 :type disulfid_dist: :class:`float`
479 :returns: A :class:`list` of :class:`ClashInfo`
480 """
481
482 if vdw_radii is None:
483 vdw_radii = {"C": 1.70, "N": 1.55, "O": 1.52, "P": 1.80, "S": 1.80}
484
485 for ele in vdw_radii.keys():
486 if ele.upper() != ele:
487 raise RuntimeError(f"Elements in vdw_radii must be upper case. "
488 f"Got {ele}")
489
490 # it would be elegant to just do a selection by the ele property. However,
491 # thats case sensitive. So the element could be Cl but the vdw radii
492 # are all caps.
493 elements = set([ele.upper() for ele in vdw_radii.keys()])
494 for a in ent.atoms:
495 if a.GetElement().upper() in elements:
496 a.SetIntProp("clash_check", 1)
497 clash_ent = ent.Select("gaclash_check:0=1")
498
499 max_radius = max(vdw_radii.values())
500 max_radius = max(max_radius, 0.5*disulfid_dist)
501 min_tolerance = min(tolerance, disulfid_tolerance)
502 radius = 2*max_radius-min_tolerance
503
504 done = set()
505 return_list = list()
506 for a in clash_ent.atoms:
507 a_hash = a.handle.GetHashCode()
508 close_atoms = clash_ent.FindWithin(a.GetPos(), radius)
509 for ca in close_atoms:
510 ca_hash = ca.handle.GetHashCode()
511 if a_hash != ca_hash and not mol.BondExists(a.handle, ca.handle):
512 d = geom.Distance(a.GetPos(), ca.GetPos())
513 if _PotentialDisulfid(a, ca):
514 thresh = disulfid_dist - disulfid_tolerance
515 else:
516 thresh = vdw_radii[a.GetElement().upper()]
517 thresh += vdw_radii[ca.GetElement().upper()]
518 thresh -= tolerance
519 if d < thresh:
520 # check if already there, add if not
521 hash_pair = (min(a_hash, ca_hash), max(a_hash, ca_hash))
522 if hash_pair not in done:
523 done.add(hash_pair)
524 return_list.append(ClashInfo(a.handle, ca.handle, d,
525 thresh))
526 return return_list
527
528
529def GetBadBonds(ent, stereo_data = None, stereo_link_data = None, tolerance=12):
530 """ Identify unrealistic bonds
531
532 :param ent: Entity for which you want to identify unrealistic bonds
533 :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
534 :param stereo_data: Stereochemistry data, use return value of
535 :func:`GetDefaultStereoData` if not given.
536 :type stereo_data: :class:`dict`
537 :param stereo_link_data: Stereochemistry data, use return value of
538 :func:`GetDefaultStereoLinkData` if not given.
539 :type stereo_link_data: :class:`dict`
540 :param tolerance: Bonds that devaiate more than *tolerance* times standard
541 deviation from expected mean are considered bad
542 :type tolerance: :class:`int`
543 :returns: :class:`list` :class:`BondViolationInfo`
544
545 """
546 if stereo_data is None:
547 stereo_data = GetDefaultStereoData()
548 if stereo_link_data is None:
549 stereo_link_data = GetDefaultStereoLinkData()
550 return_list = list()
551 for b in ent.bonds:
552 a1 = b.first
553 a2 = b.second
554 mean, std = GetBondParam(a1, a2, stereo_data = stereo_data,
555 stereo_link_data = stereo_link_data)
556 if None not in [mean, std]:
557 l = b.length
558 if abs(mean-l) > tolerance*std:
559 return_list.append(BondViolationInfo(a1, a2, l, mean, std))
560 return return_list
561
562
563def GetBadAngles(ent, stereo_data = None, stereo_link_data = None,
564 tolerance = 12):
565 """ Identify unrealistic angles
566
567 :param ent: Entity for which you want to identify unrealistic angles
568 :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
569 :param stereo_data: Stereochemistry data, use return value of
570 :func:`GetDefaultStereoData` if not given.
571 :type stereo_data: :class:`dict`
572 :param stereo_link_data: Stereochemistry data, use return value of
573 :func:`GetDefaultStereoLinkData` if not given.
574 :type stereo_link_data: :class:`dict`
575 :param tolerance: Angles that devaiate more than *tolerance* times standard
576 deviation from expected mean are considered bad
577 :type tolerance: :class:`int`
578 :returns: :class:`list` of :class:`AngleViolationInfo`
579 """
580 if stereo_data is None:
581 stereo_data = GetDefaultStereoData()
582 if stereo_link_data is None:
583 stereo_link_data = GetDefaultStereoLinkData()
584 return_list = list()
585 for a in _GetAngles(ent.bonds):
586 mean, std = GetAngleParam(a[0], a[1], a[2], stereo_data = stereo_data,
587 stereo_link_data = stereo_link_data)
588 if None not in [mean, std]:
589 angle = geom.Angle(a[0].GetPos() - a[1].GetPos(),
590 a[2].GetPos() - a[1].GetPos())
591 angle = angle/np.pi*180 # stereo params are in degrees
592 diff = abs(mean-angle)
593 if diff > tolerance*std:
594 return_list.append(AngleViolationInfo(a[0], a[1], a[2], angle,
595 mean, std))
596 return return_list
597
598
599def StereoCheck(ent, stereo_data = None, stereo_link_data = None):
600 """ Remove atoms with stereochemical problems
601
602 Selects for peptide/nucleotides and calls :func:`GetClashes`,
603 :func:`GetBadBonds` and :func:`GetBadAngles` with default
604 parameters.
605
606 * Amino acids: Remove full residue if backbone atom is involved in
607 stereochemistry issue ("N", "CA", "C", "O"). Remove sidechain if any of
608 the sidechain atoms is involved in stereochemistry issues.
609 * Nucleotides: Remove full residue if backbone atom is involved in
610 stereochemistry issue ("P", "OP1", "OP2", "OP3", "O5'", "C5'", "C4'",
611 "C3'", "C2'", "C1'", "O4'", "O3'", "O2'"). Remove sidechain (base) if any
612 of the sidechain atoms is involved in stereochemistry issues.
613
614 :param ent: Entity to be stereochecked
615 :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
616 :param stereo_data: Stereochemistry data, use return value of
617 :func:`GetDefaultStereoData` if not given.
618 :type stereo_data: :class:`dict`
619 :param stereo_link_data: Stereochemistry data, use return value of
620 :func:`GetDefaultStereoLinkData` if not given.
621 :type stereo_link_data: :class:`dict`
622 :returns: Tuple with four elements: 1) :class:`ost.mol.EntityView` of
623 *ent* processed as described above 2) Return value of
624 :func:`GetClashes` 3) return value of :func:`GetBadBonds`
625 4) return value of :func:`GetBadAngles`
626 """
627 if stereo_data is None:
628 stereo_data = GetDefaultStereoData()
629
630 sel = ent.Select("peptide=true or nucleotide=true")
631 clashes = GetClashes(sel)
632 bad_bonds = GetBadBonds(sel, stereo_data = stereo_data)
633 bad_angles = GetBadAngles(sel, stereo_data = stereo_data)
634
635 # set stereo problems as properties on an atom level
636 for clash in clashes:
637 clash.a1.SetIntProp("stereo_problem", 1)
638 clash.a2.SetIntProp("stereo_problem", 1)
639
640 for bond in bad_bonds:
641 bond.a1.SetIntProp("stereo_problem", 1)
642 bond.a2.SetIntProp("stereo_problem", 1)
643
644 for angle in bad_angles:
645 angle.a1.SetIntProp("stereo_problem", 1)
646 angle.a2.SetIntProp("stereo_problem", 1)
647 angle.a3.SetIntProp("stereo_problem", 1)
648
649 # set stereo problems as properties on a residue level
650 bad_ent = ent.Select("gastereo_problem:0=1")
651 if len(bad_ent.residues) > 0:
652 pep_bb = set(["N", "CA", "C", "O"])
653 nuc_bb = set(["P", "OP1", "OP2", "OP3", "O5'", "C5'", "C4'", "C3'",
654 "C2'", "C1'", "O4'", "O3'", "O2'"])
655
656 for r in bad_ent.residues:
657 bad_atoms = set([a.GetName() for a in r.atoms])
658 r.SetIntProp("stereo_problem", 1)
659 if r.GetChemType() == mol.ChemType.NUCLEOTIDES:
660 if len(nuc_bb.intersection(bad_atoms)) > 0:
661 r.SetIntProp("stereo_problem_bb", 1)
662 elif r.GetChemType() == mol.ChemType.AMINOACIDS:
663 if len(pep_bb.intersection(bad_atoms)) > 0:
664 r.SetIntProp("stereo_problem_bb", 1)
665
666 # explicitely add " as OpenStructure query language would not
667 # understand ' otherwise
668 nuc_bb = [f"\"{name}\"" for name in nuc_bb]
669
670 pep_query = f"(peptide=true and grstereo_problem:0=0) or "
671 pep_query += f"(peptide=true and grstereo_problem_bb:0=0 and "
672 pep_query += f"aname={','.join(pep_bb)})"
673 nuc_query = f"(nucleotide=true and grstereo_problem:0=0) or "
674 nuc_query += f"(nucleotide=true and grstereo_problem_bb:0=0 and "
675 nuc_query += f"aname={','.join(nuc_bb)})"
676 query = pep_query + " or " + nuc_query
677 return_view = sel.Select(query)
678 else:
679 return_view = sel
680
681 return return_view, clashes, bad_bonds, bad_angles
682
683
685 """ Get default stereo data derived from CCP4 MON_LIB
686
687 Used as default if not provided in :func:`GetBadBonds`, :func:`GetBadAngles`
688 and :func:`StereoCheck`.
689
690 MON_LIB is licensed under GNU LESSER GENERAL PUBLIC LICENSE Version 3.
691 Consult the latest CCP4 for the full license text.
692 """
693 data_path = os.path.join(ost.GetSharedDataPath(), "stereo_data.json")
694 with open(data_path, 'r') as fh:
695 return json.load(fh)
696
697
699 """ Get default stereo data for links between compounds
700
701 Hardcoded from arbitrary sources, see comments in the code.
702
703 :returns: Data for peptide bonds, nucleotide links and disulfid bonds that
704 are used as default if not provided in :func:`GetBadBonds`,
705 :func:`GetBadAngles` and :func:`StereoCheck`.
706 """
707 data = {"bond_data": dict(),
708 "angle_data": dict()}
709
710 # data for nucleotides - deliberately stolen from
711 # geostd (https://github.com/phenix-project/geostd) which is basically
712 # the Phenix equivalent for MON_LIB
713 # used file: $GEOSTD_DIR/rna_dna/chain_link_rna2p.cif
714 # Reason to not use the same data origin as peptides is that in CCP4
715 # there is a bit a more fine grained differentiation of NA types
716 # which makes things more complicated.
717 data["bond_data"]["NA"] = dict()
718 data["bond_data"]["NA"]["O3'_P"] = [1.607, 0.015]
719
720 data["angle_data"]["NA"] = dict()
721 data["angle_data"]["NA"]["O3'_P_O5'"] = [104.000, 1.500]
722 data["angle_data"]["NA"]["O3'_P_OP1"] = [108.000, 3.000]
723 data["angle_data"]["NA"]["O3'_P_OP2"] = [108.000, 3.000]
724 data["angle_data"]["NA"]["C3'_O3'_P"] = [120.200, 1.500]
725
726 # data for peptides - deliberately stolen from standard_geometry.cif file
727 # which is shipped with CCP4
728 # (_standard_geometry.version "Fri Feb 22 17:25:15 GMT 2013").
729 data["bond_data"]["PEPTIDE"] = dict()
730 data["bond_data"]["PEPTIDE"]["C_N"] = [1.336, 0.023]
731 data["bond_data"]["PEPTIDE"]["SG_SG"] = [2.033, 0.016]
732
733 data["bond_data"]["GLY"] = dict()
734 data["bond_data"]["GLY"]["C_N"] = [1.326, 0.018]
735
736 data["bond_data"]["PRO_CIS"] = dict()
737 data["bond_data"]["PRO_CIS"]["C_N"] = [1.338, 0.019]
738 data["bond_data"]["PRO_TRANS"] = dict()
739 data["bond_data"]["PRO_TRANS"]["C_N"] = [1.338, 0.019]
740
741 data["angle_data"]["PEPTIDE"] = dict()
742 data["angle_data"]["PEPTIDE"]["CA_C_N"] = [117.2, 2.2]
743 data["angle_data"]["PEPTIDE"]["O_C_N"] = [122.7, 1.6]
744 data["angle_data"]["PEPTIDE"]["C_N_CA"] = [121.7, 2.5]
745
746 data["angle_data"]["GLY"] = dict()
747 data["angle_data"]["GLY"]["CA_C_N"] = [116.2, 2.0]
748 data["angle_data"]["GLY"]["O_C_N"] = [123.2, 1.7]
749 data["angle_data"]["GLY"]["C_N_CA"] = [122.3, 2.1]
750
751 data["angle_data"]["PRO_TRANS"] = dict()
752 data["angle_data"]["PRO_TRANS"]["CA_C_N"] = [117.1, 2.8]
753 data["angle_data"]["PRO_TRANS"]["O_C_N"] = [121.1, 1.9]
754 data["angle_data"]["PRO_TRANS"]["C_N_CA"] = [119.3, 1.5]
755 data["angle_data"]["PRO_TRANS"]["C_N_CD"] = [128.4, 2.1]
756
757 data["angle_data"]["PRO_CIS"] = dict()
758 data["angle_data"]["PRO_CIS"]["CA_C_N"] = [117.1, 2.8]
759 data["angle_data"]["PRO_CIS"]["O_C_N"] = [121.1, 1.9]
760 data["angle_data"]["PRO_CIS"]["C_N_CA"] = [127.0, 2.4]
761 data["angle_data"]["PRO_CIS"]["C_N_CD"] = [120.6, 2.2]
762
763 return data
__init__(self, a1, a2, a3, angle, exp_angle, std)
__init__(self, a1, a2, length, exp_length, std)
__init__(self, a1, a2, dist, tolerated_dist)
Real DLLEXPORT_OST_GEOM Angle(const Line2 &l1, const Line2 &l2)
Real DihedralAngle(const Vec3 &p1, const Vec3 &p2, const Vec3 &p3, const Vec3 &p4)
Get dihedral angle for p1-p2-p3-p4.
Real DLLEXPORT_OST_GEOM Distance(const Line2 &l, const Vec2 &v)
GetClashes(ent, vdw_radii=None, tolerance=1.5, disulfid_dist=2.03, disulfid_tolerance=1.0)
GetBadBonds(ent, stereo_data=None, stereo_link_data=None, tolerance=12)
GetBondParam(a1, a2, stereo_data=None, stereo_link_data=None)
StereoCheck(ent, stereo_data=None, stereo_link_data=None)
GetBadAngles(ent, stereo_data=None, stereo_link_data=None, tolerance=12)
StereoDataFromMON_LIB(mon_lib_path, compounds=None)
GetAngleParam(a1, a2, a3, stereo_data=None, stereo_link_data=None)
String DLLEXPORT_OST_BASE GetSharedDataPath()