OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
dockq.py
Go to the documentation of this file.
1 from ost import geom
2 from ost import mol
3 from ost import seq
4 
5 def _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
6  ch1_aln = None, ch2_aln = None):
7  """ Preprocesses *mdl* and *ref*
8 
9  Sets int properties to each residue in mdl_ch1, mdl_ch2 as well as
10  the respective reference chains.
11  dockq_mapped: 1 if a residues in mdl_ch1 is mapped to a residue in ref_ch1
12  and vice versa, 0 otherwise. Same is done for mdl_ch2 and
13  ref_ch2.
14  dockq_idx: If a pair of residue is mapped, the same index will be set
15  to both residues. The index is unique otherwise.
16 
17  By default, mapping happens with residue numbers but you can enforce a
18  mapping with an alignment. In the example of ch1_aln, the first sequence
19  corresponds to the ATOMSEQ of ref_ch1 in ref and the second sequence to
20  the ATOMSEQ of mdl_ch1 in mdl.
21  """
22 
23  # set default values for dockq_mapped and dockq_idx properties
24  # => makes sure that we have a clean slate if stuff has been set in
25  # previous runs
26  for cname in [ref_ch1, ref_ch2]:
27  ch = ref.FindChain(cname)
28  for r in ch.residues:
29  r.SetIntProp("dockq_mapped", 0)
30  r.SetIntProp("dockq_idx", -1)
31  for cname in [mdl_ch1, mdl_ch2]:
32  ch = mdl.FindChain(cname)
33  for r in ch.residues:
34  r.SetIntProp("dockq_mapped", 0)
35  r.SetIntProp("dockq_idx", -1)
36 
37  dockq_idx = 0
38  if ch1_aln is not None and ch2_aln is not None:
39  # there are potentially already views attached to the alns but
40  # we go for *mdl* and *ref* here
41  if ch1_aln.GetCount() != 2 or ch2_aln.GetCount() != 2:
42  raise RuntimeError("Expect exactly two sequences in alns provided "
43  "to DockQ!")
44  tmp = ch1_aln.GetSequence(0)
45  ref_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
46  ref_s1.SetOffset(tmp.GetOffset())
47  ref_s1.AttachView(ref.Select(f"cname={ref_ch1}"))
48  tmp = ch1_aln.GetSequence(1)
49  mdl_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
50  mdl_s1.SetOffset(tmp.GetOffset())
51  mdl_s1.AttachView(mdl.Select(f"cname={mdl_ch1}"))
52  new_ch1_aln = seq.CreateAlignment(ref_s1, mdl_s1)
53  for col in new_ch1_aln:
54  if col[0] != '-' and col[1] != '-':
55  ref_r = col.GetResidue(0)
56  mdl_r = col.GetResidue(1)
57  if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
58  raise RuntimeError("DockQ: mismatch between provided "
59  "alignments and ATOMSEQ in structures")
60  if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
61  raise RuntimeError("DockQ: mismatch between provided "
62  "alignments and ATOMSEQ in structures")
63  ref_r.SetIntProp("dockq_idx", dockq_idx)
64  mdl_r.SetIntProp("dockq_idx", dockq_idx)
65  ref_r.SetIntProp("dockq_mapped", 1)
66  mdl_r.SetIntProp("dockq_mapped", 1)
67  dockq_idx += 1
68 
69  tmp = ch2_aln.GetSequence(0)
70  ref_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
71  ref_s2.SetOffset(tmp.GetOffset())
72  ref_s2.AttachView(ref.Select(f"cname={ref_ch2}"))
73  tmp = ch2_aln.GetSequence(1)
74  mdl_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
75  mdl_s2.SetOffset(tmp.GetOffset())
76  mdl_s2.AttachView(mdl.Select(f"cname={mdl_ch2}"))
77  new_ch2_aln = seq.CreateAlignment(ref_s2, mdl_s2)
78  for col in new_ch2_aln:
79  if col[0] != '-' and col[1] != '-':
80  ref_r = col.GetResidue(0)
81  mdl_r = col.GetResidue(1)
82  if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
83  raise RuntimeError("DockQ: mismatch between provided "
84  "alignments and ATOMSEQ in structures")
85  if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
86  raise RuntimeError("DockQ: mismatch between provided "
87  "alignments and ATOMSEQ in structures")
88  ref_r.SetIntProp("dockq_idx", dockq_idx)
89  mdl_r.SetIntProp("dockq_idx", dockq_idx)
90  ref_r.SetIntProp("dockq_mapped", 1)
91  mdl_r.SetIntProp("dockq_mapped", 1)
92  dockq_idx += 1
93  else:
94  # go by residue numbers
95  for mdl_r in mdl.Select(f"cname={mdl_ch1}").residues:
96  ref_r = ref.FindResidue(ref_ch1, mdl_r.GetNumber())
97  if ref_r.IsValid():
98  ref_r.SetIntProp("dockq_idx", dockq_idx)
99  mdl_r.SetIntProp("dockq_idx", dockq_idx)
100  ref_r.SetIntProp("dockq_mapped", 1)
101  mdl_r.SetIntProp("dockq_mapped", 1)
102  dockq_idx += 1
103  for mdl_r in mdl.Select(f"cname={mdl_ch2}").residues:
104  ref_r = ref.FindResidue(ref_ch2, mdl_r.GetNumber())
105  if ref_r.IsValid():
106  ref_r.SetIntProp("dockq_idx", dockq_idx)
107  mdl_r.SetIntProp("dockq_idx", dockq_idx)
108  ref_r.SetIntProp("dockq_mapped", 1)
109  mdl_r.SetIntProp("dockq_mapped", 1)
110  dockq_idx += 1
111 
112  # set unique dockq_idx property for all residues that are still -1
113  for cname in [ref_ch1, ref_ch2]:
114  ch = ref.FindChain(cname)
115  for r in ch.residues:
116  if r.GetIntProp("dockq_idx") == -1:
117  r.SetIntProp("dockq_idx", dockq_idx)
118  dockq_idx += 1
119  for cname in [mdl_ch1, mdl_ch2]:
120  ch = mdl.FindChain(cname)
121  for r in ch.residues:
122  if r.GetIntProp("dockq_idx") == -1:
123  r.SetIntProp("dockq_idx", dockq_idx)
124  dockq_idx += 1
125 
126 def _GetContacts(ent, ch1, ch2, dist_thresh):
127  int1 = ent.Select(f"cname={ch1} and {dist_thresh} <> [cname={ch2}]")
128  int2 = ent.Select(f"cname={ch2} and {dist_thresh} <> [cname={ch1}]")
129  contacts = set()
130  int1_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int1.residues]
131  int2_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int2.residues]
132  for r1, p1 in zip(int1.residues, int1_p):
133  for r2, p2 in zip(int2.residues, int2_p):
134  if p1.IsWithin(p2, dist_thresh):
135  contacts.add((r1.GetIntProp("dockq_idx"),
136  r2.GetIntProp("dockq_idx")))
137  return contacts
138 
139 def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
140  dist_thresh=5.0):
141  ref_contacts = _GetContacts(ref, ref_ch1, ref_ch2, dist_thresh)
142  mdl_contacts = _GetContacts(mdl, mdl_ch1, mdl_ch2, dist_thresh)
143 
144  nnat = len(ref_contacts)
145  nmdl = len(mdl_contacts)
146 
147  fnat = len(ref_contacts.intersection(mdl_contacts))
148  if nnat > 0:
149  fnat /= nnat
150 
151  fnonnat = len(mdl_contacts.difference(ref_contacts))
152  if len(mdl_contacts) > 0:
153  fnonnat /= len(mdl_contacts)
154 
155  return (nnat, nmdl, fnat, fnonnat)
156 
157 def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0):
158 
159  # backbone atoms used for superposition
160  sup_atoms = ['CA','C','N','O']
161 
162  # make mapped residues accessible by the dockq_idx property
163  mapped_mdl = mdl.Select(f"cname={mdl_ch1},{mdl_ch2} and grdockq_mapped=1")
164  mapped_ref = ref.Select(f"cname={ref_ch1},{ref_ch2} and grdockq_mapped=1")
165  ch = mapped_mdl.FindChain(mdl_ch1)
166  mdl_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
167  ch = mapped_mdl.FindChain(mdl_ch2)
168  mdl_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
169  ch = mapped_ref.FindChain(ref_ch1)
170  ref_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
171  ch = mapped_ref.FindChain(ref_ch2)
172  ref_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
173 
174  # iRMSD
175  #######
176  int1 = ref.Select(f"cname={ref_ch1} and {dist_thresh} <> [cname={ref_ch2}]")
177  int2 = ref.Select(f"cname={ref_ch2} and {dist_thresh} <> [cname={ref_ch1}]")
178  ref_pos = geom.Vec3List()
179  mdl_pos = geom.Vec3List()
180  for r in int1.residues:
181  idx = r.GetIntProp("dockq_idx")
182  if idx in ref_ch1_residues and idx in mdl_ch1_residues:
183  ref_r = ref_ch1_residues[idx]
184  mdl_r = mdl_ch1_residues[idx]
185  for aname in sup_atoms:
186  ref_a = ref_r.FindAtom(aname)
187  mdl_a = mdl_r.FindAtom(aname)
188  if ref_a.IsValid() and mdl_a.IsValid():
189  ref_pos.append(ref_a.pos)
190  mdl_pos.append(mdl_a.pos)
191  for r in int2.residues:
192  idx = r.GetIntProp("dockq_idx")
193  if idx in ref_ch2_residues and idx in mdl_ch2_residues:
194  ref_r = ref_ch2_residues[idx]
195  mdl_r = mdl_ch2_residues[idx]
196  for aname in sup_atoms:
197  ref_a = ref_r.FindAtom(aname)
198  mdl_a = mdl_r.FindAtom(aname)
199  if ref_a.IsValid() and mdl_a.IsValid():
200  ref_pos.append(ref_a.pos)
201  mdl_pos.append(mdl_a.pos)
202 
203  if len(mdl_pos) >= 3:
204  sup_result = mol.alg.SuperposeSVD(mdl_pos, ref_pos)
205  irmsd = sup_result.rmsd
206  else:
207  irmsd = 0.0
208 
209  # lRMSD
210  #######
211  # receptor is by definition the larger chain in ref
212  n_ch1 = len(ref.FindChain(ref_ch1).residues)
213  n_ch2 = len(ref.FindChain(ref_ch2).residues)
214  if n_ch1 > n_ch2:
215  ref_receptor_residues = ref_ch1_residues.values()
216  ref_ligand_residues = ref_ch2_residues.values()
217  mdl_receptor_residues = \
218  [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
219  mdl_ligand_residues = \
220  [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
221  else:
222  ref_receptor_residues = ref_ch2_residues.values()
223  ref_ligand_residues = ref_ch1_residues.values()
224  mdl_receptor_residues = \
225  [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
226  mdl_ligand_residues = \
227  [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
228  ref_receptor_positions = geom.Vec3List()
229  mdl_receptor_positions = geom.Vec3List()
230  ref_ligand_positions = geom.Vec3List()
231  mdl_ligand_positions = geom.Vec3List()
232  for ref_r, mdl_r in zip(ref_receptor_residues, mdl_receptor_residues):
233  for aname in sup_atoms:
234  ref_a = ref_r.FindAtom(aname)
235  mdl_a = mdl_r.FindAtom(aname)
236  if ref_a.IsValid() and mdl_a.IsValid():
237  ref_receptor_positions.append(ref_a.pos)
238  mdl_receptor_positions.append(mdl_a.pos)
239  for ref_r, mdl_r in zip(ref_ligand_residues, mdl_ligand_residues):
240  for aname in sup_atoms:
241  ref_a = ref_r.FindAtom(aname)
242  mdl_a = mdl_r.FindAtom(aname)
243  if ref_a.IsValid() and mdl_a.IsValid():
244  ref_ligand_positions.append(ref_a.pos)
245  mdl_ligand_positions.append(mdl_a.pos)
246 
247  if len(mdl_receptor_positions) >= 3:
248  sup_result = mol.alg.SuperposeSVD(mdl_receptor_positions,
249  ref_receptor_positions)
250  mdl_ligand_positions.ApplyTransform(sup_result.transformation)
251  lrmsd = mdl_ligand_positions.GetRMSD(ref_ligand_positions)
252  else:
253  lrmsd = 0.0
254 
255  return (irmsd, lrmsd)
256 
257 def _ScaleRMSD(rmsd, d):
258  return 1.0/(1+(rmsd/d)**2)
259 
260 def _DockQ(fnat, lrmsd, irmsd, d1, d2):
261  """ The final number chrunching as described in the DockQ manuscript
262  """
263  return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
264 
265 def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
266  ch1_aln=None, ch2_aln=None):
267  """ Computes DockQ for specified interface
268 
269  DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
270  Quality Measure for Protein-Protein Docking Models", PLOS one
271 
272  Residues are mapped based on residue numbers by default. If you provide
273  *ch1_aln* and *ch2_aln* you can enforce an arbitrary mapping.
274 
275  :param mdl: Model structure
276  :type mdl: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
277  :param ref: Reference structure, i.e. native structure
278  :type ref: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
279  :param mdl_ch1: Specifies chain in model constituting first part of
280  interface
281  :type mdl_ch1: :class:`str`
282  :param mdl_ch2: Specifies chain in model constituting second part of
283  interface
284  :type mdl_ch2: :class:`str`
285  :param ref_ch1: ref equivalent of mdl_ch1
286  :type ref_ch1: :class:`str`
287  :param ref_ch2: ref equivalent of mdl_ch2
288  :type ref_ch2: :class:`str`
289  :param ch1_aln: Alignment with two sequences to map *ref_ch1* and *mdl_ch1*.
290  The first sequence must match the sequence in *ref_ch1* and
291  the second to *mdl_ch1*.
292  :type ch1_aln: :class:`ost.seq.AlignmentHandle`
293  :param ch2_aln: Alignment with two sequences to map *ref_ch2* and *mdl_ch2*.
294  The first sequence must match the sequence in *ref_ch2* and
295  the second to *mdl_ch2*.
296  :type ch2_aln: :class:`ost.seq.AlignmentHandle`
297  :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
298  DockQ which corresponds to the equivalent values in the original
299  DockQ implementation.
300  """
301  _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
302  ch1_aln = ch1_aln, ch2_aln = ch2_aln)
303  nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
304  ref_ch1, ref_ch2)
305  irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2)
306  return {"nnat": nnat,
307  "nmdl": nmdl,
308  "fnat": fnat,
309  "fnonnat": fnonnat,
310  "irmsd": round(irmsd, 3),
311  "lrmsd": round(lrmsd, 3),
312  "DockQ": round(_DockQ(fnat, lrmsd, irmsd, 8.5, 1.5), 3)}