5 def _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
6 ch1_aln = None, ch2_aln = None):
7 """ Preprocesses *mdl* and *ref*
9 Sets int properties to each residue in mdl_ch1, mdl_ch2 as well as
10 the respective reference chains.
11 dockq_mapped: 1 if a residues in mdl_ch1 is mapped to a residue in ref_ch1
12 and vice versa, 0 otherwise. Same is done for mdl_ch2 and
14 dockq_idx: If a pair of residue is mapped, the same index will be set
15 to both residues. The index is unique otherwise.
17 By default, mapping happens with residue numbers but you can enforce a
18 mapping with an alignment. In the example of ch1_aln, the first sequence
19 corresponds to the ATOMSEQ of ref_ch1 in ref and the second sequence to
20 the ATOMSEQ of mdl_ch1 in mdl.
26 for cname
in [ref_ch1, ref_ch2]:
27 ch = ref.FindChain(cname)
29 r.SetIntProp(
"dockq_mapped", 0)
30 r.SetIntProp(
"dockq_idx", -1)
31 for cname
in [mdl_ch1, mdl_ch2]:
32 ch = mdl.FindChain(cname)
34 r.SetIntProp(
"dockq_mapped", 0)
35 r.SetIntProp(
"dockq_idx", -1)
38 if ch1_aln
is not None and ch2_aln
is not None:
41 if ch1_aln.GetCount() != 2
or ch2_aln.GetCount() != 2:
42 raise RuntimeError(
"Expect exactly two sequences in alns provided "
44 tmp = ch1_aln.GetSequence(0)
45 ref_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
46 ref_s1.SetOffset(tmp.GetOffset())
47 ref_s1.AttachView(ref.Select(f
"cname={mol.QueryQuoteName(ref_ch1)}"))
48 tmp = ch1_aln.GetSequence(1)
49 mdl_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
50 mdl_s1.SetOffset(tmp.GetOffset())
51 mdl_s1.AttachView(mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch1)}"))
52 new_ch1_aln = seq.CreateAlignment(ref_s1, mdl_s1)
53 for col
in new_ch1_aln:
54 if col[0] !=
'-' and col[1] !=
'-':
55 ref_r = col.GetResidue(0)
56 mdl_r = col.GetResidue(1)
57 if not (ref_r.IsValid()
and ref_r.one_letter_code == col[0]):
58 raise RuntimeError(
"DockQ: mismatch between provided "
59 "alignments and ATOMSEQ in structures")
60 if not (mdl_r.IsValid()
and mdl_r.one_letter_code == col[1]):
61 raise RuntimeError(
"DockQ: mismatch between provided "
62 "alignments and ATOMSEQ in structures")
63 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
64 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
65 ref_r.SetIntProp(
"dockq_mapped", 1)
66 mdl_r.SetIntProp(
"dockq_mapped", 1)
69 tmp = ch2_aln.GetSequence(0)
70 ref_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
71 ref_s2.SetOffset(tmp.GetOffset())
72 ref_s2.AttachView(ref.Select(f
"cname={mol.QueryQuoteName(ref_ch2)}"))
73 tmp = ch2_aln.GetSequence(1)
74 mdl_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
75 mdl_s2.SetOffset(tmp.GetOffset())
76 mdl_s2.AttachView(mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch2)}"))
77 new_ch2_aln = seq.CreateAlignment(ref_s2, mdl_s2)
78 for col
in new_ch2_aln:
79 if col[0] !=
'-' and col[1] !=
'-':
80 ref_r = col.GetResidue(0)
81 mdl_r = col.GetResidue(1)
82 if not (ref_r.IsValid()
and ref_r.one_letter_code == col[0]):
83 raise RuntimeError(
"DockQ: mismatch between provided "
84 "alignments and ATOMSEQ in structures")
85 if not (mdl_r.IsValid()
and mdl_r.one_letter_code == col[1]):
86 raise RuntimeError(
"DockQ: mismatch between provided "
87 "alignments and ATOMSEQ in structures")
88 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
89 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
90 ref_r.SetIntProp(
"dockq_mapped", 1)
91 mdl_r.SetIntProp(
"dockq_mapped", 1)
95 for mdl_r
in mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch1)}").residues:
96 ref_r = ref.FindResidue(ref_ch1, mdl_r.GetNumber())
98 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
99 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
100 ref_r.SetIntProp(
"dockq_mapped", 1)
101 mdl_r.SetIntProp(
"dockq_mapped", 1)
103 for mdl_r
in mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch2)}").residues:
104 ref_r = ref.FindResidue(ref_ch2, mdl_r.GetNumber())
106 ref_r.SetIntProp(
"dockq_idx", dockq_idx)
107 mdl_r.SetIntProp(
"dockq_idx", dockq_idx)
108 ref_r.SetIntProp(
"dockq_mapped", 1)
109 mdl_r.SetIntProp(
"dockq_mapped", 1)
113 for cname
in [ref_ch1, ref_ch2]:
114 ch = ref.FindChain(cname)
115 for r
in ch.residues:
116 if r.GetIntProp(
"dockq_idx") == -1:
117 r.SetIntProp(
"dockq_idx", dockq_idx)
119 for cname
in [mdl_ch1, mdl_ch2]:
120 ch = mdl.FindChain(cname)
121 for r
in ch.residues:
122 if r.GetIntProp(
"dockq_idx") == -1:
123 r.SetIntProp(
"dockq_idx", dockq_idx)
126 def _GetContacts(ent, ch1, ch2, dist_thresh):
127 int1 = ent.Select(f
"cname={mol.QueryQuoteName(ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch2)}]")
128 int2 = ent.Select(f
"cname={mol.QueryQuoteName(ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch1)}]")
130 int1_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in int1.residues]
131 int2_p = [
geom.Vec3List([a.pos
for a
in r.atoms])
for r
in int2.residues]
132 for r1, p1
in zip(int1.residues, int1_p):
133 for r2, p2
in zip(int2.residues, int2_p):
134 if p1.IsWithin(p2, dist_thresh):
135 contacts.add((r1.GetIntProp(
"dockq_idx"),
136 r2.GetIntProp(
"dockq_idx")))
139 def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
141 ref_contacts = _GetContacts(ref, ref_ch1, ref_ch2, dist_thresh)
142 mdl_contacts = _GetContacts(mdl, mdl_ch1, mdl_ch2, dist_thresh)
144 nnat = len(ref_contacts)
145 nmdl = len(mdl_contacts)
147 fnat = len(ref_contacts.intersection(mdl_contacts))
151 fnonnat = len(mdl_contacts.difference(ref_contacts))
152 if len(mdl_contacts) > 0:
153 fnonnat /= len(mdl_contacts)
155 return (nnat, nmdl, fnat, fnonnat)
157 def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0,
161 sup_atoms = [
"CA",
"C",
"N",
"O",
162 "P",
"OP1",
"OP2",
"O2'",
"O3'",
"O4'",
"O5'",
"C1'",
"C2'",
166 mapped_mdl = mdl.Select(f
"cname={mol.QueryQuoteName(mdl_ch1)},{mol.QueryQuoteName(mdl_ch2)} and grdockq_mapped=1")
167 mapped_ref = ref.Select(f
"cname={mol.QueryQuoteName(ref_ch1)},{mol.QueryQuoteName(ref_ch2)} and grdockq_mapped=1")
168 ch = mapped_mdl.FindChain(mdl_ch1)
169 mdl_ch1_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
170 ch = mapped_mdl.FindChain(mdl_ch2)
171 mdl_ch2_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
172 ch = mapped_ref.FindChain(ref_ch1)
173 ref_ch1_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
174 ch = mapped_ref.FindChain(ref_ch2)
175 ref_ch2_residues = {r.GetIntProp(
"dockq_idx"): r
for r
in ch.residues}
181 i_ref = ref.Select(
"aname=CB or (rname=GLY and aname=CA)")
182 int1 = i_ref.Select(f
"cname={mol.QueryQuoteName(ref_ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch2)}]")
183 int2 = i_ref.Select(f
"cname={mol.QueryQuoteName(ref_ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch1)}]")
186 for r
in int1.residues:
187 idx = r.GetIntProp(
"dockq_idx")
188 if idx
in ref_ch1_residues
and idx
in mdl_ch1_residues:
189 ref_r = ref_ch1_residues[idx]
190 mdl_r = mdl_ch1_residues[idx]
191 for aname
in sup_atoms:
192 ref_a = ref_r.FindAtom(aname)
193 mdl_a = mdl_r.FindAtom(aname)
194 if ref_a.IsValid()
and mdl_a.IsValid():
195 ref_pos.append(ref_a.pos)
196 mdl_pos.append(mdl_a.pos)
197 for r
in int2.residues:
198 idx = r.GetIntProp(
"dockq_idx")
199 if idx
in ref_ch2_residues
and idx
in mdl_ch2_residues:
200 ref_r = ref_ch2_residues[idx]
201 mdl_r = mdl_ch2_residues[idx]
202 for aname
in sup_atoms:
203 ref_a = ref_r.FindAtom(aname)
204 mdl_a = mdl_r.FindAtom(aname)
205 if ref_a.IsValid()
and mdl_a.IsValid():
206 ref_pos.append(ref_a.pos)
207 mdl_pos.append(mdl_a.pos)
209 if len(mdl_pos) >= 3:
210 sup_result = mol.alg.SuperposeSVD(mdl_pos, ref_pos)
211 irmsd = sup_result.rmsd
218 n_ch1 = len(ref.FindChain(ref_ch1).residues)
219 n_ch2 = len(ref.FindChain(ref_ch2).residues)
221 ref_receptor_residues = ref_ch1_residues.values()
222 ref_ligand_residues = ref_ch2_residues.values()
223 mdl_receptor_residues = \
224 [mdl_ch1_residues[idx]
for idx
in ref_ch1_residues.keys()]
225 mdl_ligand_residues = \
226 [mdl_ch2_residues[idx]
for idx
in ref_ch2_residues.keys()]
228 ref_receptor_residues = ref_ch2_residues.values()
229 ref_ligand_residues = ref_ch1_residues.values()
230 mdl_receptor_residues = \
231 [mdl_ch2_residues[idx]
for idx
in ref_ch2_residues.keys()]
232 mdl_ligand_residues = \
233 [mdl_ch1_residues[idx]
for idx
in ref_ch1_residues.keys()]
238 for ref_r, mdl_r
in zip(ref_receptor_residues, mdl_receptor_residues):
239 for aname
in sup_atoms:
240 ref_a = ref_r.FindAtom(aname)
241 mdl_a = mdl_r.FindAtom(aname)
242 if ref_a.IsValid()
and mdl_a.IsValid():
243 ref_receptor_positions.append(ref_a.pos)
244 mdl_receptor_positions.append(mdl_a.pos)
245 for ref_r, mdl_r
in zip(ref_ligand_residues, mdl_ligand_residues):
246 for aname
in sup_atoms:
247 ref_a = ref_r.FindAtom(aname)
248 mdl_a = mdl_r.FindAtom(aname)
249 if ref_a.IsValid()
and mdl_a.IsValid():
250 ref_ligand_positions.append(ref_a.pos)
251 mdl_ligand_positions.append(mdl_a.pos)
253 if len(mdl_receptor_positions) >= 3:
254 sup_result = mol.alg.SuperposeSVD(mdl_receptor_positions,
255 ref_receptor_positions)
256 mdl_ligand_positions.ApplyTransform(sup_result.transformation)
257 lrmsd = mdl_ligand_positions.GetRMSD(ref_ligand_positions)
261 return (irmsd, lrmsd)
263 def _ScaleRMSD(rmsd, d):
264 return 1.0/(1+(rmsd/d)**2)
266 def _DockQ(fnat, lrmsd, irmsd, d1, d2):
267 """ The final number chrunching as described in the DockQ manuscript
269 return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
271 def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
272 ch1_aln=None, ch2_aln=None, contact_dist_thresh = 5.0,
273 interface_dist_thresh = 10.0, interface_cb = False):
274 """ Computes DockQ for specified interface
276 DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
277 Quality Measure for Protein-Protein Docking Models", PLOS one
279 Residues are mapped based on residue numbers by default. If you provide
280 *ch1_aln* and *ch2_aln* you can enforce an arbitrary mapping.
282 :param mdl: Model structure
283 :type mdl: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
284 :param ref: Reference structure, i.e. native structure
285 :type ref: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
286 :param mdl_ch1: Specifies chain in model constituting first part of
288 :type mdl_ch1: :class:`str`
289 :param mdl_ch2: Specifies chain in model constituting second part of
291 :type mdl_ch2: :class:`str`
292 :param ref_ch1: ref equivalent of mdl_ch1
293 :type ref_ch1: :class:`str`
294 :param ref_ch2: ref equivalent of mdl_ch2
295 :type ref_ch2: :class:`str`
296 :param ch1_aln: Alignment with two sequences to map *ref_ch1* and *mdl_ch1*.
297 The first sequence must match the sequence in *ref_ch1* and
298 the second to *mdl_ch1*.
299 :type ch1_aln: :class:`ost.seq.AlignmentHandle`
300 :param ch2_aln: Alignment with two sequences to map *ref_ch2* and *mdl_ch2*.
301 The first sequence must match the sequence in *ref_ch2* and
302 the second to *mdl_ch2*.
303 :type ch2_aln: :class:`ost.seq.AlignmentHandle`
304 :param contact_dist_thresh: Residues with any atom within this threshold
305 are considered to be in contact. Affects contact
306 based scores fnat and fnonnat. CAPRI suggests
307 to lower the default of 5.0 to 4.0 for
308 protein-peptide interactions.
309 :type contact_dist_thresh: :class:`float`
310 :param interface_dist_thresh: Residues with any atom within this threshold
311 to another chain are considered interface
312 residues. Affects irmsd. CAPRI suggests to
313 lower the default of 10.0 to 8.0 in
314 combination with interface_cb=True for
315 protein-peptide interactions.
316 :type interface_dist_thresh: :class:`float`
317 :param interface_cb: Only use CB atoms (CA for GLY) to identify interface
318 residues for irmsd. CAPRI suggests to enable this
319 flag in combination with lowering
320 *interface_dist_thresh* to 8.0 for protein-peptide
322 :type interface_cb: :class:`bool`
323 :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
324 DockQ which corresponds to the equivalent values in the original
325 DockQ implementation.
327 _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
328 ch1_aln = ch1_aln, ch2_aln = ch2_aln)
329 nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
331 dist_thresh = contact_dist_thresh)
332 irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
333 dist_thresh = interface_dist_thresh,
334 cb_mode = interface_cb)
335 return {
"nnat": nnat,
339 "irmsd": round(irmsd, 3),
340 "lrmsd": round(lrmsd, 3),
341 "DockQ": round(_DockQ(fnat, lrmsd, irmsd, 8.5, 1.5), 3)}
def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, ch1_aln=None, ch2_aln=None, contact_dist_thresh=5.0, interface_dist_thresh=10.0, interface_cb=False)