9 """ Scoring helper - Returns copy of *ent* without hydrogens
11 Non-standard hydrogen naming can cause trouble in residue property
12 assignment which is done by the :class:`ost.conop.RuleBasedProcessor` when
13 loading. In fact, residue property assignment is not done for every residue
14 that has unknown atoms according to the chemical component dictionary. This
15 function therefore re-processes the entity after removing hydrogens.
17 :param ent: Entity to clean
18 :type ent: :class:`ost.mol.EntityHandle`/:class:`ost.mol.EntityView`
19 :param clib: Compound library to perform re-processing after hydrogen
21 :type clib: :class:`ost.conop.CompoundLib`
22 :returns: Cleaned and re-processed ent
24 cleaned_ent = mol.CreateEntityFromView(ent.Select(
25 "ele != H and ele != D"), include_exlusive_atoms=
False)
29 processor.Process(cleaned_ent)
33 def MMCIFPrep(mmcif_path, biounit=None, extract_nonpoly=False,
34 fault_tolerant=False, allow_heuristic_conn=False,
35 extract_seqres_mapping=False):
36 """ Scoring helper - Prepares input from mmCIF
38 Only performs gentle cleanup of hydrogen atoms. Further cleanup is delegated
41 Depending on input flags, the following outputs can be retrieved:
43 * poly_ent (:class:`ost.mol.EntityHandle`): An OpenStructure entity with only
45 * non_poly_entities (:class:`list` of :class:`ost.mol.EntityHandle`):
46 OpenStructure entities representing all non-polymer (ligand) entities.
47 * seqres (:class:`ost.seq.SequenceList`): Seqres sequences with entity id
48 as sequence names and the respective canonical seqres as sequence.
49 * trg_seqres_mapping (:class:`dict`): Dictionary with chain names in
50 poly_ent as keys and the respective entity ids as values.
52 :param mmcif_path: Path to mmCIF file that contains polymer and optionally
54 :type mmcif_path: :class:`str`
55 :param biounit: If given, construct specified biounit from mmCIF AU
56 :type biounit: :class:`str`
57 :param extract_nonpoly: Controls return value
58 :type extract_nonpoly: :class:`bool`
59 :param fault_tolerant: Passed as parameter to :func:`ost.io.LoadMMCIF`
60 :type fault_tolerant: :class:`bool`
61 :param allow_heuristic_conn: Only relevant if extract_nonpoly is True.
62 The chemical component dictionary is relevant
63 for connectivity information. By default, we
64 enforce the presence of each non-polymer in
65 the dictionary to ensure correct connectity.
66 If you enable this flag, you allow the use
67 of a distance based heuristic as fallback.
68 With all its consequences in ligand matching.
69 :type allow_heuristic_conn: :class:`bool`
70 :param extract_seqres_mapping: Controls return value
71 :type extract_seqres_mapping: :class:`bool`
72 :returns: poly_ent if *extract_nonpoly*/*extract_seqres_mapping* are False.
73 (poly_ent, non_poly_entities) if *extract_nonpoly* is True.
74 (poly_ent, seqres, trg_seqres_mapping) if *extract_seqres_mapping*
76 (poly_ent, non_poly_entities, seqres, trg_seqres_mapping) if both
79 clib = conop.GetDefaultLib()
81 ost.LogError(
"A compound library is required. "
82 "Please refer to the OpenStructure website: "
83 "https://openstructure.org/docs/conop/compoundlib/.")
84 raise RuntimeError(
"No compound library found")
88 non_poly_entities =
None
90 trg_seqres_mapping =
None
93 mmcif_entity, mmcif_seqres, mmcif_info = io.LoadMMCIF(mmcif_path, seqres=
True, info=
True,
94 fault_tolerant=fault_tolerant)
98 polymer_entity_ids = mmcif_info.GetEntityIdsOfType(
"polymer")
99 polymer_chain_names = list()
100 for ch
in mmcif_entity.chains:
101 if mmcif_info.GetMMCifEntityIdTr(ch.name)
in polymer_entity_ids:
102 polymer_chain_names.append(ch.name)
105 non_polymer_entity_ids = mmcif_info.GetEntityIdsOfType(
"non-polymer")
106 non_polymer_chain_names = list()
107 for ch
in mmcif_entity.chains:
108 if mmcif_info.GetMMCifEntityIdTr(ch.name)
in non_polymer_entity_ids:
109 non_polymer_chain_names.append(ch.name)
112 if biounit
is not None:
113 biounit_found =
False
114 for bu
in mmcif_info.biounits:
116 mmcif_entity = mol.alg.CreateBU(mmcif_entity, bu)
119 if not biounit_found:
120 raise RuntimeError(f
"Specified biounit '{biounit}' not in "
125 for ch
in mmcif_entity.chains:
127 if biounit
is not None:
131 dot_index = ch.name.find(
'.')
135 cname = ch.name[dot_index+1:]
139 if cname
in polymer_chain_names:
140 ch.SetIntProp(
"poly", 1)
141 if cname
in non_polymer_chain_names:
142 ch.SetIntProp(
"nonpolyid", non_poly_id)
145 poly_sel = mmcif_entity.Select(
"gcpoly:0=1")
146 poly_ent = mol.CreateEntityFromView(poly_sel,
True)
149 non_poly_sel = mmcif_entity.Select(
"gcnonpoly:0=1")
150 non_poly_entities = list()
151 for i
in range(non_poly_id):
152 view = mmcif_entity.Select(f
"gcnonpolyid:{non_poly_id}={i}")
153 if view.GetResidueCount() != 1:
154 raise RuntimeError(f
"Expect non-polymer entities in "
155 f
"{mmcif_path} to contain exactly 1 "
156 f
"residue. Got {ch.GetResidueCount()} "
157 f
"in chain {ch.name}")
158 if not allow_heuristic_conn:
159 compound = clib.FindCompound(view.residues[0].name)
161 raise RuntimeError(f
"Can only extract non-polymer entities if "
162 f
"respective residues are available in PDB "
163 f
"component dictionary. Can't find "
164 f
"\"{view.residues[0].name}\"")
166 non_poly_entities.append(mol.CreateEntityFromView(view,
True))
168 if extract_seqres_mapping:
172 seqres = seq.CreateSequenceList()
173 seqres_processed = set()
175 for s
in mmcif_seqres:
176 entity_id = mmcif_info.GetMMCifEntityIdTr(s.GetName())
177 if entity_id
not in seqres_processed:
178 seqres_processed.add(entity_id)
179 seqres.AddSequence(seq.CreateSequence(entity_id, s.GetGaplessString()))
181 trg_seqres_mapping = dict()
183 cnames = [ch.name
for ch
in poly_ent.chains]
185 trg_seqres_mapping[cname] = mmcif_info.GetMMCifEntityIdTr(cname)
187 bu_cnames = [ch.name
for ch
in poly_ent.chains]
189 for bu_cname
in bu_cnames:
190 dot_idx = bu_cname.index(
".")
191 au_cnames.append(bu_cname[dot_idx + 1 :])
192 for au_cname, bu_cname
in zip(au_cnames, bu_cnames):
193 trg_seqres_mapping[bu_cname] = mmcif_info.GetMMCifEntityIdTr(au_cname)
196 if extract_nonpoly
and extract_seqres_mapping:
197 return (poly_ent, non_poly_entities, seqres, trg_seqres_mapping)
198 elif extract_nonpoly:
199 return (poly_ent, non_poly_entities)
200 elif extract_seqres_mapping:
201 return (poly_ent, seqres, trg_seqres_mapping)
207 """ Scoring helper - Prepares scoring input from PDB
209 Only performs gentle cleanup of hydrogen atoms. Further cleanup is delegated
210 to scoring classes. There is no logic to extract ligands from PDB
211 files. Ligands must be provided separately as SDF files in these cases.
213 :param pdb_path: Path to PDB file that contains polymer entities
214 :type pdb_path: :class:`str`
215 :param fault_tolerant: Passed as parameter to :func:`ost.io.LoadPDB`
216 :type fault_tolerant: :class:`bool`
217 :returns: :class:`EntityHandle` from loaded file.
219 clib = conop.GetDefaultLib()
221 ost.LogError(
"A compound library is required. "
222 "Please refer to the OpenStructure website: "
223 "https://openstructure.org/docs/conop/compoundlib/.")
224 raise RuntimeError(
"No compound library found")
226 pdb_entity = io.LoadPDB(pdb_path, fault_tolerant=fault_tolerant)
231 __all__ = (
'CleanHydrogens',
'MMCIFPrep',
'PDBPrep')
def CleanHydrogens(ent, clib)
def PDBPrep(pdb_path, fault_tolerant=False)
def MMCIFPrep(mmcif_path, biounit=None, extract_nonpoly=False, fault_tolerant=False, allow_heuristic_conn=False, extract_seqres_mapping=False)