19 import os, tempfile, ftplib, httplib
22 from ost
import mol, geom, conop
31 return IOProfileRegistry.Instance().Get(key)
34 if isinstance(value, str):
36 IOProfileRegistry.Instance().Set(key, value)
40 return len(self.
_dict)
43 return self._dict.__iter__()
47 profiles[
'STRICT']=
IOProfile(dialect=
'PDB', fault_tolerant=
False,
48 strict_hydrogens=
False, quack_mode=
False)
49 profiles[
'SLOPPY']=
IOProfile(dialect=
'PDB', fault_tolerant=
True,
50 strict_hydrogens=
False, quack_mode=
True)
51 profiles[
'CHARMM']=
IOProfile(dialect=
'CHARMM', fault_tolerant=
True,
52 strict_hydrogens=
False, quack_mode=
False)
53 profiles[
'DEFAULT']=
'STRICT'
55 def _override(val1, val2):
61 def __GetModelFromPDB(model_id, output_dir, file_pattern='pdb%s.ent.gz'):
62 file_name = file_pattern % model_id
63 file_path = os.path.join(output_dir,file_name)
65 server=
"ftp.wwpdb.org"
66 ftp=ftplib.FTP(server,
"anonymous",
"user@")
67 ftp.cwd(
"pub/pdb/data/structures/all/pdb")
68 ftp_retrfile=open(file_path,
"wb")
69 ftp.retrbinary(
"RETR "+file_name,ftp_retrfile.write)
72 conn=httplib.HTTPConnection(
'www.pdb.org')
73 conn.request(
'GET',
'/pdb/files/%s.pdb.gz' % model_id )
74 response=conn.getresponse()
75 if response.status==200:
77 f=open(os.path.join(output_dir, file_pattern % model_id),
'w+')
83 return os.path.getsize(file_path) > 0
85 def LoadPDB(filename, restrict_chains="", no_hetatms=None,
86 fault_tolerant=
None, load_multi=
False, quack_mode=
None,
87 join_spread_atom_records=
None, calpha_only=
None,
88 profile=
'DEFAULT', remote=
False, dialect=
None,
89 strict_hydrogens=
None, seqres=
False, bond_feasibility_check=
None):
91 Load PDB file from disk and return one or more entities. Several options
92 allow to customize the exact behaviour of the PDB import. For more information
93 on these options, see :doc:`profile`.
95 Residues are flagged as ligand if they are mentioned in a HET record.
97 :param restrict_chains: If not an empty string, only chains listed in the
98 string will be imported.
100 :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
101 the value of :attr:`IOProfile.fault_tolerant`.
103 :param no_hetatms: If set to True, HETATM records will be ignored. Overrides
104 the value of :attr:`IOProfile.no_hetatms`
106 :param load_multi: If set to True, a list of entities will be returned instead
107 of only the first. This is useful when dealing with multi-PDB files.
109 :param join_spread_atom_records: If set, overrides the value of
110 :attr:`IOProfile.join_spread_atom_records`.
112 :param remote: If set to True, the method tries to load the pdb from the
113 remote pdb repository www.pdb.org. The filename is then interpreted as the
116 :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is
119 :param dialect: Specifies the particular dialect to use. If set, overrides
120 the value of :attr:`IOProfile.dialect`
122 :param seqres: Whether to read SEQRES records. If set to True, the loaded
123 entity and seqres entry will be returned as a tuple.
125 :type dialect: :class:`str`
127 :param strict_hydrogens: If set, overrides the value of
128 :attr:`IOProfile.strict_hydrogens`.
130 :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or
133 def _override(val1, val2):
138 if isinstance(profile, str):
139 prof=profiles[profile].Copy()
140 elif isinstance(profile, IOProfile):
143 raise TypeError(
'profile must be of type string or IOProfile, '+\
144 'instead of %s'%type(profile))
145 if dialect
not in (
None,
'PDB',
'CHARMM',):
146 raise ValueError(
'dialect must be PDB or CHARMM')
147 prof.calpha_only=_override(prof.calpha_only, calpha_only)
148 prof.no_hetatms=_override(prof.no_hetatms, no_hetatms)
149 prof.dialect=_override(prof.dialect, dialect)
150 prof.quack_mode=_override(prof.quack_mode, quack_mode)
151 prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens)
152 prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
153 prof.bond_feasibility_check=_override(prof.bond_feasibility_check, bond_feasibility_check)
154 prof.join_spread_atom_records=_override(prof.join_spread_atom_records,
155 join_spread_atom_records)
158 output_dir = tempfile.gettempdir()
159 if __GetModelFromPDB(filename, output_dir):
160 filename = os.path.join(output_dir,
'pdb%s.ent.gz' % filename)
162 raise IOError(
'Can not load PDB %s from www.pdb.org'%filename)
164 conop_inst=conop.Conopology.Instance()
165 builder=conop_inst.GetBuilder(
"DEFAULT")
166 if prof.dialect==
'PDB':
167 builder.dialect=conop.PDB_DIALECT
168 elif prof.dialect==
'CHARMM':
169 builder.dialect=conop.CHARMM_DIALECT
170 builder.strict_hydrogens=prof.strict_hydrogens
171 builder.bond_feasibility_check=prof.bond_feasibility_check
173 reader.read_seqres=seqres
177 while reader.HasNext():
178 ent=mol.CreateEntity()
179 reader.Import(ent, restrict_chains)
180 conop_inst.ConnectAll(builder, ent, 0)
183 raise IOError(
"File '%s' doesn't contain any entities" % filename)
186 ent=mol.CreateEntity()
188 reader.Import(ent, restrict_chains)
189 conop_inst.ConnectAll(builder, ent, 0)
191 raise IOError(
"File '%s' doesn't contain any entities" % filename)
193 return ent, reader.seqres
198 def SavePDB(models, filename, dialect=None, pqr=False, profile='DEFAULT'):
200 Save entity or list of entities to disk. If a list of entities is supplied
201 the PDB file will be saved as a multi PDB file. Each of the entities is
202 wrapped into a MODEL/ENDMDL pair.
204 If the atom number exceeds 99999, '*****' is used.
206 :param models: The entity or list of entities (handles or views) to be saved
207 :param filename: The filename
208 :type filename: string
210 if not getattr(models,
'__len__',
None):
212 if isinstance(profile, str):
213 profile=profiles[profile].Copy()
214 elif isinstance(profile, IOProfile):
217 raise TypeError(
'profile must be of type string or IOProfile, '+\
218 'instead of %s'%type(profile))
219 profile.dialect=_override(profile.dialect, dialect)
223 writer.write_multi_model=
True
240 image_list.append(image)
243 LoadMapList=LoadImageList
246 lazy_load=
False, stride=1,
247 dialect=
None, detect_swap=
True,swap_bytes=
False):
249 Load CHARMM trajectory file.
251 :param crd: EntityHandle or filename of the (PDB) file containing the
252 structure. The structure must have the same number of atoms as the
254 :param dcd_file: The filename of the DCD file. If not set, and crd is a
255 string, the filename is set to the <crd>.dcd
256 :param layz_load: Whether the trajectory should be loaded on demand. Instead
257 of loading the complete trajectory into memory, the trajectory frames are
258 loaded from disk when requested.
259 :param stride: The spacing of the frames to load. When set to 2, for example,
260 every second frame is loaded from the trajectory. By default, every frame
262 :param dialect: The dialect for the PDB file to use. See :func:`LoadPDB`. If
263 set, overrides the value of the profile
264 :param profile: The IO profile to use for loading the PDB file. See
266 :param detect_swap: if True (the default), then automatic detection of endianess
267 is attempted, otherwise the swap_bytes parameter is used
268 :param swap_bytes: is detect_swap is False, this flag determines whether bytes
269 are swapped upon loading or not
273 dcd_file=
'%s.dcd' % os.path.splitext(crd)[0]
274 crd=
LoadPDB(crd, profile=profile, dialect=dialect)
278 raise ValueError(
"No DCD filename given")
279 return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load, detect_swap, swap_bytes)
281 def LoadMMCIF(filename, restrict_chains="", fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, strict_hydrogens=None, seqres=False, info=False):
283 Load MMCIF file from disk and return one or more entities. Several options
284 allow to customize the exact behaviour of the MMCIF import. For more
285 information on these options, see :doc:`profile`.
287 Residues are flagged as ligand if they are mentioned in a HET record.
289 :param restrict_chains: If not an empty string, only chains listed in the
290 string will be imported.
292 :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
293 the value of :attr:`IOProfile.fault_tolerant`.
295 :param remote: If set to True, the method tries to load the pdb from the
296 remote pdb repository www.pdb.org. The filename is then interpreted as the
299 :rtype: :class:`~ost.mol.EntityHandle`.
301 :param strict_hydrogens: If set, overrides the value of
302 :attr:`IOProfile.strict_hydrogens`.
304 :param seqres: Whether to read SEQRES records. If set to True, the loaded
305 entity and seqres entry will be returned as second item.
307 :param info: Whether to return an info container with the other output.
308 Returns a :class:`MMCifInfo` object as last item.
310 :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous
313 def _override(val1, val2):
318 if isinstance(profile, str):
319 prof = profiles[profile].Copy()
321 prof = profile.Copy()
323 prof.calpha_only=_override(prof.calpha_only, calpha_only)
324 prof.strict_hydrogens=_override(prof.strict_hydrogens, strict_hydrogens)
325 prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
328 output_dir = tempfile.gettempdir()
329 if __GetModelFromPDB(filename, output_dir):
330 filename = os.path.join(output_dir,
'pdb%s.ent.gz' % filename)
332 raise IOError(
'Can not load PDB %s from www.pdb.org'%filename)
334 conop_inst = conop.Conopology.Instance()
335 builder = conop_inst.GetBuilder(
"DEFAULT")
337 builder.strict_hydrogens = prof.strict_hydrogens
340 ent = mol.CreateEntity()
342 reader.read_seqres = seqres
345 conop_inst.ConnectAll(builder, ent, 0)
349 return ent, reader.seqres, reader.info
351 return ent, reader.seqres
353 return ent, reader.info
364 def _PDBize(biounit, asu, seqres=None, min_polymer_size=10):
365 def _CopyAtoms(src_res, dst_res, edi, trans=geom.Mat4()):
366 for atom
in src_res.atoms:
368 new_atom=edi.InsertAtom(dst_res, atom.name,
geom.Vec3(trans*tmp_pos),
369 element=atom.element,
370 occupancy=atom.occupancy,
371 b_factor=atom.b_factor,
372 is_hetatm=atom.is_hetatom)
374 chain_names=
'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
379 operations = biounit.GetOperations()
380 trans_matrices = list()
381 if len(operations) > 0:
382 for op
in operations[0]:
384 rot.PasteRotation(op.rotation)
386 trans.PasteTranslation(op.translation)
389 trans_matrices.append(tr)
390 for op_n
in range(1, len(operations)):
392 for o
in operations[op_n]:
394 rot.PasteRotation(o.rotation)
396 trans.PasteTranslation(o.translation)
399 for t_o
in trans_matrices:
402 trans_matrices = tmp_ops
404 assu = asu.Select(
'cname=' +
','.join(biounit.GetChainList()))
407 pdb_bu = mol.CreateEntity()
408 edi = pdb_bu.EditXCS(mol.BUFFERED_EDIT)
412 for tr
in trans_matrices:
414 for chain
in assu.chains:
415 residue_count = len(chain.residues)
417 seqres_chain = seqres.FindSequence(chain.name)
418 if seqres_chain.IsValid():
419 residue_count = len(seqres_chain)
420 if chain.is_polymer
and residue_count >= min_polymer_size:
421 if len(chain_names) == cur_chain_name:
422 raise RuntimeError(
'Running out of chain names')
423 new_chain = edi.InsertChain(chain_names[cur_chain_name])
425 edi.SetChainDescription(new_chain, chain.description)
426 edi.SetChainType(new_chain, chain.type)
427 new_chain.SetStringProp(
'original_name', chain.name)
428 if chain.HasProp(
"pdb_auth_chain_name"):
429 new_chain.SetStringProp(
"pdb_auth_chain_name",
430 chain.GetStringProp(
"pdb_auth_chain_name"))
431 for res
in chain.residues:
432 new_res = edi.AppendResidue(new_chain, res.name, res.number)
433 _CopyAtoms(res, new_res, edi, tr)
434 elif chain.type == mol.CHAINTYPE_WATER:
435 if not water_chain.IsValid():
437 water_chain = edi.InsertChain(
'-')
438 edi.SetChainDescription(water_chain, chain.description)
439 edi.SetChainType(water_chain, chain.type)
440 for res
in chain.residues:
441 new_res = edi.AppendResidue(water_chain, res.name)
442 new_res.SetStringProp(
'type', mol.StringFromChainType(chain.type))
443 new_res.SetStringProp(
'description', chain.description)
444 _CopyAtoms(res, new_res, edi, tr)
446 if not ligand_chain.IsValid():
448 ligand_chain = edi.InsertChain(
'_')
451 last_rnum = ligand_chain.residues[-1].number.num
452 residues=chain.residues
456 for res
in chain.residues:
457 new_res = edi.AppendResidue(ligand_chain, res.name,
459 new_res.SetStringProp(
'description', chain.description)
460 new_res.SetStringProp(
'type', mol.StringFromChainType(chain.type))
461 new_res.SetStringProp(
"original_name", chain.name)
462 if chain.HasProp(
"pdb_auth_chain_name"):
463 new_res.SetStringProp(
"pdb_auth_chain_name",
464 chain.GetStringProp(
"pdb_auth_chain_name"))
465 ins_code = chr(ord(ins_code)+1)
466 _CopyAtoms(res, new_res, edi, tr)
467 conop.ConnectAll(pdb_bu)
470 MMCifInfoBioUnit.PDBize = _PDBize