OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
__init__.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2011 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 import os, tempfile, ftplib, httplib
20 
21 from _ost_io import *
22 from ost import mol, geom, conop, seq
23 
24 profiles=None
25 
26 class IOProfiles:
27  def __init__(self):
28  self._dict={}
29 
30  def __getitem__(self, key):
31  return IOProfileRegistry.Instance().Get(key)
32 
33  def __setitem__(self, key, value):
34  if isinstance(value, str):
35  value=self[value].Copy()
36  IOProfileRegistry.Instance().Set(key, value)
37  self._dict[key]=value
38 
39  def __len__(self):
40  return len(self._dict)
41 
42  def __iter__(self):
43  return self._dict.__iter__()
44 
45 if not profiles:
46  profiles=IOProfiles()
47  if conop.GetDefaultLib():
48  processor = conop.RuleBasedProcessor(conop.GetDefaultLib())
49  else:
50  processor = conop.HeuristicProcessor()
51  profiles['STRICT']=IOProfile(dialect='PDB', fault_tolerant=False,
52  quack_mode=False, processor=processor.Copy())
53  profiles['SLOPPY']=IOProfile(dialect='PDB', fault_tolerant=True,
54  quack_mode=True, processor=processor.Copy())
55  profiles['CHARMM']=IOProfile(dialect='CHARMM', fault_tolerant=True,
56  quack_mode=False, processor=processor.Copy())
57  profiles['DEFAULT']='STRICT'
58 
59 def _override(val1, val2):
60  if val2!=None:
61  return val2
62  else:
63  return val1
64 
65 def LoadPDB(filename, restrict_chains="", no_hetatms=None,
66  fault_tolerant=None, load_multi=False, quack_mode=None,
67  join_spread_atom_records=None, calpha_only=None,
68  profile='DEFAULT', remote=False, remote_repo='pdb',
69  dialect=None, seqres=False, bond_feasibility_check=None):
70  """
71  Load PDB file from disk and return one or more entities. Several options
72  allow to customize the exact behaviour of the PDB import. For more information
73  on these options, see :doc:`profile`.
74 
75  Residues are flagged as ligand if they are mentioned in a HET record.
76 
77  :param restrict_chains: If not an empty string, only chains listed in the
78  string will be imported.
79 
80  :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
81  the value of :attr:`IOProfile.fault_tolerant`.
82 
83  :param no_hetatms: If set to True, HETATM records will be ignored. Overrides
84  the value of :attr:`IOProfile.no_hetatms`
85 
86  :param load_multi: If set to True, a list of entities will be returned instead
87  of only the first. This is useful when dealing with multi-PDB files.
88 
89  :param join_spread_atom_records: If set, overrides the value of
90  :attr:`IOProfile.join_spread_atom_records`.
91 
92  :param remote: If set to True, the method tries to load the pdb from the
93  remote repository given as *remote_repo*. The filename is then
94  interpreted as the entry id as further specified for the *remote_repo*
95  parameter.
96 
97  :param remote_repo: Remote repository to fetch structure if *remote* is True.
98  Must be one in ['pdb', 'smtl', 'pdb_redo']. In case of
99  'pdb' and 'pdb_redo', the entry must be given as lower
100  case pdb id, which loads the deposited assymetric unit
101  (e.g. '1ake'). In case of 'smtl', the entry must also
102  specify the desired biounit (e.g. '1ake.1').
103 
104  :rtype: :class:`~ost.mol.EntityHandle` or a list thereof if `load_multi` is
105  True.
106 
107  :param dialect: Specifies the particular dialect to use. If set, overrides
108  the value of :attr:`IOProfile.dialect`
109 
110  :param seqres: Whether to read SEQRES records. If set to True, the loaded
111  entity and seqres entry will be returned as a tuple.
112 
113  :type dialect: :class:`str`
114 
115 
116  :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous or
117  inexistent file
118  """
119  def _override(val1, val2):
120  if val2!=None:
121  return val2
122  else:
123  return val1
124  if isinstance(profile, str):
125  prof=profiles[profile].Copy()
126  elif isinstance(profile, IOProfile):
127  prof=profile.Copy()
128  else:
129  raise TypeError('profile must be of type string or IOProfile, '+\
130  'instead of %s'%type(profile))
131  if dialect not in (None, 'PDB', 'CHARMM',):
132  raise ValueError('dialect must be PDB or CHARMM')
133  prof.calpha_only=_override(prof.calpha_only, calpha_only)
134  prof.no_hetatms=_override(prof.no_hetatms, no_hetatms)
135  prof.dialect=_override(prof.dialect, dialect)
136  prof.quack_mode=_override(prof.quack_mode, quack_mode)
137  if prof.processor:
138  prof.processor.check_bond_feasibility=_override(prof.processor.check_bond_feasibility,
139  bond_feasibility_check)
140  prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
141  prof.join_spread_atom_records=_override(prof.join_spread_atom_records,
142  join_spread_atom_records)
143 
144  tmp_file = None # avoid getting out of scope
145  if remote:
146  if remote_repo not in ['pdb', 'smtl', 'pdb_redo']:
147  raise IOError("remote_repo must be in ['pdb', 'smtl', 'pdb_redo']")
148  from ost.io.remote import RemoteGet
149  tmp_file =RemoteGet(filename, from_repo=remote_repo)
150  filename = tmp_file.name
151 
152  conop_inst=conop.Conopology.Instance()
153  if prof.processor:
154  if prof.dialect=='PDB':
155  prof.processor.dialect=conop.PDB_DIALECT
156  elif prof.dialect=='CHARMM':
157  prof.processor.dialect=conop.CHARMM_DIALECT
158  reader=PDBReader(filename, prof)
159  reader.read_seqres=seqres
160  try:
161  if load_multi:
162  ent_list=[]
163  while reader.HasNext():
164  ent=mol.CreateEntity()
165  reader.Import(ent, restrict_chains)
166  if prof.processor:
167  prof.processor.Process(ent)
168  ent_list.append(ent)
169  if len(ent_list)==0:
170  raise IOError("File '%s' doesn't contain any entities" % filename)
171  return ent_list
172  else:
173  ent=mol.CreateEntity()
174  if reader.HasNext():
175  reader.Import(ent, restrict_chains)
176  if prof.processor:
177  prof.processor.Process(ent)
178  else:
179  raise IOError("File '%s' doesn't contain any entities" % filename)
180  if seqres:
181  return ent, reader.seqres
182  return ent
183  except:
184  raise
185 
186 def SavePDB(models, filename, dialect=None, pqr=False, profile='DEFAULT'):
187  """
188  Save entity or list of entities to disk. If a list of entities is supplied
189  the PDB file will be saved as a multi PDB file. Each of the entities is
190  wrapped into a MODEL/ENDMDL pair.
191 
192  If the atom number exceeds 99999, '*****' is used.
193 
194  :param models: The entity or list of entities (handles or views) to be saved
195  :param filename: The filename
196  :type filename: string
197  :raises: IOException if the restrictions of the PDB format are not satisfied
198  (with the exception of atom numbers, see above):
199 
200  * Chain names with more than one character
201  * Atom positions with coordinates outside range [-999.99, 9999.99]
202  * Residue names longer than three characters
203  * Atom names longer than four characters
204  * Numeric part of :class:`ost.mol.ResNum` outside range [-999, 9999]
205  * Alternative atom indicators longer than one character
206  """
207  if not getattr(models, '__len__', None):
208  models=[models]
209  if isinstance(profile, str):
210  profile=profiles[profile].Copy()
211  elif isinstance(profile, IOProfile):
212  profile.Copy()
213  else:
214  raise TypeError('profile must be of type string or IOProfile, '+\
215  'instead of %s'%type(profile))
216  profile.dialect=_override(profile.dialect, dialect)
217  writer=PDBWriter(filename, profile)
218  writer.SetIsPQR(pqr)
219  if len(models)>1:
220  writer.write_multi_model=True
221  for model in models:
222  writer.Write(model)
223 
224 try:
225  from ost import img
226  LoadMap = LoadImage
227  SaveMap = SaveImage
228 except ImportError:
229  pass
230 
231  ## loads several images and puts them in an ImageList
232  # \sa \example fft_li.py "View Fourier Transform Example"
233 def LoadImageList (files):
234  image_list=img.ImageList()
235  for file in files:
236  image=LoadImage(file)
237  image_list.append(image)
238  return image_list
239 
240 LoadMapList=LoadImageList
241 
242 def LoadCHARMMTraj(crd, dcd_file=None, profile='CHARMM',
243  lazy_load=False, stride=1,
244  dialect=None, detect_swap=True,swap_bytes=False):
245  """
246  Load CHARMM trajectory file.
247 
248  :param crd: EntityHandle or filename of the (PDB) file containing the
249  structure. The structure must have the same number of atoms as the
250  trajectory
251  :param dcd_file: The filename of the DCD file. If not set, and crd is a
252  string, the filename is set to the <crd>.dcd
253  :param layz_load: Whether the trajectory should be loaded on demand. Instead
254  of loading the complete trajectory into memory, the trajectory frames are
255  loaded from disk when requested.
256  :param stride: The spacing of the frames to load. When set to 2, for example,
257  every second frame is loaded from the trajectory. By default, every frame
258  is loaded.
259  :param dialect: The dialect for the PDB file to use. See :func:`LoadPDB`. If
260  set, overrides the value of the profile
261  :param profile: The IO profile to use for loading the PDB file. See
262  :doc:`profile`.
263  :param detect_swap: if True (the default), then automatic detection of endianess
264  is attempted, otherwise the swap_bytes parameter is used
265  :param swap_bytes: is detect_swap is False, this flag determines whether bytes
266  are swapped upon loading or not
267  """
268  if not isinstance(crd, mol.EntityHandle):
269  if dcd_file==None:
270  dcd_file='%s.dcd' % os.path.splitext(crd)[0]
271  crd=LoadPDB(crd, profile=profile, dialect=dialect)
272 
273  else:
274  if not dcd_file:
275  raise ValueError("No DCD filename given")
276  return LoadCHARMMTraj_(crd, dcd_file, stride, lazy_load, detect_swap, swap_bytes)
277 
278 def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, seqres=False, info=False):
279  """
280  Load MMCIF file from disk and return one or more entities. Several options
281  allow to customize the exact behaviour of the MMCIF import. For more
282  information on these options, see :doc:`profile`.
283 
284  Residues are flagged as ligand if they are mentioned in a HET record.
285 
286  :param fault_tolerant: Enable/disable fault-tolerant import. If set, overrides
287  the value of :attr:`IOProfile.fault_tolerant`.
288 
289  :param remote: If set to True, the method tries to load the pdb from the
290  remote pdb repository www.pdb.org. The filename is then interpreted as the
291  pdb id.
292 
293  :rtype: :class:`~ost.mol.EntityHandle` (or tuple if *seqres* or *info* are
294  True).
295 
296  :param seqres: Whether to read SEQRES records. If True, a
297  :class:`~ost.seq.SequenceList` object is returned as the second
298  item. The sequences in the list are named according to the
299  mmCIF chain name.
300  This feature requires a default
301  :class:`compound library <ost.conop.CompoundLib>`
302  to be defined and accessible via
303  :func:`~ost.conop.GetDefaultLib` or an empty list is returned.
304 
305  :param info: Whether to return an info container with the other output.
306  If True, a :class:`MMCifInfo` object is returned as last item.
307 
308  :raises: :exc:`~ost.io.IOException` if the import fails due to an erroneous
309  or non-existent file.
310  """
311  def _override(val1, val2):
312  if val2!=None:
313  return val2
314  else:
315  return val1
316  if isinstance(profile, str):
317  prof = profiles[profile].Copy()
318  else:
319  prof = profile.Copy()
320 
321  prof.calpha_only=_override(prof.calpha_only, calpha_only)
322  prof.fault_tolerant=_override(prof.fault_tolerant, fault_tolerant)
323 
324  if remote:
325  from ost.io.remote import RemoteGet
326  tmp_file = RemoteGet(filename, from_repo='cif')
327  filename = tmp_file.name
328 
329  try:
330  ent = mol.CreateEntity()
331  reader = MMCifReader(filename, ent, prof)
332  reader.read_seqres = seqres
333 
334  # NOTE: to speed up things, we could introduce a restrict_chains parameter
335  # similar to the one in LoadPDB. Here, it would have to be a list/set
336  # of chain-name-strings.
337 
338  #if reader.HasNext():
339  reader.Parse()
340  if prof.processor:
341  prof.processor.Process(ent)
342  #else:
343  # raise IOError("File doesn't contain any entities")
344  if seqres and info:
345  return ent, reader.seqres, reader.info
346  if seqres:
347  return ent, reader.seqres
348  if info:
349  return ent, reader.info
350  return ent
351  except:
352  raise
353 
354 # this function uses a dirty trick: should be a member of MMCifInfoBioUnit
355 # which is totally C++, but we want the method in Python... so we define it
356 # here (__init__) and add it as a member to the class. With this, the first
357 # arguement is the usual 'self'.
358 # documentation for this function was moved to mmcif.rst,
359 # MMCifInfoBioUnit.PDBize, since this function is not included in SPHINX.
360 def _PDBize(biounit, asu, seqres=None, min_polymer_size=10,
361  transformation=False):
362  pdbizer = mol.alg.PDBize(min_polymer_size=min_polymer_size)
363 
364  chains = biounit.GetChainList()
365  c_intvls = biounit.GetChainIntervalList()
366  o_intvls = biounit.GetOperationsIntervalList()
367  ss = seqres
368  if not ss:
369  ss = seq.CreateSequenceList()
370  # create list of operations
371  # for cartesian products, operations are stored in a list, multiplied with
372  # the next list of operations and re-stored... until all lists of operations
373  # are multiplied in an all-against-all manner.
374  operations = biounit.GetOperations()
375  for i in range(0,len(c_intvls)):
376  trans_matrices = geom.Mat4List()
377  l_operations = operations[o_intvls[i][0]:o_intvls[i][1]]
378  if len(l_operations) > 0:
379  for op in l_operations[0]:
380  rot = geom.Mat4()
381  rot.PasteRotation(op.rotation)
382  trans = geom.Mat4()
383  trans.PasteTranslation(op.translation)
384  tr = geom.Mat4()
385  tr = trans * rot
386  trans_matrices.append(tr)
387  for op_n in range(1, len(l_operations)):
388  tmp_ops = geom.Mat4List()
389  for o in l_operations[op_n]:
390  rot = geom.Mat4()
391  rot.PasteRotation(o.rotation)
392  trans = geom.Mat4()
393  trans.PasteTranslation(o.translation)
394  tr = geom.Mat4()
395  tr = trans * rot
396  for t_o in trans_matrices:
397  tp = t_o * tr
398  tmp_ops.append(tp)
399  trans_matrices = tmp_ops
400  # select chains into a view as basis for each transformation
401  assu = asu.Select('cname='+','.join(mol.QueryQuoteName(name) \
402  for name in \
403  chains[c_intvls[i][0]:c_intvls[i][1]]))
404  pdbizer.Add(assu, trans_matrices, ss)
405  pdb_bu = pdbizer.Finish(transformation)
406  if transformation:
407  return pdb_bu, pdb_bu.GetTransformationMatrix()
408  return pdb_bu
409 
410 MMCifInfoBioUnit.PDBize = _PDBize
def RemoteGet
Definition: remote.py:86
DLLEXPORT_OST_IO img::ImageHandle LoadImage(const boost::filesystem::path &loc)
Function that loads an image from a file.
Protein or molecule.
std::vector< Mat4 > Mat4List
Definition: mat4.hh:141
mol::CoordGroupHandle DLLEXPORT_OST_IO LoadCHARMMTraj(const mol::EntityHandle &ent, const String &trj_filename, unsigned int stride=1, bool lazy_load=false, bool detect_swap=true, bool byte_swap=false)
import a CHARMM trajectory in dcd format with an existing entity requires the existing entity and the...
def LoadImageList
Definition: __init__.py:233
def LoadMMCIF
Definition: __init__.py:278
reader for the mmcif file format
Definition: mmcif_reader.hh:59
Manages a collection of images.
Definition: image_list.hh:43
def SavePDB
Definition: __init__.py:186
def LoadPDB
Definition: __init__.py:69