OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
lga.py
Go to the documentation of this file.
1 """
2 Interface to LGA
3 
4 Author: Marco Biasini
5 """
6 import tempfile
7 import os
8 from ost import io
9 import subprocess
10 from ost import geom
11 from ost import settings
12 
13 def _FindLGABinary(lga_bin):
14  return settings.Locate('lga', explicit_file_name=lga_bin,
15  env_name='LGA_BINARY')
16 
17 def _PrepareInput(pdb1, pdb2, output_pdb):
18  """
19  Deal with the brain-dead input requirements of LGA.
20  """
21  mol1=os.path.basename(os.path.splitext(pdb1)[0])
22  mol2=os.path.basename(os.path.splitext(pdb2)[0])
23  os.system('echo MOLECULE %s > %s' % (mol1, output_pdb))
24  os.system('cat %s >> %s' % (pdb1, output_pdb))
25  os.system('echo "\n" >> %s' % (output_pdb))
26  os.system('echo MOLECULE %s >> %s' % (mol2, output_pdb))
27  os.system('cat %s >> %s' % (pdb2, output_pdb))
28 
29 class GDTResult:
30  def __init__(self, rotation, shift, gdt_ts, gdt_ha):
31  self.rotation=rotation
32  self.shift=shift
33  self.gdt_ts=gdt_ts
34  self.gdt_ha=gdt_ha
35  def GetTransform(self):
36  transform=geom.Mat4()
37  transform.PasteTranslation(self.shift)
38  return transform*geom.Mat4(self.rotation)
39 
40 def _ParseRotationAndShift(lines):
41  t=[l.split() for l in lines]
42  rot=geom.Mat3()
43  shift=geom.Vec3()
44  for i, x in enumerate(t):
45  rot[(i, 0)]=+float(x[2])
46  rot[(i, 1)]=+float(x[6])
47  rot[(i, 2)]=+float(x[10])
48  shift[i]=float(x[14])
49  return rot, shift
50 
51 def _ParseGDTSection(section, residue_count):
52  cutoffs=[float(e) for e in section[0].split()[2:]]
53  num_ca=[int(e) for e in section[1].split()[2:]]
54  gdt_ts=[float(e) for e in section[2].split()[2:]]
55  scores=dict(zip(cutoffs, gdt_ts))
56  numbers=dict(zip(cutoffs, num_ca))
57  factor=(1.0/(4*residue_count))*100
58  ts_cutoffs=(1.0, 2.0, 4.0, 8.0)
59  ha_cutoffs=(0.5, 1.0, 2.0, 4.0)
60  gdt_ts=(sum([numbers[c] for c in ts_cutoffs]))*factor
61  gdt_ha=(sum([numbers[c] for c in ha_cutoffs]))*factor
62  return gdt_ts, gdt_ha
63 
64 def _ParseLGAOutput(output, residue_count):
65  result=GDTResult(geom.Mat3(), geom.Vec3(), 0.0, 0.0)
66  found_gdt_section=False
67  found_transform_section=False
68  for index, line in enumerate(output):
69  if line.startswith('GLOBAL_DISTANCE_TEST'):
70  next_lines=output[index+1:index+5]
71  result.gdt_ts, result.gdt_ha=_ParseGDTSection(next_lines, residue_count)
72  found_gdt_section=True
73  if line.startswith('Unitary ROTATION matrix'):
74  next_lines=output[index+1:index+4]
75  result.rotation, result.shift=_ParseRotationAndShift(next_lines)
76  found_transform_section=True
77  break
78  assert found_transform_section and found_gdt_section
79  return result
80 
81 def GDT(pdb1, pdb2, chain1='', chain2='', reference_length=None, lga_bin=None):
82  """
83  Calculate GDT value between pdb1 and pdb2. It is assumed that the
84  corresponding residues in pdb1 and pdb2 have the same residue numbers.
85  """
86  lga_bin=_FindLGABinary(lga_bin)
87  temp_d=tempfile.mkdtemp(prefix='lga_gdt_ts_')
88  for d in ('MOL2', 'TMP', 'RESULTS'):
89  os.mkdir(os.path.join(temp_d, d))
90 
91  if not chain1:
92  chain1=pdb1.chains[0].name
93  if not chain2:
94  chain2=pdb2.chains[0].name
95  pdb_one_name=os.path.join(temp_d, 'MOL2', 'one.pdb')
96  pdb_two_name=os.path.join(temp_d, 'MOL2', 'two.pdb')
97  io.SaveEntity(pdb1, pdb_one_name)
98  io.SaveEntity(pdb2, pdb_two_name)
99  _PrepareInput(pdb_one_name, pdb_two_name,
100  os.path.join(temp_d, 'MOL2', 'input.pdb'))
101  output_file=os.path.join(temp_d, 'out.txt')
102  ch1, ch2=('', '')
103  if len(chain1.strip()): ch1=' -ch1:%s ' % chain1
104  if len(chain2.strip()): ch2=' -ch2:%s ' % chain2
105 
106  params=(temp_d, lga_bin, 'input.pdb', ch1, ch2)
107  command='cd %s; %s %s %s%s-ie -3 -d:4 -sda'
108 
109  expanded_cmd=command % params
110  lga_proc=subprocess.Popen(expanded_cmd, shell=True,
111  stdout=subprocess.PIPE)
112  length=reference_length or max(pdb1.residue_count, pdb2.residue_count)
113  result=_ParseLGAOutput(lga_proc.stdout.readlines(), reference_length)
114  os.system('rm -r %s' % temp_d)
115  return result
Three dimensional vector class, using Real precision.
Definition: vec3.hh:42