OpenStructure
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
profile_handle.hh
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of the OpenStructure project <www.openstructure.org>
3 //
4 // Copyright (C) 2008-2015 by the OpenStructure authors
5 //
6 // This library is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU Lesser General Public License as published by the Free
8 // Software Foundation; either version 3.0 of the License, or (at your option)
9 // any later version.
10 // This library is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with this library; if not, write to the Free Software Foundation, Inc.,
17 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 //------------------------------------------------------------------------------
19 
20 /*
21  Author: Gerardo Tauriello, Gabriel Studer
22  */
23 
24 #ifndef OST_SEQ_PROFILE_HANDLE_HH
25 #define OST_SEQ_PROFILE_HANDLE_HH
26 
27 #include <ost/base.hh>
28 #include <ost/stdint.hh>
29 #include <ost/message.hh>
30 #include <ost/seq/module_config.hh>
31 
32 #include <string.h> // for memcpy, etc
33 #include <vector>
34 #include <map>
35 #include <fstream>
36 #include <boost/shared_ptr.hpp>
37 
38 namespace ost { namespace seq {
39 
40 class ProfileHandle;
41 class ProfileColumn;
42 class ProfileDB;
43 typedef boost::shared_ptr<ProfileHandle> ProfileHandlePtr;
44 typedef std::vector<ProfileHandlePtr> ProfileHandleList;
45 typedef boost::shared_ptr<ProfileDB> ProfileDBPtr;
46 typedef std::vector<ProfileColumn> ProfileColumnList;
47 
53 public:
54 
57  memset(freq_, 0, sizeof(freq_));
58  }
59 
61  memcpy(freq_, rhs.freq_, sizeof(freq_));
62  }
63  ProfileColumn& operator= (const ProfileColumn& rhs) {
64  memcpy(freq_, rhs.freq_, sizeof(freq_));
65  return *this;
66  }
67 
68  static ProfileColumn BLOSUMNullModel();
69  static ProfileColumn HHblitsNullModel();
70 
72  static int GetIndex(char ch);
73 
74  Real GetFreq(char ch) const;
75 
76  void SetFreq(char ch, Real freq);
77 
78  bool operator==(const ProfileColumn& rhs) const {
79  return !memcmp(freq_, rhs.freq_, sizeof(freq_));
80  }
81  bool operator!=(const ProfileColumn& rhs) const { return !(rhs == (*this)); }
82 
83  Real* freqs_begin() { return freq_; }
84  Real* freqs_end() { return freq_ + 20; }
85  const Real* freqs_begin() const { return freq_; }
86  const Real* freqs_end() const { return freq_ + 20; }
87 
89  Real GetEntropy() const;
90 
92  Real GetScore(const ProfileColumn& other,
93  const ProfileColumn& null_model) const;
94 
95  // functions to feed streams with limited accuracy of internal data
96  // not intended for python export
97 
98  friend std::ofstream& operator<<(std::ofstream& os, ProfileColumn& col) {
99  int16_t data[20];
100  //transform aa_freq
101  for (uint i = 0; i < 20; ++i) {
102  data[i] = static_cast<int16_t>(col.freq_[i]*10000);
103  }
104  os.write(reinterpret_cast<char*>(data), sizeof(data));
105  return os;
106  }
107 
108  friend std::ifstream& operator>>(std::ifstream& is, ProfileColumn& col) {
109  int16_t data[20];
110  is.read(reinterpret_cast<char*>(data), sizeof(data));
111  //transform aa_freq
112  for (uint i = 0; i < 20; ++i) {
113  col.freq_[i] = data[i] * 0.0001;
114  }
115  return is;
116  }
117 
118 private:
119  Real freq_[20];
120 };
121 
130 public:
132  ProfileHandle(): null_model_(ProfileColumn::HHblitsNullModel()) {}
133 
134  // uses compiler-generated copy- and assignment operators (work here!)
135 
136  const std::vector<ProfileColumn>& GetColumns() const { return columns_; }
137 
138  const ProfileColumn& GetNullModel() const { return null_model_; }
139 
140  void SetNullModel(const ProfileColumn& null_model) {
141  null_model_ = null_model;
142  }
143 
144  String GetSequence() const { return seq_; }
145 
146  void SetSequence(const String& seq) {
147  if (seq.length() != columns_.size()) {
148  throw Error("ProfileHandle - Inconsistency between number of columns and "
149  " seq. length.");
150  }
151  seq_ = seq;
152  }
153 
157  ProfileHandlePtr Extract(uint from, uint to);
158 
160  Real GetAverageEntropy() const;
161 
165  Real GetAverageScore(const ProfileHandle& other, uint offset = 0) const;
166 
167  // \brief Can only add column with an associated olc
168  void AddColumn(const ProfileColumn& c, char olc='X') {
169  columns_.push_back(c);
170  seq_ += olc;
171  }
172 
173  // some functions to make it behave like a vector
174 
175  void clear() { seq_ = ""; columns_.clear(); }
176 
177  size_t size() const { return columns_.size(); }
178 
179  bool empty() const { return columns_.empty(); }
180 
181  ProfileColumn& operator[](size_t index) { return columns_[index]; }
182 
183  const ProfileColumn& operator[](size_t index) const { return columns_[index]; }
184 
185  ProfileColumn& at(size_t index) { return columns_.at(index); }
186 
187  const ProfileColumn& at(size_t index) const { return columns_.at(index); }
188 
189  bool operator==(const ProfileHandle& other) const {
190  return seq_ == other.seq_ &&
191  columns_ == other.columns_ &&
192  null_model_ == other.null_model_;
193  }
194 
195  bool operator!=(const ProfileHandle& other) const {
196  return !(other == (*this));
197  }
198 
199  ProfileColumnList::iterator columns_begin() { return columns_.begin(); }
200  ProfileColumnList::iterator columns_end() { return columns_.end(); }
201  ProfileColumnList::const_iterator columns_begin() const {
202  return columns_.begin();
203  }
204  ProfileColumnList::const_iterator columns_end() const {
205  return columns_.end();
206  }
207 
208  // functions to feed streams with limited accuracy of internal data
209  // not intended for python export
210 
211  friend std::ofstream& operator<<(std::ofstream& os, ProfileHandle& prof) {
212  // null model
213  os << prof.null_model_;
214  // num. columns/residues
215  uint32_t size = prof.size();
216  os.write(reinterpret_cast<char*>(&size), sizeof(uint32_t));
217  // sequence
218  if (prof.seq_.length() != size) {
219  throw Error("ProfileHandle - Inconsistency between number of columns and "
220  " seq. length.");
221  }
222  os.write(prof.seq_.c_str(), size);
223  // columns
224  for(uint i = 0; i < size; ++i){
225  os << prof.columns_[i];
226  }
227 
228  return os;
229  }
230 
231  friend std::ifstream& operator>>(std::ifstream& is, ProfileHandle& prof) {
232  // null model
233  is >> prof.null_model_;
234  // num. columns/residues
235  uint32_t size;
236  is.read(reinterpret_cast<char*>(&size), sizeof(uint32_t));
237  // sequence
238  std::vector<char> tmp_buf(size);
239  is.read(&tmp_buf[0], size);
240  prof.seq_.assign(&tmp_buf[0], size);
241  // columns
242  prof.columns_.resize(size);
243  for(uint i = 0; i < size; ++i){
244  is >> prof.columns_[i];
245  }
246 
247  return is;
248  }
249 
250 private:
251  String seq_;
252  ProfileColumn null_model_;
253  ProfileColumnList columns_;
254 };
255 
258 public:
261  void Save(const String& filename) const;
262 
263  static ProfileDBPtr Load(const String& filename);
264 
265  void AddProfile(const String& name, ProfileHandlePtr prof);
266 
267  ProfileHandlePtr GetProfile(const String& name) const;
268 
269  size_t size() const { return data_.size(); }
270 
271  std::vector<String> GetNames() const;
272 
273 private:
274  std::map<String, ProfileHandlePtr> data_;
275 };
276 
277 }}
278 
279 #endif
#define DLLEXPORT_OST_SEQ
bool operator==(const ProfileHandle &other) const
void SetSequence(const String &seq)
const ProfileColumn & operator[](size_t index) const
size_t size() const
ProfileColumn & operator[](size_t index)
void AddColumn(const ProfileColumn &c, char olc='X')
std::string String
Definition: base.hh:54
float Real
Definition: base.hh:44
ProfileColumnList::const_iterator columns_end() const
ProfileColumn(const ProfileColumn &rhs)
const ProfileColumn & GetNullModel() const
signed short int16_t
Definition: stdint_msc.hh:76
const std::vector< ProfileColumn > & GetColumns() const
unsigned int uint32_t
Definition: stdint_msc.hh:80
friend std::ifstream & operator>>(std::ifstream &is, ProfileHandle &prof)
String GetSequence() const
ProfileColumnList::iterator columns_begin()
ProfileColumn()
Construct a profile with all frequencies set to 0.
T Extract(const info::InfoGroup &g, const String &n)
Convenience function to extract a value from an item.
Definition: info_item.hh:128
std::vector< ProfileColumn > ProfileColumnList
ProfileHandle()
Constructs an empty profile handle (sequence = &#39;&#39;, 0 columns).
ProfileColumn & at(size_t index)
friend std::ifstream & operator>>(std::ifstream &is, ProfileColumn &col)
Provides a profile for a sequence.
void SetNullModel(const ProfileColumn &null_model)
ProfileColumnList::const_iterator columns_begin() const
boost::shared_ptr< ProfileDB > ProfileDBPtr
bool operator==(const ProfileColumn &rhs) const
ProfileColumnList::iterator columns_end()
std::vector< ProfileHandlePtr > ProfileHandleList
boost::shared_ptr< ProfileHandle > ProfileHandlePtr
bool operator!=(const ProfileHandle &other) const
friend std::ofstream & operator<<(std::ofstream &os, ProfileColumn &col)
const ProfileColumn & at(size_t index) const
bool operator!=(const ProfileColumn &rhs) const
Defines profile of 20 frequencies for one residue.
Contains a DB of profiles (identified by a unique name (String)).
const Real * freqs_begin() const
const Real * freqs_end() const
friend std::ofstream & operator<<(std::ofstream &os, ProfileHandle &prof)
unsigned int uint
Definition: base.hh:29