OpenStructure
Loading...
Searching...
No Matches
profile_handle.hh
Go to the documentation of this file.
1//------------------------------------------------------------------------------
2// This file is part of the OpenStructure project <www.openstructure.org>
3//
4// Copyright (C) 2008-2020 by the OpenStructure authors
5//
6// This library is free software; you can redistribute it and/or modify it under
7// the terms of the GNU Lesser General Public License as published by the Free
8// Software Foundation; either version 3.0 of the License, or (at your option)
9// any later version.
10// This library is distributed in the hope that it will be useful, but WITHOUT
11// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13// details.
14//
15// You should have received a copy of the GNU Lesser General Public License
16// along with this library; if not, write to the Free Software Foundation, Inc.,
17// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18//------------------------------------------------------------------------------
19
20/*
21 Author: Gerardo Tauriello, Gabriel Studer
22 */
23
24#ifndef OST_SEQ_PROFILE_HANDLE_HH
25#define OST_SEQ_PROFILE_HANDLE_HH
26
27#include <ost/base.hh>
28#include <ost/stdint.hh>
29#include <ost/message.hh>
31
32#include <string.h> // for memcpy, etc
33#include <vector>
34#include <map>
35#include <fstream>
36#include <boost/shared_ptr.hpp>
37
38namespace ost { namespace seq {
39
40class ProfileHandle;
41class ProfileColumn;
42class HMMData;
43class ProfileDB;
44typedef boost::shared_ptr<ProfileHandle> ProfileHandlePtr;
45typedef boost::shared_ptr<HMMData> HMMDataPtr;
46typedef std::vector<ProfileHandlePtr> ProfileHandleList;
47typedef boost::shared_ptr<ProfileDB> ProfileDBPtr;
48typedef std::vector<ProfileColumn> ProfileColumnList;
49
50typedef enum {
51 HMM_M2M = 0, HMM_M2I = 1, HMM_M2D = 2,
52 HMM_I2M = 3, HMM_I2I = 4,
55
57public:
59 memset(trans_, 0, 7*sizeof(Real));
60 trans_[HMM_M2M] = 1.0;
61 trans_[HMM_I2M] = 1.0;
62 trans_[HMM_D2M] = 1.0;
63 neff_ = 1.0;
64 neff_i_ = 1.0;
65 neff_d_ = 1.0;
66 }
67
68 HMMData(const HMMData& rhs) {
69 memcpy(trans_, rhs.trans_, 7*sizeof(Real));
70 neff_ = rhs.neff_;
71 neff_i_ = rhs.neff_i_;
72 neff_d_ = rhs.neff_d_;
73 }
74
75 Real GetProb(HMMTransition transition) const {
76 return trans_[transition];
77 }
78
79 void SetProb(HMMTransition transition, Real prob) {
80 trans_[transition] = prob;
81 }
82
83 Real GetNeff() const {
84 return neff_;
85 }
86
87 void SetNeff(Real neff) {
88 neff_ = neff;
89 }
90
91 Real GetNeff_I() const {
92 return neff_i_;
93 }
94
95 void SetNeff_I(Real neff) {
96 neff_i_ = neff;
97 }
98
99 Real GetNeff_D() const {
100 return neff_d_;
101 }
102
103 void SetNeff_D(Real neff) {
104 neff_d_ = neff;
105 }
106
107 bool operator==(const HMMData& rhs) const {
108 return (!memcmp(trans_, rhs.trans_, sizeof(trans_)) &&
109 neff_ == rhs.neff_ &&
110 neff_i_ == rhs.neff_i_ &&
111 neff_d_ == rhs.neff_d_);
112 }
113 bool operator!=(const HMMData& rhs) const { return !(rhs == (*this)); }
114
115 HMMData& operator=(const HMMData& rhs) {
116 memcpy(trans_, rhs.trans_, 7*sizeof(Real));
117 neff_ = rhs.neff_;
118 neff_i_ = rhs.neff_i_;
119 neff_d_ = rhs.neff_d_;
120 return *this;
121 }
122
123 friend std::ofstream& operator<<(std::ofstream& os, HMMData& dat) {
124
125 int16_t p_data[7];
126 for (uint i = 0; i < 7; ++i) {
127 p_data[i] = static_cast<int16_t>(dat.trans_[i]*10000);
128 }
129 os.write(reinterpret_cast<char*>(p_data), 7*sizeof(int16_t));
130
131 float neff_data[3];
132 neff_data[0] = dat.neff_;
133 neff_data[1] = dat.neff_i_;
134 neff_data[2] = dat.neff_d_;
135 os.write(reinterpret_cast<char*>(neff_data), 3*sizeof(float));
136
137 return os;
138 }
139
140 friend std::ifstream& operator>>(std::ifstream& is, HMMData& dat) {
141
142 int16_t p_data[7];
143 is.read(reinterpret_cast<char*>(p_data), 7*sizeof(int16_t));
144 for (uint i = 0; i < 7; ++i) {
145 dat.trans_[i] = p_data[i] * 0.0001;
146 }
147
148 float neff_data[3];
149 is.read(reinterpret_cast<char*>(neff_data), 3*sizeof(float));
150 dat.neff_ = neff_data[0];
151 dat.neff_i_ = neff_data[1];
152 dat.neff_d_ = neff_data[2];
153
154 return is;
155 }
156
157private:
158 Real trans_[7];
159 Real neff_;
160 Real neff_i_;
161 Real neff_d_;
162};
163
169public:
170
173 memset(freq_, 0, sizeof(freq_));
174 }
175
177 memcpy(freq_, rhs.freq_, sizeof(freq_));
178 if(rhs.hmm_data_) {
179 // do deep copy
180 hmm_data_ = HMMDataPtr(new HMMData(*rhs.hmm_data_));
181 }
182 }
183 ProfileColumn& operator= (const ProfileColumn& rhs) {
184 memcpy(freq_, rhs.freq_, sizeof(freq_));
185 if(rhs.hmm_data_) {
186 // do deep copy
187 hmm_data_ = HMMDataPtr(new HMMData(*rhs.hmm_data_));
188 } else if(hmm_data_) {
189 hmm_data_ = HMMDataPtr();
190 }
191 return *this;
192 }
193
196
198 hmm_data_ = p;
199 }
200
202 if(!hmm_data_) {
203 throw Error("ProfileColumn has no HMM data set!");
204 }
205 return hmm_data_;
206 }
207
208 Real GetTransProb(HMMTransition transition) const {
209 if(!hmm_data_) {
210 throw Error("ProfileColumn has no HMM data set!");
211 }
212 return hmm_data_->GetProb(transition);
213 }
214
216 static int GetIndex(char ch);
217
218 Real GetFreq(char ch) const;
219
220 void SetFreq(char ch, Real freq);
221
222 bool operator==(const ProfileColumn& rhs) const {
223 return !memcmp(freq_, rhs.freq_, sizeof(freq_));
224 }
225 bool operator!=(const ProfileColumn& rhs) const { return !(rhs == (*this)); }
226
227 Real* freqs_begin() { return freq_; }
228 Real* freqs_end() { return freq_ + 20; }
229 const Real* freqs_begin() const { return freq_; }
230 const Real* freqs_end() const { return freq_ + 20; }
231
234
237 const ProfileColumn& null_model) const;
238
239 // functions to feed streams with limited accuracy of internal data
240 // not intended for python export
241
242 friend std::ofstream& operator<<(std::ofstream& os, ProfileColumn& col) {
243 int16_t data[20];
244 //transform aa_freq
245 for (uint i = 0; i < 20; ++i) {
246 data[i] = static_cast<int16_t>(col.freq_[i]*10000);
247 }
248 os.write(reinterpret_cast<char*>(data), sizeof(data));
249
250 if(col.hmm_data_) {
251 bool has_hmm_data = true;
252 os.write(reinterpret_cast<char*>(&has_hmm_data), 1);
253 os << *col.hmm_data_;
254 } else {
255 bool has_hmm_data = false;
256 os.write(reinterpret_cast<char*>(&has_hmm_data), 1);
257 }
258
259 return os;
260 }
261
262 friend std::ifstream& operator>>(std::ifstream& is, ProfileColumn& col) {
263 int16_t data[20];
264 is.read(reinterpret_cast<char*>(data), sizeof(data));
265 //transform aa_freq
266 for (uint i = 0; i < 20; ++i) {
267 col.freq_[i] = data[i] * 0.0001;
268 }
269
270 bool has_hmm_data;
271 is.read(reinterpret_cast<char*>(&has_hmm_data), 1);
272 if(has_hmm_data) {
273 is >> *col.hmm_data_;
274 }
275
276 return is;
277 }
278
279private:
280 Real freq_[20];
281 HMMDataPtr hmm_data_;
282};
283
292public:
294 ProfileHandle(): null_model_(ProfileColumn::HHblitsNullModel()), neff_(1.0) {}
295
296 // uses compiler-generated copy- and assignment operators (work here!)
297
298 const std::vector<ProfileColumn>& GetColumns() const { return columns_; }
299
300 const ProfileColumn& GetNullModel() const { return null_model_; }
301
302 void SetNullModel(const ProfileColumn& null_model) {
303 null_model_ = null_model;
304 }
305
306 String GetSequence() const { return seq_; }
307
308 void SetSequence(const String& seq) {
309 if (seq.length() != columns_.size()) {
310 throw Error("ProfileHandle - Inconsistency between number of columns and "
311 " seq. length.");
312 }
313 seq_ = seq;
314 }
315
316 Real GetNeff() const { return neff_; }
317
318 void SetNeff(Real neff) { neff_ = neff; }
319
324
327
331 Real GetAverageScore(const ProfileHandle& other, uint offset = 0) const;
332
333 // \brief Can only add column with an associated olc
334 void AddColumn(const ProfileColumn& c, char olc='X') {
335 columns_.push_back(c);
336 seq_ += olc;
337 }
338
339 // some functions to make it behave like a vector
340
341 void clear() { seq_ = ""; columns_.clear(); }
342
343 size_t size() const { return columns_.size(); }
344
345 bool empty() const { return columns_.empty(); }
346
347 ProfileColumn& operator[](size_t index) { return columns_[index]; }
348
349 const ProfileColumn& operator[](size_t index) const { return columns_[index]; }
350
351 ProfileColumn& at(size_t index) { return columns_.at(index); }
352
353 const ProfileColumn& at(size_t index) const { return columns_.at(index); }
354
355 ProfileColumn& back() { return columns_.back(); }
356
357 const ProfileColumn& back() const { return columns_.back(); }
358
359 bool operator==(const ProfileHandle& other) const {
360 return seq_ == other.seq_ &&
361 columns_ == other.columns_ &&
362 null_model_ == other.null_model_;
363 }
364
365 bool operator!=(const ProfileHandle& other) const {
366 return !(other == (*this));
367 }
368
369 ProfileColumnList::iterator columns_begin() { return columns_.begin(); }
370 ProfileColumnList::iterator columns_end() { return columns_.end(); }
371 ProfileColumnList::const_iterator columns_begin() const {
372 return columns_.begin();
373 }
374 ProfileColumnList::const_iterator columns_end() const {
375 return columns_.end();
376 }
377
378 // functions to feed streams with limited accuracy of internal data
379 // not intended for python export
380
381 friend std::ofstream& operator<<(std::ofstream& os, ProfileHandle& prof) {
382 // null model
383 os << prof.null_model_;
384 // num. columns/residues
385 uint32_t size = prof.size();
386 os.write(reinterpret_cast<char*>(&size), sizeof(uint32_t));
387 // sequence
388 if (prof.seq_.length() != size) {
389 throw Error("ProfileHandle - Inconsistency between number of columns and "
390 " seq. length.");
391 }
392 os.write(prof.seq_.c_str(), size);
393 // columns
394 for(uint i = 0; i < size; ++i){
395 os << prof.columns_[i];
396 }
397
398 return os;
399 }
400
401 friend std::ifstream& operator>>(std::ifstream& is, ProfileHandle& prof) {
402 // null model
403 is >> prof.null_model_;
404 // num. columns/residues
405 uint32_t size;
406 is.read(reinterpret_cast<char*>(&size), sizeof(uint32_t));
407 // sequence
408 std::vector<char> tmp_buf(size);
409 is.read(&tmp_buf[0], size);
410 prof.seq_.assign(&tmp_buf[0], size);
411 // columns
412 prof.columns_.resize(size);
413 for(uint i = 0; i < size; ++i){
414 is >> prof.columns_[i];
415 }
416
417 return is;
418 }
419
420private:
421 String seq_;
422 ProfileColumn null_model_;
423 ProfileColumnList columns_;
424 Real neff_;
425};
426
429public:
432 void Save(const String& filename) const;
433
434 static ProfileDBPtr Load(const String& filename);
435
436 void AddProfile(const String& name, ProfileHandlePtr prof);
437
439
440 size_t size() const { return data_.size(); }
441
442 std::vector<String> GetNames() const;
443
444private:
445 std::map<String, ProfileHandlePtr> data_;
446};
447
448}}
449
450#endif
void SetNeff(Real neff)
HMMData(const HMMData &rhs)
void SetNeff_D(Real neff)
Real GetNeff_D() const
bool operator==(const HMMData &rhs) const
Real GetProb(HMMTransition transition) const
bool operator!=(const HMMData &rhs) const
HMMData & operator=(const HMMData &rhs)
void SetNeff_I(Real neff)
Real GetNeff() const
friend std::ofstream & operator<<(std::ofstream &os, HMMData &dat)
void SetProb(HMMTransition transition, Real prob)
Real GetNeff_I() const
friend std::ifstream & operator>>(std::ifstream &is, HMMData &dat)
Defines profile of 20 frequencies for one residue.
const Real * freqs_begin() const
Real GetScore(const ProfileColumn &other, const ProfileColumn &null_model) const
Get column score as in Soeding-2005.
friend std::ofstream & operator<<(std::ofstream &os, ProfileColumn &col)
static ProfileColumn BLOSUMNullModel()
static ProfileColumn HHblitsNullModel()
ProfileColumn(const ProfileColumn &rhs)
void SetHMMData(HMMDataPtr p)
Real GetFreq(char ch) const
static int GetIndex(char ch)
Translate one-letter-code to index (0-indexing).
bool operator==(const ProfileColumn &rhs) const
void SetFreq(char ch, Real freq)
HMMDataPtr GetHMMData() const
ProfileColumn()
Construct a profile with all frequencies set to 0.
friend std::ifstream & operator>>(std::ifstream &is, ProfileColumn &col)
Real GetTransProb(HMMTransition transition) const
const Real * freqs_end() const
bool operator!=(const ProfileColumn &rhs) const
Real GetEntropy() const
Get entropy for this column.
Contains a DB of profiles (identified by a unique name (String)).
static ProfileDBPtr Load(const String &filename)
void AddProfile(const String &name, ProfileHandlePtr prof)
ProfileHandlePtr GetProfile(const String &name) const
void Save(const String &filename) const
Saves all profiles in DB with limited accuracy of internal data. Binary format with fixed-width integ...
std::vector< String > GetNames() const
Provides a profile for a sequence.
const ProfileColumn & operator[](size_t index) const
void AddColumn(const ProfileColumn &c, char olc='X')
const ProfileColumn & back() const
friend std::ifstream & operator>>(std::ifstream &is, ProfileHandle &prof)
ProfileColumnList::iterator columns_end()
bool operator==(const ProfileHandle &other) const
ProfileColumn & operator[](size_t index)
void SetNullModel(const ProfileColumn &null_model)
const ProfileColumn & GetNullModel() const
bool operator!=(const ProfileHandle &other) const
ProfileColumnList::iterator columns_begin()
const std::vector< ProfileColumn > & GetColumns() const
Real GetAverageEntropy() const
Compute average entropy over all columns.
friend std::ofstream & operator<<(std::ofstream &os, ProfileHandle &prof)
ProfileColumn & at(size_t index)
ProfileColumnList::const_iterator columns_begin() const
ProfileColumnList::const_iterator columns_end() const
Real GetAverageScore(const ProfileHandle &other, uint offset=0) const
Compute score comparing columns other[i] and this->at(i+offset) Column score as in Soeding-2005,...
void SetSequence(const String &seq)
ProfileHandle()
Constructs an empty profile handle (sequence = '', 0 columns).
ProfileHandlePtr Extract(uint from, uint to) const
Extract subset of profile for columns from until to-1 (0-indexing). Null model is copied from this pr...
const ProfileColumn & at(size_t index) const
unsigned int uint
Definition base.hh:29
float Real
Definition base.hh:44
std::string String
Definition base.hh:54
boost::shared_ptr< HMMData > HMMDataPtr
std::vector< ProfileColumn > ProfileColumnList
boost::shared_ptr< ProfileHandle > ProfileHandlePtr
boost::shared_ptr< ProfileDB > ProfileDBPtr
std::vector< ProfileHandlePtr > ProfileHandleList
Definition base.dox:1
#define DLLEXPORT_OST_SEQ
signed short int16_t
Definition stdint_msc.hh:76
unsigned int uint32_t
Definition stdint_msc.hh:80