00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef OST_SEQ_PROFILE_HANDLE_HH
00025 #define OST_SEQ_PROFILE_HANDLE_HH
00026
00027 #include <ost/base.hh>
00028 #include <ost/stdint.hh>
00029 #include <ost/message.hh>
00030 #include <ost/seq/module_config.hh>
00031
00032 #include <string.h>
00033 #include <vector>
00034 #include <map>
00035 #include <fstream>
00036 #include <boost/shared_ptr.hpp>
00037
00038 namespace ost { namespace seq {
00039
00040 class ProfileHandle;
00041 class ProfileColumn;
00042 class ProfileDB;
00043 typedef boost::shared_ptr<ProfileHandle> ProfileHandlePtr;
00044 typedef std::vector<ProfileHandlePtr> ProfileHandleList;
00045 typedef boost::shared_ptr<ProfileDB> ProfileDBPtr;
00046 typedef std::vector<ProfileColumn> ProfileColumnList;
00047
00052 class DLLEXPORT_OST_SEQ ProfileColumn {
00053 public:
00054
00056 ProfileColumn() {
00057 memset(freq_, 0, sizeof(freq_));
00058 }
00059
00060 ProfileColumn(const ProfileColumn& rhs) {
00061 memcpy(freq_, rhs.freq_, sizeof(freq_));
00062 }
00063 ProfileColumn& operator= (const ProfileColumn& rhs) {
00064 memcpy(freq_, rhs.freq_, sizeof(freq_));
00065 return *this;
00066 }
00067
00068 static ProfileColumn BLOSUMNullModel();
00069 static ProfileColumn HHblitsNullModel();
00070
00072 static int GetIndex(char ch);
00073
00074 Real GetFreq(char ch) const;
00075
00076 void SetFreq(char ch, Real freq);
00077
00078 bool operator==(const ProfileColumn& rhs) const {
00079 return !memcmp(freq_, rhs.freq_, sizeof(freq_));
00080 }
00081 bool operator!=(const ProfileColumn& rhs) const { return !(rhs == (*this)); }
00082
00083 Real* freqs_begin() { return freq_; }
00084 Real* freqs_end() { return freq_ + 20; }
00085 const Real* freqs_begin() const { return freq_; }
00086 const Real* freqs_end() const { return freq_ + 20; }
00087
00089 Real GetEntropy() const;
00090
00092 Real GetScore(const ProfileColumn& other,
00093 const ProfileColumn& null_model) const;
00094
00095
00096
00097
00098 friend std::ofstream& operator<<(std::ofstream& os, ProfileColumn& col) {
00099 int16_t data[20];
00100
00101 for (uint i = 0; i < 20; ++i) {
00102 data[i] = static_cast<int16_t>(col.freq_[i]*10000);
00103 }
00104 os.write(reinterpret_cast<char*>(data), sizeof(data));
00105 return os;
00106 }
00107
00108 friend std::ifstream& operator>>(std::ifstream& is, ProfileColumn& col) {
00109 int16_t data[20];
00110 is.read(reinterpret_cast<char*>(data), sizeof(data));
00111
00112 for (uint i = 0; i < 20; ++i) {
00113 col.freq_[i] = data[i] * 0.0001;
00114 }
00115 return is;
00116 }
00117
00118 private:
00119 Real freq_[20];
00120 };
00121
00129 class DLLEXPORT_OST_SEQ ProfileHandle {
00130 public:
00132 ProfileHandle(): null_model_(ProfileColumn::HHblitsNullModel()) {}
00133
00134
00135
00136 const std::vector<ProfileColumn>& GetColumns() const { return columns_; }
00137
00138 const ProfileColumn& GetNullModel() const { return null_model_; }
00139
00140 void SetNullModel(const ProfileColumn& null_model) {
00141 null_model_ = null_model;
00142 }
00143
00144 String GetSequence() const { return seq_; }
00145
00146 void SetSequence(const String& seq) {
00147 if (seq.length() != columns_.size()) {
00148 throw Error("ProfileHandle - Inconsistency between number of columns and "
00149 " seq. length.");
00150 }
00151 seq_ = seq;
00152 }
00153
00157 ProfileHandlePtr Extract(uint from, uint to);
00158
00160 Real GetAverageEntropy() const;
00161
00165 Real GetAverageScore(const ProfileHandle& other, uint offset = 0) const;
00166
00167
00168 void AddColumn(const ProfileColumn& c, char olc='X') {
00169 columns_.push_back(c);
00170 seq_ += olc;
00171 }
00172
00173
00174
00175 void clear() { seq_ = ""; columns_.clear(); }
00176
00177 size_t size() const { return columns_.size(); }
00178
00179 bool empty() const { return columns_.empty(); }
00180
00181 ProfileColumn& operator[](size_t index) { return columns_[index]; }
00182
00183 const ProfileColumn& operator[](size_t index) const { return columns_[index]; }
00184
00185 ProfileColumn& at(size_t index) { return columns_.at(index); }
00186
00187 const ProfileColumn& at(size_t index) const { return columns_.at(index); }
00188
00189 bool operator==(const ProfileHandle& other) const {
00190 return seq_ == other.seq_ &&
00191 columns_ == other.columns_ &&
00192 null_model_ == other.null_model_;
00193 }
00194
00195 bool operator!=(const ProfileHandle& other) const {
00196 return !(other == (*this));
00197 }
00198
00199 ProfileColumnList::iterator columns_begin() { return columns_.begin(); }
00200 ProfileColumnList::iterator columns_end() { return columns_.end(); }
00201 ProfileColumnList::const_iterator columns_begin() const {
00202 return columns_.begin();
00203 }
00204 ProfileColumnList::const_iterator columns_end() const {
00205 return columns_.end();
00206 }
00207
00208
00209
00210
00211 friend std::ofstream& operator<<(std::ofstream& os, ProfileHandle& prof) {
00212
00213 os << prof.null_model_;
00214
00215 uint32_t size = prof.size();
00216 os.write(reinterpret_cast<char*>(&size), sizeof(uint32_t));
00217
00218 if (prof.seq_.length() != size) {
00219 throw Error("ProfileHandle - Inconsistency between number of columns and "
00220 " seq. length.");
00221 }
00222 os.write(prof.seq_.c_str(), size);
00223
00224 for(uint i = 0; i < size; ++i){
00225 os << prof.columns_[i];
00226 }
00227
00228 return os;
00229 }
00230
00231 friend std::ifstream& operator>>(std::ifstream& is, ProfileHandle& prof) {
00232
00233 is >> prof.null_model_;
00234
00235 uint32_t size;
00236 is.read(reinterpret_cast<char*>(&size), sizeof(uint32_t));
00237
00238 std::vector<char> tmp_buf(size);
00239 is.read(&tmp_buf[0], size);
00240 prof.seq_.assign(&tmp_buf[0], size);
00241
00242 prof.columns_.resize(size);
00243 for(uint i = 0; i < size; ++i){
00244 is >> prof.columns_[i];
00245 }
00246
00247 return is;
00248 }
00249
00250 private:
00251 String seq_;
00252 ProfileColumn null_model_;
00253 ProfileColumnList columns_;
00254 };
00255
00257 class DLLEXPORT_OST_SEQ ProfileDB {
00258 public:
00261 void Save(const String& filename) const;
00262
00263 static ProfileDBPtr Load(const String& filename);
00264
00265 void AddProfile(const String& name, ProfileHandlePtr prof);
00266
00267 ProfileHandlePtr GetProfile(const String& name) const;
00268
00269 size_t size() const { return data_.size(); }
00270
00271 std::vector<String> GetNames() const;
00272
00273 private:
00274 std::map<String, ProfileHandlePtr> data_;
00275 };
00276
00277 }}
00278
00279 #endif