00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef OST_SEQ_PROFILE_HANDLE_HH
00025 #define OST_SEQ_PROFILE_HANDLE_HH
00026
00027 #include <ost/base.hh>
00028 #include <ost/stdint.hh>
00029 #include <ost/message.hh>
00030 #include <ost/seq/module_config.hh>
00031
00032 #include <string.h>
00033 #include <vector>
00034 #include <map>
00035 #include <fstream>
00036 #include <boost/shared_ptr.hpp>
00037
00038 namespace ost { namespace seq {
00039
00040 class ProfileHandle;
00041 class ProfileColumn;
00042 class ProfileDB;
00043 typedef boost::shared_ptr<ProfileHandle> ProfileHandlePtr;
00044 typedef boost::shared_ptr<ProfileDB> ProfileDBPtr;
00045 typedef std::vector<ProfileColumn> ProfileColumnList;
00046
00051 class DLLEXPORT_OST_SEQ ProfileColumn {
00052 public:
00053
00055 ProfileColumn() {
00056 memset(freq_, 0, sizeof(freq_));
00057 }
00058
00059 ProfileColumn(const ProfileColumn& rhs) {
00060 memcpy(freq_, rhs.freq_, sizeof(freq_));
00061 }
00062 ProfileColumn& operator= (const ProfileColumn& rhs) {
00063 memcpy(freq_, rhs.freq_, sizeof(freq_));
00064 return *this;
00065 }
00066
00067 static ProfileColumn BLOSUMNullModel();
00068
00070 static int GetIndex(char ch);
00071
00072 Real GetFreq(char ch) const;
00073
00074 void SetFreq(char ch, Real freq);
00075
00076 bool operator==(const ProfileColumn& rhs) const {
00077 return !memcmp(freq_, rhs.freq_, sizeof(freq_));
00078 }
00079 bool operator!=(const ProfileColumn& rhs) const { return !(rhs == (*this)); }
00080
00081 Real* freqs_begin() { return freq_; }
00082 Real* freqs_end() { return freq_ + 20; }
00083 const Real* freqs_begin() const { return freq_; }
00084 const Real* freqs_end() const { return freq_ + 20; }
00085
00087 Real GetEntropy() const;
00088
00089
00090
00091
00092 friend std::ofstream& operator<<(std::ofstream& os, ProfileColumn& col) {
00093 int16_t data[20];
00094
00095 for (uint i = 0; i < 20; ++i) {
00096 data[i] = static_cast<int16_t>(col.freq_[i]*10000);
00097 }
00098 os.write(reinterpret_cast<char*>(data), sizeof(data));
00099 return os;
00100 }
00101
00102 friend std::ifstream& operator>>(std::ifstream& is, ProfileColumn& col) {
00103 int16_t data[20];
00104 is.read(reinterpret_cast<char*>(data), sizeof(data));
00105
00106 for (uint i = 0; i < 20; ++i) {
00107 col.freq_[i] = data[i] * 0.0001;
00108 }
00109 return is;
00110 }
00111
00112 private:
00113 Real freq_[20];
00114 };
00115
00123 class DLLEXPORT_OST_SEQ ProfileHandle {
00124 public:
00126 ProfileHandle() {}
00127
00128
00129
00130 const std::vector<ProfileColumn>& GetColumns() const { return columns_; }
00131
00132 const ProfileColumn& GetNullModel() const { return null_model_; }
00133
00134 void SetNullModel(const ProfileColumn& null_model) { null_model_ = null_model; }
00135
00136 String GetSequence() const { return seq_; }
00137
00138 void SetSequence(const String& seq) { seq_ = seq; }
00139
00143 ProfileHandlePtr Extract(uint from, uint to);
00144
00146 Real GetAverageEntropy() const;
00147
00148
00149
00150 void clear() { seq_ = ""; columns_.clear(); }
00151
00152 size_t size() const { return columns_.size(); }
00153
00154 bool empty() const { return columns_.empty(); }
00155
00156 void push_back(const ProfileColumn& c) { columns_.push_back(c); }
00157
00158 ProfileColumn& operator[](size_t index) { return columns_[index]; }
00159
00160 const ProfileColumn& operator[](size_t index) const { return columns_[index]; }
00161
00162 ProfileColumn& at(size_t index) { return columns_.at(index); }
00163
00164 const ProfileColumn& at(size_t index) const { return columns_.at(index); }
00165
00166 bool operator==(const ProfileHandle& other) const {
00167 return seq_ == other.seq_ &&
00168 columns_ == other.columns_ &&
00169 null_model_ == other.null_model_;
00170 }
00171
00172 bool operator!=(const ProfileHandle& other) const { return !(other == (*this)); }
00173
00174 ProfileColumnList::const_iterator columns_end() const { return columns_.end(); }
00175 ProfileColumnList::iterator columns_end() { return columns_.end(); }
00176 ProfileColumnList::const_iterator columns_begin() const { return columns_.begin(); }
00177 ProfileColumnList::iterator columns_begin() { return columns_.begin(); }
00178
00179
00180
00181
00182 friend std::ofstream& operator<<(std::ofstream& os, ProfileHandle& prof) {
00183
00184 os << prof.null_model_;
00185
00186 uint32_t size = prof.size();
00187 os.write(reinterpret_cast<char*>(&size), sizeof(uint32_t));
00188
00189 if (prof.seq_.length() != size) {
00190 throw Error("ProfileHandle - Inconsistency between number of columns and "
00191 " seq. length.");
00192 }
00193 os.write(prof.seq_.c_str(), size);
00194
00195 for(uint i = 0; i < size; ++i){
00196 os << prof.columns_[i];
00197 }
00198
00199 return os;
00200 }
00201
00202 friend std::ifstream& operator>>(std::ifstream& is, ProfileHandle& prof) {
00203
00204 is >> prof.null_model_;
00205
00206 uint32_t size;
00207 is.read(reinterpret_cast<char*>(&size), sizeof(uint32_t));
00208
00209 std::vector<char> tmp_buf(size);
00210 is.read(&tmp_buf[0], size);
00211 prof.seq_.assign(&tmp_buf[0], size);
00212
00213 prof.columns_.resize(size);
00214 for(uint i = 0; i < size; ++i){
00215 is >> prof.columns_[i];
00216 }
00217
00218 return is;
00219 }
00220
00221 private:
00222 String seq_;
00223 ProfileColumn null_model_;
00224 ProfileColumnList columns_;
00225 };
00226
00228 class DLLEXPORT_OST_SEQ ProfileDB {
00229 public:
00232 void Save(const String& filename) const;
00233
00234 static ProfileDBPtr Load(const String& filename);
00235
00236 void AddProfile(const String& name, ProfileHandlePtr prof);
00237
00238 ProfileHandlePtr GetProfile(const String& name) const;
00239
00240 size_t size() const { return data_.size(); }
00241
00242 std::vector<String> GetNames() const;
00243
00244 private:
00245 std::map<String, ProfileHandlePtr> data_;
00246 };
00247
00248 }}
00249
00250 #endif