OpenStructure
Loading...
Searching...
No Matches
hmm_pseudo_counts.hh
Go to the documentation of this file.
1//------------------------------------------------------------------------------
2// This file is part of the OpenStructure project <www.openstructure.org>
3//
4// Copyright (C) 2008-2020 by the OpenStructure authors
5//
6// This library is free software; you can redistribute it and/or modify it under
7// the terms of the GNU Lesser General Public License as published by the Free
8// Software Foundation; either version 3.0 of the License, or (at your option)
9// any later version.
10// This library is distributed in the hope that it will be useful, but WITHOUT
11// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13// details.
14//
15// You should have received a copy of the GNU Lesser General Public License
16// along with this library; if not, write to the Free Software Foundation, Inc.,
17// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18//------------------------------------------------------------------------------
19
20#ifndef OST_SEQ_ALG_HMM_PSEUDO_COUNTS_HH
21#define OST_SEQ_ALG_HMM_PSEUDO_COUNTS_HH
22
24
25namespace ost{ namespace seq{ namespace alg{
26
27
28class ContextProfileDB;
29typedef boost::shared_ptr<ContextProfileDB> ContextProfileDBPtr;
30
31
33
34public:
35
36ContextProfile(int length): length_(length),
37 data_(ContextProfile::DataSize(length), 0.0) { }
38
39ContextProfile(int length, Real* data): length_(length),
40 data_(ContextProfile::DataSize(length), 0.0) {
41 memcpy(&data_[0], data, data_.size() * sizeof(Real));
42}
43
44void SetWeight(int pos, char olc, Real weight) {
45 if(pos >= length_) {
46 throw Error("Tried to access invalid pos in ContextProfile");
47 }
48 int olc_idx = ProfileColumn::GetIndex(olc);
49 if(olc_idx != -1) {
50 data_[pos*20 + olc_idx] = weight;
51 } else {
52 throw Error("Invalid one letter code in ContextProfile");
53 }
54}
55
56void SetPseudoCount(char olc, Real count) {
57 int olc_idx = ProfileColumn::GetIndex(olc);
58 if(olc_idx != -1) {
59 data_[length_*20 + olc_idx] = count;
60 } else {
61 throw Error("Invalid one letter code in ContextProfile");
62 }
63}
64
65void SetBias(Real bias) { data_.back() = bias; }
66
67const Real* GetWeights(int pos) const{
68 if(pos >= length_) {
69 throw Error("Tried to access invalid pos in ContextProfile");
70 }
71 return &data_[pos*20];
72}
73
74Real GetWeight(int pos, char olc) {
75 if(pos >= length_) {
76 throw Error("Tried to access invalid pos in ContextProfile");
77 }
78 int olc_idx = ProfileColumn::GetIndex(olc);
79 if(olc_idx != -1) {
80 return data_[pos*20 + olc_idx];
81 } else {
82 throw Error("Invalid one letter code in ContextProfile");
83 }
84}
85
86const Real* GetPseudoCounts() const { return &data_[length_*20]; }
87
89 int olc_idx = ProfileColumn::GetIndex(olc);
90 if(olc_idx != -1) {
91 return data_[length_*20 + olc_idx];
92 } else {
93 throw Error("Invalid one letter code in ContextProfile");
94 }
95}
96
97Real GetBias() const { return data_.back(); }
98
99const std::vector<Real>& GetData() const { return data_; }
100
101int GetLength() const { return length_; }
102
103static int DataSize(int length) { return (length+1)*20+1; }
104
105private:
106int length_;
107// data organisation:
108// context weights in chunks of 20 (length_ chunks)
109// followed by 20 elements representing the context pseudo counts
110// last element is the bias
111std::vector<Real> data_;
112};
113
114
116
117public:
118
120
121void Save(const String& filename) const;
122
123static ContextProfileDBPtr Load(const String& filename);
124
125static ContextProfileDBPtr FromCRF(const String& filename);
126
127void AddProfile(const ContextProfile& profile){
128
129 // enforce same length for all profiles
130 if(!profiles_.empty()) {
131 if(profile.GetLength() != profiles_[0].GetLength()) {
132 throw Error("Require all profiles to be of same length");
133 }
134 }
135 profiles_.push_back(profile);
136}
137
138const ContextProfile& operator [](int idx) const {
139 return profiles_[idx];
140}
141
142const ContextProfile& at(int idx) const {
143 return profiles_.at(idx);
144}
145
146size_t size() const {
147 return profiles_.size();
148}
149
150size_t profile_length() const {
151 if(profiles_.empty()) {
152 throw Error("DB must contain profiles to get profile length");
153 }
154 return profiles_[0].GetLength();
155}
156
157private:
158std::vector<ContextProfile> profiles_;
159};
160
162 Real gapb = 1.0, Real gabd = 0.15,
163 Real gape = 1.0);
164
166 Real a = 1.0, Real b = 1.5, Real c = 1.0);
167
169 const ContextProfileDB& db,
170 Real a = 0.9, Real b = 4.0, Real c = 1.0);
171
173
174}}} // ns
175
176#endif
static int GetIndex(char ch)
Translate one-letter-code to index (0-indexing).
Provides a profile for a sequence.
const ContextProfile & operator[](int idx) const
void AddProfile(const ContextProfile &profile)
static ContextProfileDBPtr FromCRF(const String &filename)
void Save(const String &filename) const
const ContextProfile & at(int idx) const
static ContextProfileDBPtr Load(const String &filename)
ContextProfile(int length, Real *data)
void SetWeight(int pos, char olc, Real weight)
Real GetWeight(int pos, char olc)
const std::vector< Real > & GetData() const
void SetPseudoCount(char olc, Real count)
const Real * GetPseudoCounts() const
const Real * GetWeights(int pos) const
static int DataSize(int length)
float Real
Definition base.hh:44
std::string String
Definition base.hh:54
void AddNullPseudoCounts(ost::seq::ProfileHandle &profile)
void AddTransitionPseudoCounts(ost::seq::ProfileHandle &profile, Real gapb=1.0, Real gabd=0.15, Real gape=1.0)
void AddAAPseudoCounts(ost::seq::ProfileHandle &profile, Real a=1.0, Real b=1.5, Real c=1.0)
boost::shared_ptr< ContextProfileDB > ContextProfileDBPtr
Definition base.dox:1