Direct Graphical Models  v.1.7.0
SparseDictionary.cpp
1 #include "SparseDictionary.h"
2 #include "macroses.h"
3 #include "DGM/parallel.h"
4 #include "DGM/random.h"
5 
6 namespace DirectGraphicalModels { namespace fex
7 {
8 // =================================================================================== public
9 void CSparseDictionary::train(const Mat &X, word nWords, dword batch, unsigned int nIt, float lRate, const std::string &fileName)
10 {
11  const dword nSamples = X.rows;
12  const int sampleLen = X.cols;
13 
14  // Assertions
15  DGM_ASSERT_MSG((X.depth() == CV_8U) || (X.depth() == CV_16U), "The depth of argument X is not supported");
16  if (batch > nSamples) {
17  DGM_WARNING("The batch number %d exceeds the length of the training data %d", batch, nSamples);
18  batch = nSamples;
19  }
20 
21  // 1. Initialize dictionary D randomly
22  if (!m_D.empty()) m_D.release();
23  m_D = random::N(cv::Size(sampleLen, nWords), CV_32FC1, 0.0f, 0.3f);
24 
25  Mat _W, W; // Weights matrix (Size: nStamples x nWords)
26  float cost;
27 
28  // 2. Repeat until convergence
29  for (unsigned int i = 0; i < nIt; i++) { // iterations
30 #ifdef DEBUG_PRINT_INFO
31  if (i == 0) printf("\n");
32  printf("--- It: %d ---\n", i);
33 #endif
34  // 2.1 Select a random mini-batch of 2000 patches
35  dword rndRow = random::u<dword>(0, MAX(1, nSamples - batch) - 1);
36  int normalizer = (X.depth() == CV_8U) ? 255 : 65535;
37  Mat _X = X(cv::Rect(0, rndRow, sampleLen, batch));
38  _X.convertTo(_X, CV_32FC1, 1.0 / normalizer);
39 
40  // 2.2 Initialize W
41  parallel::gemm(m_D, _X.t(), 1.0, Mat(), 0.0, _W); // _W = (D x _X^T);
42  W = _W.t(); // _W = (D x _X^T)^T;
43  for (word w = 0; w < W.cols; w++)
44  W.col(w) /= norm(m_D.row(w), NORM_L2);
45 
46 #ifdef DEBUG_PRINT_INFO
47  printf("Cost: ");
48  cost = calculateCost(_X, m_D, W, SC_LAMBDA, SC_EPSILON, SC_GAMMA);
49  printf("%f -> ", cost);
50 #endif
51 
52  // 2.3. Find the W, that minimizes J(D, W) for the D found in the previos step
53  // argmin J(W) = ||W x D - X||^{2}_{2} + \lambda||W||_1
55 #ifdef DEBUG_PRINT_INFO
56  cost = calculateCost(_X, m_D, W, SC_LAMBDA, SC_EPSILON, SC_GAMMA);
57  printf("%f -> ", cost);
58 #endif
59 
60  // 2.4 Solve for the D that minimizes J(D, W) for the W found in the previous step
61  // argmin J(D) = ||W x D - X||^{2}_{2} + \gamma||D||^{2}_{2}
62  calculate_D(_X, m_D, W, SC_GAMMA, 800, lRate);
63  cost = calculateCost(_X, m_D, W, SC_LAMBDA, SC_EPSILON, SC_GAMMA);
64 #ifdef DEBUG_PRINT_INFO
65  printf("%f\n", cost);
66 #endif
67  DGM_ASSERT_MSG(!std::isnan(cost), "Training is unstable. Try reducing the learning rate for dictionary.");
68 
69  // 2.5 Saving intermediate dictionary
70  if (!fileName.empty()) {
71  std::string str = fileName + std::to_string(i / 5);
72  str += ".dic";
73  if (i % 5 == 0) save(str);
74  }
75  } // i
76 }
77 
78 void CSparseDictionary::save(const std::string &fileName) const
79 {
80  FILE *pFile = fopen(fileName.c_str(), "wb");
81  fwrite(&m_D.rows, sizeof(int), 1, pFile); // nWords
82  fwrite(&m_D.cols, sizeof(int), 1, pFile); // sampleLen
83  fwrite(m_D.data, sizeof(float), m_D.rows * m_D.cols, pFile);
84  fclose(pFile);
85 }
86 
87 void CSparseDictionary::load(const std::string &fileName)
88 {
89  int sampleLen;
90  int nWords;
91 
92  FILE *pFile = fopen(fileName.c_str(), "rb");
93  DGM_ASSERT_MSG(pFile, "Can't load data from %s", fileName.c_str());
94 
95  fread(&nWords, sizeof(int), 1, pFile);
96  fread(&sampleLen, sizeof(int), 1, pFile);
97 
98  if (!m_D.empty()) m_D.release();
99  m_D = Mat(nWords, sampleLen, CV_32FC1);
100 
101  fread(m_D.data, sizeof(float), nWords * sampleLen, pFile);
102 
103  fclose(pFile);
104 }
105 
106 #ifdef DEBUG_MODE // --- Debugging ---
107 Mat CSparseDictionary::TEST_decode(const Mat &X, cv::Size imgSize) const
108 {
109  DGM_ASSERT_MSG(!m_D.empty(), "The dictionary must me trained or loaded before using this function");
110 
111  const int blockSize = getBlockSize();
112  const int dataWidth = imgSize.width - blockSize + 1;
113  const int dataHeight = imgSize.height - blockSize + 1;
114  const int sampleType = X.type();
115  const int normalizer = (X.depth() == CV_8U) ? 255 : 65535;
116 
117  const float lambda = 5e-5f; // L1-regularisation parameter (on features)
118  const float epsilon = 1e-5f; // L1-regularisation epsilon |x| ~ sqrt(x^2 + epsilon)
119 
120  Mat res(imgSize, CV_32FC1, Scalar(0));
121  Mat cover(imgSize, CV_32FC1, Scalar(0));
122 
123 #ifdef ENABLE_PPL
124  concurrency::parallel_for(0, dataHeight, blockSize, [&](int y) {
125 #else
126  for (int y = 0; y < dataHeight; y += blockSize) {
127 #endif
128  Mat _W, W;
129  Mat tmp;
130  for (int x = 0; x < dataWidth; x += blockSize) {
131  int s = y * dataWidth + x; // sample index
132  Mat sample = X.row(s); // sample
133  sample.convertTo(sample, CV_32FC1, 1.0 / normalizer);
134 
135  gemm(m_D, sample.t(), 1.0, Mat(), 0.0, _W); // _W = (D x sample^T)
136  W = _W.t(); // W = (D x sample^T)^T
137  for (int w = 0; w < W.cols; w++)
138  W.col(w) /= norm(m_D.row(w), NORM_L2);
139 
140  // argmin J(W) = ||W x D - X||^{2}_{2} + \lambda||W||_1
141  calculate_W(sample, m_D, W, lambda, epsilon, 800);
142 
143  gemm(W, m_D, 1.0, Mat(), 0.0, tmp); // tmp = W x D
144  tmp = tmp.reshape(0, blockSize);
145 
146  res(Rect(x, y, blockSize, blockSize)) += tmp;
147  cover(Rect(x, y, blockSize, blockSize)) += 1.0;
148  }
149  }
150 #ifdef ENABLE_PPL
151  );
152 #endif
153  res /= cover;
154  res.convertTo(res, sampleType, normalizer);
155  return res;
156 }
157 #endif // --- --------- ---
158 
159 // =================================================================================== static
160 
161 Mat CSparseDictionary::img2data(const Mat &img, int blockSize, float varianceThreshold)
162 {
163  DGM_IF_WARNING(blockSize % 2 == 0, "The block size is even");
164 
165  varianceThreshold = sqrtf(varianceThreshold);
166  // Converting to one channel image
167  Mat I;
168  if (img.channels() != 1) cvtColor(img, I, cv::ColorConversionCodes::COLOR_RGB2GRAY);
169  else img.copyTo(I);
170 
171  const int dataHeight = img.rows - blockSize + 1;
172  const int dataWidth = img.cols - blockSize + 1;
173 
174  Mat res;
175  Mat sample;
176 
177  for (int y = 0; y < dataHeight; y++)
178  for (int x = 0; x < dataWidth; x++) {
179  sample = I(cv::Rect(x, y, blockSize, blockSize)).clone().reshape(0, 1); // sample as a row-vector
180 
181  Scalar stddev;
182  meanStdDev(sample, Mat(), stddev);
183  float variance = (float) stddev[0];
184  //printf("variance = %f\n", variance);
185 
186  if (variance >= varianceThreshold)
187  res.push_back(sample);
188  } // x
189  return res;
190 }
191 
192 Mat CSparseDictionary::data2img(const Mat &X, cv::Size imgSize)
193 {
194  Mat res(imgSize, CV_32FC1, cv::Scalar(0));
195  Mat cover(imgSize, CV_32FC1, cv::Scalar(0));
196 
197  const int blockSize = static_cast<int>(sqrt(X.cols));
198  const int dataWidth = res.cols - blockSize + 1;
199 // const int dataHeight = res.rows - blockSize + 1;
200 
201  for (int s = 0; s < X.rows; s++) {
202  Mat sample = X.row(s); // smple as a row-vector
203  sample = sample.reshape(0, blockSize); // square sample - data patch
204 
205  int y = s / dataWidth;
206  int x = s % dataWidth;
207 
208  res(cv::Rect(x, y, blockSize, blockSize)) += sample;
209  cover(cv::Rect(x, y, blockSize, blockSize)) += 1.0;
210  }
211  res /= cover;
212 
213  res.convertTo(res, X.type(), 1);
214  return res;
215 }
216 
217 // =================================================================================== protected
218 
219 // J(W) = ||W x D - X||^{2}_{2} + \lambda||W||_1
220 void CSparseDictionary::calculate_W(const Mat &X, const Mat &D, Mat &W, float lambda, float epsilon, unsigned int nIt, float lRate)
221 {
222  // Define the velocity vectors
223  Mat gradient;
224  Mat incriment(W.size(), W.type(), cv::Scalar(0));
225 
226  for (unsigned int i = 0; i < nIt; i++) {
227  float momentum = (i <= 10) ? 0.5f : 0.9f;
228  gradient = calculateGradient(GRAD_W, X, D, W, lambda, epsilon, 0);
229  incriment = momentum * incriment + lRate * (gradient - 2e-4f * W);
230  W -= incriment;
231  } // i
232 }
233 
234 // J(D) = ||W x D - X||^{2}_{2} + \gamma||D||^{2}_{2}
235 void CSparseDictionary::calculate_D(const Mat &X, Mat &D, const Mat &W, float gamma, unsigned int nIt, float lRate)
236 {
237  // define the velocity vectors
238  Mat gradient;
239  Mat incriment(D.size(), D.type(), cv::Scalar(0));
240 
241  for (unsigned int i = 0; i < nIt; i++) {
242  float momentum = (i <= 10) ? 0.5f : 0.9f;
243  gradient = calculateGradient(GRAD_D, X, D, W, 0, 0, gamma);
244  incriment = momentum * incriment + lRate * (gradient - 2e-4f * D);
245  D -= incriment;
246  } // i
247 }
248 
249 Mat CSparseDictionary::calculateGradient(grad_type gType, const Mat &X, const Mat &D, const Mat &W, float lambda, float epsilon, float gamma)
250 {
251  const int nSamples = X.rows;
252 
253  Mat temp;
254  parallel::gemm(W, D, 2.0f / nSamples, X, -2.0f / nSamples, temp); // temp = (2.0 / nSamples) * (W x D - X)
255  Mat gradient;
256  Mat sparsityMatrix;
257 
258  switch (gType) {
259  case GRAD_W: // 2 * (W x D - X) x D^T / nSamples + lambda * W / sqrt(W^2 + epsilon)
260  multiply(W, W, sparsityMatrix);
261  sparsityMatrix += epsilon;
262  sqrt(sparsityMatrix, sparsityMatrix); // sparsityMatrix = sqrt(W^2 + epsilon)
263  parallel::gemm(temp, D.t(), 1.0, W / sparsityMatrix, lambda, gradient);
264  break;
265  case GRAD_D: // 2 * W^T x (W x D - X) / nSamples + 2 * gamma * D
266  parallel::gemm(W.t(), temp, 1.0, D, 2 * gamma, gradient);
267  break;
268  }
269 
270  return gradient;
271 }
272 
273 float CSparseDictionary::calculateCost(const Mat &X, const Mat &D, const Mat &W, float lambda, float epsilon, float gamma)
274 {
275  Mat temp;
276 
277  parallel::gemm(W, D, 1.0, X, -1.0, temp); // temp = W x D - X
278  reduce(temp, temp, 0, cv::ReduceTypes::REDUCE_AVG);
279  multiply(temp, temp, temp); // temp = (W x D - X)^2
280  float J1 = static_cast<float>(sum(temp)[0]);
281 
282  multiply(W, W, temp); // temp = W^2
283  temp += epsilon; // temp = W^2 + epsilon
284  sqrt(temp, temp); // temp = sqrt(W^2 + epsilon)
285  reduce(temp, temp, 0, cv::ReduceTypes::REDUCE_AVG);
286  float J2 = lambda * static_cast<float>(sum(temp)[0]);
287 
288  multiply(D, D, temp); // temp = D^2
289  float J3 = gamma * static_cast<float>(sum(temp)[0]);
290 
291  float cost = J1 + J2 + J3;
292  return cost;
293 }
294 
295 } }
static void calculate_W(const Mat &X, const Mat &D, Mat &W, float lambda, float epsilon, unsigned int nIt=800, float lRate=SC_LRATE_W)
Evaluates weighting coefficients matrix .
const float SC_LRATE_W
Learning rate (speed) for weights .
const float SC_EPSILON
: L1-regularisation epsilon
static Mat calculateGradient(grad_type gType, const Mat &X, const Mat &D, const Mat &W, float lambda, float epsilon, float gamma)
Calculates the gradient matrices and .
static float calculateCost(const Mat &X, const Mat &D, const Mat &W, float lambda, float epsilon, float gamma)
Calculates the value of function.
static Mat data2img(const Mat &X, cv::Size imgSize)
Converts data into an image.
void save(const std::string &fileName) const
Saves dictionary into a binary file.
void load(const std::string &fileName)
Loads dictionary from the file.
static void calculate_D(const Mat &X, Mat &D, const Mat &W, float gamma, unsigned int nIt=800, float lRate=SC_LRATE_D)
Evaluates dictionary .
const float SC_GAMMA
: L2-regularisation parameter (on dictionary words)
void gemm(const Mat &A, const Mat &B, float alpha, const Mat &C, float beta, Mat &res)
Fast generalized matrix multiplication.
Definition: parallel.h:112
static Mat img2data(const Mat &img, int blockSize, float varianceThreshold=0.0f)
Converts image into data .
Mat m_D
The dictionary : Mat(size: nWords x sampleLen; type: CV_32FC1);.
void train(const Mat &X, word nWords, dword batch=2000, unsigned int nIt=1000, float lRate=SC_LRATE_D, const std::string &fileName=std::string())
Trains dictionary .
const float SC_LAMBDA
: L1-regularisation parameter (on features)
int getBlockSize(void) const
Returns size of the block, i.e. .
T N(T mu=0, T sigma=1)
Returns a floating-point random number with normal distribution.
Definition: random.h:63