1 #include "SparseDictionary.h" 3 #include "DGM/parallel.h" 4 #include "DGM/random.h" 9 void CSparseDictionary::train(
const Mat &X, word nWords, dword batch,
unsigned int nIt,
float lRate,
const std::string &fileName)
11 const dword nSamples = X.rows;
12 const int sampleLen = X.cols;
15 DGM_ASSERT_MSG((X.depth() == CV_8U) || (X.depth() == CV_16U),
"The depth of argument X is not supported");
16 if (batch > nSamples) {
17 DGM_WARNING(
"The batch number %d exceeds the length of the training data %d", batch, nSamples);
22 if (!
m_D.empty())
m_D.release();
23 m_D =
random::N(cv::Size(sampleLen, nWords), CV_32FC1, 0.0f, 0.3f);
29 for (
unsigned int i = 0; i < nIt; i++) {
30 #ifdef DEBUG_PRINT_INFO 31 if (i == 0) printf(
"\n");
32 printf(
"--- It: %d ---\n", i);
35 dword rndRow = random::u<dword>(0, MAX(1, nSamples - batch) - 1);
36 int normalizer = (X.depth() == CV_8U) ? 255 : 65535;
37 Mat _X = X(cv::Rect(0, rndRow, sampleLen, batch));
38 _X.convertTo(_X, CV_32FC1, 1.0 / normalizer);
43 for (word w = 0; w < W.cols; w++)
44 W.col(w) /= norm(
m_D.row(w), NORM_L2);
46 #ifdef DEBUG_PRINT_INFO 49 printf(
"%f -> ", cost);
55 #ifdef DEBUG_PRINT_INFO 57 printf(
"%f -> ", cost);
64 #ifdef DEBUG_PRINT_INFO 67 DGM_ASSERT_MSG(!std::isnan(cost),
"Training is unstable. Try reducing the learning rate for dictionary.");
70 if (!fileName.empty()) {
71 std::string str = fileName + std::to_string(i / 5);
73 if (i % 5 == 0)
save(str);
80 FILE *pFile = fopen(fileName.c_str(),
"wb");
81 fwrite(&
m_D.rows,
sizeof(
int), 1, pFile);
82 fwrite(&
m_D.cols,
sizeof(
int), 1, pFile);
83 fwrite(
m_D.data,
sizeof(
float),
m_D.rows *
m_D.cols, pFile);
92 FILE *pFile = fopen(fileName.c_str(),
"rb");
93 DGM_ASSERT_MSG(pFile,
"Can't load data from %s", fileName.c_str());
95 fread(&nWords,
sizeof(
int), 1, pFile);
96 fread(&sampleLen,
sizeof(
int), 1, pFile);
98 if (!
m_D.empty())
m_D.release();
99 m_D = Mat(nWords, sampleLen, CV_32FC1);
101 fread(
m_D.data,
sizeof(
float), nWords * sampleLen, pFile);
106 #ifdef DEBUG_MODE // --- Debugging --- 107 Mat CSparseDictionary::TEST_decode(
const Mat &X, cv::Size imgSize)
const 109 DGM_ASSERT_MSG(!
m_D.empty(),
"The dictionary must me trained or loaded before using this function");
112 const int dataWidth = imgSize.width - blockSize + 1;
113 const int dataHeight = imgSize.height - blockSize + 1;
114 const int sampleType = X.type();
115 const int normalizer = (X.depth() == CV_8U) ? 255 : 65535;
117 const float lambda = 5e-5f;
118 const float epsilon = 1e-5f;
120 Mat res(imgSize, CV_32FC1, Scalar(0));
121 Mat cover(imgSize, CV_32FC1, Scalar(0));
124 concurrency::parallel_for(0, dataHeight, blockSize, [&](
int y) {
126 for (
int y = 0; y < dataHeight; y += blockSize) {
130 for (
int x = 0; x < dataWidth; x += blockSize) {
131 int s = y * dataWidth + x;
132 Mat sample = X.row(s);
133 sample.convertTo(sample, CV_32FC1, 1.0 / normalizer);
135 gemm(
m_D, sample.t(), 1.0, Mat(), 0.0, _W);
137 for (
int w = 0; w < W.cols; w++)
138 W.col(w) /= norm(
m_D.row(w), NORM_L2);
143 gemm(W,
m_D, 1.0, Mat(), 0.0, tmp);
144 tmp = tmp.reshape(0, blockSize);
146 res(Rect(x, y, blockSize, blockSize)) += tmp;
147 cover(Rect(x, y, blockSize, blockSize)) += 1.0;
154 res.convertTo(res, sampleType, normalizer);
157 #endif // --- --------- --- 163 DGM_IF_WARNING(blockSize % 2 == 0,
"The block size is even");
165 varianceThreshold = sqrtf(varianceThreshold);
168 if (img.channels() != 1) cvtColor(img, I, cv::ColorConversionCodes::COLOR_RGB2GRAY);
171 const int dataHeight = img.rows - blockSize + 1;
172 const int dataWidth = img.cols - blockSize + 1;
177 for (
int y = 0; y < dataHeight; y++)
178 for (
int x = 0; x < dataWidth; x++) {
179 sample = I(cv::Rect(x, y, blockSize, blockSize)).clone().reshape(0, 1);
182 meanStdDev(sample, Mat(), stddev);
183 float variance = (float) stddev[0];
186 if (variance >= varianceThreshold)
187 res.push_back(sample);
194 Mat res(imgSize, CV_32FC1, cv::Scalar(0));
195 Mat cover(imgSize, CV_32FC1, cv::Scalar(0));
197 const int blockSize =
static_cast<int>(sqrt(X.cols));
198 const int dataWidth = res.cols - blockSize + 1;
201 for (
int s = 0; s < X.rows; s++) {
202 Mat sample = X.row(s);
203 sample = sample.reshape(0, blockSize);
205 int y = s / dataWidth;
206 int x = s % dataWidth;
208 res(cv::Rect(x, y, blockSize, blockSize)) += sample;
209 cover(cv::Rect(x, y, blockSize, blockSize)) += 1.0;
213 res.convertTo(res, X.type(), 1);
224 Mat incriment(W.size(), W.type(), cv::Scalar(0));
226 for (
unsigned int i = 0; i < nIt; i++) {
227 float momentum = (i <= 10) ? 0.5f : 0.9f;
229 incriment = momentum * incriment + lRate * (gradient - 2e-4f * W);
239 Mat incriment(D.size(), D.type(), cv::Scalar(0));
241 for (
unsigned int i = 0; i < nIt; i++) {
242 float momentum = (i <= 10) ? 0.5f : 0.9f;
244 incriment = momentum * incriment + lRate * (gradient - 2e-4f * D);
251 const int nSamples = X.rows;
260 multiply(W, W, sparsityMatrix);
261 sparsityMatrix += epsilon;
262 sqrt(sparsityMatrix, sparsityMatrix);
263 parallel::gemm(temp, D.t(), 1.0, W / sparsityMatrix, lambda, gradient);
278 reduce(temp, temp, 0, cv::ReduceTypes::REDUCE_AVG);
279 multiply(temp, temp, temp);
280 float J1 =
static_cast<float>(sum(temp)[0]);
282 multiply(W, W, temp);
285 reduce(temp, temp, 0, cv::ReduceTypes::REDUCE_AVG);
286 float J2 = lambda *
static_cast<float>(sum(temp)[0]);
288 multiply(D, D, temp);
289 float J3 = gamma *
static_cast<float>(sum(temp)[0]);
291 float cost = J1 + J2 + J3;
static void calculate_W(const Mat &X, const Mat &D, Mat &W, float lambda, float epsilon, unsigned int nIt=800, float lRate=SC_LRATE_W)
Evaluates weighting coefficients matrix .
const float SC_LRATE_W
Learning rate (speed) for weights .
const float SC_EPSILON
: L1-regularisation epsilon
static Mat calculateGradient(grad_type gType, const Mat &X, const Mat &D, const Mat &W, float lambda, float epsilon, float gamma)
Calculates the gradient matrices and .
static float calculateCost(const Mat &X, const Mat &D, const Mat &W, float lambda, float epsilon, float gamma)
Calculates the value of function.
static Mat data2img(const Mat &X, cv::Size imgSize)
Converts data into an image.
void save(const std::string &fileName) const
Saves dictionary into a binary file.
void load(const std::string &fileName)
Loads dictionary from the file.
static void calculate_D(const Mat &X, Mat &D, const Mat &W, float gamma, unsigned int nIt=800, float lRate=SC_LRATE_D)
Evaluates dictionary .
const float SC_GAMMA
: L2-regularisation parameter (on dictionary words)
void gemm(const Mat &A, const Mat &B, float alpha, const Mat &C, float beta, Mat &res)
Fast generalized matrix multiplication.
static Mat img2data(const Mat &img, int blockSize, float varianceThreshold=0.0f)
Converts image into data .
Mat m_D
The dictionary : Mat(size: nWords x sampleLen; type: CV_32FC1);.
void train(const Mat &X, word nWords, dword batch=2000, unsigned int nIt=1000, float lRate=SC_LRATE_D, const std::string &fileName=std::string())
Trains dictionary .
const float SC_LAMBDA
: L1-regularisation parameter (on features)
int getBlockSize(void) const
Returns size of the block, i.e. .
T N(T mu=0, T sigma=1)
Returns a floating-point random number with normal distribution.