Engauge Digitizer  2
GridClassifier.cpp
Go to the documentation of this file.
1 /******************************************************************************************************
2  * (C) 2014 markummitchell@github.com. This file is part of Engauge Digitizer, which is released *
3  * under GNU General Public License version 2 (GPLv2) or (at your option) any later version. See file *
4  * LICENSE or go to gnu.org/licenses for details. Distribution requires prior written permission. *
5  ******************************************************************************************************/
6 
7 #include "ColorFilter.h"
8 #include "Correlation.h"
9 #include "DocumentModelCoords.h"
10 #include "EngaugeAssert.h"
11 #include "gnuplot.h"
12 #include "GridClassifier.h"
13 #include <iostream>
14 #include "Logger.h"
15 #include <QDebug>
16 #include <QFile>
17 #include <QImage>
18 #include <qmath.h>
19 #include "QtToString.h"
20 #include "Transformation.h"
21 
22 int GridClassifier::NUM_PIXELS_PER_HISTOGRAM_BINS = 1;
23 double GridClassifier::PEAK_HALF_WIDTH = 4;
24 int GridClassifier::MIN_STEP_PIXELS = qFloor (4 * GridClassifier::PEAK_HALF_WIDTH); // Step includes down ramp, flat part, up ramp
25 const QString GNUPLOT_DELIMITER ("\t");
26 
27 // We set up the picket fence with binStart arbitrarily set close to zero. Peak is
28 // not exactly at zero since we want to include the left side of the first peak.
29 int GridClassifier::BIN_START_UNSHIFTED = qFloor (GridClassifier::PEAK_HALF_WIDTH);
30 
31 using namespace std;
32 
34 {
35 }
36 
37 int GridClassifier::binFromCoordinate (double coord,
38  double coordMin,
39  double coordMax) const
40 {
41  ENGAUGE_ASSERT (coordMin < coordMax);
42  ENGAUGE_ASSERT (coordMin <= coord);
43  ENGAUGE_ASSERT (coord <= coordMax);
44 
45  int bin = qFloor (0.5 + (m_numHistogramBins - 1.0) * (coord - coordMin) / (coordMax - coordMin));
46 
47  return bin;
48 }
49 
50 void GridClassifier::classify (bool isGnuplot,
51  const QPixmap &originalPixmap,
52  const Transformation &transformation,
53  int &countX,
54  double &startX,
55  double &stepX,
56  int &countY,
57  double &startY,
58  double &stepY)
59 {
60  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::classify";
61 
62  QImage image = originalPixmap.toImage ();
63 
64  m_numHistogramBins = image.width() / NUM_PIXELS_PER_HISTOGRAM_BINS;
65  ENGAUGE_ASSERT (m_numHistogramBins > 1);
66 
67  double xMin, xMax, yMin, yMax;
68  double binStartX, binStepX, binStartY, binStepY;
69 
70  m_binsX = new double [unsigned (m_numHistogramBins)];
71  m_binsY = new double [unsigned (m_numHistogramBins)];
72 
73  computeGraphCoordinateLimits (image,
74  transformation,
75  xMin,
76  xMax,
77  yMin,
78  yMax);
79  initializeHistogramBins ();
80  populateHistogramBins (image,
81  transformation,
82  xMin,
83  xMax,
84  yMin,
85  yMax);
86  searchStartStepSpace (isGnuplot,
87  m_binsX,
88  "x",
89  xMin,
90  xMax,
91  startX,
92  stepX,
93  binStartX,
94  binStepX);
95  searchStartStepSpace (isGnuplot,
96  m_binsY,
97  "y",
98  yMin,
99  yMax,
100  startY,
101  stepY,
102  binStartY,
103  binStepY);
104  searchCountSpace (m_binsX,
105  binStartX,
106  binStepX,
107  countX);
108  searchCountSpace (m_binsY,
109  binStartY,
110  binStepY,
111  countY);
112 
113  delete [] m_binsX;
114  delete [] m_binsY;
115 }
116 
117 void GridClassifier::computeGraphCoordinateLimits (const QImage &image,
118  const Transformation &transformation,
119  double &xMin,
120  double &xMax,
121  double &yMin,
122  double &yMax)
123 {
124  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::computeGraphCoordinateLimits";
125 
126  // Project every pixel onto the x axis, and onto the y axis. One set of histogram bins will be
127  // set up along each of the axes. The range of bins will encompass every pixel in the image, and no more.
128 
129  QPointF posGraphTL, posGraphTR, posGraphBL, posGraphBR;
130  transformation.transformScreenToRawGraph (QPointF (0, 0) , posGraphTL);
131  transformation.transformScreenToRawGraph (QPointF (image.width(), 0) , posGraphTR);
132  transformation.transformScreenToRawGraph (QPointF (0, image.height()) , posGraphBL);
133  transformation.transformScreenToRawGraph (QPointF (image.width(), image.height()), posGraphBR);
134 
135  // Compute x and y ranges for setting up the histogram bins
136  if (transformation.modelCoords().coordsType() == COORDS_TYPE_CARTESIAN) {
137 
138  // For affine cartesian coordinates, we only need to look at the screen corners
139  xMin = qMin (qMin (qMin (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
140  xMax = qMax (qMax (qMax (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
141  yMin = qMin (qMin (qMin (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
142  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
143 
144  } else {
145 
146  // For affine polar coordinates, easiest approach is to assume the full circle
147  xMin = 0.0;
148  xMax = transformation.modelCoords().thetaPeriod();
149  yMin = transformation.modelCoords().originRadius();
150  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
151 
152  }
153 
154  ENGAUGE_ASSERT (xMin < xMax);
155  ENGAUGE_ASSERT (yMin < yMax);
156 }
157 
158 double GridClassifier::coordinateFromBin (int bin,
159  double coordMin,
160  double coordMax) const
161 {
162  ENGAUGE_ASSERT (1 < m_numHistogramBins);
163  ENGAUGE_ASSERT (coordMin < coordMax);
164 
165  return coordMin + (coordMax - coordMin) * double (bin) / (double (m_numHistogramBins) - 1.0);
166 }
167 
168 void GridClassifier::copyVectorToVector (const double from [],
169  double to []) const
170 {
171  for (int bin = 0; bin < m_numHistogramBins; bin++) {
172  to [bin] = from [bin];
173  }
174 }
175 
176 void GridClassifier::dumpGnuplotCoordinate (const QString &coordinateLabel,
177  double corr,
178  const double *bins,
179  double coordinateMin,
180  double coordinateMax,
181  int binStart,
182  int binStep) const
183 {
184  QString filename = QString ("gridclassifier_%1_corr%2_startMax%3_stepMax%4.gnuplot")
185  .arg (coordinateLabel)
186  .arg (corr, 8, 'f', 3, '0')
187  .arg (binStart)
188  .arg (binStep);
189 
190  cout << GNUPLOT_FILE_MESSAGE.toLatin1().data() << filename.toLatin1().data() << "\n";
191 
192  QFile fileDump (filename);
193  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
194  QTextStream strDump (&fileDump);
195 
196  int bin;
197 
198  // For consistent scaling, get the max bin count
199  int binCountMax = 0;
200  for (bin = 0; bin < m_numHistogramBins; bin++) {
201  if (bins [bin] > binCountMax) {
202  binCountMax = qMax (signed (binCountMax),
203  signed (bins [bin]));
204  }
205  }
206 
207  // Get picket fence
208  double *picketFence = new double [unsigned (m_numHistogramBins)];
209  loadPicketFence (picketFence,
210  binStart,
211  binStep,
212  0,
213  false);
214 
215  // Header
216  strDump << "bin"
217  << GNUPLOT_DELIMITER << "coordinate"
218  << GNUPLOT_DELIMITER << "binCount"
219  << GNUPLOT_DELIMITER << "startStep"
220  << GNUPLOT_DELIMITER << "picketFence" << "\n";
221 
222  // Data, with one row per coordinate value
223  for (bin = 0; bin < m_numHistogramBins; bin++) {
224 
225  double coordinate = coordinateFromBin (bin,
226  coordinateMin,
227  coordinateMax);
228  double startStepValue (((bin - binStart) % binStep == 0) ? 1 : 0);
229  strDump << bin
230  << GNUPLOT_DELIMITER << coordinate
231  << GNUPLOT_DELIMITER << bins [bin]
232  << GNUPLOT_DELIMITER << binCountMax * startStepValue
233  << GNUPLOT_DELIMITER << binCountMax * picketFence [bin] << "\n";
234  }
235 
236  delete [] picketFence;
237 }
238 
239 void GridClassifier::dumpGnuplotCorrelations (const QString &coordinateLabel,
240  double valueMin,
241  double valueMax,
242  const double signalA [],
243  const double signalB [],
244  const double correlations [])
245 {
246  QString filename = QString ("gridclassifier_%1_correlations.gnuplot")
247  .arg (coordinateLabel);
248 
249  cout << GNUPLOT_FILE_MESSAGE.toLatin1().data() << filename.toLatin1().data() << "\n";
250 
251  QFile fileDump (filename);
252  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
253  QTextStream strDump (&fileDump);
254 
255  int bin;
256 
257  // Compute max values so curves can be normalized to the same heights
258  double signalAMax = 1, signalBMax = 1, correlationsMax = 1;
259  for (bin = 0; bin < m_numHistogramBins; bin++) {
260  if (bin == 0 || signalA [bin] > signalAMax) {
261  signalAMax = signalA [bin];
262  }
263  if (bin == 0 || signalB [bin] > signalBMax) {
264  signalBMax = signalB [bin];
265  }
266  if (bin == 0 || correlations [bin] > correlationsMax) {
267  correlationsMax = correlations [bin];
268  }
269  }
270 
271  // Prevent divide by zero error
272  if (qAbs (signalAMax) <= 0) {
273  signalAMax = 1.0;
274  }
275  if (qAbs (signalBMax) <= 0) {
276  signalBMax = 1.0;
277  }
278 
279  // Output normalized curves
280  for (int bin = 0; bin < m_numHistogramBins; bin++) {
281 
282  strDump << coordinateFromBin (bin,
283  valueMin,
284  valueMax)
285  << GNUPLOT_DELIMITER << signalA [bin] / signalAMax
286  << GNUPLOT_DELIMITER << signalB [bin] / signalBMax
287  << GNUPLOT_DELIMITER << correlations [bin] / correlationsMax << "\n";
288  }
289 }
290 
291 void GridClassifier::initializeHistogramBins ()
292 {
293  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::initializeHistogramBins";
294 
295  for (int bin = 0; bin < m_numHistogramBins; bin++) {
296  m_binsX [bin] = 0;
297  m_binsY [bin] = 0;
298  }
299 }
300 
301 void GridClassifier::loadPicketFence (double picketFence [],
302  int binStart,
303  int binStep,
304  int count,
305  bool isCount) const
306 {
307  const double PEAK_HEIGHT = 1.0; // Arbitrary height, since normalization will counteract any change to this value
308 
309  // Count argument is optional. Note that binStart already excludes PEAK_HALF_WIDTH bins at start,
310  // and we also exclude PEAK_HALF_WIDTH bins at the end
311  ENGAUGE_ASSERT (binStart >= PEAK_HALF_WIDTH);
312  ENGAUGE_ASSERT (binStep != 0);
313  if (!isCount) {
314  count = qFloor (1 + (m_numHistogramBins - binStart - PEAK_HALF_WIDTH) / binStep);
315  }
316 
317  // Bins that we need to worry about
318  int binStartMinusHalfWidth = qFloor (binStart - PEAK_HALF_WIDTH);
319  int binStopPlusHalfWidth = qFloor ((binStart + (count - 1) * binStep) + PEAK_HALF_WIDTH);
320 
321  // To normalize, we compute the area under the picket fence. Constraint is
322  // areaUnnormalized + NUM_HISTOGRAM_BINS * normalizationOffset = 0
323  double areaUnnormalized = count * PEAK_HEIGHT * PEAK_HALF_WIDTH;
324  double normalizationOffset = -1.0 * areaUnnormalized / m_numHistogramBins;
325 
326  for (int bin = 0; bin < m_numHistogramBins; bin++) {
327 
328  // Outside of the peaks, bin is small negative so correlation with unit function is zero. In other
329  // words, the function is normalized
330  picketFence [bin] = normalizationOffset;
331 
332  if ((binStartMinusHalfWidth <= bin) &&
333  (bin <= binStopPlusHalfWidth)) {
334 
335  // Closest peak
336  int ordinalClosestPeak = qFloor ((bin - binStart + binStep / 2) / binStep);
337  int binClosestPeak = binStart + ordinalClosestPeak * binStep;
338 
339  // Distance from closest peak is used to define an isosceles triangle
340  int distanceToClosestPeak = qAbs (bin - binClosestPeak);
341 
342  if (distanceToClosestPeak < PEAK_HALF_WIDTH) {
343 
344  // Map 0 to PEAK_HALF_WIDTH to 1 to 0
345  picketFence [bin] = 1.0 - double (distanceToClosestPeak) / PEAK_HALF_WIDTH + normalizationOffset;
346 
347  }
348  }
349  }
350 }
351 
352 void GridClassifier::populateHistogramBins (const QImage &image,
353  const Transformation &transformation,
354  double xMin,
355  double xMax,
356  double yMin,
357  double yMax)
358 {
359  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::populateHistogramBins";
360 
361  ColorFilter filter;
362  QRgb rgbBackground = filter.marginColor (&image);
363 
364  for (int x = 0; x < image.width(); x++) {
365  for (int y = 0; y < image.height(); y++) {
366 
367  QColor pixel = image.pixel (x, y);
368 
369  // Skip pixels with background color
370  if (!filter.colorCompare (rgbBackground,
371  pixel.rgb ())) {
372 
373  // Add this pixel to histograms
374  QPointF posGraph;
375  transformation.transformScreenToRawGraph (QPointF (x, y), posGraph);
376 
377  if (transformation.modelCoords().coordsType() == COORDS_TYPE_POLAR) {
378 
379  // If out of the 0 to period range, the theta value must shifted by the period to get into that range
380  while (posGraph.x() < xMin) {
381  posGraph.setX (posGraph.x() + transformation.modelCoords().thetaPeriod());
382  }
383  while (posGraph.x() > xMax) {
384  posGraph.setX (posGraph.x() - transformation.modelCoords().thetaPeriod());
385  }
386  }
387 
388  int binX = binFromCoordinate (posGraph.x(), xMin, xMax);
389  int binY = binFromCoordinate (posGraph.y(), yMin, yMax);
390 
391  ENGAUGE_ASSERT (0 <= binX);
392  ENGAUGE_ASSERT (0 <= binY);
393  ENGAUGE_ASSERT (binX < m_numHistogramBins);
394  ENGAUGE_ASSERT (binY < m_numHistogramBins);
395 
396  // Roundoff error in log scaling may let bin go just outside legal range
397  binX = qMin (binX, m_numHistogramBins - 1);
398  binY = qMin (binY, m_numHistogramBins - 1);
399 
400  ++m_binsX [binX];
401  ++m_binsY [binY];
402  }
403  }
404  }
405 }
406 
407 void GridClassifier::searchCountSpace (double bins [],
408  double binStart,
409  double binStep,
410  int &countMax)
411 {
412  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchCountSpace"
413  << " start=" << binStart
414  << " step=" << binStep;
415 
416  // Loop though the space of possible counts
417  Correlation correlation (m_numHistogramBins);
418  double *picketFence = new double [unsigned (m_numHistogramBins)];
419  double corr, corrMax = 0;
420  bool isFirst = true;
421  int countStop = qFloor (1 + (m_numHistogramBins - binStart) / binStep);
422  for (int count = 2; count <= countStop; count++) {
423 
424  loadPicketFence (picketFence,
425  qFloor (binStart),
426  qFloor (binStep),
427  count,
428  true);
429 
430  correlation.correlateWithoutShift (m_numHistogramBins,
431  bins,
432  picketFence,
433  corr);
434  if (isFirst || (corr > corrMax)) {
435  countMax = count;
436  corrMax = corr;
437  }
438 
439  isFirst = false;
440  }
441 
442  delete [] picketFence;
443 }
444 
445 void GridClassifier::searchStartStepSpace (bool isGnuplot,
446  double bins [],
447  const QString &coordinateLabel,
448  double valueMin,
449  double valueMax,
450  double &start,
451  double &step,
452  double &binStartMax,
453  double &binStepMax)
454 {
455  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchStartStepSpace";
456 
457  // Correlations are tracked for logging
458  double *signalA = new double [unsigned (m_numHistogramBins)];
459  double *signalB = new double [unsigned (m_numHistogramBins)];
460  double *correlations = new double [unsigned (m_numHistogramBins)];
461  double *correlationsMax = new double [unsigned (m_numHistogramBins)];
462 
463  // Loop though the space of possible gridlines using the independent variables (start,step).
464  Correlation correlation (m_numHistogramBins);
465  double *picketFence = new double [unsigned (m_numHistogramBins)];
466  int binStart;
467  double corr = 0, corrMax = 0;
468  bool isFirst = true;
469 
470  // We do not explicitly search(=loop) through binStart here, since Correlation::correlateWithShift will take
471  // care of that for us
472 
473  // Step search starts out small, and stops at value that gives count substantially greater than 2. Freakishly small
474  // images need to have MIN_STEP_PIXELS overridden so the loop iterates at least once
475  binStartMax = BIN_START_UNSHIFTED + 1; // In case search below ever fails
476  binStepMax = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); // In case search below ever fails
477  for (int binStep = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); binStep < m_numHistogramBins / 4; binStep++) {
478 
479  loadPicketFence (picketFence,
480  BIN_START_UNSHIFTED,
481  binStep,
482  qFloor (PEAK_HALF_WIDTH),
483  false);
484 
485  correlation.correlateWithShift (m_numHistogramBins,
486  bins,
487  picketFence,
488  binStart,
489  corr,
490  correlations);
491  if (isFirst || (corr > corrMax)) {
492 
493  int binStartMaxNext = binStart + BIN_START_UNSHIFTED + 1; // Compensate for the shift performed inside loadPicketFence
494 
495  // Make sure binStartMax never goes out of bounds
496  if (binStartMaxNext < m_numHistogramBins) {
497 
498  binStartMax = binStartMaxNext;
499  binStepMax = binStep;
500  corrMax = corr;
501  copyVectorToVector (bins, signalA);
502  copyVectorToVector (picketFence, signalB);
503  copyVectorToVector (correlations, correlationsMax);
504 
505  // Output a gnuplot file. We should see the correlation values consistently increasing
506  if (isGnuplot) {
507 
508  dumpGnuplotCoordinate(coordinateLabel,
509  corr,
510  bins,
511  valueMin,
512  valueMax,
513  binStart,
514  binStep);
515  }
516  }
517  }
518 
519  isFirst = false;
520  }
521 
522  // Convert from bins back to graph coordinates
523  start = coordinateFromBin (qFloor (binStartMax),
524  valueMin,
525  valueMax);
526  if (binStartMax + binStepMax < m_numHistogramBins) {
527 
528  // Normal case where a reasonable step value is being calculated
529  double next = coordinateFromBin (qFloor (binStartMax + binStepMax),
530  valueMin,
531  valueMax);
532  step = next - start;
533  } else {
534 
535  // Pathological case where step jumps to outside the legal range. We bring the step back into range
536  double next = coordinateFromBin (m_numHistogramBins - 1,
537  valueMin,
538  valueMax);
539  step = next - start;
540  }
541 
542  if (isGnuplot) {
543  dumpGnuplotCorrelations (coordinateLabel,
544  valueMin,
545  valueMax,
546  signalA,
547  signalB,
548  correlationsMax);
549  }
550 
551  delete [] signalA;
552  delete [] signalB;
553  delete [] correlations;
554  delete [] correlationsMax;
555  delete [] picketFence;
556 }
QRgb marginColor(const QImage *image) const
Identify the margin color of the image, which is defined as the most common color in the four margins...
Definition: ColorFilter.cpp:78
bool colorCompare(QRgb rgb1, QRgb rgb2) const
See if the two color values are close enough to be considered to be the same.
Definition: ColorFilter.cpp:30
double thetaPeriod() const
Return the period of the theta value for polar coordinates, consistent with CoordThetaUnits.
#define LOG4CPP_INFO_S(logger)
Definition: convenience.h:18
Fast cross correlation between two functions.
Definition: Correlation.h:14
Class for filtering image to remove unimportant information.
Definition: ColorFilter.h:20
DocumentModelCoords modelCoords() const
Get method for DocumentModelCoords.
const QString GNUPLOT_FILE_MESSAGE
Affine transformation between screen and graph coordinates, based on digitized axis points...
GridClassifier()
Single constructor.
log4cpp::Category * mainCat
Definition: Logger.cpp:14
CoordsType coordsType() const
Get method for coordinates type.
void transformScreenToRawGraph(const QPointF &coordScreen, QPointF &coordGraph) const
Transform from cartesian pixel screen coordinates to cartesian/polar graph coordinates.
double originRadius() const
Get method for origin radius in polar mode.
const QString GNUPLOT_DELIMITER("\)
void classify(bool isGnuplot, const QPixmap &originalPixmap, const Transformation &transformation, int &countX, double &startX, double &stepX, int &countY, double &startY, double &stepY)
Classify the specified image, and return the most probably x and y grid settings. ...
#define ENGAUGE_ASSERT(cond)
Drop in replacement for Q_ASSERT if defined(QT_NO_DEBUG) && !defined(QT_FORCE_ASSERTS) define ENGAUGE...
Definition: EngaugeAssert.h:20