I am building a program to search for, identify, and mark the location of a graph of integer values in a simple two-dimensional array.
I hand traced the first example and it appeared to work out accurately. With that said I either wrote code that doesn’t do what I think it does or my hand tracing was inaccurate.
I think my code is close and I’m looking for some debugging assistance and any thoughts on general style, etc.
Eventually this algorithm will be modified to find graphs of the pixels of characters for OCR. I simply want to prove that my algorithm implementation is accurate prior to complicating things with the code for processing images.
The input array might look like this:
0 0 0 0 0 0
0 0 0 0 0 0
0 0 1 1 0 0
0 0 1 1 0 0
0 0 0 0 0 0
0 0 0 0 0 0
and the expected outcome is this:
3 3 3 3 3 3
3 0 0 0 0 3
3 0 2 2 0 3
3 0 2 2 0 3
3 0 0 0 0 3
3 3 3 3 3 3
another similar possibility is:
in:
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 1 1 0 0 0 0 0 0 0
0 0 0 1 1 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 1 1 1 1 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
out:
0 3 3 3 3 3 3 0 0 0 0 0
0 3 0 0 0 0 3 0 0 0 0 0
0 3 0 2 2 0 3 0 0 0 0 0
0 3 0 2 2 0 3 0 0 0 0 0
0 3 0 0 0 0 3 0 0 0 0 0
0 3 3 3 3 3 3 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
0 0 3 3 3 3 3 3 3 3 3 0
0 0 3 0 0 0 0 0 0 0 3 0
0 0 3 0 2 2 2 2 2 0 3 0
0 0 3 0 0 0 0 0 0 0 3 0
0 0 3 3 3 3 3 3 3 3 3 0
Basic rules:
- The array size of the input file must match the GS defined in the .cpp file (H equals W equals GS).
- A graph is defined as one or more “1” values adjacent to each other.
- The search is performed using a basic BFS technique using a simple queue.
- When a graph is located its values will be updated from “1” to “2”.
- When the final value in the graph is determined a bounding box of “3” values will be drawn around the graph. The smallest X of the box equals the smallest X of the graph minus two, the smallest Y of the box equals the smallest Y of the graph minus two. The largest X of the box equals the largest X of the graph plus two, the largest Y of the box equals the largest Y of the graph plus two. Assume all graphs have a buffer of at least two rows/columns from the border to allow a box to be drawn.
The latest attempt of processing this array:
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 1 1 0 0 0
0 0 0 1 1 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
yields this output:
0 0 0 0 0 0 0 0
0 3 3 3 3 3 0 0
0 3 3 3 3 3 3 0
0 3 3 2 1 3 3 0
0 3 3 2 2 3 3 0
0 3 3 3 3 3 3 0
0 3 3 3 3 3 3 0
0 0 0 0 0 0 0 0
while a single digit graph works well:
0 0 0 0 0
0 0 0 0 0
0 0 1 0 0
0 0 0 0 0
0 0 0 0 0
yields output:
3 3 3 3 3
3 0 0 0 3
3 0 2 0 3
3 0 0 0 3
3 3 3 3 3
Here is my code:
#include <iostream>
#include <fstream>
#include <cstdlib>
#include "queue.h"
#define GS 8 /* GRID SIZE */
using namespace std;
void processCmdArgs (ifstream& input, int argc, char* argv[]);
void drawBoundingBox (int arr[][GS], int xLo, int yLo, int xHi, int yHi);
void checkNeighbors (int arr[][GS], bool vis[][GS], queue Q, point* p);
void print (int arr[][GS]);
int main( int argc, char* argv[] ) {
int xLo = 0;
int xHi = GS - 1;
int yLo = 0;
int yHi = GS - 1;
ifstream input; /* filestream to read in file to parse */
int arr[GS][GS]; /* declare array of vals to check for graph */
bool visited[GS][GS]; /* array of bools to track progress */
int count = 0; /* number of graphs found */
processCmdArgs(input, argc, argv);
/* populate array */
for (int i = 0; i < GS; i++) {
for (int j = 0; j < GS; j++) {
input >> arr[i][j];
}
}
input.close();
/*init visited */
for (int y = yLo; y < GS; y++) {
for (int x = xLo; x < GS; x++) {
visited[x][y] = false;
}
}
/* print array */
cout << "The array to find a graph is:\n";
print(arr);
/* find graph(s) in array */
queue Q;
for (int j = yLo; j < GS; j++) {
for (int k = xLo; k < GS; k++) {
if (arr[k][j] == 1) {
count++;
xLo = xHi = k;
yLo = yHi = j;
point *p = new point(k, j);
Q.insert(p);
delete p;
visited[k][j] = true;
while (!Q.isEmpty()) {
*p = Q.del(); /* does this really work? */
int x = p->getx();
int y = p->gety();
arr[x][y] = 2;
if (x < xLo) xLo = x;
if (y < yLo) yLo = y;
if (x > xHi) xHi = x;
if (y > yHi) yHi = y;
checkNeighbors(arr, visited, Q, p);
}
drawBoundingBox(arr, xLo, yLo, xHi, yHi);
}
else {
visited[k][j] = true;
}
}
}
cout << "The updated array is:\n";
print(arr);
cout << "The number of graphs in arr is " << count << endl;
return 0;
}
/*** END OF MAIN ***/
/*** START OF FUNCTIONS ***/
void processCmdArgs(ifstream& input, int argc, char* argv[]) {
/* Check command-line args first to avoid accessing nonexistent memory */
if (argc != 2) {
cerr << "Error: this program takes one command-line argument.\n";
exit(1);
}
/* Try to open the file using the provided filename */
input.open(argv[1]);
/* Exit with error if it doesn't open */
if (input.fail()) {
cerr << "Error: could not open " << argv[1] << ".\n";
exit(1);
}
}
void drawBoundingBox (int arr[][GS], int xLo, int yLo, int xHi, int yHi) {
// draw a box with (lowx-2,lowy-2) as NW and
// (highx + 2, highy + 2) as SE boundary
/* draw top and bottom of box */
for (int x = xLo - 2; x <= xHi + 2; x++) {
arr[x][yLo - 2] = 3;
arr[x][yHi + 2] = 3;
}
/* draw sides of box */
for (int y = yLo - 1; y <= yHi + 1; y++) {
arr[xLo - 2][y] = 3;
arr[xHi + 2][y] = 3;
}
}
void checkNeighbors (int arr[][GS], bool vis[][GS], queue Q, point* p) {
int pX = p->getx();
int pY = p->gety();
for (int y = pY - 1; y <= pY + 1; y++) {
for (int x = pX - 1; x <= pX + 1; x++) {
if (x == pX && y == pY) {/* easier than opposite boolean logic */ }
else {
if (vis[x][y] == false) vis[x][y] = true;
if (arr[x][y] == 1) {
point *n = new point(x, y);
Q.insert(n);
delete n;
}
}
}
}
}
void print (int arr[][GS]) {
/* print array */
for (int i = 0; i < GS; i++) {
for (int j = 0; j < GS; j++) {
cout << arr[i][j] << " ";
}
cout << endl;
}
}
/*** END OF FUNCTIONS ***/
/*** START of QUEUE CLASS ***/
const int MSIZE = 1000;
class point {
private:
int x; int y;
public:
point(int p, int q) {
x = p; y = q;
}
int getx() {
return x;
}
int gety() {
return y;
}
};
class queue {
private:
point* Q[MSIZE];
int front, rear, size;
public:
queue() {
// initialize an empty queue
//front = 0; rear = 0; size = 0;
front = rear = size = 0;
for (int j = 0; j < MSIZE; ++j)
Q[j] = 0;
}
void insert(point* x) {
if (size != MSIZE) {
front++; size++;
if (front == MSIZE) front = 0;
Q[front] = x;
}
}
point del() {
if (size != 0) {
rear++; if (rear == MSIZE) rear = 0;
point temp(Q[rear]->getx(), Q[rear]->gety());
size--;
return temp;
}
}
void print() {
for (int j = 1; j <= size; ++j) {
int i = front - j + 1;
cout << "x = " << Q[i]->getx() << " y = " << Q[i]->gety() << endl;
}
cout << "end of queue" << endl;
}
bool isEmpty() {
return (size == 0);
}
};
/*** END of QUEUE CLASS ***/
Now let’s look for bugs…
And now it works, for one graph. I’m not going to try it with two.
EDIT:
I have to eat some of my words: you don’t index with
[j][k], I was just confused by your use of(k,j) <=> (x,y)and got it mixed up with an actual bug elsewhere. And now I see what you’re doing with thequeue, but seriously you should look into the STL.The really serious bug is in the signature of
checkNeighbors(...). You’re passingQby value, not by reference. Fix that, and the code works for multiple graphs.EDIT:
Yep, another bug:
queuestores pointers to points, not points, for no particular reason (see “6”, above), and somehow it’s fouling them up. Rather than hunt down the exact bug, I changedqueueto handle points, and got the correct result for the complicated graph.