* scribo/sandbox/icdar_13_table/src/new.cc: Here.
* scribo/sandbox/icdar_13_table/src/disjoint_set.hh: New.
---
scribo/sandbox/icdar_13_table/src/disjoint_set.hh | 54 +++++++++++++++++++++
scribo/sandbox/icdar_13_table/src/new.cc | 36 ++++++++++++++
2 files changed, 90 insertions(+), 0 deletions(-)
create mode 100644 scribo/sandbox/icdar_13_table/src/disjoint_set.hh
diff --git a/scribo/sandbox/icdar_13_table/src/disjoint_set.hh
b/scribo/sandbox/icdar_13_table/src/disjoint_set.hh
new file mode 100644
index 0000000..b8ae409
--- /dev/null
+++ b/scribo/sandbox/icdar_13_table/src/disjoint_set.hh
@@ -0,0 +1,54 @@
+#ifndef DISJOINT_SET_HH
+# define DISJOINT_SET_HH
+
+/// \file
+/// Implementation of a disjoint-set forest.
+
+# include <limits>
+# include <vector>
+
+template <typename T>
+struct disjoint_set
+{
+ disjoint_set(size_t size)
+ // The maximum value of T is considered an ``invalid value''.
+ : parent(size, std::numeric_limits<T>::max())
+ {
+ for (unsigned i = 0; i < parent.size(); ++i)
+ make_set(i);
+ }
+
+ /// Create initial sets (singletons).
+ void make_set(T x)
+ {
+ parent[x] = x;
+ }
+
+ bool is_root(T x)
+ {
+ return parent[x] == x;
+ }
+
+ /// Find the root of the set containing \a x.
+ T find(T x)
+ {
+ if (!is_root(x))
+ // Path compression.
+ parent[x] = find(parent[x]);
+ return parent[x];
+ }
+
+ /// Merge the set containing \a y into the set containing \a x.
+ void make_union(T x, T y)
+ {
+ T x_root = find(x);
+ T y_root = find(y);
+ parent[y_root] = x_root;
+ }
+
+ /// Parent relationship. A node with is its own parent is a
+ /// root (representative of a component).
+ std::vector<T> parent;
+};
+
+#endif // ! DISJOINT_SET_HH
diff --git a/scribo/sandbox/icdar_13_table/src/new.cc
b/scribo/sandbox/icdar_13_table/src/new.cc
index 2c41e31..bcf127e 100644
--- a/scribo/sandbox/icdar_13_table/src/new.cc
+++ b/scribo/sandbox/icdar_13_table/src/new.cc
@@ -54,6 +54,8 @@
#include <scribo/text/extract_paragraphs.hh>
#include <scribo/text/merging.hh>
+#include "disjoint_set.hh"
+
using namespace mln;
// Draw weighted boxes (red < orange < cyan < green)
@@ -558,6 +560,39 @@ int main(int argc, char** argv)
}
}
+ // Build the columns.
+ // First pass.
+ disjoint_set<node_id> columns_set(groups.nelements());
+ for (unsigned i = 0; i < groups.nelements(); ++i)
+ {
+ const group_set& successors = nodes_below[i];
+ for (group_set::const_iterator j = successors.begin();
+ j != successors.end(); ++j)
+ columns_set.make_union(i, *j);
+ }
+ // Second pass: assign labels. Label 0 is unused and means
+ // ``default''.
+ std::vector<group_set> descendants(groups.nelements());
+ for (unsigned i = 0; i < groups.nelements(); ++i)
+ {
+ // Process groups in reverse order.
+ unsigned j = groups.nelements() - 1 - i;
+ if (!columns_set.is_root(j))
+ descendants[columns_set.find(j)].insert(j);
+ }
+
+ // Visualization: columns.
+ image2d<value::rgb8> ima_columns = data::convert(value::rgb8(), bin_merged);
+ for (unsigned i = 0; i < groups.nelements(); ++i)
+ if (!descendants[i].empty())
+ {
+ box2d column_box = groups(i).bbox();
+ for (group_set::const_iterator j = descendants[i].begin();
+ j != descendants[i].end(); ++j)
+ column_box.merge(groups(*j).bbox());
+ draw::box(ima_columns, column_box, literal::red);
+ }
+
// Write images and close XML
std::ostringstream path;
unsigned number = 0;
@@ -572,6 +607,7 @@ int main(int argc, char** argv)
write_image(ima_links, "components", page, number, path);
write_image(ima_groups, "groups", page, number, path);
write_image(ima_valid, "valid", page, number, path);
+ write_image(ima_columns, "columns", page, number, path);
}
delete xml;
--
1.7.2.5
Show replies by date