olena-2.0-400-g023cbf5 Build the set of detected columns during table recognition.

* scribo/sandbox/icdar_13_table/src/new.cc: Here. * scribo/sandbox/icdar_13_table/src/disjoint_set.hh: New. --- scribo/sandbox/icdar_13_table/src/disjoint_set.hh | 54 +++++++++++++++++++++ scribo/sandbox/icdar_13_table/src/new.cc | 36 ++++++++++++++ 2 files changed, 90 insertions(+), 0 deletions(-) create mode 100644 scribo/sandbox/icdar_13_table/src/disjoint_set.hh diff --git a/scribo/sandbox/icdar_13_table/src/disjoint_set.hh b/scribo/sandbox/icdar_13_table/src/disjoint_set.hh new file mode 100644 index 0000000..b8ae409 --- /dev/null +++ b/scribo/sandbox/icdar_13_table/src/disjoint_set.hh @@ -0,0 +1,54 @@ +#ifndef DISJOINT_SET_HH +# define DISJOINT_SET_HH + +/// \file +/// Implementation of a disjoint-set forest. + +# include <limits> +# include <vector> + +template <typename T> +struct disjoint_set +{ + disjoint_set(size_t size) + // The maximum value of T is considered an ``invalid value''. + : parent(size, std::numeric_limits<T>::max()) + { + for (unsigned i = 0; i < parent.size(); ++i) + make_set(i); + } + + /// Create initial sets (singletons). + void make_set(T x) + { + parent[x] = x; + } + + bool is_root(T x) + { + return parent[x] == x; + } + + /// Find the root of the set containing \a x. + T find(T x) + { + if (!is_root(x)) + // Path compression. + parent[x] = find(parent[x]); + return parent[x]; + } + + /// Merge the set containing \a y into the set containing \a x. + void make_union(T x, T y) + { + T x_root = find(x); + T y_root = find(y); + parent[y_root] = x_root; + } + + /// Parent relationship. A node with is its own parent is a + /// root (representative of a component). + std::vector<T> parent; +}; + +#endif // ! DISJOINT_SET_HH diff --git a/scribo/sandbox/icdar_13_table/src/new.cc b/scribo/sandbox/icdar_13_table/src/new.cc index 2c41e31..bcf127e 100644 --- a/scribo/sandbox/icdar_13_table/src/new.cc +++ b/scribo/sandbox/icdar_13_table/src/new.cc @@ -54,6 +54,8 @@ #include <scribo/text/extract_paragraphs.hh> #include <scribo/text/merging.hh> +#include "disjoint_set.hh" + using namespace mln; // Draw weighted boxes (red < orange < cyan < green) @@ -558,6 +560,39 @@ int main(int argc, char** argv) } } + // Build the columns. + // First pass. + disjoint_set<node_id> columns_set(groups.nelements()); + for (unsigned i = 0; i < groups.nelements(); ++i) + { + const group_set& successors = nodes_below[i]; + for (group_set::const_iterator j = successors.begin(); + j != successors.end(); ++j) + columns_set.make_union(i, *j); + } + // Second pass: assign labels. Label 0 is unused and means + // ``default''. + std::vector<group_set> descendants(groups.nelements()); + for (unsigned i = 0; i < groups.nelements(); ++i) + { + // Process groups in reverse order. + unsigned j = groups.nelements() - 1 - i; + if (!columns_set.is_root(j)) + descendants[columns_set.find(j)].insert(j); + } + + // Visualization: columns. + image2d<value::rgb8> ima_columns = data::convert(value::rgb8(), bin_merged); + for (unsigned i = 0; i < groups.nelements(); ++i) + if (!descendants[i].empty()) + { + box2d column_box = groups(i).bbox(); + for (group_set::const_iterator j = descendants[i].begin(); + j != descendants[i].end(); ++j) + column_box.merge(groups(*j).bbox()); + draw::box(ima_columns, column_box, literal::red); + } + // Write images and close XML std::ostringstream path; unsigned number = 0; @@ -572,6 +607,7 @@ int main(int argc, char** argv) write_image(ima_links, "components", page, number, path); write_image(ima_groups, "groups", page, number, path); write_image(ima_valid, "valid", page, number, path); + write_image(ima_columns, "columns", page, number, path); } delete xml; -- 1.7.2.5
participants (1)
-
Roland Levillain