* scribo/core/tag/component.hh: New tag.
* scribo/postprocessing/images_to_drop_capital.hh: New routine.
---
scribo/ChangeLog | 8 +++
scribo/scribo/core/tag/component.hh | 7 ++-
.../images_to_drop_capital.hh} | 66 +++++++++++++-------
3 files changed, 58 insertions(+), 23 deletions(-)
copy scribo/scribo/{filter/images_in_paragraph.hh =>
postprocessing/images_to_drop_capital.hh} (64%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index c8b3503..b10058f 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,13 @@
2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Handle drop capital components.
+
+ * scribo/core/tag/component.hh: New tag.
+
+ * scribo/postprocessing/images_to_drop_capital.hh: New routine.
+
+2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Save component outlines instead of bboxes.
* scribo/io/img/internal/debug_img_visitor.hh,
diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh
index dc9db90..d5afb36 100644
--- a/scribo/scribo/core/tag/component.hh
+++ b/scribo/scribo/core/tag/component.hh
@@ -60,7 +60,8 @@ namespace scribo
WhitespaceSeparator,
Noise,
Punctuation,
- Image
+ Image,
+ DropCapital
};
@@ -135,6 +136,8 @@ namespace scribo
break;
case Image:
str = "Image";
+ case DropCapital:
+ str = "DropCapital";
break;
}
@@ -159,6 +162,8 @@ namespace scribo
return Punctuation;
else if (str == "Image")
return Image;
+ else if (str == "DropCapital")
+ return DropCapital;
return Undefined;
}
diff --git a/scribo/scribo/filter/images_in_paragraph.hh
b/scribo/scribo/postprocessing/images_to_drop_capital.hh
similarity index 64%
copy from scribo/scribo/filter/images_in_paragraph.hh
copy to scribo/scribo/postprocessing/images_to_drop_capital.hh
index 3cf64e1..ca76609 100644
--- a/scribo/scribo/filter/images_in_paragraph.hh
+++ b/scribo/scribo/postprocessing/images_to_drop_capital.hh
@@ -23,12 +23,12 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_FILTER_IMAGES_IN_PARAGRAPH_HH
-# define SCRIBO_FILTER_IMAGES_IN_PARAGRAPH_HH
+#ifndef SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
+# define SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
/// \file
///
-/// Invalidate false positive separators.
+/// Set type for specific images to Drop Capital component.
/// \fixme Share same test canvas as text::merging.
@@ -40,34 +40,31 @@
namespace scribo
{
- namespace filter
+ namespace postprocessing
{
using namespace mln;
- /// Invalidate false positive separators.
+ /// Set type for specific images to Drop Capital component.
///
/// \param[in] separators A paragraph set.
///
- /// \return A doc with invalidated separators.
- ///
- /// Warning: it does not remove separators from separator
- /// image. It only invalidate separator components in their
- /// respective component_set.
- ///
+ /// \return A doc with images tagged as dropped capital is such
+ /// images have been found.
+ //
template <typename L>
void
- images_in_paragraph(document<L>& doc);
+ images_to_drop_capital(document<L>& doc);
# ifndef MLN_INCLUDE_ONLY
template <typename L>
void
- images_in_paragraph(document<L>& doc)
+ images_to_drop_capital(document<L>& doc)
{
- trace::entering("scribo::filter::images_in_paragraph");
+ trace::entering("scribo::postprocessing::images_to_drop_capital");
mln_precondition(doc.is_valid());
@@ -82,6 +79,8 @@ namespace scribo
if (doc.paragraphs()(p).is_valid())
mln::draw::box_plain(billboard, doc.paragraphs()(p).bbox(), true);
+ float min_img_size = 0.2 * (doc.image().domain().width()
+ + doc.image().domain().height());
component_set<L> elts = doc.elements();
for_all_comps(c, elts)
@@ -91,29 +90,52 @@ namespace scribo
const bool tl = billboard(b_.pmin());
const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mb = billboard.at_(b_.pmax().row(), b_.pcenter().col());
const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
const bool br = billboard(b_.pmax());
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl
&& br)
- elts(c).update_tag(component::Ignored);
+ typedef mln::util::set<int> set_t;
+ set_t s;
+ s.insert(tl);
+ s.insert(tr);
+ s.insert(mb);
+ s.insert(mc);
+ s.insert(mr);
+ s.insert(bl);
+ s.insert(br);
+
+ if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0)))
+ continue;
+
+ float elt_size = elts(c).bbox().width() + elts(c).bbox().height();
+ for_all_elements(e, s)
+ if (s[e] != 0
+ && (mc != 0 && mc == s[e]
+ && ((tl == mc && bl == mc)
+ || (tr == mc && br == mc)
+ || (bl == mc && br == mc)
+ || (tl == mc && tr == mc)
+ || (br == mc && mr == mc && mb == mc)))
+ && (elt_size < min_img_size))
+ {
+ elts(c).update_type(component::DropCapital);
+ break;
+ }
}
// FIXME: warning this call may produce inconsistent data
// Ignored components are still in the separator image...
doc.set_elements(elts);
- trace::exiting("scribo::filter::images_in_paragraph");
+ trace::exiting("scribo::postprocessing::images_to_drop_capital");
}
# endif // ! MLN_INCLUDE_ONLY
- } // end of namespace scribo::filter
+ } // end of namespace scribo::postprocessing
} // end of namespace scribo
-#endif // ! SCRIBO_FILTER_IMAGES_IN_PARAGRAPH_HH
+#endif // ! SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
--
1.5.6.5