mirror of
https://github.com/harfbuzz/harfbuzz.git
synced 2025-04-13 08:42:59 +00:00
[repacker] Rework how ClassDef sizes are estimated during splitting.
The old approach considered only one class at a time, which in some cases can generate the wrong answer. This change updates the estimation to consider how all classes in the current split would end up encoded in a single ClassDef table. Additionally compute whether glyphs are consecutive only for the current split (instead of the fully mapping).
This commit is contained in:
parent
d6431dc7e6
commit
252a926fbf
3 changed files with 149 additions and 37 deletions
|
@ -134,20 +134,17 @@ struct ClassDef : public OT::ClassDef
|
|||
|
||||
struct class_def_size_estimator_t
|
||||
{
|
||||
// TODO(garretrieger): for coverage take the same approach as class def (compute the running total for each format).
|
||||
template<typename It>
|
||||
class_def_size_estimator_t (It glyph_and_class)
|
||||
: gids_consecutive (true), num_ranges_per_class (), glyphs_per_class ()
|
||||
: num_ranges_per_class (), glyphs_per_class ()
|
||||
{
|
||||
unsigned last_gid = (unsigned) -1;
|
||||
reset();
|
||||
for (auto p : + glyph_and_class)
|
||||
{
|
||||
unsigned gid = p.first;
|
||||
unsigned klass = p.second;
|
||||
|
||||
if (last_gid != (unsigned) -1 && gid != last_gid + 1)
|
||||
gids_consecutive = false;
|
||||
last_gid = gid;
|
||||
|
||||
hb_set_t* glyphs;
|
||||
if (glyphs_per_class.has (klass, &glyphs) && glyphs) {
|
||||
glyphs->add (gid);
|
||||
|
@ -177,6 +174,13 @@ struct class_def_size_estimator_t
|
|||
}
|
||||
}
|
||||
|
||||
void reset() {
|
||||
class_def_1_size = 4;
|
||||
class_def_2_size = 4;
|
||||
included_glyphs.clear();
|
||||
included_classes.clear();
|
||||
}
|
||||
|
||||
// Incremental increase in the Coverage and ClassDef table size
|
||||
// (worst case) if all glyphs associated with 'klass' were added.
|
||||
unsigned incremental_coverage_size (unsigned klass) const
|
||||
|
@ -185,20 +189,40 @@ struct class_def_size_estimator_t
|
|||
return 2 * glyphs_per_class.get (klass).get_population ();
|
||||
}
|
||||
|
||||
// Incremental increase in the Coverage and ClassDef table size
|
||||
// (worst case) if all glyphs associated with 'klass' were added.
|
||||
unsigned incremental_class_def_size (unsigned klass) const
|
||||
// Compute the new size of the ClassDef table if all glyphs associated with 'klass' were added.
|
||||
unsigned class_def_size (unsigned klass)
|
||||
{
|
||||
// ClassDef takes 6 bytes per range
|
||||
unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass);
|
||||
if (gids_consecutive)
|
||||
{
|
||||
// ClassDef1 takes 2 bytes per glyph, but only can be used
|
||||
// when gids are consecutive.
|
||||
return hb_min (2 * glyphs_per_class.get (klass).get_population (), class_def_2_size);
|
||||
if (!included_classes.has(klass)) {
|
||||
// ClassDef 1 takes 2 bytes per glyph.
|
||||
class_def_1_size += 2 * glyphs_per_class.get (klass).get_population ();
|
||||
// ClassDef 2 takes 6 bytes per range.
|
||||
class_def_2_size += 6 * num_ranges_per_class.get (klass);
|
||||
|
||||
hb_set_t* glyphs = nullptr;
|
||||
if (glyphs_per_class.has(klass, &glyphs)) {
|
||||
included_glyphs.union_(*glyphs);
|
||||
}
|
||||
|
||||
included_classes.add(klass);
|
||||
}
|
||||
|
||||
return class_def_2_size;
|
||||
if (!gids_consecutive())
|
||||
return class_def_2_size;
|
||||
|
||||
// ClassDef1 can only be used when gids are consecutive.
|
||||
return hb_min (class_def_1_size, class_def_2_size);
|
||||
}
|
||||
|
||||
bool gids_consecutive() const {
|
||||
hb_codepoint_t start = HB_SET_VALUE_INVALID;
|
||||
hb_codepoint_t end = HB_SET_VALUE_INVALID;
|
||||
|
||||
unsigned count = 0;
|
||||
while (included_glyphs.next_range (&start, &end)) {
|
||||
count++;
|
||||
if (count > 1) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool in_error ()
|
||||
|
@ -214,9 +238,12 @@ struct class_def_size_estimator_t
|
|||
}
|
||||
|
||||
private:
|
||||
bool gids_consecutive;
|
||||
hb_hashmap_t<unsigned, unsigned> num_ranges_per_class;
|
||||
hb_hashmap_t<unsigned, hb_set_t> glyphs_per_class;
|
||||
hb_set_t included_classes;
|
||||
hb_set_t included_glyphs;
|
||||
unsigned class_def_1_size = 4;
|
||||
unsigned class_def_2_size = 4;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -232,7 +232,7 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
|
|||
|
||||
unsigned accumulated = base_size;
|
||||
unsigned coverage_size = 4;
|
||||
unsigned class_def_1_size = 4;
|
||||
unsigned class_def_1_size = 0;
|
||||
unsigned max_coverage_size = coverage_size;
|
||||
unsigned max_class_def_1_size = class_def_1_size;
|
||||
|
||||
|
@ -248,7 +248,7 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
|
|||
{
|
||||
unsigned accumulated_delta = class1_record_size;
|
||||
coverage_size += estimator.incremental_coverage_size (i);
|
||||
class_def_1_size += estimator.incremental_class_def_size (i);
|
||||
class_def_1_size = estimator.class_def_size (i);
|
||||
max_coverage_size = hb_max (max_coverage_size, coverage_size);
|
||||
max_class_def_1_size = hb_max (max_class_def_1_size, class_def_1_size);
|
||||
|
||||
|
@ -280,8 +280,10 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
|
|||
split_points.push (i);
|
||||
// split does not include i, so add the size for i when we reset the size counters.
|
||||
accumulated = base_size + accumulated_delta;
|
||||
|
||||
estimator.reset();
|
||||
coverage_size = 4 + estimator.incremental_coverage_size (i);
|
||||
class_def_1_size = 4 + estimator.incremental_class_def_size (i);
|
||||
class_def_1_size = estimator.class_def_size(i);
|
||||
visited.clear (); // node sharing isn't allowed between splits.
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klas
|
|||
return false;
|
||||
}
|
||||
|
||||
result = estimator.incremental_class_def_size (klass);
|
||||
result = estimator.class_def_size (klass);
|
||||
if (result != class_def_expected)
|
||||
{
|
||||
printf ("FAIL: class def expected size %u but was %u\n", class_def_expected, result);
|
||||
|
@ -55,15 +55,16 @@ static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klas
|
|||
|
||||
static void test_class_and_coverage_size_estimates ()
|
||||
{
|
||||
// TODO(garretrieger): test against the actual serialized sizes of class def tables
|
||||
gid_and_class_list_t empty = {
|
||||
};
|
||||
assert (incremental_size_is (empty, 0, 0, 0));
|
||||
assert (incremental_size_is (empty, 1, 0, 0));
|
||||
assert (incremental_size_is (empty, 0, 0, 4));
|
||||
assert (incremental_size_is (empty, 1, 0, 4));
|
||||
|
||||
gid_and_class_list_t class_zero = {
|
||||
{5, 0},
|
||||
};
|
||||
assert (incremental_size_is (class_zero, 0, 2, 0));
|
||||
assert (incremental_size_is (class_zero, 0, 2, 4));
|
||||
|
||||
gid_and_class_list_t consecutive = {
|
||||
{4, 0},
|
||||
|
@ -75,25 +76,25 @@ static void test_class_and_coverage_size_estimates ()
|
|||
{10, 2},
|
||||
{11, 2},
|
||||
};
|
||||
assert (incremental_size_is (consecutive, 0, 4, 0));
|
||||
assert (incremental_size_is (consecutive, 1, 4, 4));
|
||||
assert (incremental_size_is (consecutive, 2, 8, 6));
|
||||
assert (incremental_size_is (consecutive, 0, 4, 4));
|
||||
assert (incremental_size_is (consecutive, 1, 4, 8));
|
||||
assert (incremental_size_is (consecutive, 2, 8, 10));
|
||||
|
||||
gid_and_class_list_t non_consecutive = {
|
||||
{4, 0},
|
||||
{5, 0},
|
||||
{6, 0},
|
||||
|
||||
{6, 1},
|
||||
{7, 1},
|
||||
{8, 1},
|
||||
{10, 1},
|
||||
|
||||
{9, 2},
|
||||
{10, 2},
|
||||
{11, 2},
|
||||
{12, 2},
|
||||
{13, 2},
|
||||
};
|
||||
assert (incremental_size_is (non_consecutive, 0, 4, 0));
|
||||
assert (incremental_size_is (non_consecutive, 1, 4, 6));
|
||||
assert (incremental_size_is (non_consecutive, 2, 8, 6));
|
||||
assert (incremental_size_is (non_consecutive, 0, 4, 4));
|
||||
assert (incremental_size_is (non_consecutive, 1, 4, 4 + 2*6));
|
||||
assert (incremental_size_is (non_consecutive, 2, 8, 4 + 2*6));
|
||||
|
||||
gid_and_class_list_t multiple_ranges = {
|
||||
{4, 0},
|
||||
|
@ -108,12 +109,94 @@ static void test_class_and_coverage_size_estimates ()
|
|||
{12, 1},
|
||||
{13, 1},
|
||||
};
|
||||
assert (incremental_size_is (multiple_ranges, 0, 4, 0));
|
||||
assert (incremental_size_is (multiple_ranges, 1, 2 * 6, 3 * 6));
|
||||
assert (incremental_size_is (multiple_ranges, 0, 4, 4));
|
||||
assert (incremental_size_is (multiple_ranges, 1, 2 * 6, 4 + 3 * 6));
|
||||
}
|
||||
|
||||
static void test_running_class_and_coverage_size_estimates () {
|
||||
// #### With consecutive gids: switches formats ###
|
||||
gid_and_class_list_t consecutive_map = {
|
||||
// range 1-4 (f1: 8 bytes), (f2: 6 bytes)
|
||||
{1, 1},
|
||||
{2, 1},
|
||||
{3, 1},
|
||||
{4, 1},
|
||||
|
||||
// (f1: 2 bytes), (f2: 6 bytes)
|
||||
{5, 2},
|
||||
|
||||
// (f1: 14 bytes), (f2: 6 bytes)
|
||||
{6, 3},
|
||||
{7, 3},
|
||||
{8, 3},
|
||||
{9, 3},
|
||||
{10, 3},
|
||||
{11, 3},
|
||||
{12, 3},
|
||||
};
|
||||
|
||||
graph::class_def_size_estimator_t estimator1(consecutive_map.iter());
|
||||
assert(estimator1.class_def_size(1) == 4 + 6); // format 2, 1 range
|
||||
assert(estimator1.class_def_size(2) == 4 + 10); // format 1, 5 glyphs
|
||||
assert(estimator1.class_def_size(3) == 4 + 18); // format 2, 3 ranges
|
||||
|
||||
estimator1.reset();
|
||||
assert(estimator1.class_def_size(2) == 4 + 2); // format 1, 1 glyphs
|
||||
assert(estimator1.class_def_size(3) == 4 + 12); // format 2, 2 ranges
|
||||
|
||||
// #### With non-consecutive gids: always uses format 2 ###
|
||||
gid_and_class_list_t non_consecutive_map = {
|
||||
// range 1-4 (f1: 8 bytes), (f2: 6 bytes)
|
||||
{1, 1},
|
||||
{2, 1},
|
||||
{3, 1},
|
||||
{4, 1},
|
||||
|
||||
// (f1: 2 bytes), (f2: 12 bytes)
|
||||
{6, 2},
|
||||
{8, 2},
|
||||
|
||||
// (f1: 14 bytes), (f2: 6 bytes)
|
||||
{9, 3},
|
||||
{10, 3},
|
||||
{11, 3},
|
||||
{12, 3},
|
||||
{13, 3},
|
||||
{14, 3},
|
||||
{15, 3},
|
||||
};
|
||||
|
||||
graph::class_def_size_estimator_t estimator2(non_consecutive_map.iter());
|
||||
assert(estimator2.class_def_size(1) == 4 + 6); // format 2, 1 range
|
||||
assert(estimator2.class_def_size(2) == 4 + 18); // format 2, 3 ranges
|
||||
assert(estimator2.class_def_size(3) == 4 + 24); // format 2, 4 ranges
|
||||
|
||||
estimator2.reset();
|
||||
assert(estimator2.class_def_size(2) == 4 + 12); // format 1, 1 range
|
||||
assert(estimator2.class_def_size(3) == 4 + 18); // format 2, 2 ranges
|
||||
}
|
||||
|
||||
static void test_running_class_size_estimates_with_locally_consecutive_glyphs () {
|
||||
gid_and_class_list_t consecutive_map = {
|
||||
{1, 1},
|
||||
{6, 2},
|
||||
{7, 3},
|
||||
};
|
||||
|
||||
graph::class_def_size_estimator_t estimator(consecutive_map.iter());
|
||||
assert(estimator.class_def_size(1) == 4 + 2); // format 1, 1 glyph
|
||||
assert(estimator.class_def_size(2) == 4 + 12); // format 2, 2 ranges
|
||||
assert(estimator.class_def_size(3) == 4 + 18); // format 2, 3 ranges
|
||||
|
||||
estimator.reset();
|
||||
assert(estimator.class_def_size(2) == 4 + 2); // format 1, 1 glyphs
|
||||
assert(estimator.class_def_size(3) == 4 + 4); // format 1, 2 glyphs
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
test_class_and_coverage_size_estimates ();
|
||||
test_running_class_and_coverage_size_estimates ();
|
||||
test_running_class_size_estimates_with_locally_consecutive_glyphs ();
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue