[repacker] Rework how ClassDef sizes are estimated during splitting.

The old approach considered only one class at a time, which in some cases can generate the wrong answer. This change updates the estimation to consider how all classes in the current split would end up encoded in a single ClassDef table. Additionally compute whether glyphs are consecutive only for the current split (instead of the fully mapping).
This commit is contained in:
Garret Rieger 2024-03-08 19:46:48 +00:00
parent d6431dc7e6
commit 252a926fbf
3 changed files with 149 additions and 37 deletions

View file

@ -134,20 +134,17 @@ struct ClassDef : public OT::ClassDef
struct class_def_size_estimator_t
{
// TODO(garretrieger): for coverage take the same approach as class def (compute the running total for each format).
template<typename It>
class_def_size_estimator_t (It glyph_and_class)
: gids_consecutive (true), num_ranges_per_class (), glyphs_per_class ()
: num_ranges_per_class (), glyphs_per_class ()
{
unsigned last_gid = (unsigned) -1;
reset();
for (auto p : + glyph_and_class)
{
unsigned gid = p.first;
unsigned klass = p.second;
if (last_gid != (unsigned) -1 && gid != last_gid + 1)
gids_consecutive = false;
last_gid = gid;
hb_set_t* glyphs;
if (glyphs_per_class.has (klass, &glyphs) && glyphs) {
glyphs->add (gid);
@ -177,6 +174,13 @@ struct class_def_size_estimator_t
}
}
void reset() {
class_def_1_size = 4;
class_def_2_size = 4;
included_glyphs.clear();
included_classes.clear();
}
// Incremental increase in the Coverage and ClassDef table size
// (worst case) if all glyphs associated with 'klass' were added.
unsigned incremental_coverage_size (unsigned klass) const
@ -185,20 +189,40 @@ struct class_def_size_estimator_t
return 2 * glyphs_per_class.get (klass).get_population ();
}
// Incremental increase in the Coverage and ClassDef table size
// (worst case) if all glyphs associated with 'klass' were added.
unsigned incremental_class_def_size (unsigned klass) const
// Compute the new size of the ClassDef table if all glyphs associated with 'klass' were added.
unsigned class_def_size (unsigned klass)
{
// ClassDef takes 6 bytes per range
unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass);
if (gids_consecutive)
{
// ClassDef1 takes 2 bytes per glyph, but only can be used
// when gids are consecutive.
return hb_min (2 * glyphs_per_class.get (klass).get_population (), class_def_2_size);
if (!included_classes.has(klass)) {
// ClassDef 1 takes 2 bytes per glyph.
class_def_1_size += 2 * glyphs_per_class.get (klass).get_population ();
// ClassDef 2 takes 6 bytes per range.
class_def_2_size += 6 * num_ranges_per_class.get (klass);
hb_set_t* glyphs = nullptr;
if (glyphs_per_class.has(klass, &glyphs)) {
included_glyphs.union_(*glyphs);
}
included_classes.add(klass);
}
return class_def_2_size;
if (!gids_consecutive())
return class_def_2_size;
// ClassDef1 can only be used when gids are consecutive.
return hb_min (class_def_1_size, class_def_2_size);
}
bool gids_consecutive() const {
hb_codepoint_t start = HB_SET_VALUE_INVALID;
hb_codepoint_t end = HB_SET_VALUE_INVALID;
unsigned count = 0;
while (included_glyphs.next_range (&start, &end)) {
count++;
if (count > 1) return false;
}
return true;
}
bool in_error ()
@ -214,9 +238,12 @@ struct class_def_size_estimator_t
}
private:
bool gids_consecutive;
hb_hashmap_t<unsigned, unsigned> num_ranges_per_class;
hb_hashmap_t<unsigned, hb_set_t> glyphs_per_class;
hb_set_t included_classes;
hb_set_t included_glyphs;
unsigned class_def_1_size = 4;
unsigned class_def_2_size = 4;
};

View file

@ -232,7 +232,7 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
unsigned accumulated = base_size;
unsigned coverage_size = 4;
unsigned class_def_1_size = 4;
unsigned class_def_1_size = 0;
unsigned max_coverage_size = coverage_size;
unsigned max_class_def_1_size = class_def_1_size;
@ -248,7 +248,7 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
{
unsigned accumulated_delta = class1_record_size;
coverage_size += estimator.incremental_coverage_size (i);
class_def_1_size += estimator.incremental_class_def_size (i);
class_def_1_size = estimator.class_def_size (i);
max_coverage_size = hb_max (max_coverage_size, coverage_size);
max_class_def_1_size = hb_max (max_class_def_1_size, class_def_1_size);
@ -280,8 +280,10 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
split_points.push (i);
// split does not include i, so add the size for i when we reset the size counters.
accumulated = base_size + accumulated_delta;
estimator.reset();
coverage_size = 4 + estimator.incremental_coverage_size (i);
class_def_1_size = 4 + estimator.incremental_class_def_size (i);
class_def_1_size = estimator.class_def_size(i);
visited.clear (); // node sharing isn't allowed between splits.
}
}

View file

@ -43,7 +43,7 @@ static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klas
return false;
}
result = estimator.incremental_class_def_size (klass);
result = estimator.class_def_size (klass);
if (result != class_def_expected)
{
printf ("FAIL: class def expected size %u but was %u\n", class_def_expected, result);
@ -55,15 +55,16 @@ static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klas
static void test_class_and_coverage_size_estimates ()
{
// TODO(garretrieger): test against the actual serialized sizes of class def tables
gid_and_class_list_t empty = {
};
assert (incremental_size_is (empty, 0, 0, 0));
assert (incremental_size_is (empty, 1, 0, 0));
assert (incremental_size_is (empty, 0, 0, 4));
assert (incremental_size_is (empty, 1, 0, 4));
gid_and_class_list_t class_zero = {
{5, 0},
};
assert (incremental_size_is (class_zero, 0, 2, 0));
assert (incremental_size_is (class_zero, 0, 2, 4));
gid_and_class_list_t consecutive = {
{4, 0},
@ -75,25 +76,25 @@ static void test_class_and_coverage_size_estimates ()
{10, 2},
{11, 2},
};
assert (incremental_size_is (consecutive, 0, 4, 0));
assert (incremental_size_is (consecutive, 1, 4, 4));
assert (incremental_size_is (consecutive, 2, 8, 6));
assert (incremental_size_is (consecutive, 0, 4, 4));
assert (incremental_size_is (consecutive, 1, 4, 8));
assert (incremental_size_is (consecutive, 2, 8, 10));
gid_and_class_list_t non_consecutive = {
{4, 0},
{5, 0},
{6, 0},
{6, 1},
{7, 1},
{8, 1},
{10, 1},
{9, 2},
{10, 2},
{11, 2},
{12, 2},
{13, 2},
};
assert (incremental_size_is (non_consecutive, 0, 4, 0));
assert (incremental_size_is (non_consecutive, 1, 4, 6));
assert (incremental_size_is (non_consecutive, 2, 8, 6));
assert (incremental_size_is (non_consecutive, 0, 4, 4));
assert (incremental_size_is (non_consecutive, 1, 4, 4 + 2*6));
assert (incremental_size_is (non_consecutive, 2, 8, 4 + 2*6));
gid_and_class_list_t multiple_ranges = {
{4, 0},
@ -108,12 +109,94 @@ static void test_class_and_coverage_size_estimates ()
{12, 1},
{13, 1},
};
assert (incremental_size_is (multiple_ranges, 0, 4, 0));
assert (incremental_size_is (multiple_ranges, 1, 2 * 6, 3 * 6));
assert (incremental_size_is (multiple_ranges, 0, 4, 4));
assert (incremental_size_is (multiple_ranges, 1, 2 * 6, 4 + 3 * 6));
}
static void test_running_class_and_coverage_size_estimates () {
// #### With consecutive gids: switches formats ###
gid_and_class_list_t consecutive_map = {
// range 1-4 (f1: 8 bytes), (f2: 6 bytes)
{1, 1},
{2, 1},
{3, 1},
{4, 1},
// (f1: 2 bytes), (f2: 6 bytes)
{5, 2},
// (f1: 14 bytes), (f2: 6 bytes)
{6, 3},
{7, 3},
{8, 3},
{9, 3},
{10, 3},
{11, 3},
{12, 3},
};
graph::class_def_size_estimator_t estimator1(consecutive_map.iter());
assert(estimator1.class_def_size(1) == 4 + 6); // format 2, 1 range
assert(estimator1.class_def_size(2) == 4 + 10); // format 1, 5 glyphs
assert(estimator1.class_def_size(3) == 4 + 18); // format 2, 3 ranges
estimator1.reset();
assert(estimator1.class_def_size(2) == 4 + 2); // format 1, 1 glyphs
assert(estimator1.class_def_size(3) == 4 + 12); // format 2, 2 ranges
// #### With non-consecutive gids: always uses format 2 ###
gid_and_class_list_t non_consecutive_map = {
// range 1-4 (f1: 8 bytes), (f2: 6 bytes)
{1, 1},
{2, 1},
{3, 1},
{4, 1},
// (f1: 2 bytes), (f2: 12 bytes)
{6, 2},
{8, 2},
// (f1: 14 bytes), (f2: 6 bytes)
{9, 3},
{10, 3},
{11, 3},
{12, 3},
{13, 3},
{14, 3},
{15, 3},
};
graph::class_def_size_estimator_t estimator2(non_consecutive_map.iter());
assert(estimator2.class_def_size(1) == 4 + 6); // format 2, 1 range
assert(estimator2.class_def_size(2) == 4 + 18); // format 2, 3 ranges
assert(estimator2.class_def_size(3) == 4 + 24); // format 2, 4 ranges
estimator2.reset();
assert(estimator2.class_def_size(2) == 4 + 12); // format 1, 1 range
assert(estimator2.class_def_size(3) == 4 + 18); // format 2, 2 ranges
}
static void test_running_class_size_estimates_with_locally_consecutive_glyphs () {
gid_and_class_list_t consecutive_map = {
{1, 1},
{6, 2},
{7, 3},
};
graph::class_def_size_estimator_t estimator(consecutive_map.iter());
assert(estimator.class_def_size(1) == 4 + 2); // format 1, 1 glyph
assert(estimator.class_def_size(2) == 4 + 12); // format 2, 2 ranges
assert(estimator.class_def_size(3) == 4 + 18); // format 2, 3 ranges
estimator.reset();
assert(estimator.class_def_size(2) == 4 + 2); // format 1, 1 glyphs
assert(estimator.class_def_size(3) == 4 + 4); // format 1, 2 glyphs
}
int
main (int argc, char **argv)
{
test_class_and_coverage_size_estimates ();
test_running_class_and_coverage_size_estimates ();
test_running_class_size_estimates_with_locally_consecutive_glyphs ();
}