Merge pull request #4616 from googlefonts/repacker

[repacker] Rework how ClassDef sizes are estimated during splitting.
This commit is contained in:
Behdad Esfahbod 2024-03-14 15:40:18 -06:00 committed by GitHub
commit e18565875a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 266 additions and 55 deletions

View file

@ -134,20 +134,23 @@ struct ClassDef : public OT::ClassDef
struct class_def_size_estimator_t
{
// TODO(garretrieger): update to support beyond64k coverage/classdef tables.
constexpr static unsigned class_def_format1_base_size = 6;
constexpr static unsigned class_def_format2_base_size = 4;
constexpr static unsigned coverage_base_size = 4;
constexpr static unsigned bytes_per_range = 6;
constexpr static unsigned bytes_per_glyph = 2;
template<typename It>
class_def_size_estimator_t (It glyph_and_class)
: gids_consecutive (true), num_ranges_per_class (), glyphs_per_class ()
: num_ranges_per_class (), glyphs_per_class ()
{
unsigned last_gid = (unsigned) -1;
reset();
for (auto p : + glyph_and_class)
{
unsigned gid = p.first;
unsigned klass = p.second;
if (last_gid != (unsigned) -1 && gid != last_gid + 1)
gids_consecutive = false;
last_gid = gid;
hb_set_t* glyphs;
if (glyphs_per_class.has (klass, &glyphs) && glyphs) {
glyphs->add (gid);
@ -177,28 +180,54 @@ struct class_def_size_estimator_t
}
}
// Incremental increase in the Coverage and ClassDef table size
// (worst case) if all glyphs associated with 'klass' were added.
unsigned incremental_coverage_size (unsigned klass) const
{
// Coverage takes 2 bytes per glyph worst case,
return 2 * glyphs_per_class.get (klass).get_population ();
void reset() {
class_def_1_size = class_def_format1_base_size;
class_def_2_size = class_def_format2_base_size;
included_glyphs.clear();
included_classes.clear();
}
// Incremental increase in the Coverage and ClassDef table size
// (worst case) if all glyphs associated with 'klass' were added.
unsigned incremental_class_def_size (unsigned klass) const
// Compute the size of coverage for all glyphs added via 'add_class_def_size'.
unsigned coverage_size () const
{
// ClassDef takes 6 bytes per range
unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass);
if (gids_consecutive)
{
// ClassDef1 takes 2 bytes per glyph, but only can be used
// when gids are consecutive.
return hb_min (2 * glyphs_per_class.get (klass).get_population (), class_def_2_size);
unsigned format1_size = coverage_base_size + bytes_per_glyph * included_glyphs.get_population();
unsigned format2_size = coverage_base_size + bytes_per_range * num_glyph_ranges();
return hb_min(format1_size, format2_size);
}
// Compute the new size of the ClassDef table if all glyphs associated with 'klass' were added.
unsigned add_class_def_size (unsigned klass)
{
if (!included_classes.has(klass)) {
hb_set_t* glyphs = nullptr;
if (glyphs_per_class.has(klass, &glyphs)) {
included_glyphs.union_(*glyphs);
}
class_def_1_size = class_def_format1_base_size;
if (!included_glyphs.is_empty()) {
unsigned min_glyph = included_glyphs.get_min();
unsigned max_glyph = included_glyphs.get_max();
class_def_1_size += bytes_per_glyph * (max_glyph - min_glyph + 1);
}
class_def_2_size += bytes_per_range * num_ranges_per_class.get (klass);
included_classes.add(klass);
}
return class_def_2_size;
return hb_min (class_def_1_size, class_def_2_size);
}
unsigned num_glyph_ranges() const {
hb_codepoint_t start = HB_SET_VALUE_INVALID;
hb_codepoint_t end = HB_SET_VALUE_INVALID;
unsigned count = 0;
while (included_glyphs.next_range (&start, &end)) {
count++;
}
return count;
}
bool in_error ()
@ -214,9 +243,12 @@ struct class_def_size_estimator_t
}
private:
bool gids_consecutive;
hb_hashmap_t<unsigned, unsigned> num_ranges_per_class;
hb_hashmap_t<unsigned, hb_set_t> glyphs_per_class;
hb_set_t included_classes;
hb_set_t included_glyphs;
unsigned class_def_1_size;
unsigned class_def_2_size;
};

View file

@ -247,8 +247,8 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
for (unsigned i = 0; i < class1_count; i++)
{
unsigned accumulated_delta = class1_record_size;
coverage_size += estimator.incremental_coverage_size (i);
class_def_1_size += estimator.incremental_class_def_size (i);
class_def_1_size = estimator.add_class_def_size (i);
coverage_size = estimator.coverage_size ();
max_coverage_size = hb_max (max_coverage_size, coverage_size);
max_class_def_1_size = hb_max (max_class_def_1_size, class_def_1_size);
@ -280,8 +280,10 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
split_points.push (i);
// split does not include i, so add the size for i when we reset the size counters.
accumulated = base_size + accumulated_delta;
coverage_size = 4 + estimator.incremental_coverage_size (i);
class_def_1_size = 4 + estimator.incremental_class_def_size (i);
estimator.reset();
class_def_1_size = estimator.add_class_def_size(i);
coverage_size = estimator.coverage_size();
visited.clear (); // node sharing isn't allowed between splits.
}
}

View file

@ -26,27 +26,119 @@
#include "gsubgpos-context.hh"
#include "classdef-graph.hh"
#include "hb-iter.hh"
#include "hb-serialize.hh"
typedef hb_codepoint_pair_t gid_and_class_t;
typedef hb_vector_t<gid_and_class_t> gid_and_class_list_t;
template<typename It>
static unsigned actual_class_def_size(It glyph_and_class) {
char buffer[100];
hb_serialize_context_t serializer(buffer, 100);
OT::ClassDef_serialize (&serializer, glyph_and_class);
serializer.end_serialize ();
assert(!serializer.in_error());
static bool incremental_size_is (const gid_and_class_list_t& list, unsigned klass,
unsigned cov_expected, unsigned class_def_expected)
hb_blob_t* blob = serializer.copy_blob();
unsigned size = hb_blob_get_length(blob);
hb_blob_destroy(blob);
return size;
}
static unsigned actual_class_def_size(gid_and_class_list_t consecutive_map, hb_vector_t<unsigned> classes) {
auto filtered_it =
+ consecutive_map.as_sorted_array().iter()
| hb_filter([&] (unsigned c) {
for (unsigned klass : classes) {
if (c == klass) {
return true;
}
}
return false;
}, hb_second);
return actual_class_def_size(+ filtered_it);
}
template<typename It>
static unsigned actual_coverage_size(It glyphs) {
char buffer[100];
hb_serialize_context_t serializer(buffer, 100);
OT::Layout::Common::Coverage_serialize (&serializer, glyphs);
serializer.end_serialize ();
assert(!serializer.in_error());
hb_blob_t* blob = serializer.copy_blob();
unsigned size = hb_blob_get_length(blob);
hb_blob_destroy(blob);
return size;
}
static unsigned actual_coverage_size(gid_and_class_list_t consecutive_map, hb_vector_t<unsigned> classes) {
auto filtered_it =
+ consecutive_map.as_sorted_array().iter()
| hb_filter([&] (unsigned c) {
for (unsigned klass : classes) {
if (c == klass) {
return true;
}
}
return false;
}, hb_second);
return actual_coverage_size(+ filtered_it | hb_map_retains_sorting(hb_first));
}
static bool check_coverage_size(graph::class_def_size_estimator_t& estimator,
const gid_and_class_list_t& map,
hb_vector_t<unsigned> klasses)
{
graph::class_def_size_estimator_t estimator (list.iter ());
unsigned result = estimator.coverage_size();
unsigned expected = actual_coverage_size(map, klasses);
if (result != expected) {
printf ("FAIL: estimated coverage expected size %u but was %u\n", expected, result);
return false;
}
return true;
}
unsigned result = estimator.incremental_coverage_size (klass);
if (result != cov_expected)
{
printf ("FAIL: coverage expected size %u but was %u\n", cov_expected, result);
static bool check_add_class_def_size(graph::class_def_size_estimator_t& estimator,
const gid_and_class_list_t& map,
unsigned klass, hb_vector_t<unsigned> klasses)
{
unsigned result = estimator.add_class_def_size(klass);
unsigned expected = actual_class_def_size(map, klasses);
if (result != expected) {
printf ("FAIL: estimated class def expected size %u but was %u\n", expected, result);
return false;
}
result = estimator.incremental_class_def_size (klass);
if (result != class_def_expected)
return check_coverage_size(estimator, map, klasses);
}
static bool check_add_class_def_size (const gid_and_class_list_t& list, unsigned klass)
{
graph::class_def_size_estimator_t estimator (list.iter ());
unsigned result = estimator.add_class_def_size (klass);
auto filtered_it =
+ list.as_sorted_array().iter()
| hb_filter([&] (unsigned c) {
return c == klass;
}, hb_second);
unsigned expected = actual_class_def_size(filtered_it);
if (result != expected)
{
printf ("FAIL: class def expected size %u but was %u\n", class_def_expected, result);
printf ("FAIL: class def expected size %u but was %u\n", expected, result);
return false;
}
auto cov_it = + filtered_it | hb_map_retains_sorting(hb_first);
result = estimator.coverage_size ();
expected = actual_coverage_size(cov_it);
if (result != expected)
{
printf ("FAIL: coverage expected size %u but was %u\n", expected, result);
return false;
}
@ -57,43 +149,45 @@ static void test_class_and_coverage_size_estimates ()
{
gid_and_class_list_t empty = {
};
assert (incremental_size_is (empty, 0, 0, 0));
assert (incremental_size_is (empty, 1, 0, 0));
assert (check_add_class_def_size (empty, 0));
assert (check_add_class_def_size (empty, 1));
gid_and_class_list_t class_zero = {
{5, 0},
};
assert (incremental_size_is (class_zero, 0, 2, 0));
assert (check_add_class_def_size (class_zero, 0));
gid_and_class_list_t consecutive = {
{4, 0},
{5, 0},
{6, 1},
{7, 1},
{8, 2},
{9, 2},
{10, 2},
{11, 2},
};
assert (incremental_size_is (consecutive, 0, 4, 0));
assert (incremental_size_is (consecutive, 1, 4, 4));
assert (incremental_size_is (consecutive, 2, 8, 6));
assert (check_add_class_def_size (consecutive, 0));
assert (check_add_class_def_size (consecutive, 1));
assert (check_add_class_def_size (consecutive, 2));
gid_and_class_list_t non_consecutive = {
{4, 0},
{5, 0},
{6, 0},
{6, 1},
{7, 1},
{8, 1},
{10, 1},
{9, 2},
{10, 2},
{11, 2},
{12, 2},
{13, 2},
};
assert (incremental_size_is (non_consecutive, 0, 4, 0));
assert (incremental_size_is (non_consecutive, 1, 4, 6));
assert (incremental_size_is (non_consecutive, 2, 8, 6));
assert (check_add_class_def_size (non_consecutive, 0));
assert (check_add_class_def_size (non_consecutive, 1));
assert (check_add_class_def_size (non_consecutive, 2));
gid_and_class_list_t multiple_ranges = {
{4, 0},
@ -108,12 +202,95 @@ static void test_class_and_coverage_size_estimates ()
{12, 1},
{13, 1},
};
assert (incremental_size_is (multiple_ranges, 0, 4, 0));
assert (incremental_size_is (multiple_ranges, 1, 2 * 6, 3 * 6));
assert (check_add_class_def_size (multiple_ranges, 0));
assert (check_add_class_def_size (multiple_ranges, 1));
}
static void test_running_class_and_coverage_size_estimates () {
// #### With consecutive gids: switches formats ###
gid_and_class_list_t consecutive_map = {
// range 1-4 (f1: 8 bytes), (f2: 6 bytes)
{1, 1},
{2, 1},
{3, 1},
{4, 1},
// (f1: 2 bytes), (f2: 6 bytes)
{5, 2},
// (f1: 14 bytes), (f2: 6 bytes)
{6, 3},
{7, 3},
{8, 3},
{9, 3},
{10, 3},
{11, 3},
{12, 3},
};
graph::class_def_size_estimator_t estimator1(consecutive_map.iter());
assert(check_add_class_def_size(estimator1, consecutive_map, 1, {1}));
assert(check_add_class_def_size(estimator1, consecutive_map, 2, {1, 2}));
assert(check_add_class_def_size(estimator1, consecutive_map, 2, {1, 2})); // check that adding the same class again works
assert(check_add_class_def_size(estimator1, consecutive_map, 3, {1, 2, 3}));
estimator1.reset();
assert(check_add_class_def_size(estimator1, consecutive_map, 2, {2}));
assert(check_add_class_def_size(estimator1, consecutive_map, 3, {2, 3}));
// #### With non-consecutive gids: always uses format 2 ###
gid_and_class_list_t non_consecutive_map = {
// range 1-4 (f1: 8 bytes), (f2: 6 bytes)
{1, 1},
{2, 1},
{3, 1},
{4, 1},
// (f1: 2 bytes), (f2: 12 bytes)
{6, 2},
{8, 2},
// (f1: 14 bytes), (f2: 6 bytes)
{9, 3},
{10, 3},
{11, 3},
{12, 3},
{13, 3},
{14, 3},
{15, 3},
};
graph::class_def_size_estimator_t estimator2(non_consecutive_map.iter());
assert(check_add_class_def_size(estimator2, non_consecutive_map, 1, {1}));
assert(check_add_class_def_size(estimator2, non_consecutive_map, 2, {1, 2}));
assert(check_add_class_def_size(estimator2, non_consecutive_map, 3, {1, 2, 3}));
estimator2.reset();
assert(check_add_class_def_size(estimator2, non_consecutive_map, 2, {2}));
assert(check_add_class_def_size(estimator2, non_consecutive_map, 3, {2, 3}));
}
static void test_running_class_size_estimates_with_locally_consecutive_glyphs () {
gid_and_class_list_t map = {
{1, 1},
{6, 2},
{7, 3},
};
graph::class_def_size_estimator_t estimator(map.iter());
assert(check_add_class_def_size(estimator, map, 1, {1}));
assert(check_add_class_def_size(estimator, map, 2, {1, 2}));
assert(check_add_class_def_size(estimator, map, 3, {1, 2, 3}));
estimator.reset();
assert(check_add_class_def_size(estimator, map, 2, {2}));
assert(check_add_class_def_size(estimator, map, 3, {2, 3}));
}
int
main (int argc, char **argv)
{
test_class_and_coverage_size_estimates ();
test_running_class_and_coverage_size_estimates ();
test_running_class_size_estimates_with_locally_consecutive_glyphs ();
}

View file

@ -1986,12 +1986,12 @@ static void test_resolve_with_close_to_limit_pair_pos_2_split ()
void* buffer = malloc (buffer_size);
assert (buffer);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_with_large_pair_pos_2 <1, 1596, 10>(&c, true, false, false);
populate_serializer_with_large_pair_pos_2 <1, 1636, 10>(&c, true, false, false);
void* expected_buffer = malloc (buffer_size);
assert (expected_buffer);
hb_serialize_context_t e (expected_buffer, buffer_size);
populate_serializer_with_large_pair_pos_2 <2, 798, 10>(&e, true, false, false);
populate_serializer_with_large_pair_pos_2 <2, 818, 10>(&e, true, false, false);
run_resolve_overflow_test ("test_resolve_with_close_to_limit_pair_pos_2_split",
c,