From 878329aefce12cbf3e7800a2c063797a28738d76 Mon Sep 17 00:00:00 2001 From: Maxim Pimenov Date: Thu, 24 Jan 2019 13:48:15 +0300 Subject: [PATCH] [3party] Removed the PagedArray from our bsdiff implementation. We have enough address space even when computing diffs for the largest mwms. --- 3party/bsdiff-courgette/CMakeLists.txt | 1 - 3party/bsdiff-courgette/bsdiff/bsdiff.h | 22 +- .../bsdiff-courgette/bsdiff/bsdiff_search.h | 4 +- .../bsdiff/bsdiff_search_unittest.cc | 17 +- 3party/bsdiff-courgette/bsdiff/paged_array.h | 249 ------------------ .../bsdiff/paged_array_unittest.cc | 248 ----------------- .../bsdiff-courgette/divsufsort/divsufsort.h | 7 - 7 files changed, 24 insertions(+), 524 deletions(-) delete mode 100644 3party/bsdiff-courgette/bsdiff/paged_array.h delete mode 100644 3party/bsdiff-courgette/bsdiff/paged_array_unittest.cc diff --git a/3party/bsdiff-courgette/CMakeLists.txt b/3party/bsdiff-courgette/CMakeLists.txt index 2ee83de38c..e7a87804b8 100644 --- a/3party/bsdiff-courgette/CMakeLists.txt +++ b/3party/bsdiff-courgette/CMakeLists.txt @@ -9,7 +9,6 @@ set( bsdiff/bsdiff.h bsdiff/bsdiff_common.h bsdiff/bsdiff_search.h - bsdiff/paged_array.h divsufsort/divsufsort.cc divsufsort/divsufsort.h divsufsort/divsufsort_private.h diff --git a/3party/bsdiff-courgette/bsdiff/bsdiff.h b/3party/bsdiff-courgette/bsdiff/bsdiff.h index ca74a2b63d..69b6478fed 100644 --- a/3party/bsdiff-courgette/bsdiff/bsdiff.h +++ b/3party/bsdiff-courgette/bsdiff/bsdiff.h @@ -38,6 +38,9 @@ // all routines to use MAPS.ME readers and writers instead // of Courgette streams and files. // --Maxim Pimenov +// 2019-01-24 - Got rid of the paged array. We have enough address space +// for our application of bsdiff. +// --Maxim Pimenov // Changelog for bsdiff_apply: // 2009-03-31 - Change to use Streams. Move CRC code to crc.{h,cc} @@ -89,7 +92,6 @@ #include "3party/bsdiff-courgette/bsdiff/bsdiff_common.h" #include "3party/bsdiff-courgette/bsdiff/bsdiff_search.h" -#include "3party/bsdiff-courgette/bsdiff/paged_array.h" #include "3party/bsdiff-courgette/divsufsort/divsufsort.h" #include "zlib.h" @@ -141,16 +143,20 @@ BSDiffStatus CreateBinaryPatch(OldReader & old_reader, old_source.Read(old_buf.data(), old_buf.size()); const uint8_t * old = old_buf.data(); - courgette::PagedArray I; - - if (!I.Allocate(old_size + 1)) { - LOG(LERROR, ("Could not allocate I[], ", ((old_size + 1) * sizeof(int)), "bytes")); - return MEM_ERROR; + std::vector I; + try + { + I.resize(old_size + 1); + } + catch (...) + { + LOG(LERROR, ("Could not allocate I[], ", ((old_size + 1) * sizeof(int)), "bytes")); + return MEM_ERROR; } base::Timer suf_sort_timer; divsuf::saint_t result = divsuf::divsufsort_include_empty( - old, I.begin(), old_size); + old, I.data(), old_size); LOG(LINFO, ("Done divsufsort", suf_sort_timer.ElapsedSeconds())); if (result != 0) return UNEXPECTED_ERROR; @@ -215,7 +221,7 @@ BSDiffStatus CreateBinaryPatch(OldReader & old_reader, scan += match.size; for (int scsc = scan; scan < new_size; ++scan) { - match = search&>( + match = search( I, old, old_size, newbuf + scan, new_size - scan); for (; scsc < scan + match.size; scsc++) diff --git a/3party/bsdiff-courgette/bsdiff/bsdiff_search.h b/3party/bsdiff-courgette/bsdiff/bsdiff_search.h index 78eee53073..e54687d8c1 100644 --- a/3party/bsdiff-courgette/bsdiff/bsdiff_search.h +++ b/3party/bsdiff-courgette/bsdiff/bsdiff_search.h @@ -71,14 +71,14 @@ inline int matchlen(const unsigned char* buf1, // |pos| a position of best match in |old|. If multiple such positions exist, // |pos| would take an arbitrary one. template -SearchResult search(T I, +SearchResult search(const T & I, const unsigned char* srcbuf, int srcsize, const unsigned char* keybuf, int keysize) { int lo = 0; int hi = srcsize; - while (hi - lo >= 2) { + while (hi - lo > 1) { int mid = (lo + hi) / 2; if (std::lexicographical_compare( srcbuf + I[mid], srcbuf + srcsize, keybuf, keybuf + keysize)) { diff --git a/3party/bsdiff-courgette/bsdiff/bsdiff_search_unittest.cc b/3party/bsdiff-courgette/bsdiff/bsdiff_search_unittest.cc index 46332b09df..e3f5e1210b 100644 --- a/3party/bsdiff-courgette/bsdiff/bsdiff_search_unittest.cc +++ b/3party/bsdiff-courgette/bsdiff/bsdiff_search_unittest.cc @@ -5,9 +5,9 @@ #include "courgette/third_party/bsdiff/bsdiff_search.h" #include +#include #include "base/macros.h" -#include "courgette/third_party/bsdiff/paged_array.h" #include "courgette/third_party/divsufsort/divsufsort.h" #include "testing/gtest/include/gtest/gtest.h" @@ -18,9 +18,8 @@ TEST(BSDiffSearchTest, Search) { const char* str = "the quick brown fox jumps over the lazy dog."; int size = static_cast(::strlen(str)); const unsigned char* buf = reinterpret_cast(str); - courgette::PagedArray I; - ASSERT_TRUE(I.Allocate(size + 1)); - divsuf::divsufsort_include_empty(buf, I.begin(), size); + std::vector I(size + 1); + divsuf::divsufsort_include_empty(buf, I.data(), size); // Specific queries. const struct { @@ -65,7 +64,7 @@ TEST(BSDiffSearchTest, Search) { // Perform the search. bsdiff::SearchResult match = - bsdiff::search&>( + bsdiff::search( I, buf, size, query_buf, query_size); // Check basic properties and match with expected values. @@ -101,9 +100,9 @@ TEST(BSDiffSearchTest, SearchExact) { int size = static_cast(::strlen(test_cases[idx])); const unsigned char* buf = reinterpret_cast(test_cases[idx]); - courgette::PagedArray I; - ASSERT_TRUE(I.Allocate(size + 1)); - divsuf::divsufsort_include_empty(buf, I.begin(), size); + + std::vector I(size + 1); + divsuf::divsufsort_include_empty(buf, I.data(), size); // Test exact matches for every non-empty substring. for (int lo = 0; lo < size; ++lo) { @@ -114,7 +113,7 @@ TEST(BSDiffSearchTest, SearchExact) { const unsigned char* query_buf = reinterpret_cast(query.c_str()); bsdiff::SearchResult match = - bsdiff::search&>( + bsdiff::search( I, buf, size, query_buf, query_size); EXPECT_EQ(query_size, match.size); diff --git a/3party/bsdiff-courgette/bsdiff/paged_array.h b/3party/bsdiff-courgette/bsdiff/paged_array.h deleted file mode 100644 index 5574584824..0000000000 --- a/3party/bsdiff-courgette/bsdiff/paged_array.h +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// PagedArray implements an array stored using many fixed-size pages. -// -// PagedArray is a work-around to allow large arrays to be allocated when there -// is too much address space fragmentation for allocating the large arrays as -// contiguous arrays. - -#ifndef COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_ -#define COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_ - -#include -#include -#include -#include - -#include "base/macros.hpp" - -namespace courgette { - -// Page size of 2^18 * sizeof(T) is 1MB for T = int32_t. -constexpr int kPagedArrayDefaultPageLogSize = 18; - -template -class PagedArray; - -// A random access iterator with pointer-like semantics, for PagedArray. -template -class PagedArray_iterator { - public: - using ThisType = PagedArray_iterator; - using difference_type = ptrdiff_t; - using value_type = typename std::remove_const::type; - using reference = T&; - using pointer = T*; - using iterator_category = std::random_access_iterator_tag; - - PagedArray_iterator() : array_(nullptr), index_(0U) {} - PagedArray_iterator(ContainerType* array, size_t index) - : array_(array), index_(index) {} - - template - PagedArray_iterator(const PagedArray_iterator& it) - : array_(it.array_), index_(it.index_) {} - - PagedArray_iterator(std::nullptr_t) : array_(nullptr), index_(0) {} - - ~PagedArray_iterator() = default; - - reference operator*() const { return (*array_)[index_]; } - reference operator[](size_t idx) const { return (*array_)[index_ + idx]; } - pointer operator->() const { return &(*array_)[index_]; } - - ThisType& operator=(std::nullptr_t) { - array_ = nullptr; - index_ = 0; - return *this; - } - - ThisType& operator++() { - ++index_; - return *this; - } - ThisType& operator--() { - --index_; - return *this; - } - - ThisType operator++(int) { return ThisType(array_, index_++); } - ThisType operator--(int) { return ThisType(array_, index_--); } - - ThisType& operator+=(difference_type delta) { - index_ += delta; - return *this; - } - ThisType& operator-=(difference_type delta) { - index_ -= delta; - return *this; - } - - ThisType operator+(difference_type offset) const { - return ThisType(array_, index_ + offset); - } - ThisType operator-(difference_type offset) const { - return ThisType(array_, index_ - offset); - } - - template - bool operator==(const PagedArray_iterator& it) const { - return index_ == it.index_ && array_ == it.array_; - } - bool operator==(std::nullptr_t) const { - return index_ == 0 && array_ == nullptr; - } - template - bool operator!=(const PagedArray_iterator& it) const { - return !(*this == it); - } - - template - bool operator<(const PagedArray_iterator& it) const { -#ifdef DEBUG - // For performance, skip the |array_| check in Release builds. - if (array_ != it.array_) - return false; -#endif - return index_ < it.index_; - } - template - bool operator<=(const PagedArray_iterator& it) const { -#ifdef DEBUG - // For performance, skip the |array_| check in Release builds. - if (array_ != it.array_) - return false; -#endif - return index_ <= it.index_; - } - template - bool operator>(const PagedArray_iterator& it) const { -#ifdef DEBUG - // For performance, skip the |array_| check in Release builds. - if (array_ != it.array_) - return false; -#endif - return index_ > it.index_; - } - template - bool operator>=(const PagedArray_iterator& it) const { -#ifdef DEBUG - // For performance, skip the |array_| check in Release builds. - if (array_ != it.array_) - return false; -#endif - return index_ >= it.index_; - } - - template - difference_type operator-( - const PagedArray_iterator& it) const { - return index_ - it.index_; - } - - private: - template - friend class PagedArray_iterator; - - ContainerType* array_; - size_t index_; -}; - -// PagedArray implements an array stored using many fixed-size pages. -template -class PagedArray { - enum { - // Page size in elements. - kLogPageSize = LOG_PAGE_SIZE, - kPageSize = 1 << LOG_PAGE_SIZE - }; - - public: - using ThisType = PagedArray; - using const_iterator = PagedArray_iterator; - using iterator = PagedArray_iterator; - - PagedArray() = default; - ~PagedArray() { clear(); } - - iterator begin() { return iterator(this, 0); } - iterator end() { return iterator(this, size_); } - const_iterator begin() const { return const_iterator(this, 0); } - const_iterator end() const { return const_iterator(this, size_); } - - T& operator[](size_t i) { - size_t page = i >> kLogPageSize; - size_t offset = i & (kPageSize - 1); - - // *NOTE* CHECK() would significaltly slow down bsdiff_create - // even in optimized Release build (about 1.4x). - ASSERT_LESS(page, page_count_, ()); - - return pages_[page][offset]; - } - - const T& operator[](size_t i) const { - // Duplicating code here for performance. If we use common code for this - // then bsdiff_create slows down by ~5% in optimized Release build. - size_t page = i >> kLogPageSize; - size_t offset = i & (kPageSize - 1); - - // *NOTE* CHECK() would significaltly slow down bsdiff_create - // even in optimized Release build (about 1.4x). - ASSERT_LESS(page, page_count_, ()); - - return pages_[page][offset]; - } - - // Allocates storage for |size| elements. Returns true on success and false if - // allocation fails. - bool Allocate(size_t size) { - clear(); - size_ = size; - size_t pages_needed = (size_ + kPageSize - 1) >> kLogPageSize; - if (!UncheckedMalloc(sizeof(T*) * pages_needed, - reinterpret_cast(&pages_))) { - return false; - } - - for (page_count_ = 0; page_count_ < pages_needed; ++page_count_) { - T* block = nullptr; - if (!UncheckedMalloc(sizeof(T) * kPageSize, - reinterpret_cast(&block))) { - clear(); - return false; - } - pages_[page_count_] = block; - } - return true; - } - - // Releases all storage. May be called more than once. - void clear() { - if (pages_ != nullptr) { - while (page_count_ != 0) { - --page_count_; - free(pages_[page_count_]); - } - free(pages_); - pages_ = nullptr; - } - } - - private: - T** pages_ = nullptr; - size_t size_ = 0U; - size_t page_count_ = 0U; - - bool UncheckedMalloc(size_t size, void** result) { - *result = malloc(size); - return *result != NULL; - } - - DISALLOW_COPY_AND_MOVE(PagedArray); -}; - -} // namespace courgette - -#endif // COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_ diff --git a/3party/bsdiff-courgette/bsdiff/paged_array_unittest.cc b/3party/bsdiff-courgette/bsdiff/paged_array_unittest.cc deleted file mode 100644 index 58ae0933a5..0000000000 --- a/3party/bsdiff-courgette/bsdiff/paged_array_unittest.cc +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright (c) 2010 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "courgette/third_party/bsdiff/paged_array.h" - -#include - -#include -#include -#include -#include - -#include "testing/gtest/include/gtest/gtest.h" - -namespace { - -#if !defined(ADDRESS_SANITIZER) || !defined(OS_WIN) -// Total allocation of 4GB will fail in 32 bit programs if allocations are -// leaked. -const int kIterations = 20; -const int kSizeBig = 200 * 1024 * 1024 / sizeof(int); // 200MB -#endif - -const size_t kLogBlockSizeSmall = 10; -const size_t kBlockSizeSmall = 1 << kLogBlockSizeSmall; -const size_t kSizeList[] = {1, - 16, - 123, - kBlockSizeSmall, - kBlockSizeSmall + 1, - 123 * kBlockSizeSmall + 567}; - -const size_t kIteratorTestDataSize = 7; -const int32_t kIteratorTestData[kIteratorTestDataSize] = {2, 3, 5, 7, - 11, 13, 17}; - -} // namespace - -class PagedArrayTest : public testing::Test { - public: - template - void TestIteratorBasic(Iterator begin, Iterator end) { - EXPECT_FALSE(begin == nullptr); - EXPECT_FALSE(end == nullptr); - - // Test TestPagedArray::iterator data read. - Iterator it = begin; - EXPECT_EQ(2, *it); - EXPECT_EQ(3, *(it + 1)); - EXPECT_EQ(5, it[2]); // Pseudo-pointer. - EXPECT_EQ(7, (it + 1)[2]); - EXPECT_EQ(3, *(++it)); - EXPECT_EQ(3, *it); - EXPECT_EQ(3, *(it++)); - EXPECT_EQ(5, *it); - EXPECT_EQ(11, *(it += 2)); - EXPECT_EQ(11, *it); - EXPECT_FALSE(it == begin); - EXPECT_TRUE(it != begin); - EXPECT_TRUE(it == (begin + 4)); - EXPECT_FALSE(it != (begin + 4)); - EXPECT_EQ(static_cast(kIteratorTestDataSize), end - begin); - it = begin + 5; - EXPECT_EQ(begin, it - 5); - EXPECT_EQ(13, *it); - EXPECT_EQ(11, *(--it)); - EXPECT_EQ(11, *it); - EXPECT_EQ(11, *(it--)); - EXPECT_EQ(7, *it); - EXPECT_EQ(3, *(it -= 2)); - EXPECT_EQ(3, *it); - - // Test binary operators for every pair of iterator. - EXPECT_TRUE(begin <= end); - EXPECT_TRUE(begin < end); - for (size_t i = 0; i < kIteratorTestDataSize; ++i) { - Iterator it1 = begin + i; - EXPECT_TRUE(it1 == it1); - EXPECT_FALSE(it1 != it1); - EXPECT_TRUE(begin <= it1); - EXPECT_FALSE(begin > it1); - EXPECT_TRUE(it1 >= begin); - EXPECT_FALSE(it1 < begin); - EXPECT_EQ(begin, it1 - i); - EXPECT_EQ(end, it1 + (kIteratorTestDataSize - i)); - EXPECT_EQ(kIteratorTestData[i], *it1); - EXPECT_EQ(kIteratorTestData[i], begin[i]); - for (size_t j = 0; i + j < kIteratorTestDataSize; ++j) { - Iterator it2 = it1 + j; - EXPECT_TRUE(it1 <= it2); - EXPECT_FALSE(it1 > it2); - EXPECT_TRUE(it2 >= it1); - EXPECT_FALSE(it2 < it1); - if (j > 0) { - EXPECT_TRUE(it1 < it2); - EXPECT_FALSE(it1 >= it2); - EXPECT_TRUE(it2 > it1); - EXPECT_FALSE(it2 <= it1); - EXPECT_FALSE(it1 == it2); - EXPECT_TRUE(it1 != it2); - } - EXPECT_EQ(kIteratorTestData[i + j], it1[j]); // Pseudo-pointer. - EXPECT_EQ(kIteratorTestData[i + j], *it2); - EXPECT_EQ(static_cast(j), it2 - it1); - EXPECT_EQ(it1, it2 - j); - } - EXPECT_TRUE(it1 < end); - EXPECT_FALSE(it1 >= end); - EXPECT_TRUE(it1 <= end); - EXPECT_FALSE(it1 > end); - EXPECT_TRUE(end > it1); - EXPECT_FALSE(end <= it1); - EXPECT_TRUE(end >= it1); - EXPECT_FALSE(end < it1); - EXPECT_TRUE(it1 != end); - EXPECT_FALSE(it1 == end); - } - - // Test initialize with null. - Iterator it_dummy; - it_dummy = nullptr; - EXPECT_TRUE(it_dummy == nullptr); - - // Test copy constructor. - Iterator begin_copy(begin); - EXPECT_TRUE(begin_copy == begin); - - // Test STL read-only usage. - std::vector::value_type> v(begin, - end); - EXPECT_TRUE(std::equal(begin, end, v.begin())); - EXPECT_TRUE(std::equal(v.begin(), v.end(), begin)); - } -}; - -// AddressSanitizer on Windows adds additional memory overhead, which -// causes these tests to go OOM and fail. -#if !defined(ADDRESS_SANITIZER) || !defined(OS_WIN) -TEST_F(PagedArrayTest, TestManyAllocationsDestructorFree) { - for (int i = 0; i < kIterations; ++i) { - courgette::PagedArray a; - EXPECT_TRUE(a.Allocate(kSizeBig)); - } -} - -TEST_F(PagedArrayTest, TestManyAllocationsManualFree) { - courgette::PagedArray a; - for (int i = 0; i < kIterations; ++i) { - EXPECT_TRUE(a.Allocate(kSizeBig)); - a.clear(); - } -} -#endif - -TEST_F(PagedArrayTest, TestAccess) { - for (size_t size : kSizeList) { - courgette::PagedArray a; - EXPECT_TRUE(a.Allocate(size)); - for (size_t i = 0; i < size; ++i) - a[i] = i; - for (size_t i = 0; i < size; ++i) - EXPECT_EQ(static_cast(i), a[i]); - } -} - -TEST_F(PagedArrayTest, TestIterator) { - using TestPagedArray = courgette::PagedArray; // Page size = 2. - - TestPagedArray::const_iterator uninit_const_it; - EXPECT_TRUE(uninit_const_it == nullptr); - - TestPagedArray::iterator uninit_it; - EXPECT_TRUE(uninit_it == nullptr); - - TestPagedArray a; - EXPECT_TRUE(a.Allocate(kIteratorTestDataSize)); - std::copy(kIteratorTestData, kIteratorTestData + kIteratorTestDataSize, - a.begin()); - const TestPagedArray& a_const = a; - - // Test TestPagedArray::const_iterator. - TestIteratorBasic(a_const.begin(), a_const.end()); - - // Test TestPagedArray::iterator. - TestIteratorBasic(a.begin(), a.end()); - - // Test equality of const and non-const. - EXPECT_TRUE(a.begin() == a_const.begin()); - EXPECT_TRUE(a_const.begin() == a.begin()); - - // Test casting from non-const to const. - TestPagedArray::iterator non_const_it = a.begin(); - EXPECT_TRUE(non_const_it == a.begin()); - TestPagedArray::const_iterator const_it = non_const_it; - EXPECT_TRUE(const_it == non_const_it); - // The following should and will emit compile error: - // non_const_it = a_const.begin(); - - // Test copy constructor from non-const to const. - TestPagedArray::iterator const_it2(a.begin()); - EXPECT_TRUE(const_it2 == a.begin()); - - // Test pointer distance from non-const to const. - EXPECT_EQ(static_cast(kIteratorTestDataSize), - a.end() - a_const.begin()); - EXPECT_EQ(static_cast(kIteratorTestDataSize), - a_const.end() - a.begin()); - - // Test operator->(). - struct TestStruct { - int32_t value = 0; - }; - using TestStructPagedArray = courgette::PagedArray; - TestStructPagedArray b; - b.Allocate(3); - b[0].value = 100; - b[1].value = 200; - b[2].value = 300; - const TestStructPagedArray& b_const = b; - - EXPECT_EQ(100, b.begin()->value); - EXPECT_EQ(100, b_const.begin()->value); - EXPECT_EQ(200, (b.begin() + 1)->value); - EXPECT_EQ(200, (b_const.begin() + 1)->value); - (b.begin() + 2)->value *= -1; - EXPECT_EQ(-300, (b.begin() + 2)->value); - EXPECT_EQ(-300, (b_const.begin() + 2)->value); -} - -// Test generic read-write of itrators by sorting pseudo-random numbers. -TEST_F(PagedArrayTest, TestSort) { - courgette::PagedArray a; - std::minstd_rand pseudo_rand_gen; // Deterministic, using defaults. - for (size_t size : kSizeList) { - std::vector v(size); - courgette::PagedArray a; - EXPECT_TRUE(a.Allocate(size)); - for (size_t i = 0; i < size; ++i) { - v[i] = pseudo_rand_gen(); - a[i] = v[i]; - } - std::sort(v.begin(), v.end()); - std::sort(a.begin(), a.end()); - for (size_t i = 0; i < size; ++i) - EXPECT_EQ(v[i], a[i]); - } -} diff --git a/3party/bsdiff-courgette/divsufsort/divsufsort.h b/3party/bsdiff-courgette/divsufsort/divsufsort.h index 68a585992f..ed040b6715 100644 --- a/3party/bsdiff-courgette/divsufsort/divsufsort.h +++ b/3party/bsdiff-courgette/divsufsort/divsufsort.h @@ -30,8 +30,6 @@ #include -#include "3party/bsdiff-courgette/bsdiff/paged_array.h" - namespace divsuf { /*- Datatypes -*/ @@ -39,13 +37,8 @@ typedef int32_t saint_t; typedef int32_t saidx_t; typedef uint8_t sauchar_t; -#ifdef DIVSUFSORT_NO_PAGED_ARRAY typedef saidx_t* saidx_it; typedef const saidx_t* const_saidx_it; -#else -typedef courgette::PagedArray::iterator saidx_it; -typedef courgette::PagedArray::const_iterator const_saidx_it; -#endif /*- Prototypes -*/