forked from organicmaps/organicmaps
Replace bsdiff-courgette libs with xor based diffs
Signed-off-by: Meenbeese <meenbeese@tutanota.com>
This commit is contained in:
parent
e00f33a076
commit
351ef9698a
32 changed files with 189 additions and 3354 deletions
|
@ -55,7 +55,6 @@ if (NOT WITH_SYSTEM_PROVIDED_3PARTY)
|
|||
endif()
|
||||
|
||||
add_subdirectory(agg)
|
||||
add_subdirectory(bsdiff-courgette)
|
||||
add_subdirectory(minizip)
|
||||
add_subdirectory(open-location-code)
|
||||
add_subdirectory(opening_hours)
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
project(bsdiff)
|
||||
|
||||
set(SRC
|
||||
bsdiff/bsdiff.h
|
||||
bsdiff/bsdiff_common.h
|
||||
bsdiff/bsdiff_search.h
|
||||
divsufsort/divsufsort.cc
|
||||
divsufsort/divsufsort.h
|
||||
divsufsort/divsufsort_private.h
|
||||
divsufsort/sssort.cc
|
||||
divsufsort/trsort.cc
|
||||
)
|
||||
|
||||
add_library(${PROJECT_NAME} ${SRC})
|
||||
|
||||
omim_add_test_subdirectory(bsdiff/bsdiff_tests)
|
|
@ -1,121 +0,0 @@
|
|||
BSD Protection License
|
||||
February 2002
|
||||
|
||||
Preamble
|
||||
--------
|
||||
|
||||
The Berkeley Software Distribution ("BSD") license has proven very effective
|
||||
over the years at allowing for a wide spread of work throughout both
|
||||
commercial and non-commercial products. For programmers whose primary
|
||||
intention is to improve the general quality of available software, it is
|
||||
arguable that there is no better license than the BSD license, as it
|
||||
permits improvements to be used wherever they will help, without idealogical
|
||||
or metallic constraint.
|
||||
|
||||
This is of particular value to those who produce reference implementations
|
||||
of proposed standards: The case of TCP/IP clearly illustrates that freely
|
||||
and universally available implementations leads the rapid acceptance of
|
||||
standards -- often even being used instead of a de jure standard (eg, OSI
|
||||
network models).
|
||||
|
||||
With the rapid proliferation of software licensed under the GNU General
|
||||
Public License, however, the continued success of this role is called into
|
||||
question. Given that the inclusion of a few lines of "GPL-tainted" work
|
||||
into a larger body of work will result in restricted distribution -- and
|
||||
given that further work will likely build upon the "tainted" portions,
|
||||
making them difficult to remove at a future date -- there are inevitable
|
||||
circumstances where authors would, in order to protect their goal of
|
||||
providing for the widespread usage of their work, wish to guard against
|
||||
such "GPL-taint".
|
||||
|
||||
In addition, one can imagine that companies which operate by producing and
|
||||
selling (possibly closed-source) code would wish to protect themselves
|
||||
against the rise of a GPL-licensed competitor. While under existing
|
||||
licenses this would mean not releasing their code under any form of open
|
||||
license, if a license existed under which they could incorporate any
|
||||
improvements back into their own (commercial) products then they might be
|
||||
far more willing to provide for non-closed distribution.
|
||||
|
||||
For the above reasons, we put forth this "BSD Protection License": A
|
||||
license designed to retain the freedom granted by the BSD license to use
|
||||
licensed works in a wide variety of settings, both non-commercial and
|
||||
commercial, while protecting the work from having future contributors
|
||||
restrict that freedom.
|
||||
|
||||
The precise terms and conditions for copying, distribution, and
|
||||
modification follow.
|
||||
|
||||
BSD PROTECTION LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION, AND MODIFICATION
|
||||
----------------------------------------------------------------
|
||||
|
||||
0. Definitions.
|
||||
a) "Program", below, refers to any program or work distributed under
|
||||
the terms of this license.
|
||||
b) A "work based on the Program", below, refers to either the Program
|
||||
or any derivative work under copyright law.
|
||||
c) "Modification", below, refers to the act of creating derivative works.
|
||||
d) "You", below, refers to each licensee.
|
||||
|
||||
1. Scope.
|
||||
This license governs the copying, distribution, and modification of the
|
||||
Program. Other activities are outside the scope of this license; The
|
||||
act of running the Program is not restricted, and the output from the
|
||||
Program is covered only if its contents constitute a work based on the
|
||||
Program.
|
||||
|
||||
2. Verbatim copies.
|
||||
You may copy and distribute verbatim copies of the Program as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice; keep
|
||||
intact all the notices that refer to this License and to the absence of
|
||||
any warranty; and give any other recipients of the Program a copy of this
|
||||
License along with the Program.
|
||||
|
||||
3. Modification and redistribution under closed license.
|
||||
You may modify your copy or copies of the Program, and distribute
|
||||
the resulting derivative works, provided that you meet the
|
||||
following conditions:
|
||||
a) The copyright notice and disclaimer on the Program must be reproduced
|
||||
and included in the source code, documentation, and/or other materials
|
||||
provided in a manner in which such notices are normally distributed.
|
||||
b) The derivative work must be clearly identified as such, in order that
|
||||
it may not be confused with the original work.
|
||||
c) The license under which the derivative work is distributed must
|
||||
expressly prohibit the distribution of further derivative works.
|
||||
|
||||
4. Modification and redistribution under open license.
|
||||
You may modify your copy or copies of the Program, and distribute
|
||||
the resulting derivative works, provided that you meet the
|
||||
following conditions:
|
||||
a) The copyright notice and disclaimer on the Program must be reproduced
|
||||
and included in the source code, documentation, and/or other materials
|
||||
provided in a manner in which such notices are normally distributed.
|
||||
b) You must clearly indicate the nature and date of any changes made
|
||||
to the Program. The full details need not necessarily be included in
|
||||
the individual modified files, provided that each modified file is
|
||||
clearly marked as such and instructions are included on where the
|
||||
full details of the modifications may be found.
|
||||
c) You must cause any work that you distribute or publish, that in whole
|
||||
or in part contains or is derived from the Program or any part
|
||||
thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
5. Implied acceptance.
|
||||
You may not copy or distribute the Program or any derivative works except
|
||||
as expressly provided under this license. Consequently, any such action
|
||||
will be taken as implied acceptance of the terms of this license.
|
||||
|
||||
6. NO WARRANTY.
|
||||
THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
THE COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||
TORT, EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
|
@ -1,31 +0,0 @@
|
|||
Name: bsdiff
|
||||
URL: http://www.daemonology.net/bsdiff/
|
||||
License: BSD
|
||||
License File: LICENCE
|
||||
|
||||
Description:
|
||||
This directory contains an extensively modified version of Colin Percival's
|
||||
bsdiff, available in its original form from:
|
||||
|
||||
http://www.daemonology.net/bsdiff/
|
||||
|
||||
The basic principles of operation are best understood by reading Colin's
|
||||
unpublised paper:
|
||||
|
||||
Colin Percival, Naive differences of executable code, http://www.daemonology.net/bsdiff/, 2003.
|
||||
|
||||
The copy on this directory so extensively modified that the binary format is
|
||||
incompatible with the original and it cannot be compiled outside the Chromium
|
||||
source tree or the Courgette project.
|
||||
|
||||
List of changes made to original code:
|
||||
- Wrapped functions in 'bsdiff' namespace.
|
||||
- Renamed .c files to .cc files.
|
||||
- Added bsdiff.h and bsdiff_search.h header files.
|
||||
- Changed the code to use streams.h from Courgette.
|
||||
- Changed the encoding of numbers to use the 'varint' encoding.
|
||||
- Reformatted code to be closer to Google coding standards.
|
||||
- Renamed variables.
|
||||
- Added comments.
|
||||
- Fixed search() comparison issue: http://crbug.com/620867.
|
||||
- Replaced QSufSort with modified version of libdivsufsort.
|
|
@ -1,511 +0,0 @@
|
|||
// Copyright 2003, 2004 Colin Percival
|
||||
// All rights reserved
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted providing that the following conditions
|
||||
// are met:
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// For the terms under which this work may be distributed, please see
|
||||
// the adjoining file "LICENSE".
|
||||
//
|
||||
// Changelog:
|
||||
// 2005-04-26 - Define the header as a C structure, add a CRC32 checksum to
|
||||
// the header, and make all the types 32-bit.
|
||||
// --Benjamin Smedberg <benjamin@smedbergs.us>
|
||||
// 2009-03-31 - Change to use Streams. Move CRC code to crc.{h,cc}
|
||||
// Changed status to an enum, removed unused status codes.
|
||||
// --Stephen Adams <sra@chromium.org>
|
||||
// 2013-04-10 - Added wrapper to apply a patch directly to files.
|
||||
// --Joshua Pawlicki <waffles@chromium.org>
|
||||
// 2017-08-14 - Moved "apply" and "create" to the header file, rewrote
|
||||
// all routines to use OMaps readers and writers instead
|
||||
// of Courgette streams and files.
|
||||
// --Maxim Pimenov <m@maps.me>
|
||||
// 2019-01-24 - Got rid of the paged array. We have enough address space
|
||||
// for our application of bsdiff.
|
||||
// --Maxim Pimenov <m@maps.me>
|
||||
|
||||
// Changelog for bsdiff_apply:
|
||||
// 2009-03-31 - Change to use Streams. Move CRC code to crc.{h,cc}
|
||||
// --Stephen Adams <sra@chromium.org>
|
||||
// 2013-04-10 - Add wrapper method to apply a patch to files directly.
|
||||
// --Joshua Pawlicki <waffles@chromium.org>
|
||||
|
||||
// Changelog for bsdiff_create:
|
||||
// 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
|
||||
// values throughout.
|
||||
// --Benjamin Smedberg <benjamin@smedbergs.us>
|
||||
// 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
|
||||
// provided by libbz2.
|
||||
// --Darin Fisher <darin@meer.net>
|
||||
// 2007-11-14 - Changed to use Crc from Lzma library instead of Bzip library
|
||||
// --Rahul Kuchhal
|
||||
// 2009-03-31 - Change to use Streams. Added lots of comments.
|
||||
// --Stephen Adams <sra@chromium.org>
|
||||
// 2010-05-26 - Use a paged array for V and I. The address space may be too
|
||||
// fragmented for these big arrays to be contiguous.
|
||||
// --Stephen Adams <sra@chromium.org>
|
||||
// 2015-08-03 - Extract qsufsort portion to a separate file.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
// 2015-08-12 - Interface change to search().
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
// 2016-07-29 - Replacing qsufsort with divsufsort.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
|
||||
// Copyright 2016 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_H_
|
||||
#define COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_H_
|
||||
|
||||
#include "coding/varint.hpp"
|
||||
#include "coding/write_to_sink.hpp"
|
||||
#include "coding/writer.hpp"
|
||||
|
||||
#include "base/cancellable.hpp"
|
||||
#include "base/checked_cast.hpp"
|
||||
#include "base/logging.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
#include "base/timer.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#include "3party/bsdiff-courgette/bsdiff/bsdiff_common.h"
|
||||
#include "3party/bsdiff-courgette/bsdiff/bsdiff_search.h"
|
||||
#include "3party/bsdiff-courgette/divsufsort/divsufsort.h"
|
||||
|
||||
#include "zlib.h"
|
||||
|
||||
namespace bsdiff {
|
||||
// A MemWriter with its own buffer.
|
||||
struct MemStream {
|
||||
MemStream(): m_writer(m_buf) {}
|
||||
|
||||
MemWriter<std::vector<uint8_t>> & GetWriter() { return m_writer; }
|
||||
size_t Size() const { return m_buf.size(); }
|
||||
std::vector<uint8_t> const & GetBuf() const { return m_buf; }
|
||||
|
||||
private:
|
||||
std::vector<uint8_t> m_buf;
|
||||
MemWriter<std::vector<uint8_t>> m_writer;
|
||||
};
|
||||
|
||||
inline uint32_t CalculateCrc(const uint8_t* buffer, size_t size) {
|
||||
// Calculate Crc by calling CRC method in zlib
|
||||
const auto size32 = base::checked_cast<uint32_t>(size);
|
||||
const uint32_t crc = base::checked_cast<uint32_t>(crc32(0, buffer, size32));
|
||||
return ~crc;
|
||||
}
|
||||
|
||||
// Creates a binary patch.
|
||||
template <typename OldReader, typename NewReader, typename PatchSink>
|
||||
BSDiffStatus CreateBinaryPatch(OldReader & old_reader,
|
||||
NewReader & new_reader,
|
||||
PatchSink & patch_sink) {
|
||||
ReaderSource<OldReader> old_source(old_reader);
|
||||
ReaderSource<NewReader> new_source(new_reader);
|
||||
|
||||
auto initial_patch_sink_pos = patch_sink.Pos();
|
||||
|
||||
base::Timer bsdiff_timer;
|
||||
|
||||
CHECK_GREATER_OR_EQUAL(kNumStreams, 6, ());
|
||||
std::array<MemStream, kNumStreams> mem_streams;
|
||||
auto & control_stream_copy_counts = mem_streams[0];
|
||||
auto & control_stream_extra_counts = mem_streams[1];
|
||||
auto & control_stream_seeks = mem_streams[2];
|
||||
auto & diff_skips = mem_streams[3];
|
||||
auto & diff_bytes = mem_streams[4];
|
||||
auto & extra_bytes = mem_streams[5];
|
||||
|
||||
const int old_size = static_cast<int>(old_source.Size());
|
||||
std::vector<uint8_t> old_buf(old_size);
|
||||
old_source.Read(old_buf.data(), old_buf.size());
|
||||
const uint8_t * old = old_buf.data();
|
||||
|
||||
std::vector<divsuf::saidx_t> suffix_array(old_size + 1);
|
||||
base::Timer suf_sort_timer;
|
||||
divsuf::saint_t result = divsuf::divsufsort_include_empty(old, suffix_array.data(), old_size);
|
||||
LOG(LINFO, ("Done divsufsort", suf_sort_timer.ElapsedSeconds()));
|
||||
if (result != 0)
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
const int new_size = static_cast<int>(new_source.Size());
|
||||
std::vector<uint8_t> new_buf(new_size);
|
||||
new_source.Read(new_buf.data(), new_buf.size());
|
||||
const uint8_t * newbuf = new_buf.data();
|
||||
|
||||
int control_length = 0;
|
||||
int diff_bytes_length = 0;
|
||||
int diff_bytes_nonzero = 0;
|
||||
int extra_bytes_length = 0;
|
||||
|
||||
// The patch format is a sequence of triples <copy,extra,seek> where 'copy' is
|
||||
// the number of bytes to copy from the old file (possibly with mistakes),
|
||||
// 'extra' is the number of bytes to copy from a stream of fresh bytes, and
|
||||
// 'seek' is an offset to move to the position to copy for the next triple.
|
||||
//
|
||||
// The invariant at the top of this loop is that we are committed to emitting
|
||||
// a triple for the part of |newbuf| surrounding a 'seed' match near
|
||||
// |lastscan|. We are searching for a second match that will be the 'seed' of
|
||||
// the next triple. As we scan through |newbuf|, one of four things can
|
||||
// happen at the current position |scan|:
|
||||
//
|
||||
// 1. We find a nice match that appears to be consistent with the current
|
||||
// seed. Continue scanning. It is likely that this match will become
|
||||
// part of the 'copy'.
|
||||
//
|
||||
// 2. We find match which does much better than extending the current seed
|
||||
// old match. Emit a triple for the current seed and take this match as
|
||||
// the new seed for a new triple. By 'much better' we remove 8 mismatched
|
||||
// bytes by taking the new seed.
|
||||
//
|
||||
// 3. There is not a good match. Continue scanning. These bytes will likely
|
||||
// become part of the 'extra'.
|
||||
//
|
||||
// 4. There is no match because we reached the end of the input, |newbuf|.
|
||||
|
||||
// This is how the loop advances through the bytes of |newbuf|:
|
||||
//
|
||||
// ...012345678901234567890123456789...
|
||||
// ssssssssss Seed at |lastscan|
|
||||
// xxyyyxxyyxy |scan| forward, cases (3)(x) & (1)(y)
|
||||
// mmmmmmmm New match will start new seed case (2).
|
||||
// fffffffffffffff |lenf| = scan forward from |lastscan|
|
||||
// bbbb |lenb| = scan back from new seed |scan|.
|
||||
// ddddddddddddddd Emit diff bytes for the 'copy'.
|
||||
// xx Emit extra bytes.
|
||||
// ssssssssssss |lastscan = scan - lenb| is new seed.
|
||||
// x Cases (1) and (3) ....
|
||||
|
||||
int lastscan = 0, lastpos = 0, lastoffset = 0;
|
||||
int scan = 0;
|
||||
SearchResult match(0, 0);
|
||||
uint32_t pending_diff_zeros = 0;
|
||||
|
||||
while (scan < new_size) {
|
||||
int oldscore = 0; // Count of how many bytes of the current match at |scan|
|
||||
// extend the match at |lastscan|.
|
||||
match.pos = 0;
|
||||
|
||||
scan += match.size;
|
||||
for (int scsc = scan; scan < new_size; ++scan) {
|
||||
match = search<decltype(suffix_array)>(suffix_array, old, old_size, newbuf + scan,
|
||||
new_size - scan);
|
||||
|
||||
for (; scsc < scan + match.size; scsc++)
|
||||
if ((scsc + lastoffset < old_size) &&
|
||||
(old[scsc + lastoffset] == newbuf[scsc]))
|
||||
oldscore++;
|
||||
|
||||
if ((match.size == oldscore) && (match.size != 0))
|
||||
break; // Good continuing match, case (1)
|
||||
if (match.size > oldscore + 8)
|
||||
break; // New seed match, case (2)
|
||||
|
||||
if ((scan + lastoffset < old_size) &&
|
||||
(old[scan + lastoffset] == newbuf[scan]))
|
||||
oldscore--;
|
||||
// Case (3) continues in this loop until we fall out of the loop (4).
|
||||
}
|
||||
|
||||
if ((match.size != oldscore) || (scan == new_size)) { // Cases (2) and (4)
|
||||
// This next chunk of code finds the boundary between the bytes to be
|
||||
// copied as part of the current triple, and the bytes to be copied as
|
||||
// part of the next triple. The |lastscan| match is extended forwards as
|
||||
// far as possible provided doing to does not add too many mistakes. The
|
||||
// |scan| match is extended backwards in a similar way.
|
||||
|
||||
// Extend the current match (if any) backwards. |lenb| is the maximal
|
||||
// extension for which less than half the byte positions in the extension
|
||||
// are wrong.
|
||||
int lenb = 0;
|
||||
if (scan < new_size) { // i.e. not case (4); there is a match to extend.
|
||||
int score = 0, Sb = 0;
|
||||
for (int i = 1; (scan >= lastscan + i) && (match.pos >= i); i++) {
|
||||
if (old[match.pos - i] == newbuf[scan - i])
|
||||
score++;
|
||||
if (score * 2 - i > Sb * 2 - lenb) {
|
||||
Sb = score;
|
||||
lenb = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extend the lastscan match forward; |lenf| is the maximal extension for
|
||||
// which less than half of the byte positions in entire lastscan match are
|
||||
// wrong. There is a subtle point here: |lastscan| points to before the
|
||||
// seed match by |lenb| bytes from the previous iteration. This is why
|
||||
// the loop measures the total number of mistakes in the the match, not
|
||||
// just the from the match.
|
||||
int lenf = 0;
|
||||
{
|
||||
int score = 0, Sf = 0;
|
||||
for (int i = 0; (lastscan + i < scan) && (lastpos + i < old_size);) {
|
||||
if (old[lastpos + i] == newbuf[lastscan + i])
|
||||
score++;
|
||||
i++;
|
||||
if (score * 2 - i > Sf * 2 - lenf) {
|
||||
Sf = score;
|
||||
lenf = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the extended scans overlap, pick a position in the overlap region
|
||||
// that maximizes the exact matching bytes.
|
||||
if (lastscan + lenf > scan - lenb) {
|
||||
int overlap = (lastscan + lenf) - (scan - lenb);
|
||||
int score = 0;
|
||||
int Ss = 0, lens = 0;
|
||||
for (int i = 0; i < overlap; i++) {
|
||||
if (newbuf[lastscan + lenf - overlap + i] ==
|
||||
old[lastpos + lenf - overlap + i]) {
|
||||
score++;
|
||||
}
|
||||
if (newbuf[scan - lenb + i] == old[match.pos - lenb + i]) {
|
||||
score--;
|
||||
}
|
||||
if (score > Ss) {
|
||||
Ss = score;
|
||||
lens = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
lenf += lens - overlap;
|
||||
lenb -= lens;
|
||||
};
|
||||
|
||||
for (int i = 0; i < lenf; i++) {
|
||||
uint8_t diff_byte = newbuf[lastscan + i] - old[lastpos + i];
|
||||
if (diff_byte) {
|
||||
++diff_bytes_nonzero;
|
||||
WriteVarUint(diff_skips.GetWriter(), pending_diff_zeros);
|
||||
pending_diff_zeros = 0;
|
||||
diff_bytes.GetWriter().Write(&diff_byte, 1);
|
||||
} else {
|
||||
++pending_diff_zeros;
|
||||
}
|
||||
}
|
||||
int gap = (scan - lenb) - (lastscan + lenf);
|
||||
for (int i = 0; i < gap; i++) {
|
||||
extra_bytes.GetWriter().Write(&newbuf[lastscan + lenf + i], 1);
|
||||
}
|
||||
|
||||
diff_bytes_length += lenf;
|
||||
extra_bytes_length += gap;
|
||||
|
||||
uint32_t copy_count = lenf;
|
||||
uint32_t extra_count = gap;
|
||||
int32_t seek_adjustment = ((match.pos - lenb) - (lastpos + lenf));
|
||||
|
||||
WriteVarUint(control_stream_copy_counts.GetWriter(), copy_count);
|
||||
WriteVarUint(control_stream_extra_counts.GetWriter(), extra_count);
|
||||
WriteVarInt(control_stream_seeks.GetWriter(), seek_adjustment);
|
||||
|
||||
++control_length;
|
||||
|
||||
#ifdef DEBUG_bsmedberg
|
||||
LOG(LDEBUG, ("Writing a block: copy:", copy_count, "extra:", extra_count, "seek:", seek_adjustment));
|
||||
#endif
|
||||
|
||||
lastscan = scan - lenb; // Include the backward extension in seed.
|
||||
lastpos = match.pos - lenb; // ditto.
|
||||
lastoffset = lastpos - lastscan;
|
||||
}
|
||||
}
|
||||
|
||||
WriteVarUint(diff_skips.GetWriter(), pending_diff_zeros);
|
||||
|
||||
suffix_array.clear();
|
||||
|
||||
MBSPatchHeader header;
|
||||
// The string will have a null terminator that we don't use, hence '-1'.
|
||||
static_assert(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header.tag),
|
||||
"MBS_PATCH_HEADER_TAG must match header field size");
|
||||
memcpy(header.tag, MBS_PATCH_HEADER_TAG, sizeof(header.tag));
|
||||
header.slen = old_size;
|
||||
header.scrc32 = CalculateCrc(old, old_size);
|
||||
header.dlen = new_size;
|
||||
|
||||
WriteHeader(patch_sink, &header);
|
||||
for (auto const & s : mem_streams)
|
||||
{
|
||||
uint32_t const sz = base::checked_cast<uint32_t>(s.Size());
|
||||
WriteToSink(patch_sink, sz);
|
||||
}
|
||||
|
||||
for (auto const & s : mem_streams)
|
||||
patch_sink.Write(s.GetBuf().data(), s.GetBuf().size());
|
||||
|
||||
size_t diff_skips_length = diff_skips.Size();
|
||||
|
||||
std::ostringstream log_stream;
|
||||
log_stream << "Control tuples: " << control_length
|
||||
<< " copy bytes: " << diff_bytes_length
|
||||
<< " mistakes: " << diff_bytes_nonzero
|
||||
<< " (skips: " << diff_skips_length << ")"
|
||||
<< " extra bytes: " << extra_bytes_length
|
||||
<< "\nUncompressed bsdiff patch size "
|
||||
<< patch_sink.Pos() - initial_patch_sink_pos
|
||||
<< "\nEnd bsdiff "
|
||||
<< bsdiff_timer.ElapsedSeconds();
|
||||
|
||||
LOG(LINFO, (log_stream.str()));
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
// Applies the given patch file to a given source file. This method validates
|
||||
// the CRC of the original file stored in the patch file, before applying the
|
||||
// patch to it.
|
||||
template <typename OldReader, typename NewSink, typename PatchReader>
|
||||
BSDiffStatus ApplyBinaryPatch(OldReader & old_reader, NewSink & new_sink,
|
||||
PatchReader & patch_reader, const base::Cancellable & cancellable)
|
||||
{
|
||||
ReaderSource<OldReader> old_source(old_reader);
|
||||
ReaderSource<PatchReader> patch_source(patch_reader);
|
||||
|
||||
MBSPatchHeader header;
|
||||
BSDiffStatus ret = MBS_ReadHeader(patch_source, &header);
|
||||
if (ret != OK)
|
||||
return ret;
|
||||
|
||||
const auto old_size = static_cast<size_t>(old_source.Size());
|
||||
std::vector<uint8_t> old_buf(old_size);
|
||||
old_source.Read(old_buf.data(), old_buf.size());
|
||||
|
||||
const uint8_t* old_start = old_buf.data();
|
||||
const uint8_t* old_end = old_buf.data() + old_buf.size();
|
||||
const uint8_t* old_position = old_start;
|
||||
|
||||
if (old_size != header.slen)
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
if (CalculateCrc(old_start, old_size) != header.scrc32)
|
||||
return CRC_ERROR;
|
||||
|
||||
CHECK_GREATER_OR_EQUAL(kNumStreams, 6, ());
|
||||
std::vector<uint32_t> stream_sizes(kNumStreams);
|
||||
for (auto & s : stream_sizes)
|
||||
s = ReadPrimitiveFromSource<uint32_t>(patch_source);
|
||||
|
||||
std::vector<ReaderSource<PatchReader>> patch_streams;
|
||||
patch_streams.reserve(kNumStreams);
|
||||
for (size_t i = 0; i < kNumStreams; ++i) {
|
||||
uint64_t size = static_cast<uint64_t>(stream_sizes[i]);
|
||||
patch_streams.emplace_back(ReaderSource<PatchReader>(patch_source.SubReader(size)));
|
||||
}
|
||||
|
||||
auto & control_stream_copy_counts = patch_streams[0];
|
||||
auto & control_stream_extra_counts = patch_streams[1];
|
||||
auto & control_stream_seeks = patch_streams[2];
|
||||
auto & diff_skips = patch_streams[3];
|
||||
auto & diff_bytes = patch_streams[4];
|
||||
auto & extra_bytes = patch_streams[5];
|
||||
|
||||
std::vector<uint8_t> extra_bytes_buf(static_cast<size_t>(extra_bytes.Size()));
|
||||
extra_bytes.Read(extra_bytes_buf.data(), extra_bytes_buf.size());
|
||||
|
||||
const uint8_t* extra_start = extra_bytes_buf.data();
|
||||
const uint8_t* extra_end = extra_bytes_buf.data() + extra_bytes_buf.size();
|
||||
const uint8_t* extra_position = extra_start;
|
||||
|
||||
// if (header->dlen && !new_sink->Reserve(header->dlen))
|
||||
// return MEM_ERROR;
|
||||
|
||||
auto pending_diff_zeros = ReadVarUint<uint32_t>(diff_skips);
|
||||
|
||||
// We will check whether the application process has been cancelled
|
||||
// upon copying every |kCheckCancelledPeriod| bytes from the old file.
|
||||
constexpr size_t kCheckCancelledPeriod = 100 * 1024;
|
||||
|
||||
while (control_stream_copy_counts.Size() > 0) {
|
||||
if (cancellable.IsCancelled())
|
||||
return CANCELLED;
|
||||
|
||||
auto copy_count = ReadVarUint<uint32_t>(control_stream_copy_counts);
|
||||
auto extra_count = ReadVarUint<uint32_t>(control_stream_extra_counts);
|
||||
auto seek_adjustment = ReadVarInt<int32_t>(control_stream_seeks);
|
||||
|
||||
#ifdef DEBUG_bsmedberg
|
||||
LOG(LDEBUG, ("Applying block: copy:", copy_count, "extra:", extra_count, "seek:", seek_adjustment));
|
||||
#endif
|
||||
|
||||
// Byte-wise arithmetically add bytes from old file to bytes from the diff
|
||||
// block.
|
||||
if (copy_count > static_cast<size_t>(old_end - old_position))
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
// Add together bytes from the 'old' file and the 'diff' stream.
|
||||
for (size_t i = 0; i < copy_count; ++i) {
|
||||
if (i > 0 && i % kCheckCancelledPeriod == 0 && cancellable.IsCancelled())
|
||||
return CANCELLED;
|
||||
|
||||
uint8_t diff_byte = 0;
|
||||
if (pending_diff_zeros) {
|
||||
--pending_diff_zeros;
|
||||
} else {
|
||||
pending_diff_zeros = ReadVarUint<uint32_t>(diff_skips);
|
||||
diff_byte = ReadPrimitiveFromSource<uint8_t>(diff_bytes);
|
||||
}
|
||||
uint8_t byte = old_position[i] + diff_byte;
|
||||
WriteToSink(new_sink, byte);
|
||||
}
|
||||
old_position += copy_count;
|
||||
|
||||
// Copy bytes from the extra block.
|
||||
if (extra_count > static_cast<size_t>(extra_end - extra_position))
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
new_sink.Write(extra_position, extra_count);
|
||||
|
||||
extra_position += extra_count;
|
||||
|
||||
// "seek" forwards (or backwards) in oldfile.
|
||||
if (old_position + seek_adjustment < old_start ||
|
||||
old_position + seek_adjustment > old_end)
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
old_position += seek_adjustment;
|
||||
}
|
||||
|
||||
if (control_stream_copy_counts.Size() > 0 ||
|
||||
control_stream_extra_counts.Size() > 0 ||
|
||||
control_stream_seeks.Size() > 0 ||
|
||||
diff_skips.Size() > 0 ||
|
||||
diff_bytes.Size() > 0 ||
|
||||
extra_bytes.Size() > 0)
|
||||
{
|
||||
return UNEXPECTED_ERROR;
|
||||
}
|
||||
|
||||
if (cancellable.IsCancelled())
|
||||
return CANCELLED;
|
||||
|
||||
return OK;
|
||||
}
|
||||
} // namespace bsdiff
|
||||
|
||||
#endif // COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_H_
|
|
@ -1,75 +0,0 @@
|
|||
#ifndef COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_HEADER_H_
|
||||
#define COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_HEADER_H_
|
||||
|
||||
#include "coding/reader.hpp"
|
||||
#include "coding/varint.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace bsdiff {
|
||||
// The following declarations are common to the patch-creation and
|
||||
// patch-application code.
|
||||
|
||||
int constexpr kNumStreams = 6;
|
||||
|
||||
enum BSDiffStatus {
|
||||
OK = 0,
|
||||
MEM_ERROR = 1,
|
||||
CRC_ERROR = 2,
|
||||
READ_ERROR = 3,
|
||||
UNEXPECTED_ERROR = 4,
|
||||
WRITE_ERROR = 5,
|
||||
CANCELLED = 6,
|
||||
};
|
||||
|
||||
// The patch stream starts with a MBSPatchHeader.
|
||||
typedef struct MBSPatchHeader_ {
|
||||
char tag[8]; // Contains MBS_PATCH_HEADER_TAG.
|
||||
uint32_t slen; // Length of the file to be patched.
|
||||
uint32_t scrc32; // CRC32 of the file to be patched.
|
||||
uint32_t dlen; // Length of the result file.
|
||||
} MBSPatchHeader;
|
||||
|
||||
// This is the value for the tag field. Must match length exactly, not counting
|
||||
// null at end of string.
|
||||
#define MBS_PATCH_HEADER_TAG "GBSDIF42"
|
||||
|
||||
template <typename Sink>
|
||||
void WriteHeader(Sink & sink, MBSPatchHeader* header) {
|
||||
sink.Write(header->tag, sizeof(header->tag));
|
||||
WriteVarUint(sink, header->slen);
|
||||
WriteVarUint(sink, header->scrc32);
|
||||
WriteVarUint(sink, header->dlen);
|
||||
}
|
||||
|
||||
template <typename Source>
|
||||
BSDiffStatus MBS_ReadHeader(Source & src, MBSPatchHeader* header) {
|
||||
src.Read(header->tag, sizeof(header->tag));
|
||||
header->slen = ReadVarUint<uint32_t>(src);
|
||||
header->scrc32 = ReadVarUint<uint32_t>(src);
|
||||
header->dlen = ReadVarUint<uint32_t>(src);
|
||||
|
||||
// The string will have a NUL terminator that we don't use, hence '-1'.
|
||||
static_assert(sizeof(MBS_PATCH_HEADER_TAG) - 1 == sizeof(header->tag),
|
||||
"MBS_PATCH_HEADER_TAG must match header field size");
|
||||
if (memcmp(header->tag, MBS_PATCH_HEADER_TAG, 8) != 0)
|
||||
return UNEXPECTED_ERROR;
|
||||
|
||||
return OK;
|
||||
}
|
||||
|
||||
inline std::string DebugPrint(BSDiffStatus status) {
|
||||
switch (status) {
|
||||
case OK: return "OK";
|
||||
case MEM_ERROR: return "MEM_ERROR";
|
||||
case CRC_ERROR: return "CRC_ERROR";
|
||||
case READ_ERROR: return "READ_ERROR";
|
||||
case UNEXPECTED_ERROR: return "UNEXPECTED_ERROR";
|
||||
case WRITE_ERROR: return "WRITE_ERROR";
|
||||
case CANCELLED: return "CANCELLED";
|
||||
}
|
||||
return "Unknown status";
|
||||
}
|
||||
} // namespace bsdiff
|
||||
|
||||
#endif // COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_HEADER_H_
|
|
@ -1,97 +0,0 @@
|
|||
// Copyright 2003, 2004 Colin Percival
|
||||
// All rights reserved
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted providing that the following conditions
|
||||
// are met:
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
||||
// IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// For the terms under which this work may be distributed, please see
|
||||
// the adjoining file "LICENSE".
|
||||
//
|
||||
// ChangeLog:
|
||||
// 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
|
||||
// values throughout.
|
||||
// --Benjamin Smedberg <benjamin@smedbergs.us>
|
||||
// 2015-08-03 - Change search() to template to allow PagedArray usage.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
// 2015-08-19 - Optimized search() to be non-recursive.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
// 2016-06-28 - Moved matchlen() and search() to a new file; format; changed
|
||||
// search() use std::lexicographical_compare().
|
||||
// 2016-06-30 - Changed matchlen() input; changed search() to return struct.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
|
||||
// Copyright 2016 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_SEARCH_H_
|
||||
#define COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_SEARCH_H_
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace bsdiff {
|
||||
|
||||
// Return values of search().
|
||||
struct SearchResult {
|
||||
SearchResult(int pos_in, int size_in) : pos(pos_in), size(size_in) {}
|
||||
int pos;
|
||||
int size;
|
||||
};
|
||||
|
||||
// Similar to ::memcmp(), but assumes equal |size| and returns match length.
|
||||
inline int matchlen(const unsigned char* buf1,
|
||||
const unsigned char* buf2,
|
||||
int size) {
|
||||
for (int i = 0; i < size; ++i)
|
||||
if (buf1[i] != buf2[i])
|
||||
return i;
|
||||
return size;
|
||||
}
|
||||
|
||||
// Finds a suffix in |old| that has the longest common prefix with |keybuf|,
|
||||
// aided by suffix array |sa| of |old|. Returns the match length, and writes to
|
||||
// |pos| a position of best match in |old|. If multiple such positions exist,
|
||||
// |pos| would take an arbitrary one.
|
||||
template <class T>
|
||||
SearchResult search(const T & sa,
|
||||
const unsigned char* srcbuf,
|
||||
int srcsize,
|
||||
const unsigned char* keybuf,
|
||||
int keysize) {
|
||||
int lo = 0;
|
||||
int hi = srcsize;
|
||||
while (hi - lo > 1) {
|
||||
int mid = (lo + hi) / 2;
|
||||
if (std::lexicographical_compare(
|
||||
srcbuf + sa[mid], srcbuf + srcsize, keybuf, keybuf + keysize)) {
|
||||
lo = mid;
|
||||
} else {
|
||||
hi = mid;
|
||||
}
|
||||
}
|
||||
int x = matchlen(srcbuf + sa[lo], keybuf, std::min(srcsize - sa[lo], keysize));
|
||||
int y = matchlen(srcbuf + sa[hi], keybuf, std::min(srcsize - sa[hi], keysize));
|
||||
return (x > y) ? SearchResult(sa[lo], x) : SearchResult(sa[hi], y);
|
||||
}
|
||||
|
||||
} // namespace bsdiff
|
||||
|
||||
#endif // COURGETTE_THIRD_PARTY_BSDIFF_BSDIFF_SEARCH_H_
|
|
@ -1,7 +0,0 @@
|
|||
project(bsdiff_tests)
|
||||
|
||||
set(SRC bsdiff_search_tests.cpp)
|
||||
|
||||
omim_add_test(${PROJECT_NAME} ${SRC})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} bsdiff)
|
|
@ -1,135 +0,0 @@
|
|||
#include "testing/testing.hpp"
|
||||
|
||||
#include "base/macros.hpp"
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "3party/bsdiff-courgette/bsdiff/bsdiff_search.h"
|
||||
#include "3party/bsdiff-courgette/divsufsort/divsufsort.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Adapted from 3party/bsdiff-courgette.
|
||||
UNIT_TEST(BSDiffSearchTest_Search)
|
||||
{
|
||||
// Initialize main string and the suffix array.
|
||||
// Positions: 000000000011111111111222222222333333333344444
|
||||
// 012345678901234567890123456789012345678901234
|
||||
string const str = "the quick brown fox jumps over the lazy dog.";
|
||||
int const size = static_cast<int>(str.size());
|
||||
auto buf = reinterpret_cast<unsigned char const *>(str.data());
|
||||
vector<divsuf::saidx_t> suffix_array(size + 1);
|
||||
divsuf::divsufsort_include_empty(buf, suffix_array.data(), size);
|
||||
|
||||
// Specific queries.
|
||||
struct
|
||||
{
|
||||
int m_expMatchPos; // -1 means "don't care".
|
||||
int m_expMatchSize;
|
||||
string m_query_str;
|
||||
} const testCases[] = {
|
||||
// Entire string: exact and unique.
|
||||
{0, 44, "the quick brown fox jumps over the lazy dog."},
|
||||
// Empty string: exact and non-unique.
|
||||
{-1, 0, ""},
|
||||
// Exact and unique suffix matches.
|
||||
{43, 1, "."},
|
||||
{31, 13, "the lazy dog."},
|
||||
// Exact and unique non-suffix matches.
|
||||
{4, 5, "quick"},
|
||||
{0, 9, "the quick"}, // Unique prefix.
|
||||
// Partial and unique matches.
|
||||
{16, 10, "fox jumps with the hosps"}, // Unique prefix.
|
||||
{18, 1, "xyz"},
|
||||
// Exact and non-unique match: take lexicographical first.
|
||||
{-1, 3, "the"}, // Non-unique prefix.
|
||||
{-1, 1, " "},
|
||||
// Partial and non-unique match: no guarantees on |match.pos|!
|
||||
{-1, 4, "the apple"}, // query < "the l"... < "the q"...
|
||||
{-1, 4, "the opera"}, // "the l"... < query < "the q"...
|
||||
{-1, 4, "the zebra"}, // "the l"... < "the q"... < query
|
||||
// Prefix match dominates suffix match (unique).
|
||||
{26, 5, "over quick brown fox"},
|
||||
// Empty matchs.
|
||||
{-1, 0, ","},
|
||||
{-1, 0, "1234"},
|
||||
{-1, 0, "THE QUICK BROWN FOX"},
|
||||
{-1, 0, "(the"},
|
||||
};
|
||||
|
||||
for (size_t idx = 0; idx < ARRAY_SIZE(testCases); ++idx)
|
||||
{
|
||||
auto const & testCase = testCases[idx];
|
||||
int const querySize = static_cast<int>(testCase.m_query_str.size());
|
||||
auto query_buf = reinterpret_cast<unsigned char const *>(testCase.m_query_str.data());
|
||||
|
||||
// Perform the search.
|
||||
bsdiff::SearchResult const match =
|
||||
bsdiff::search<decltype(suffix_array)>(suffix_array, buf, size, query_buf, querySize);
|
||||
|
||||
// Check basic properties and match with expected values.
|
||||
TEST_GREATER_OR_EQUAL(match.size, 0, ());
|
||||
TEST_LESS_OR_EQUAL(match.size, querySize, ());
|
||||
if (match.size > 0)
|
||||
{
|
||||
TEST_GREATER_OR_EQUAL(match.pos, 0, ());
|
||||
TEST_LESS_OR_EQUAL(match.pos, size - match.size, ());
|
||||
TEST_EQUAL(0, memcmp(buf + match.pos, query_buf, match.size), ());
|
||||
}
|
||||
if (testCase.m_expMatchPos >= 0)
|
||||
{
|
||||
TEST_EQUAL(testCase.m_expMatchPos, match.pos, ());
|
||||
}
|
||||
TEST_EQUAL(testCase.m_expMatchSize, match.size, ());
|
||||
}
|
||||
}
|
||||
|
||||
// Adapted from 3party/bsdiff-courgette.
|
||||
UNIT_TEST(BSDiffSearchTest_SearchExact)
|
||||
{
|
||||
string const testCases[] = {
|
||||
"a",
|
||||
"aa",
|
||||
"az",
|
||||
"za",
|
||||
"aaaaa",
|
||||
"CACAO",
|
||||
"banana",
|
||||
"tobeornottobe",
|
||||
"the quick brown fox jumps over the lazy dog.",
|
||||
"elephantelephantelephantelephantelephant",
|
||||
"011010011001011010010110011010010",
|
||||
};
|
||||
for (size_t idx = 0; idx < ARRAY_SIZE(testCases); ++idx)
|
||||
{
|
||||
int const size = static_cast<int>(testCases[idx].size());
|
||||
unsigned char const * const buf =
|
||||
reinterpret_cast<unsigned char const *>(testCases[idx].data());
|
||||
|
||||
vector<divsuf::saidx_t> suffix_array(size + 1);
|
||||
divsuf::divsufsort_include_empty(buf, suffix_array.data(), size);
|
||||
|
||||
// Test exact matches for every non-empty substring.
|
||||
for (int lo = 0; lo < size; ++lo)
|
||||
{
|
||||
for (int hi = lo + 1; hi <= size; ++hi)
|
||||
{
|
||||
string query(buf + lo, buf + hi);
|
||||
int querySize = static_cast<int>(query.size());
|
||||
CHECK_EQUAL(querySize, hi - lo, ());
|
||||
unsigned char const * const query_buf =
|
||||
reinterpret_cast<unsigned char const *>(query.c_str());
|
||||
bsdiff::SearchResult const match =
|
||||
bsdiff::search<decltype(suffix_array)>(suffix_array, buf, size, query_buf, querySize);
|
||||
|
||||
TEST_EQUAL(querySize, match.size, ());
|
||||
TEST_GREATER_OR_EQUAL(match.pos, 0, ());
|
||||
TEST_LESS_OR_EQUAL(match.pos, size - match.size, ());
|
||||
string const suffix(buf + match.pos, buf + size);
|
||||
TEST_EQUAL(suffix.substr(0, querySize), query, ());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2003 Yuta Mori All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -1,35 +0,0 @@
|
|||
Name: divsufsort
|
||||
URL: https://github.com/y-256/libdivsufsort
|
||||
Date: 2016-06-01
|
||||
Security Critical: no
|
||||
License: MIT
|
||||
License File: LICENSE
|
||||
|
||||
Description:
|
||||
This directory contains a modified version of Yuta Mori's libdivsufsort,
|
||||
available in its original form from:
|
||||
|
||||
https://github.com/y-256/libdivsufsort
|
||||
|
||||
The copy in this directory is so extensively modified that the binary format is
|
||||
incompatible with the original and it cannot be compiled outside the Chromium
|
||||
source tree or the Courgette project.
|
||||
|
||||
List of changes made to original code:
|
||||
- Flattened directory and renamed .c files to .cc files.
|
||||
- Extracted top-of-file license to common file LICENCE.
|
||||
- Removed unused features, e.g., Burrows-Wheeler transformation.
|
||||
- Removed Open EMP usage.
|
||||
- Changed static functions to anonymous namespace functions.
|
||||
- Moved various #define's from divsufsort_private.h to files that use them.
|
||||
- Adapted code to enable PagedArray usage, to reduce effect of memory
|
||||
fragmentation:
|
||||
- Classified saidx_t* to {saidx_t*, saidx_it, const_saidx_it}.
|
||||
- Extracted ss_compare_internal() from ss_compare(), to break awkward usage
|
||||
of local array at end of sssort().
|
||||
- Defined saidx_it and const_saidx_it to use PagedArray iterators. To
|
||||
restore old behavior, we can define DIVSUFSORT_NO_PAGED_ARRAY.
|
||||
- Added namespace divsuf.
|
||||
- Added divsufsort_with_empty().
|
||||
- Added unit tests.
|
||||
- Patch to avoid int/uint comparison warnings.
|
|
@ -1,276 +0,0 @@
|
|||
// Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// ChangeLog:
|
||||
// 2016-07-22 - Initial commit and adaption to use PagedArray.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
|
||||
#include "3party/bsdiff-courgette/divsufsort/divsufsort_private.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define BUCKET_A_SIZE (ALPHABET_SIZE)
|
||||
#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
|
||||
|
||||
#define BUCKET_A(_c0) bucket_A[(_c0)]
|
||||
#if ALPHABET_SIZE == 256
|
||||
#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
|
||||
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
|
||||
#else
|
||||
#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
|
||||
#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
|
||||
#endif
|
||||
|
||||
namespace divsuf {
|
||||
|
||||
/*- Private Functions -*/
|
||||
|
||||
namespace {
|
||||
|
||||
/* Sorts suffixes of type B*. */
|
||||
saidx_t
|
||||
sort_typeBstar(const sauchar_t *T, saidx_it SA,
|
||||
saidx_t *bucket_A, saidx_t *bucket_B,
|
||||
saidx_t n) {
|
||||
saidx_it PAb, ISAb, buf;
|
||||
saidx_t i, j, k, t, m, bufsize;
|
||||
saint_t c0, c1;
|
||||
|
||||
/* Initialize bucket arrays. */
|
||||
for(i = 0; i < static_cast<saidx_t>(BUCKET_A_SIZE); ++i) { bucket_A[i] = 0; }
|
||||
for(i = 0; i < static_cast<saidx_t>(BUCKET_B_SIZE); ++i) { bucket_B[i] = 0; }
|
||||
|
||||
/* Count the number of occurrences of the first one or two characters of each
|
||||
type A, B and B* suffix. Moreover, store the beginning position of all
|
||||
type B* suffixes into the array SA. */
|
||||
for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
|
||||
/* type A suffix. */
|
||||
do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
|
||||
if(0 <= i) {
|
||||
/* type B* suffix. */
|
||||
++BUCKET_BSTAR(c0, c1);
|
||||
SA[--m] = i;
|
||||
/* type B suffix. */
|
||||
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
|
||||
++BUCKET_B(c0, c1);
|
||||
}
|
||||
}
|
||||
}
|
||||
m = n - m;
|
||||
/*
|
||||
note:
|
||||
A type B* suffix is lexicographically smaller than a type B suffix that
|
||||
begins with the same first two characters.
|
||||
*/
|
||||
|
||||
/* Calculate the index of start/end point of each bucket. */
|
||||
for(c0 = 0, i = 0, j = 0; c0 < static_cast<saint_t>(ALPHABET_SIZE); ++c0) {
|
||||
t = i + BUCKET_A(c0);
|
||||
BUCKET_A(c0) = i + j; /* start point */
|
||||
i = t + BUCKET_B(c0, c0);
|
||||
for(c1 = c0 + 1; c1 < static_cast<saint_t>(ALPHABET_SIZE); ++c1) {
|
||||
j += BUCKET_BSTAR(c0, c1);
|
||||
BUCKET_BSTAR(c0, c1) = j; /* end point */
|
||||
i += BUCKET_B(c0, c1);
|
||||
}
|
||||
}
|
||||
|
||||
if(0 < m) {
|
||||
/* Sort the type B* suffixes by their first two characters. */
|
||||
PAb = SA + n - m; ISAb = SA + m;
|
||||
for(i = m - 2; 0 <= i; --i) {
|
||||
t = PAb[i], c0 = T[t], c1 = T[t + 1];
|
||||
SA[--BUCKET_BSTAR(c0, c1)] = i;
|
||||
}
|
||||
t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
|
||||
SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
|
||||
|
||||
/* Sort the type B* substrings using sssort. */
|
||||
buf = SA + m, bufsize = n - (2 * m);
|
||||
for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
|
||||
for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
|
||||
i = BUCKET_BSTAR(c0, c1);
|
||||
if(1 < (j - i)) {
|
||||
sssort(T, PAb, SA + i, SA + j,
|
||||
buf, bufsize, 2, n, *(SA + i) == (m - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute ranks of type B* substrings. */
|
||||
for(i = m - 1; 0 <= i; --i) {
|
||||
if(0 <= SA[i]) {
|
||||
j = i;
|
||||
do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
|
||||
SA[i + 1] = i - j;
|
||||
if(i <= 0) { break; }
|
||||
}
|
||||
j = i;
|
||||
do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
|
||||
ISAb[SA[i]] = j;
|
||||
}
|
||||
|
||||
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
||||
trsort(ISAb, SA, m, 1);
|
||||
|
||||
/* Set the sorted order of tyoe B* suffixes. */
|
||||
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
||||
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
||||
if(0 <= i) {
|
||||
t = i;
|
||||
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
|
||||
SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate the index of start/end point of each bucket. */
|
||||
BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
|
||||
for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
|
||||
i = BUCKET_A(c0 + 1) - 1;
|
||||
for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
|
||||
t = i - BUCKET_B(c0, c1);
|
||||
BUCKET_B(c0, c1) = i; /* end point */
|
||||
|
||||
/* Move all type B* suffixes to the correct position. */
|
||||
for(i = t, j = BUCKET_BSTAR(c0, c1);
|
||||
j <= k;
|
||||
--i, --k) { SA[i] = SA[k]; }
|
||||
}
|
||||
BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
|
||||
BUCKET_B(c0, c0) = i; /* end point */
|
||||
}
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
/* Constructs the suffix array by using the sorted order of type B* suffixes. */
|
||||
void
|
||||
construct_SA(const sauchar_t *T, saidx_it SA,
|
||||
saidx_t *bucket_A, saidx_t *bucket_B,
|
||||
saidx_t n, saidx_t m) {
|
||||
saidx_it i, j, k;
|
||||
saidx_t s;
|
||||
saint_t c0, c1, c2;
|
||||
|
||||
if(0 < m) {
|
||||
/* Construct the sorted order of type B suffixes by using
|
||||
the sorted order of type B* suffixes. */
|
||||
for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
|
||||
/* Scan the suffix array from right to left. */
|
||||
for (i = SA + BUCKET_BSTAR(c1, c1 + 1), j = SA + BUCKET_A(c1 + 1) - 1,
|
||||
k = nullptr, c2 = -1;
|
||||
i <= j; --j) {
|
||||
if(0 < (s = *j)) {
|
||||
assert(T[s] == c1);
|
||||
assert(((s + 1) < n) && (T[s] <= T[s + 1]));
|
||||
assert(T[s - 1] <= T[s]);
|
||||
*j = ~s;
|
||||
c0 = T[--s];
|
||||
if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
|
||||
if(c0 != c2) {
|
||||
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
|
||||
k = SA + BUCKET_B(c2 = c0, c1);
|
||||
}
|
||||
assert(k < j);
|
||||
*k-- = s;
|
||||
} else {
|
||||
assert(((s == 0) && (T[s] == c1)) || (s < 0));
|
||||
*j = ~s;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Construct the suffix array by using
|
||||
the sorted order of type B suffixes. */
|
||||
k = SA + BUCKET_A(c2 = T[n - 1]);
|
||||
*k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
|
||||
/* Scan the suffix array from left to right. */
|
||||
for(i = SA, j = SA + n; i < j; ++i) {
|
||||
if(0 < (s = *i)) {
|
||||
assert(T[s - 1] >= T[s]);
|
||||
c0 = T[--s];
|
||||
if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
|
||||
if(c0 != c2) {
|
||||
BUCKET_A(c2) = k - SA;
|
||||
k = SA + BUCKET_A(c2 = c0);
|
||||
}
|
||||
assert(i < k);
|
||||
*k++ = s;
|
||||
} else {
|
||||
assert(s < 0);
|
||||
*i = ~s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/*- Function -*/
|
||||
|
||||
saint_t
|
||||
divsufsort(const sauchar_t *T, saidx_it SA, saidx_t n) {
|
||||
saidx_t *bucket_A, *bucket_B;
|
||||
saidx_t m;
|
||||
saint_t err = 0;
|
||||
|
||||
/* Check arguments. */
|
||||
if ((T == nullptr) || (SA == nullptr) || (n < 0)) {
|
||||
return -1;
|
||||
} else if (n == 0) {
|
||||
return 0;
|
||||
} else if (n == 1) {
|
||||
SA[0] = 0;
|
||||
return 0;
|
||||
} else if (n == 2) {
|
||||
m = (T[0] < T[1]);
|
||||
SA[m ^ 1] = 0, SA[m] = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
|
||||
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
|
||||
|
||||
/* Suffixsort. */
|
||||
if ((bucket_A != nullptr) && (bucket_B != nullptr)) {
|
||||
m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
|
||||
construct_SA(T, SA, bucket_A, bucket_B, n, m);
|
||||
} else {
|
||||
err = -2;
|
||||
}
|
||||
|
||||
free(bucket_B);
|
||||
free(bucket_A);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
saint_t divsufsort_include_empty(const sauchar_t *T, saidx_it SA, saidx_t n) {
|
||||
SA[0] = n; // Manually add the empty string suffix.
|
||||
return divsufsort(T, SA + 1, n);
|
||||
}
|
||||
|
||||
} // namespace divsuf
|
|
@ -1,65 +0,0 @@
|
|||
// Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// ChangeLog:
|
||||
// 2016-07-22 - Initial commit and adaption to use PagedArray.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
|
||||
#ifndef COURGETTE_BSDIFF_THIRD_PARTY_DIVSUFSORT_H_
|
||||
#define COURGETTE_BSDIFF_THIRD_PARTY_DIVSUFSORT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace divsuf {
|
||||
|
||||
/*- Datatypes -*/
|
||||
typedef int32_t saint_t;
|
||||
typedef int32_t saidx_t;
|
||||
typedef uint8_t sauchar_t;
|
||||
|
||||
typedef saidx_t* saidx_it;
|
||||
typedef const saidx_t* const_saidx_it;
|
||||
|
||||
/*- Prototypes -*/
|
||||
|
||||
/**
|
||||
* Constructs the suffix array of a given string, excluding the empty string.
|
||||
* @param T[0..n-1] The input string.
|
||||
* @param SA[0..n-1] The output array of suffixes.
|
||||
* @param n The length of the given string.
|
||||
* @return 0 if no error occurred, -1 or -2 otherwise.
|
||||
*/
|
||||
saint_t divsufsort(const sauchar_t *T, saidx_it SA, saidx_t n);
|
||||
|
||||
/**
|
||||
* Constructs the suffix array of a given string, including the empty string.
|
||||
* @param T[0..n-1] The input string.
|
||||
* @param SA[0..n] The output array of suffixes (includes empty string).
|
||||
* @param n The length of the given string.
|
||||
* @return 0 if no error occurred, -1 or -2 otherwise.
|
||||
*/
|
||||
saint_t divsufsort_include_empty(const sauchar_t *T, saidx_it SA, saidx_t n);
|
||||
|
||||
} // namespace divsuf
|
||||
|
||||
#endif // COURGETTE_BSDIFF_THIRD_PARTY_DIVSUFSORT_H_
|
|
@ -1,75 +0,0 @@
|
|||
// Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// ChangeLog:
|
||||
// 2016-07-22 - Initial commit and adaption to use PagedArray.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
|
||||
#ifndef COURGETTE_BSDIFF_THIRD_PARTY_DIVSUFSORT_PRIVATE_H_
|
||||
#define COURGETTE_BSDIFF_THIRD_PARTY_DIVSUFSORT_PRIVATE_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "3party/bsdiff-courgette/divsufsort/divsufsort.h"
|
||||
|
||||
namespace divsuf {
|
||||
|
||||
/*- Constants -*/
|
||||
#if !defined(UINT8_MAX)
|
||||
# define UINT8_MAX (255)
|
||||
#endif /* UINT8_MAX */
|
||||
#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
|
||||
# undef ALPHABET_SIZE
|
||||
#endif
|
||||
#if !defined(ALPHABET_SIZE)
|
||||
# define ALPHABET_SIZE (UINT8_MAX + 1)
|
||||
#endif
|
||||
|
||||
/*- Macros -*/
|
||||
#ifndef SWAP
|
||||
# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
|
||||
#endif /* SWAP */
|
||||
#ifndef MIN
|
||||
# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
|
||||
#endif /* MIN */
|
||||
#ifndef MAX
|
||||
# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
|
||||
#endif /* MAX */
|
||||
|
||||
/*- Private Prototypes -*/
|
||||
/* sssort.c */
|
||||
void
|
||||
sssort(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it last,
|
||||
saidx_it buf, saidx_t bufsize,
|
||||
saidx_t depth, saidx_t n, saint_t lastsuffix);
|
||||
|
||||
/* trsort.c */
|
||||
void
|
||||
trsort(saidx_it ISA, saidx_it SA, saidx_t n, saidx_t depth);
|
||||
|
||||
} // namespace divsuf
|
||||
|
||||
#endif // COURGETTE_BSDIFF_THIRD_PARTY_DIVSUFSORT_PRIVATE_H_
|
|
@ -1,87 +0,0 @@
|
|||
// Copyright 2016 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "courgette/third_party/divsufsort/divsufsort.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "courgette/third_party/bsdiff/bsdiff_search.h"
|
||||
#include "courgette/third_party/bsdiff/paged_array.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
|
||||
TEST(DivSufSortTest, Sort) {
|
||||
const char* test_strs[] = {
|
||||
"",
|
||||
"a",
|
||||
"za",
|
||||
"CACAO",
|
||||
"banana",
|
||||
"tobeornottobe",
|
||||
"The quick brown fox jumps over the lazy dog.",
|
||||
"elephantelephantelephantelephantelephant",
|
||||
"-------------------------",
|
||||
"011010011001011010010110011010010",
|
||||
"3141592653589793238462643383279502884197169399375105",
|
||||
"\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
|
||||
};
|
||||
|
||||
for (const std::string& test_str : test_strs) {
|
||||
int len = static_cast<int>(test_str.length());
|
||||
const unsigned char* buf =
|
||||
reinterpret_cast<const unsigned char*>(test_str.data());
|
||||
|
||||
// Generate the suffix array as I.
|
||||
courgette::PagedArray<divsuf::saidx_t> I;
|
||||
ASSERT_TRUE(I.Allocate(len + 1));
|
||||
divsuf::divsufsort_include_empty(buf, I.begin(), len);
|
||||
|
||||
// Expect that I[] is a permutation of [0, len].
|
||||
std::vector<divsuf::saidx_t> I_sorted(I.begin(), I.end());
|
||||
std::sort(I_sorted.begin(), I_sorted.end());
|
||||
for (divsuf::saidx_t i = 0; i < len + 1; ++i)
|
||||
EXPECT_EQ(i, I_sorted[i]);
|
||||
|
||||
// First string must be empty string.
|
||||
EXPECT_EQ(len, I[0]);
|
||||
|
||||
// Expect that the |len + 1| suffixes are strictly ordered.
|
||||
const unsigned char* end = buf + len;
|
||||
for (divsuf::saidx_t i = 1; i <= len; ++i) {
|
||||
const unsigned char* suf1 = buf + I[i - 1];
|
||||
const unsigned char* suf2 = buf + I[i];
|
||||
bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
|
||||
EXPECT_TRUE(is_less);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test with sequence that has every character.
|
||||
TEST(DivSufSortTest, AllChar) {
|
||||
const int kNumChar = 256;
|
||||
std::vector<unsigned char> all_char(kNumChar);
|
||||
std::iota(all_char.begin(), all_char.end(), 0);
|
||||
|
||||
{
|
||||
courgette::PagedArray<divsuf::saidx_t> I;
|
||||
ASSERT_TRUE(I.Allocate(kNumChar + 1));
|
||||
divsuf::divsufsort_include_empty(&all_char[0], I.begin(), kNumChar);
|
||||
EXPECT_EQ(kNumChar, I[0]); // Empty character.
|
||||
for (int i = 1; i <= kNumChar; ++i)
|
||||
EXPECT_EQ(i - 1, I[i]);
|
||||
}
|
||||
|
||||
std::vector<unsigned char> all_char_reverse(
|
||||
all_char.rbegin(), all_char.rend());
|
||||
{
|
||||
courgette::PagedArray<divsuf::saidx_t> I;
|
||||
ASSERT_TRUE(I.Allocate(kNumChar + 1));
|
||||
divsuf::divsufsort_include_empty(&all_char_reverse[0], I.begin(), kNumChar);
|
||||
for (int i = 0; i <= kNumChar; ++i)
|
||||
EXPECT_EQ(kNumChar - i, I[i]);
|
||||
}
|
||||
}
|
|
@ -1,855 +0,0 @@
|
|||
// Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// ChangeLog:
|
||||
// 2016-07-22 - Initial commit and adaption to use PagedArray.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
|
||||
#include "3party/bsdiff-courgette/divsufsort/divsufsort_private.h"
|
||||
|
||||
#if defined(SS_INSERTIONSORT_THRESHOLD)
|
||||
# if SS_INSERTIONSORT_THRESHOLD < 1
|
||||
# undef SS_INSERTIONSORT_THRESHOLD
|
||||
# define SS_INSERTIONSORT_THRESHOLD (1)
|
||||
# endif
|
||||
#else
|
||||
# define SS_INSERTIONSORT_THRESHOLD (8)
|
||||
#endif
|
||||
#if defined(SS_BLOCKSIZE)
|
||||
# if SS_BLOCKSIZE < 0
|
||||
# undef SS_BLOCKSIZE
|
||||
# define SS_BLOCKSIZE (0)
|
||||
# elif 32768 <= SS_BLOCKSIZE
|
||||
# undef SS_BLOCKSIZE
|
||||
# define SS_BLOCKSIZE (32767)
|
||||
# endif
|
||||
#else
|
||||
# define SS_BLOCKSIZE (1024)
|
||||
#endif
|
||||
/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
|
||||
#if SS_BLOCKSIZE == 0
|
||||
# define SS_MISORT_STACKSIZE (64)
|
||||
#elif SS_BLOCKSIZE <= 4096
|
||||
# define SS_MISORT_STACKSIZE (16)
|
||||
#else
|
||||
# define SS_MISORT_STACKSIZE (24)
|
||||
#endif
|
||||
#define SS_SMERGE_STACKSIZE (32)
|
||||
|
||||
#define STACK_PUSH(_a, _b, _c, _d)\
|
||||
do {\
|
||||
assert(ssize < STACK_SIZE);\
|
||||
stack[ssize].a = (_a), stack[ssize].b = (_b),\
|
||||
stack[ssize].c = (_c), stack[ssize++].d = (_d);\
|
||||
} while(0)
|
||||
#define STACK_POP(_a, _b, _c, _d)\
|
||||
do {\
|
||||
assert(0 <= ssize);\
|
||||
if(ssize == 0) { return; }\
|
||||
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
|
||||
(_c) = stack[ssize].c, (_d) = stack[ssize].d;\
|
||||
} while(0)
|
||||
|
||||
namespace divsuf {
|
||||
|
||||
namespace {
|
||||
|
||||
/*- Private Functions -*/
|
||||
|
||||
const saint_t lg_table[256]= {
|
||||
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
||||
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
|
||||
};
|
||||
|
||||
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
|
||||
|
||||
inline
|
||||
saint_t
|
||||
ss_ilg(saidx_t n) {
|
||||
#if SS_BLOCKSIZE == 0
|
||||
return (n & 0xffff0000) ?
|
||||
((n & 0xff000000) ?
|
||||
24 + lg_table[(n >> 24) & 0xff] :
|
||||
16 + lg_table[(n >> 16) & 0xff]) :
|
||||
((n & 0x0000ff00) ?
|
||||
8 + lg_table[(n >> 8) & 0xff] :
|
||||
0 + lg_table[(n >> 0) & 0xff]);
|
||||
#elif SS_BLOCKSIZE < 256
|
||||
return lg_table[n];
|
||||
#else
|
||||
return (n & 0xff00) ?
|
||||
8 + lg_table[(n >> 8) & 0xff] :
|
||||
0 + lg_table[(n >> 0) & 0xff];
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
|
||||
|
||||
#if SS_BLOCKSIZE != 0
|
||||
|
||||
const saint_t sqq_table[256] = {
|
||||
0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
|
||||
64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
|
||||
90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
|
||||
110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
|
||||
128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
|
||||
143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
|
||||
156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
|
||||
169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
|
||||
181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
|
||||
192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
|
||||
202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
|
||||
212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
|
||||
221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
|
||||
230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
|
||||
239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
|
||||
247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
|
||||
};
|
||||
|
||||
inline
|
||||
saidx_t
|
||||
ss_isqrt(saidx_t x) {
|
||||
saidx_t y, e;
|
||||
|
||||
if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
|
||||
e = (x & 0xffff0000) ?
|
||||
((x & 0xff000000) ?
|
||||
24 + lg_table[(x >> 24) & 0xff] :
|
||||
16 + lg_table[(x >> 16) & 0xff]) :
|
||||
((x & 0x0000ff00) ?
|
||||
8 + lg_table[(x >> 8) & 0xff] :
|
||||
0 + lg_table[(x >> 0) & 0xff]);
|
||||
|
||||
if(e >= 16) {
|
||||
y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
|
||||
if(e >= 24) { y = (y + 1 + x / y) >> 1; }
|
||||
y = (y + 1 + x / y) >> 1;
|
||||
} else if(e >= 8) {
|
||||
y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
|
||||
} else {
|
||||
return sqq_table[x] >> 4;
|
||||
}
|
||||
|
||||
return (x < (y * y)) ? y - 1 : y;
|
||||
}
|
||||
|
||||
#endif /* SS_BLOCKSIZE != 0 */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/* Compares two suffixes. */
|
||||
inline
|
||||
saint_t
|
||||
ss_compare_internal(const sauchar_t *T,
|
||||
saidx_t p1_lo,
|
||||
saidx_t p1_hi,
|
||||
saidx_t p2_lo,
|
||||
saidx_t p2_hi,
|
||||
saidx_t depth) {
|
||||
const sauchar_t *U1, *U2, *U1n, *U2n;
|
||||
|
||||
for(U1 = T + depth + p1_lo,
|
||||
U2 = T + depth + p2_lo,
|
||||
U1n = T + p1_hi + 2,
|
||||
U2n = T + p2_hi + 2;
|
||||
(U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
|
||||
++U1, ++U2) {
|
||||
}
|
||||
|
||||
return U1 < U1n ?
|
||||
(U2 < U2n ? *U1 - *U2 : 1) :
|
||||
(U2 < U2n ? -1 : 0);
|
||||
}
|
||||
|
||||
/* Compares two suffixes. */
|
||||
inline
|
||||
saint_t
|
||||
ss_compare(const sauchar_t *T,
|
||||
const_saidx_it p1, const_saidx_it p2,
|
||||
saidx_t depth) {
|
||||
return ss_compare_internal(T, *p1, *(p1 + 1), *p2, *(p2 + 1), depth);
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
|
||||
|
||||
/* Insertionsort for small size groups */
|
||||
void
|
||||
ss_insertionsort(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it last, saidx_t depth) {
|
||||
saidx_it i, j;
|
||||
saidx_t t;
|
||||
saint_t r;
|
||||
|
||||
for(i = last - 2; first <= i; --i) {
|
||||
for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
|
||||
do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
|
||||
if(last <= j) { break; }
|
||||
}
|
||||
if(r == 0) { *j = ~*j; }
|
||||
*(j - 1) = t;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
|
||||
|
||||
inline
|
||||
void
|
||||
ss_fixdown(const sauchar_t *Td, const_saidx_it PA,
|
||||
saidx_it SA, saidx_t i, saidx_t size) {
|
||||
saidx_t j, k;
|
||||
saidx_t v;
|
||||
saint_t c, d, e;
|
||||
|
||||
for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
|
||||
d = Td[PA[SA[k = j++]]];
|
||||
if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
|
||||
if(d <= c) { break; }
|
||||
}
|
||||
SA[i] = v;
|
||||
}
|
||||
|
||||
/* Simple top-down heapsort. */
|
||||
void
|
||||
ss_heapsort(const sauchar_t *Td, const_saidx_it PA, saidx_it SA, saidx_t size) {
|
||||
saidx_t i, m;
|
||||
saidx_t t;
|
||||
|
||||
m = size;
|
||||
if((size % 2) == 0) {
|
||||
m--;
|
||||
if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
|
||||
}
|
||||
|
||||
for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
|
||||
if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
|
||||
for(i = m - 1; 0 < i; --i) {
|
||||
t = SA[0], SA[0] = SA[i];
|
||||
ss_fixdown(Td, PA, SA, 0, i);
|
||||
SA[i] = t;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/* Returns the median of three elements. */
|
||||
inline
|
||||
saidx_it
|
||||
ss_median3(const sauchar_t *Td, const_saidx_it PA,
|
||||
saidx_it v1, saidx_it v2, saidx_it v3) {
|
||||
saidx_it t;
|
||||
if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
|
||||
if(Td[PA[*v2]] > Td[PA[*v3]]) {
|
||||
if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
|
||||
else { return v3; }
|
||||
}
|
||||
return v2;
|
||||
}
|
||||
|
||||
/* Returns the median of five elements. */
|
||||
inline
|
||||
saidx_it
|
||||
ss_median5(const sauchar_t *Td, const_saidx_it PA,
|
||||
saidx_it v1, saidx_it v2, saidx_it v3, saidx_it v4, saidx_it v5) {
|
||||
saidx_it t;
|
||||
if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
|
||||
if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
|
||||
if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
|
||||
if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
|
||||
if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
|
||||
if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
|
||||
return v3;
|
||||
}
|
||||
|
||||
/* Returns the pivot element. */
|
||||
inline
|
||||
saidx_it
|
||||
ss_pivot(const sauchar_t *Td, const_saidx_it PA, saidx_it first, saidx_it last) {
|
||||
saidx_it middle;
|
||||
saidx_t t;
|
||||
|
||||
t = last - first;
|
||||
middle = first + t / 2;
|
||||
|
||||
if(t <= 512) {
|
||||
if(t <= 32) {
|
||||
return ss_median3(Td, PA, first, middle, last - 1);
|
||||
} else {
|
||||
t >>= 2;
|
||||
return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
|
||||
}
|
||||
}
|
||||
t >>= 3;
|
||||
first = ss_median3(Td, PA, first, first + t, first + (t << 1));
|
||||
middle = ss_median3(Td, PA, middle - t, middle, middle + t);
|
||||
last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
|
||||
return ss_median3(Td, PA, first, middle, last);
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/* Binary partition for substrings. */
|
||||
inline
|
||||
saidx_it
|
||||
ss_partition(const_saidx_it PA,
|
||||
saidx_it first, saidx_it last, saidx_t depth) {
|
||||
saidx_it a, b;
|
||||
saidx_t t;
|
||||
for(a = first - 1, b = last;;) {
|
||||
for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
|
||||
for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
|
||||
if(b <= a) { break; }
|
||||
t = ~*b;
|
||||
*b = *a;
|
||||
*a = t;
|
||||
}
|
||||
if(first < a) { *first = ~*first; }
|
||||
return a;
|
||||
}
|
||||
|
||||
/* Multikey introsort for medium size groups. */
|
||||
void
|
||||
ss_mintrosort(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it last,
|
||||
saidx_t depth) {
|
||||
#define STACK_SIZE SS_MISORT_STACKSIZE
|
||||
struct { saidx_it a, b; saidx_t c; saint_t d; } stack[STACK_SIZE];
|
||||
const sauchar_t *Td;
|
||||
saidx_it a, b, c, d, e, f;
|
||||
saidx_t s, t;
|
||||
saint_t ssize;
|
||||
saint_t limit;
|
||||
saint_t v, x = 0;
|
||||
|
||||
for(ssize = 0, limit = ss_ilg(last - first);;) {
|
||||
|
||||
if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
|
||||
#if 1 < SS_INSERTIONSORT_THRESHOLD
|
||||
if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
|
||||
#endif
|
||||
STACK_POP(first, last, depth, limit);
|
||||
continue;
|
||||
}
|
||||
|
||||
Td = T + depth;
|
||||
if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
|
||||
if(limit < 0) {
|
||||
for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
|
||||
if((x = Td[PA[*a]]) != v) {
|
||||
if(1 < (a - first)) { break; }
|
||||
v = x;
|
||||
first = a;
|
||||
}
|
||||
}
|
||||
if(Td[PA[*first] - 1] < v) {
|
||||
first = ss_partition(PA, first, a, depth);
|
||||
}
|
||||
if((a - first) <= (last - a)) {
|
||||
if(1 < (a - first)) {
|
||||
STACK_PUSH(a, last, depth, -1);
|
||||
last = a, depth += 1, limit = ss_ilg(a - first);
|
||||
} else {
|
||||
first = a, limit = -1;
|
||||
}
|
||||
} else {
|
||||
if(1 < (last - a)) {
|
||||
STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
|
||||
first = a, limit = -1;
|
||||
} else {
|
||||
last = a, depth += 1, limit = ss_ilg(a - first);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* choose pivot */
|
||||
a = ss_pivot(Td, PA, first, last);
|
||||
v = Td[PA[*a]];
|
||||
SWAP(*first, *a);
|
||||
|
||||
/* partition */
|
||||
for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
|
||||
if(((a = b) < last) && (x < v)) {
|
||||
for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
|
||||
if(x == v) { SWAP(*b, *a); ++a; }
|
||||
}
|
||||
}
|
||||
for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
|
||||
if((b < (d = c)) && (x > v)) {
|
||||
for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
|
||||
if(x == v) { SWAP(*c, *d); --d; }
|
||||
}
|
||||
}
|
||||
for(; b < c;) {
|
||||
SWAP(*b, *c);
|
||||
for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
|
||||
if(x == v) { SWAP(*b, *a); ++a; }
|
||||
}
|
||||
for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
|
||||
if(x == v) { SWAP(*c, *d); --d; }
|
||||
}
|
||||
}
|
||||
|
||||
if(a <= d) {
|
||||
c = b - 1;
|
||||
|
||||
if((s = a - first) > (t = b - a)) { s = t; }
|
||||
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
||||
if((s = d - c) > (t = last - d - 1)) { s = t; }
|
||||
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
||||
|
||||
a = first + (b - a), c = last - (d - c);
|
||||
b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
|
||||
|
||||
if((a - first) <= (last - c)) {
|
||||
if((last - c) <= (c - b)) {
|
||||
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
|
||||
STACK_PUSH(c, last, depth, limit);
|
||||
last = a;
|
||||
} else if((a - first) <= (c - b)) {
|
||||
STACK_PUSH(c, last, depth, limit);
|
||||
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
|
||||
last = a;
|
||||
} else {
|
||||
STACK_PUSH(c, last, depth, limit);
|
||||
STACK_PUSH(first, a, depth, limit);
|
||||
first = b, last = c, depth += 1, limit = ss_ilg(c - b);
|
||||
}
|
||||
} else {
|
||||
if((a - first) <= (c - b)) {
|
||||
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
|
||||
STACK_PUSH(first, a, depth, limit);
|
||||
first = c;
|
||||
} else if((last - c) <= (c - b)) {
|
||||
STACK_PUSH(first, a, depth, limit);
|
||||
STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
|
||||
first = c;
|
||||
} else {
|
||||
STACK_PUSH(first, a, depth, limit);
|
||||
STACK_PUSH(c, last, depth, limit);
|
||||
first = b, last = c, depth += 1, limit = ss_ilg(c - b);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
limit += 1;
|
||||
if(Td[PA[*first] - 1] < v) {
|
||||
first = ss_partition(PA, first, last, depth);
|
||||
limit = ss_ilg(last - first);
|
||||
}
|
||||
depth += 1;
|
||||
}
|
||||
}
|
||||
#undef STACK_SIZE
|
||||
}
|
||||
|
||||
#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#if SS_BLOCKSIZE != 0
|
||||
|
||||
inline
|
||||
void
|
||||
ss_blockswap(saidx_it a, saidx_it b, saidx_t n) {
|
||||
saidx_t t;
|
||||
for(; 0 < n; --n, ++a, ++b) {
|
||||
t = *a, *a = *b, *b = t;
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void
|
||||
ss_rotate(saidx_it first, saidx_it middle, saidx_it last) {
|
||||
saidx_it a, b;
|
||||
saidx_t t;
|
||||
saidx_t l, r;
|
||||
l = middle - first, r = last - middle;
|
||||
for(; (0 < l) && (0 < r);) {
|
||||
if(l == r) { ss_blockswap(first, middle, l); break; }
|
||||
if(l < r) {
|
||||
a = last - 1, b = middle - 1;
|
||||
t = *a;
|
||||
do {
|
||||
*a-- = *b, *b-- = *a;
|
||||
if(b < first) {
|
||||
*a = t;
|
||||
last = a;
|
||||
if((r -= l + 1) <= l) { break; }
|
||||
a -= 1, b = middle - 1;
|
||||
t = *a;
|
||||
}
|
||||
} while(1);
|
||||
} else {
|
||||
a = first, b = middle;
|
||||
t = *a;
|
||||
do {
|
||||
*a++ = *b, *b++ = *a;
|
||||
if(last <= b) {
|
||||
*a = t;
|
||||
first = a + 1;
|
||||
if((l -= r + 1) <= r) { break; }
|
||||
a += 1, b = middle;
|
||||
t = *a;
|
||||
}
|
||||
} while(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
void
|
||||
ss_inplacemerge(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it middle, saidx_it last,
|
||||
saidx_t depth) {
|
||||
const_saidx_it p;
|
||||
saidx_it a, b;
|
||||
saidx_t len, half;
|
||||
saint_t q, r;
|
||||
saint_t x;
|
||||
|
||||
for(;;) {
|
||||
if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
|
||||
else { x = 0; p = PA + *(last - 1); }
|
||||
for(a = first, len = middle - first, half = len >> 1, r = -1;
|
||||
0 < len;
|
||||
len = half, half >>= 1) {
|
||||
b = a + half;
|
||||
q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
|
||||
if(q < 0) {
|
||||
a = b + 1;
|
||||
half -= (len & 1) ^ 1;
|
||||
} else {
|
||||
r = q;
|
||||
}
|
||||
}
|
||||
if(a < middle) {
|
||||
if(r == 0) { *a = ~*a; }
|
||||
ss_rotate(a, middle, last);
|
||||
last -= middle - a;
|
||||
middle = a;
|
||||
if(first == middle) { break; }
|
||||
}
|
||||
--last;
|
||||
if(x != 0) { while(*--last < 0) { } }
|
||||
if(middle == last) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/* Merge-forward with internal buffer. */
|
||||
static
|
||||
void
|
||||
ss_mergeforward(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it middle, saidx_it last,
|
||||
saidx_it buf, saidx_t depth) {
|
||||
saidx_it a, b, c, bufend;
|
||||
saidx_t t;
|
||||
saint_t r;
|
||||
|
||||
bufend = buf + (middle - first) - 1;
|
||||
ss_blockswap(buf, first, middle - first);
|
||||
|
||||
for(t = *(a = first), b = buf, c = middle;;) {
|
||||
r = ss_compare(T, PA + *b, PA + *c, depth);
|
||||
if(r < 0) {
|
||||
do {
|
||||
*a++ = *b;
|
||||
if(bufend <= b) { *bufend = t; return; }
|
||||
*b++ = *a;
|
||||
} while(*b < 0);
|
||||
} else if(r > 0) {
|
||||
do {
|
||||
*a++ = *c, *c++ = *a;
|
||||
if(last <= c) {
|
||||
while(b < bufend) { *a++ = *b, *b++ = *a; }
|
||||
*a = *b, *b = t;
|
||||
return;
|
||||
}
|
||||
} while(*c < 0);
|
||||
} else {
|
||||
*c = ~*c;
|
||||
do {
|
||||
*a++ = *b;
|
||||
if(bufend <= b) { *bufend = t; return; }
|
||||
*b++ = *a;
|
||||
} while(*b < 0);
|
||||
|
||||
do {
|
||||
*a++ = *c, *c++ = *a;
|
||||
if(last <= c) {
|
||||
while(b < bufend) { *a++ = *b, *b++ = *a; }
|
||||
*a = *b, *b = t;
|
||||
return;
|
||||
}
|
||||
} while(*c < 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Merge-backward with internal buffer. */
|
||||
void
|
||||
ss_mergebackward(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it middle, saidx_it last,
|
||||
saidx_it buf, saidx_t depth) {
|
||||
const_saidx_it p1, p2;
|
||||
saidx_it a, b, c, bufend;
|
||||
saidx_t t;
|
||||
saint_t r;
|
||||
saint_t x;
|
||||
|
||||
bufend = buf + (last - middle) - 1;
|
||||
ss_blockswap(buf, middle, last - middle);
|
||||
|
||||
x = 0;
|
||||
if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
|
||||
else { p1 = PA + *bufend; }
|
||||
if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
|
||||
else { p2 = PA + *(middle - 1); }
|
||||
for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
|
||||
r = ss_compare(T, p1, p2, depth);
|
||||
if(0 < r) {
|
||||
if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
|
||||
*a-- = *b;
|
||||
if(b <= buf) { *buf = t; break; }
|
||||
*b-- = *a;
|
||||
if(*b < 0) { p1 = PA + ~*b; x |= 1; }
|
||||
else { p1 = PA + *b; }
|
||||
} else if(r < 0) {
|
||||
if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
|
||||
*a-- = *c, *c-- = *a;
|
||||
if(c < first) {
|
||||
while(buf < b) { *a-- = *b, *b-- = *a; }
|
||||
*a = *b, *b = t;
|
||||
break;
|
||||
}
|
||||
if(*c < 0) { p2 = PA + ~*c; x |= 2; }
|
||||
else { p2 = PA + *c; }
|
||||
} else {
|
||||
if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
|
||||
*a-- = ~*b;
|
||||
if(b <= buf) { *buf = t; break; }
|
||||
*b-- = *a;
|
||||
if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
|
||||
*a-- = *c, *c-- = *a;
|
||||
if(c < first) {
|
||||
while(buf < b) { *a-- = *b, *b-- = *a; }
|
||||
*a = *b, *b = t;
|
||||
break;
|
||||
}
|
||||
if(*b < 0) { p1 = PA + ~*b; x |= 1; }
|
||||
else { p1 = PA + *b; }
|
||||
if(*c < 0) { p2 = PA + ~*c; x |= 2; }
|
||||
else { p2 = PA + *c; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* D&C based merge. */
|
||||
void
|
||||
ss_swapmerge(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it middle, saidx_it last,
|
||||
saidx_it buf, saidx_t bufsize, saidx_t depth) {
|
||||
#define STACK_SIZE SS_SMERGE_STACKSIZE
|
||||
#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
|
||||
#define MERGE_CHECK(a, b, c)\
|
||||
do {\
|
||||
if(((c) & 1) ||\
|
||||
(((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
|
||||
*(a) = ~*(a);\
|
||||
}\
|
||||
if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
|
||||
*(b) = ~*(b);\
|
||||
}\
|
||||
} while(0)
|
||||
struct { saidx_it a, b, c; saint_t d; } stack[STACK_SIZE];
|
||||
saidx_it l, r, lm, rm;
|
||||
saidx_t m, len, half;
|
||||
saint_t ssize;
|
||||
saint_t check, next;
|
||||
|
||||
for(check = 0, ssize = 0;;) {
|
||||
if((last - middle) <= bufsize) {
|
||||
if((first < middle) && (middle < last)) {
|
||||
ss_mergebackward(T, PA, first, middle, last, buf, depth);
|
||||
}
|
||||
MERGE_CHECK(first, last, check);
|
||||
STACK_POP(first, middle, last, check);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((middle - first) <= bufsize) {
|
||||
if(first < middle) {
|
||||
ss_mergeforward(T, PA, first, middle, last, buf, depth);
|
||||
}
|
||||
MERGE_CHECK(first, last, check);
|
||||
STACK_POP(first, middle, last, check);
|
||||
continue;
|
||||
}
|
||||
|
||||
for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
|
||||
0 < len;
|
||||
len = half, half >>= 1) {
|
||||
if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
|
||||
PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
|
||||
m += half + 1;
|
||||
half -= (len & 1) ^ 1;
|
||||
}
|
||||
}
|
||||
|
||||
if(0 < m) {
|
||||
lm = middle - m, rm = middle + m;
|
||||
ss_blockswap(lm, middle, m);
|
||||
l = r = middle, next = 0;
|
||||
if(rm < last) {
|
||||
if(*rm < 0) {
|
||||
*rm = ~*rm;
|
||||
if(first < lm) { for(; *--l < 0;) { } next |= 4; }
|
||||
next |= 1;
|
||||
} else if(first < lm) {
|
||||
for(; *r < 0; ++r) { }
|
||||
next |= 2;
|
||||
}
|
||||
}
|
||||
|
||||
if((l - first) <= (last - r)) {
|
||||
STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
|
||||
middle = lm, last = l, check = (check & 3) | (next & 4);
|
||||
} else {
|
||||
if((next & 2) && (r == middle)) { next ^= 6; }
|
||||
STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
|
||||
first = r, middle = rm, check = (next & 3) | (check & 4);
|
||||
}
|
||||
} else {
|
||||
if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
|
||||
*middle = ~*middle;
|
||||
}
|
||||
MERGE_CHECK(first, last, check);
|
||||
STACK_POP(first, middle, last, check);
|
||||
}
|
||||
}
|
||||
#undef STACK_SIZE
|
||||
}
|
||||
|
||||
#endif /* SS_BLOCKSIZE != 0 */
|
||||
|
||||
} // namespace
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/*- Function -*/
|
||||
|
||||
/* Substring sort */
|
||||
void
|
||||
sssort(const sauchar_t *T, const_saidx_it PA,
|
||||
saidx_it first, saidx_it last,
|
||||
saidx_it buf, saidx_t bufsize,
|
||||
saidx_t depth, saidx_t n, saint_t lastsuffix) {
|
||||
saidx_it a;
|
||||
#if SS_BLOCKSIZE != 0
|
||||
saidx_it b, middle, curbuf;
|
||||
saidx_t j, k, curbufsize, limit;
|
||||
#endif
|
||||
saidx_t i;
|
||||
|
||||
if(lastsuffix != 0) { ++first; }
|
||||
|
||||
#if SS_BLOCKSIZE == 0
|
||||
ss_mintrosort(T, PA, first, last, depth);
|
||||
#else
|
||||
if((bufsize < SS_BLOCKSIZE) &&
|
||||
(bufsize < (last - first)) &&
|
||||
(bufsize < (limit = ss_isqrt(last - first)))) {
|
||||
if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
|
||||
buf = middle = last - limit, bufsize = limit;
|
||||
} else {
|
||||
middle = last, limit = 0;
|
||||
}
|
||||
for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
|
||||
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
|
||||
ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
|
||||
#elif 1 < SS_BLOCKSIZE
|
||||
ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
|
||||
#endif
|
||||
curbufsize = last - (a + SS_BLOCKSIZE);
|
||||
curbuf = a + SS_BLOCKSIZE;
|
||||
if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
|
||||
for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
|
||||
ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
|
||||
}
|
||||
}
|
||||
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
|
||||
ss_mintrosort(T, PA, a, middle, depth);
|
||||
#elif 1 < SS_BLOCKSIZE
|
||||
ss_insertionsort(T, PA, a, middle, depth);
|
||||
#endif
|
||||
for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
|
||||
if(i & 1) {
|
||||
ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
|
||||
a -= k;
|
||||
}
|
||||
}
|
||||
if(limit != 0) {
|
||||
#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
|
||||
ss_mintrosort(T, PA, middle, last, depth);
|
||||
#elif 1 < SS_BLOCKSIZE
|
||||
ss_insertionsort(T, PA, middle, last, depth);
|
||||
#endif
|
||||
ss_inplacemerge(T, PA, first, middle, last, depth);
|
||||
}
|
||||
#endif
|
||||
|
||||
if(lastsuffix != 0) {
|
||||
/* Insert last type B* suffix. */
|
||||
for(a = first, i = *(first - 1);
|
||||
(a < last) && ((*a < 0) ||
|
||||
(0 < ss_compare_internal(T, PA[*(first - 1)], n - 2,
|
||||
PA[*a], PA[*a + 1], depth)));
|
||||
++a) {
|
||||
*(a - 1) = *a;
|
||||
}
|
||||
*(a - 1) = i;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace divsuf
|
|
@ -1,587 +0,0 @@
|
|||
// Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
//
|
||||
// ChangeLog:
|
||||
// 2016-07-22 - Initial commit and adaption to use PagedArray.
|
||||
// --Samuel Huang <huangs@chromium.org>
|
||||
|
||||
#include "3party/bsdiff-courgette/divsufsort/divsufsort_private.h"
|
||||
|
||||
#define TR_INSERTIONSORT_THRESHOLD (8)
|
||||
#define TR_STACKSIZE (64)
|
||||
|
||||
#define STACK_PUSH5(_a, _b, _c, _d, _e)\
|
||||
do {\
|
||||
assert(ssize < STACK_SIZE);\
|
||||
stack[ssize].a = (_a), stack[ssize].b = (_b),\
|
||||
stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
|
||||
} while(0)
|
||||
#define STACK_POP5(_a, _b, _c, _d, _e)\
|
||||
do {\
|
||||
assert(0 <= ssize);\
|
||||
if(ssize == 0) { return; }\
|
||||
(_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
|
||||
(_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
|
||||
} while(0)
|
||||
|
||||
|
||||
namespace divsuf {
|
||||
|
||||
namespace {
|
||||
|
||||
/*- Private Functions -*/
|
||||
|
||||
const saint_t lg_table_[256]= {
|
||||
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
||||
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
||||
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
|
||||
};
|
||||
|
||||
inline
|
||||
saint_t
|
||||
tr_ilg(saidx_t n) {
|
||||
return (n & 0xffff0000) ?
|
||||
((n & 0xff000000) ?
|
||||
24 + lg_table_[(n >> 24) & 0xff] :
|
||||
16 + lg_table_[(n >> 16) & 0xff]) :
|
||||
((n & 0x0000ff00) ?
|
||||
8 + lg_table_[(n >> 8) & 0xff] :
|
||||
0 + lg_table_[(n >> 0) & 0xff]);
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/* Simple insertionsort for small size groups. */
|
||||
void
|
||||
tr_insertionsort(const_saidx_it ISAd, saidx_it first, saidx_it last) {
|
||||
saidx_it a, b;
|
||||
saidx_t t, r;
|
||||
|
||||
for(a = first + 1; a < last; ++a) {
|
||||
for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
|
||||
do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
|
||||
if(b < first) { break; }
|
||||
}
|
||||
if(r == 0) { *b = ~*b; }
|
||||
*(b + 1) = t;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
inline
|
||||
void
|
||||
tr_fixdown(const_saidx_it ISAd, saidx_it SA, saidx_t i, saidx_t size) {
|
||||
saidx_t j, k;
|
||||
saidx_t v;
|
||||
saidx_t c, d, e;
|
||||
|
||||
for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
|
||||
d = ISAd[SA[k = j++]];
|
||||
if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
|
||||
if(d <= c) { break; }
|
||||
}
|
||||
SA[i] = v;
|
||||
}
|
||||
|
||||
/* Simple top-down heapsort. */
|
||||
void
|
||||
tr_heapsort(const_saidx_it ISAd, saidx_it SA, saidx_t size) {
|
||||
saidx_t i, m;
|
||||
saidx_t t;
|
||||
|
||||
m = size;
|
||||
if((size % 2) == 0) {
|
||||
m--;
|
||||
if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
|
||||
}
|
||||
|
||||
for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
|
||||
if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
|
||||
for(i = m - 1; 0 < i; --i) {
|
||||
t = SA[0], SA[0] = SA[i];
|
||||
tr_fixdown(ISAd, SA, 0, i);
|
||||
SA[i] = t;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/* Returns the median of three elements. */
|
||||
inline
|
||||
saidx_it
|
||||
tr_median3(const_saidx_it ISAd, saidx_it v1, saidx_it v2, saidx_it v3) {
|
||||
saidx_it t;
|
||||
if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
|
||||
if(ISAd[*v2] > ISAd[*v3]) {
|
||||
if(ISAd[*v1] > ISAd[*v3]) { return v1; }
|
||||
else { return v3; }
|
||||
}
|
||||
return v2;
|
||||
}
|
||||
|
||||
/* Returns the median of five elements. */
|
||||
inline
|
||||
saidx_it
|
||||
tr_median5(const_saidx_it ISAd,
|
||||
saidx_it v1, saidx_it v2, saidx_it v3, saidx_it v4, saidx_it v5) {
|
||||
saidx_it t;
|
||||
if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
|
||||
if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
|
||||
if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
|
||||
if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
|
||||
if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
|
||||
if(ISAd[*v3] > ISAd[*v4]) { return v4; }
|
||||
return v3;
|
||||
}
|
||||
|
||||
/* Returns the pivot element. */
|
||||
inline
|
||||
saidx_it
|
||||
tr_pivot(const_saidx_it ISAd, saidx_it first, saidx_it last) {
|
||||
saidx_it middle;
|
||||
saidx_t t;
|
||||
|
||||
t = last - first;
|
||||
middle = first + t / 2;
|
||||
|
||||
if(t <= 512) {
|
||||
if(t <= 32) {
|
||||
return tr_median3(ISAd, first, middle, last - 1);
|
||||
} else {
|
||||
t >>= 2;
|
||||
return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
|
||||
}
|
||||
}
|
||||
t >>= 3;
|
||||
first = tr_median3(ISAd, first, first + t, first + (t << 1));
|
||||
middle = tr_median3(ISAd, middle - t, middle, middle + t);
|
||||
last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
|
||||
return tr_median3(ISAd, first, middle, last);
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
typedef struct _trbudget_t trbudget_t;
|
||||
struct _trbudget_t {
|
||||
saidx_t chance;
|
||||
saidx_t remain;
|
||||
saidx_t incval;
|
||||
saidx_t count;
|
||||
};
|
||||
|
||||
inline
|
||||
void
|
||||
trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
|
||||
budget->chance = chance;
|
||||
budget->remain = budget->incval = incval;
|
||||
}
|
||||
|
||||
inline
|
||||
saint_t
|
||||
trbudget_check(trbudget_t *budget, saidx_t size) {
|
||||
if(size <= budget->remain) { budget->remain -= size; return 1; }
|
||||
if(budget->chance == 0) { budget->count += size; return 0; }
|
||||
budget->remain += budget->incval - size;
|
||||
budget->chance -= 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
inline
|
||||
void
|
||||
tr_partition(const_saidx_it ISAd,
|
||||
saidx_it first, saidx_it middle, saidx_it last,
|
||||
saidx_it* pa, saidx_it* pb, saidx_t v) {
|
||||
saidx_it a, b, c, d, e, f;
|
||||
saidx_t t, s;
|
||||
saidx_t x = 0;
|
||||
|
||||
for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
|
||||
if(((a = b) < last) && (x < v)) {
|
||||
for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
|
||||
if(x == v) { SWAP(*b, *a); ++a; }
|
||||
}
|
||||
}
|
||||
for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
|
||||
if((b < (d = c)) && (x > v)) {
|
||||
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
|
||||
if(x == v) { SWAP(*c, *d); --d; }
|
||||
}
|
||||
}
|
||||
for(; b < c;) {
|
||||
SWAP(*b, *c);
|
||||
for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
|
||||
if(x == v) { SWAP(*b, *a); ++a; }
|
||||
}
|
||||
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
|
||||
if(x == v) { SWAP(*c, *d); --d; }
|
||||
}
|
||||
}
|
||||
|
||||
if(a <= d) {
|
||||
c = b - 1;
|
||||
if((s = a - first) > (t = b - a)) { s = t; }
|
||||
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
||||
if((s = d - c) > (t = last - d - 1)) { s = t; }
|
||||
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
||||
first += (b - a), last -= (d - c);
|
||||
}
|
||||
*pa = first, *pb = last;
|
||||
}
|
||||
|
||||
void
|
||||
tr_copy(saidx_it ISA, const_saidx_it SA,
|
||||
saidx_it first, saidx_it a, saidx_it b, saidx_it last,
|
||||
saidx_t depth) {
|
||||
/* sort suffixes of middle partition
|
||||
by using sorted order of suffixes of left and right partition. */
|
||||
saidx_it c, d, e;
|
||||
saidx_t s, v;
|
||||
|
||||
v = b - SA - 1;
|
||||
for(c = first, d = a - 1; c <= d; ++c) {
|
||||
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
||||
*++d = s;
|
||||
ISA[s] = d - SA;
|
||||
}
|
||||
}
|
||||
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
|
||||
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
||||
*--d = s;
|
||||
ISA[s] = d - SA;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tr_partialcopy(saidx_it ISA, const_saidx_it SA,
|
||||
saidx_it first, saidx_it a, saidx_it b, saidx_it last,
|
||||
saidx_t depth) {
|
||||
saidx_it c, d, e;
|
||||
saidx_t s, v;
|
||||
saidx_t rank, lastrank, newrank = -1;
|
||||
|
||||
v = b - SA - 1;
|
||||
lastrank = -1;
|
||||
for(c = first, d = a - 1; c <= d; ++c) {
|
||||
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
||||
*++d = s;
|
||||
rank = ISA[s + depth];
|
||||
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
|
||||
ISA[s] = newrank;
|
||||
}
|
||||
}
|
||||
|
||||
lastrank = -1;
|
||||
for(e = d; first <= e; --e) {
|
||||
rank = ISA[*e];
|
||||
if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
|
||||
if(newrank != rank) { ISA[*e] = newrank; }
|
||||
}
|
||||
|
||||
lastrank = -1;
|
||||
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
|
||||
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
||||
*--d = s;
|
||||
rank = ISA[s + depth];
|
||||
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
|
||||
ISA[s] = newrank;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tr_introsort(saidx_it ISA, const_saidx_it ISAd,
|
||||
saidx_it SA, saidx_it first, saidx_it last,
|
||||
trbudget_t *budget) {
|
||||
#define STACK_SIZE TR_STACKSIZE
|
||||
struct { const_saidx_it a; saidx_it b, c; saint_t d, e; }stack[STACK_SIZE];
|
||||
saidx_it a, b, c;
|
||||
saidx_t t;
|
||||
saidx_t v, x = 0;
|
||||
saidx_t incr = ISAd - ISA;
|
||||
saint_t limit, next;
|
||||
saint_t ssize, trlink = -1;
|
||||
|
||||
for(ssize = 0, limit = tr_ilg(last - first);;) {
|
||||
|
||||
if(limit < 0) {
|
||||
if(limit == -1) {
|
||||
/* tandem repeat partition */
|
||||
tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
|
||||
|
||||
/* update ranks */
|
||||
if(a < last) {
|
||||
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
|
||||
}
|
||||
if(b < last) {
|
||||
for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
|
||||
}
|
||||
|
||||
/* push */
|
||||
if(1 < (b - a)) {
|
||||
STACK_PUSH5(nullptr, a, b, 0, 0);
|
||||
STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
|
||||
trlink = ssize - 2;
|
||||
}
|
||||
if((a - first) <= (last - b)) {
|
||||
if(1 < (a - first)) {
|
||||
STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
|
||||
last = a, limit = tr_ilg(a - first);
|
||||
} else if(1 < (last - b)) {
|
||||
first = b, limit = tr_ilg(last - b);
|
||||
} else {
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
}
|
||||
} else {
|
||||
if(1 < (last - b)) {
|
||||
STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
|
||||
first = b, limit = tr_ilg(last - b);
|
||||
} else if(1 < (a - first)) {
|
||||
last = a, limit = tr_ilg(a - first);
|
||||
} else {
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
}
|
||||
}
|
||||
} else if(limit == -2) {
|
||||
/* tandem repeat copy */
|
||||
a = stack[--ssize].b, b = stack[ssize].c;
|
||||
if(stack[ssize].d == 0) {
|
||||
tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
|
||||
} else {
|
||||
if(0 <= trlink) { stack[trlink].d = -1; }
|
||||
tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
|
||||
}
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
} else {
|
||||
/* sorted partition */
|
||||
if(0 <= *first) {
|
||||
a = first;
|
||||
do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
|
||||
first = a;
|
||||
}
|
||||
if(first < last) {
|
||||
a = first; do { *a = ~*a; } while(*++a < 0);
|
||||
next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
|
||||
if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
|
||||
|
||||
/* push */
|
||||
if(trbudget_check(budget, a - first)) {
|
||||
if((a - first) <= (last - a)) {
|
||||
STACK_PUSH5(ISAd, a, last, -3, trlink);
|
||||
ISAd += incr, last = a, limit = next;
|
||||
} else {
|
||||
if(1 < (last - a)) {
|
||||
STACK_PUSH5(ISAd + incr, first, a, next, trlink);
|
||||
first = a, limit = -3;
|
||||
} else {
|
||||
ISAd += incr, last = a, limit = next;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(0 <= trlink) { stack[trlink].d = -1; }
|
||||
if(1 < (last - a)) {
|
||||
first = a, limit = -3;
|
||||
} else {
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
|
||||
tr_insertionsort(ISAd, first, last);
|
||||
limit = -3;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(limit-- == 0) {
|
||||
tr_heapsort(ISAd, first, last - first);
|
||||
for(a = last - 1; first < a; a = b) {
|
||||
for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
|
||||
}
|
||||
limit = -3;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* choose pivot */
|
||||
a = tr_pivot(ISAd, first, last);
|
||||
SWAP(*first, *a);
|
||||
v = ISAd[*first];
|
||||
|
||||
/* partition */
|
||||
tr_partition(ISAd, first, first + 1, last, &a, &b, v);
|
||||
if((last - first) != (b - a)) {
|
||||
next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
|
||||
|
||||
/* update ranks */
|
||||
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
|
||||
if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
|
||||
|
||||
/* push */
|
||||
if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
|
||||
if((a - first) <= (last - b)) {
|
||||
if((last - b) <= (b - a)) {
|
||||
if(1 < (a - first)) {
|
||||
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
||||
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
||||
last = a;
|
||||
} else if(1 < (last - b)) {
|
||||
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
||||
first = b;
|
||||
} else {
|
||||
ISAd += incr, first = a, last = b, limit = next;
|
||||
}
|
||||
} else if((a - first) <= (b - a)) {
|
||||
if(1 < (a - first)) {
|
||||
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
||||
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
||||
last = a;
|
||||
} else {
|
||||
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
||||
ISAd += incr, first = a, last = b, limit = next;
|
||||
}
|
||||
} else {
|
||||
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
||||
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
||||
ISAd += incr, first = a, last = b, limit = next;
|
||||
}
|
||||
} else {
|
||||
if((a - first) <= (b - a)) {
|
||||
if(1 < (last - b)) {
|
||||
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
||||
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
||||
first = b;
|
||||
} else if(1 < (a - first)) {
|
||||
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
||||
last = a;
|
||||
} else {
|
||||
ISAd += incr, first = a, last = b, limit = next;
|
||||
}
|
||||
} else if((last - b) <= (b - a)) {
|
||||
if(1 < (last - b)) {
|
||||
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
||||
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
||||
first = b;
|
||||
} else {
|
||||
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
||||
ISAd += incr, first = a, last = b, limit = next;
|
||||
}
|
||||
} else {
|
||||
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
||||
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
||||
ISAd += incr, first = a, last = b, limit = next;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
|
||||
if((a - first) <= (last - b)) {
|
||||
if(1 < (a - first)) {
|
||||
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
||||
last = a;
|
||||
} else if(1 < (last - b)) {
|
||||
first = b;
|
||||
} else {
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
}
|
||||
} else {
|
||||
if(1 < (last - b)) {
|
||||
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
||||
first = b;
|
||||
} else if(1 < (a - first)) {
|
||||
last = a;
|
||||
} else {
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(trbudget_check(budget, last - first)) {
|
||||
limit = tr_ilg(last - first), ISAd += incr;
|
||||
} else {
|
||||
if(0 <= trlink) { stack[trlink].d = -1; }
|
||||
STACK_POP5(ISAd, first, last, limit, trlink);
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef STACK_SIZE
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/*- Function -*/
|
||||
|
||||
/* Tandem repeat sort */
|
||||
void
|
||||
trsort(saidx_it ISA, saidx_it SA, saidx_t n, saidx_t depth) {
|
||||
saidx_it ISAd;
|
||||
saidx_it first, last;
|
||||
trbudget_t budget;
|
||||
saidx_t t, skip, unsorted;
|
||||
|
||||
trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
|
||||
/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
|
||||
for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
|
||||
first = SA;
|
||||
skip = 0;
|
||||
unsorted = 0;
|
||||
do {
|
||||
if((t = *first) < 0) { first -= t; skip += t; }
|
||||
else {
|
||||
if(skip != 0) { *(first + skip) = skip; skip = 0; }
|
||||
last = SA + ISA[t] + 1;
|
||||
if(1 < (last - first)) {
|
||||
budget.count = 0;
|
||||
tr_introsort(ISA, ISAd, SA, first, last, &budget);
|
||||
if(budget.count != 0) { unsorted += budget.count; }
|
||||
else { skip = first - last; }
|
||||
} else if((last - first) == 1) {
|
||||
skip = -1;
|
||||
}
|
||||
first = last;
|
||||
}
|
||||
} while(first < (SA + n));
|
||||
if(skip != 0) { *(first + skip) = skip; }
|
||||
if(unsorted == 0) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace divsuf
|
|
@ -1 +1 @@
|
|||
Subproject commit a0dc7d5efacbe2b744211289c276e2b9168bd4ae
|
||||
Subproject commit 6b3f93c6caa0308455beeced0268cfae04df3584
|
|
@ -93,7 +93,6 @@ target_link_libraries(${PROJECT_NAME}
|
|||
# indexer
|
||||
# platform
|
||||
# mwm_diff
|
||||
# bsdiff
|
||||
# geometry
|
||||
# coding
|
||||
# base
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
#include "coding/zlib.hpp"
|
||||
|
||||
#include "std/target_os.hpp"
|
||||
#include <optional>
|
||||
#include <span>
|
||||
|
||||
namespace coding
|
||||
{
|
||||
namespace
|
||||
{
|
||||
int constexpr kGzipBits = 16;
|
||||
int constexpr kBothBits = 32;
|
||||
constexpr int kGzipBits = 16;
|
||||
constexpr int kBothBits = 32;
|
||||
|
||||
int ToInt(ZLib::Deflate::Level level)
|
||||
constexpr int ToInt(ZLib::Deflate::Level level)
|
||||
{
|
||||
using Level = ZLib::Deflate::Level;
|
||||
switch (level)
|
||||
|
@ -21,18 +21,27 @@ int ToInt(ZLib::Deflate::Level level)
|
|||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <typename InitFunc>
|
||||
std::optional<z_stream> InitStream(InitFunc func)
|
||||
{
|
||||
z_stream stream{};
|
||||
stream.zalloc = Z_NULL;
|
||||
stream.zfree = Z_NULL;
|
||||
stream.opaque = Z_NULL;
|
||||
|
||||
if (func(stream) == Z_OK)
|
||||
return stream;
|
||||
return std::nullopt;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// ZLib::Processor ---------------------------------------------------------------------------------
|
||||
ZLib::Processor::Processor(void const * data, size_t size) noexcept : m_init(false)
|
||||
ZLib::Processor::Processor(std::span<const std::byte> data) noexcept : m_init(false)
|
||||
{
|
||||
// next_in is defined as z_const (see
|
||||
// http://www.zlib.net/manual.html). Sometimes it's a const (when
|
||||
// ZLIB_CONST is defined), sometimes not, it depends on the local
|
||||
// zconf.h. So, for portability, const_cast<...> is used here, but
|
||||
// in any case, zlib does not modify |data|.
|
||||
m_stream.next_in = static_cast<unsigned char *>(const_cast<void *>(data));
|
||||
m_stream.avail_in = static_cast<unsigned int>(size);
|
||||
ASSERT(!data.empty(), ());
|
||||
m_stream.next_in = reinterpret_cast<unsigned char const*>(data.data());
|
||||
m_stream.avail_in = static_cast<unsigned int>(data.size());
|
||||
|
||||
m_stream.next_out = m_buffer;
|
||||
m_stream.avail_out = kBufferSize;
|
||||
|
@ -56,20 +65,25 @@ bool ZLib::Processor::BufferIsFull() const
|
|||
|
||||
// ZLib::Deflate -----------------------------------------------------------------------------------
|
||||
ZLib::DeflateProcessor::DeflateProcessor(Deflate::Format format, Deflate::Level level,
|
||||
void const * data, size_t size) noexcept
|
||||
: Processor(data, size)
|
||||
std::span<const std::byte> data) noexcept
|
||||
: Processor(data)
|
||||
{
|
||||
auto bits = MAX_WBITS;
|
||||
switch (format)
|
||||
{
|
||||
case Deflate::Format::ZLib: break;
|
||||
case Deflate::Format::GZip: bits = bits | kGzipBits; break;
|
||||
case Deflate::Format::GZip: bits |= kGzipBits; break;
|
||||
}
|
||||
|
||||
int const ret =
|
||||
deflateInit2(&m_stream, ToInt(level) /* level */, Z_DEFLATED /* method */,
|
||||
bits /* windowBits */, 8 /* memLevel */, Z_DEFAULT_STRATEGY /* strategy */);
|
||||
m_init = (ret == Z_OK);
|
||||
auto maybeStream = InitStream([&](z_stream& stream) {
|
||||
return deflateInit2(&stream, ToInt(level), Z_DEFLATED, bits, 8, Z_DEFAULT_STRATEGY);
|
||||
});
|
||||
|
||||
if (maybeStream)
|
||||
{
|
||||
m_stream = *maybeStream;
|
||||
m_init = true;
|
||||
}
|
||||
}
|
||||
|
||||
ZLib::DeflateProcessor::~DeflateProcessor() noexcept
|
||||
|
@ -85,19 +99,26 @@ int ZLib::DeflateProcessor::Process(int flush)
|
|||
}
|
||||
|
||||
// ZLib::Inflate -----------------------------------------------------------------------------------
|
||||
ZLib::InflateProcessor::InflateProcessor(Inflate::Format format, void const * data,
|
||||
size_t size) noexcept
|
||||
: Processor(data, size)
|
||||
ZLib::InflateProcessor::InflateProcessor(Inflate::Format format, std::span<const std::byte> data) noexcept
|
||||
: Processor(data)
|
||||
{
|
||||
auto bits = MAX_WBITS;
|
||||
switch (format)
|
||||
{
|
||||
case Inflate::Format::ZLib: break;
|
||||
case Inflate::Format::GZip: bits = bits | kGzipBits; break;
|
||||
case Inflate::Format::Both: bits = bits | kBothBits; break;
|
||||
case Inflate::Format::GZip: bits |= kGzipBits; break;
|
||||
case Inflate::Format::Both: bits |= kBothBits; break;
|
||||
}
|
||||
|
||||
auto maybeStream = InitStream([&](z_stream& stream) {
|
||||
return inflateInit2(&stream, bits);
|
||||
});
|
||||
|
||||
if (maybeStream)
|
||||
{
|
||||
m_stream = *maybeStream;
|
||||
m_init = true;
|
||||
}
|
||||
int const ret = inflateInit2(&m_stream, bits);
|
||||
m_init = (ret == Z_OK);
|
||||
}
|
||||
|
||||
ZLib::InflateProcessor::~InflateProcessor() noexcept
|
||||
|
|
|
@ -5,18 +5,15 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <span>
|
||||
|
||||
#include "zlib.h"
|
||||
|
||||
namespace coding
|
||||
{
|
||||
// Following classes are wrappers around ZLib routines.
|
||||
//
|
||||
// *NOTE* All Inflate() and Deflate() methods may return false in case
|
||||
// of errors. In this case the output sequence may be already
|
||||
// partially formed, so the user needs to implement their own
|
||||
// roll-back strategy.
|
||||
// ZLib wrapper for compression and decompression.
|
||||
class ZLib
|
||||
{
|
||||
public:
|
||||
|
@ -33,18 +30,17 @@ public:
|
|||
explicit Inflate(Format format) noexcept : m_format(format) {}
|
||||
|
||||
template <typename OutIt>
|
||||
bool operator()(void const * data, size_t size, OutIt out) const
|
||||
std::optional<void> operator()(std::span<const std::byte> data, OutIt out) const
|
||||
{
|
||||
if (data == nullptr)
|
||||
return false;
|
||||
InflateProcessor processor(m_format, data, size);
|
||||
ASSERT(!data.empty(), ());
|
||||
InflateProcessor processor(m_format, data);
|
||||
return Process(processor, out);
|
||||
}
|
||||
|
||||
template <typename OutIt>
|
||||
bool operator()(std::string const & s, OutIt out) const
|
||||
std::optional<void> operator()(std::string const & s, OutIt out) const
|
||||
{
|
||||
return (*this)(s.c_str(), s.size(), out);
|
||||
return (*this)(std::as_bytes(std::span{s}), out);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -62,27 +58,26 @@ public:
|
|||
|
||||
enum class Level
|
||||
{
|
||||
NoCompression,
|
||||
BestSpeed,
|
||||
BestCompression,
|
||||
DefaultCompression
|
||||
NoCompression = Z_NO_COMPRESSION,
|
||||
BestSpeed = Z_BEST_SPEED,
|
||||
BestCompression = Z_BEST_COMPRESSION,
|
||||
DefaultCompression = Z_DEFAULT_COMPRESSION
|
||||
};
|
||||
|
||||
Deflate(Format format, Level level) noexcept : m_format(format), m_level(level) {}
|
||||
|
||||
template <typename OutIt>
|
||||
bool operator()(void const * data, size_t size, OutIt out) const
|
||||
std::optional<void> operator()(std::span<const std::byte> data, OutIt out) const
|
||||
{
|
||||
if (data == nullptr)
|
||||
return false;
|
||||
DeflateProcessor processor(m_format, m_level, data, size);
|
||||
ASSERT(!data.empty(), ());
|
||||
DeflateProcessor processor(m_format, m_level, data);
|
||||
return Process(processor, out);
|
||||
}
|
||||
|
||||
template <typename OutIt>
|
||||
bool operator()(std::string const & s, OutIt out) const
|
||||
std::optional<void> operator()(std::string const & s, OutIt out) const
|
||||
{
|
||||
return (*this)(s.c_str(), s.size(), out);
|
||||
return (*this)(std::as_bytes(std::span{s}), out);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -94,9 +89,9 @@ private:
|
|||
class Processor
|
||||
{
|
||||
public:
|
||||
static size_t constexpr kBufferSize = 1024;
|
||||
static size_t constexpr kBufferSize = 8192;
|
||||
|
||||
Processor(void const * data, size_t size) noexcept;
|
||||
Processor(std::span<const std::byte> data) noexcept;
|
||||
virtual ~Processor() noexcept = default;
|
||||
|
||||
inline bool IsInit() const noexcept { return m_init; }
|
||||
|
@ -113,18 +108,19 @@ private:
|
|||
}
|
||||
|
||||
protected:
|
||||
z_stream m_stream;
|
||||
z_stream m_stream{};
|
||||
bool m_init = false;
|
||||
unsigned char m_buffer[kBufferSize] = {};
|
||||
|
||||
void InitStream();
|
||||
|
||||
DISALLOW_COPY_AND_MOVE(Processor);
|
||||
};
|
||||
|
||||
class DeflateProcessor final : public Processor
|
||||
{
|
||||
public:
|
||||
DeflateProcessor(Deflate::Format format, Deflate::Level level, void const * data,
|
||||
size_t size) noexcept;
|
||||
DeflateProcessor(Deflate::Format format, Deflate::Level level, std::span<const std::byte> data) noexcept;
|
||||
virtual ~DeflateProcessor() noexcept override;
|
||||
|
||||
int Process(int flush);
|
||||
|
@ -135,7 +131,7 @@ private:
|
|||
class InflateProcessor final : public Processor
|
||||
{
|
||||
public:
|
||||
InflateProcessor(Inflate::Format format, void const * data, size_t size) noexcept;
|
||||
InflateProcessor(Inflate::Format format, std::span<const std::byte> data) noexcept;
|
||||
virtual ~InflateProcessor() noexcept override;
|
||||
|
||||
int Process(int flush);
|
||||
|
@ -144,10 +140,10 @@ private:
|
|||
};
|
||||
|
||||
template <typename Processor, typename OutIt>
|
||||
static bool Process(Processor & processor, OutIt out)
|
||||
static std::optional<void> Process(Processor & processor, OutIt out)
|
||||
{
|
||||
if (!processor.IsInit())
|
||||
return false;
|
||||
return std::nullopt;
|
||||
|
||||
int ret = Z_OK;
|
||||
while (true)
|
||||
|
@ -158,7 +154,7 @@ private:
|
|||
{
|
||||
ret = processor.Process(flush);
|
||||
if (ret != Z_OK && ret != Z_STREAM_END)
|
||||
return false;
|
||||
return std::nullopt;
|
||||
|
||||
if (!processor.BufferIsFull())
|
||||
break;
|
||||
|
@ -171,7 +167,8 @@ private:
|
|||
}
|
||||
|
||||
processor.MoveOut(out);
|
||||
return processor.ConsumedAll();
|
||||
return processor.ConsumedAll() ? std::optional<void>{} : std::nullopt;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace coding
|
||||
|
|
|
@ -150,11 +150,6 @@
|
|||
|
||||
<li><a href="http://www.boost.org/">Boost</a>; <a href="#boost-license" class="license">Boost License</a></li>
|
||||
|
||||
<li><a href="http://www.daemonology.net/bsdiff/">bsdiff</a>; <a href="#bsd3-license" class="license">BSD License</a></li>
|
||||
|
||||
<li><a href="https://chromium.googlesource.com/chromium/src/courgette/">Chromium's Courgette</a>;
|
||||
<a href="#bsd3-license" class="license">BSD License</a></li>
|
||||
|
||||
<li><a href="https://libexpat.github.io">Expat</a><br>© 1998–2000 Thai Open Source Software Center Ltd and Clark Cooper,
|
||||
© 2001–2019 Expat maintainers; <a href="#mit-license" class="license">MIT License</a></li>
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@ set(SRC
|
|||
omim_add_library(${PROJECT_NAME} ${SRC})
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
bsdiff
|
||||
coding
|
||||
)
|
||||
|
||||
|
|
|
@ -16,42 +16,56 @@
|
|||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
|
||||
#include "3party/bsdiff-courgette/bsdiff/bsdiff.h"
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace
|
||||
{
|
||||
enum Version
|
||||
{
|
||||
// Format Version 0: bsdiff+gzip.
|
||||
// Format Version 0: XOR-based diff + gzip.
|
||||
VERSION_V0 = 0,
|
||||
VERSION_LATEST = VERSION_V0
|
||||
};
|
||||
|
||||
bool MakeDiffVersion0(FileReader & oldReader, FileReader & newReader, FileWriter & diffFileWriter)
|
||||
bool MakeDiffVersion0(FileReader & oldReader, FileReader & newReader, FileWriter & diffFileWriter, base::Cancellable const & cancellable)
|
||||
{
|
||||
std::vector<uint8_t> diffBuf;
|
||||
MemWriter<std::vector<uint8_t>> diffMemWriter(diffBuf);
|
||||
|
||||
auto const status = bsdiff::CreateBinaryPatch(oldReader, newReader, diffMemWriter);
|
||||
|
||||
if (status != bsdiff::BSDiffStatus::OK)
|
||||
try
|
||||
{
|
||||
LOG(LERROR, ("Could not create patch with bsdiff:", status));
|
||||
std::vector<uint8_t> oldData(oldReader.Size());
|
||||
oldReader.Read(0, oldData.data(), oldData.size());
|
||||
|
||||
std::vector<uint8_t> newData(newReader.Size());
|
||||
newReader.Read(0, newData.data(), newData.size());
|
||||
|
||||
std::vector<uint8_t> diffBuf(std::max(oldData.size(), newData.size()));
|
||||
for (size_t i = 0; i < diffBuf.size(); ++i)
|
||||
{
|
||||
if (cancellable.IsCancelled())
|
||||
{
|
||||
LOG(LINFO, ("MakeDiffVersion0 cancelled at index", i));
|
||||
return false;
|
||||
}
|
||||
diffBuf[i] = (i < oldData.size() ? oldData[i] : 0) ^ (i < newData.size() ? newData[i] : 0);
|
||||
}
|
||||
|
||||
using Deflate = coding::ZLib::Deflate;
|
||||
Deflate deflate(Deflate::Format::ZLib, Deflate::Level::BestCompression);
|
||||
|
||||
std::vector<uint8_t> deflatedDiffBuf;
|
||||
deflate(diffBuf.data(), diffBuf.size(), back_inserter(deflatedDiffBuf));
|
||||
|
||||
// A basic header that holds only version.
|
||||
WriteToSink(diffFileWriter, static_cast<uint32_t>(VERSION_V0));
|
||||
diffFileWriter.Write(deflatedDiffBuf.data(), deflatedDiffBuf.size());
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (std::exception const & e)
|
||||
{
|
||||
LOG(LERROR, ("Error during MakeDiffVersion0:", e.what()));
|
||||
return false;
|
||||
}
|
||||
|
||||
using Deflate = coding::ZLib::Deflate;
|
||||
Deflate deflate(Deflate::Format::ZLib, Deflate::Level::BestCompression);
|
||||
|
||||
std::vector<uint8_t> deflatedDiffBuf;
|
||||
deflate(diffBuf.data(), diffBuf.size(), back_inserter(deflatedDiffBuf));
|
||||
|
||||
// A basic header that holds only version.
|
||||
WriteToSink(diffFileWriter, static_cast<uint32_t>(VERSION_V0));
|
||||
diffFileWriter.Write(deflatedDiffBuf.data(), deflatedDiffBuf.size());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
generator::mwm_diff::DiffApplicationResult ApplyDiffVersion0(
|
||||
|
@ -60,36 +74,55 @@ generator::mwm_diff::DiffApplicationResult ApplyDiffVersion0(
|
|||
{
|
||||
using generator::mwm_diff::DiffApplicationResult;
|
||||
|
||||
std::vector<uint8_t> deflatedDiff(base::checked_cast<size_t>(diffFileSource.Size()));
|
||||
diffFileSource.Read(deflatedDiff.data(), deflatedDiff.size());
|
||||
|
||||
using Inflate = coding::ZLib::Inflate;
|
||||
Inflate inflate(Inflate::Format::ZLib);
|
||||
std::vector<uint8_t> diffBuf;
|
||||
inflate(deflatedDiff.data(), deflatedDiff.size(), back_inserter(diffBuf));
|
||||
|
||||
// Our bsdiff assumes that both the old mwm and the diff files are correct and
|
||||
// does no checks when using its readers.
|
||||
// Yet sometimes we observe corrupted files in the logs, and to avoid
|
||||
// crashes from such files the exception-throwing version of MemReader is used here.
|
||||
// |oldReader| is a FileReader so it throws exceptions too but we
|
||||
// are more confident in the uncorrupted status of the old file because
|
||||
// its checksum is compared to the one stored in the diff file.
|
||||
MemReaderWithExceptions diffMemReader(diffBuf.data(), diffBuf.size());
|
||||
|
||||
auto const status = bsdiff::ApplyBinaryPatch(oldReader, newWriter, diffMemReader, cancellable);
|
||||
|
||||
if (status == bsdiff::BSDiffStatus::CANCELLED)
|
||||
try
|
||||
{
|
||||
LOG(LDEBUG, ("Diff application has been cancelled"));
|
||||
return DiffApplicationResult::Cancelled;
|
||||
}
|
||||
std::vector<uint8_t> deflatedDiff(base::checked_cast<size_t>(diffFileSource.Size()));
|
||||
diffFileSource.Read(deflatedDiff.data(), deflatedDiff.size());
|
||||
|
||||
if (cancellable.IsCancelled())
|
||||
{
|
||||
LOG(LINFO, ("ApplyDiffVersion0 cancelled before inflation"));
|
||||
return DiffApplicationResult::Cancelled;
|
||||
}
|
||||
|
||||
using Inflate = coding::ZLib::Inflate;
|
||||
Inflate inflate(Inflate::Format::ZLib);
|
||||
std::vector<uint8_t> diffBuf;
|
||||
if (!inflate(deflatedDiff.data(), deflatedDiff.size(), back_inserter(diffBuf)))
|
||||
{
|
||||
LOG(LERROR, ("Inflation failed"));
|
||||
return DiffApplicationResult::Failed;
|
||||
}
|
||||
|
||||
if (cancellable.IsCancelled())
|
||||
{
|
||||
LOG(LINFO, ("ApplyDiffVersion0 cancelled during inflation"));
|
||||
return DiffApplicationResult::Cancelled;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> oldData(oldReader.Size());
|
||||
oldReader.Read(0, oldData.data(), oldData.size());
|
||||
|
||||
std::vector<uint8_t> newData(diffBuf.size());
|
||||
for (size_t i = 0; i < newData.size(); ++i)
|
||||
{
|
||||
if (cancellable.IsCancelled())
|
||||
{
|
||||
LOG(LINFO, ("ApplyDiffVersion0 cancelled at index", i));
|
||||
return DiffApplicationResult::Cancelled;
|
||||
}
|
||||
newData[i] = (i < oldData.size() ? oldData[i] : 0) ^ diffBuf[i];
|
||||
}
|
||||
|
||||
newWriter.Write(newData.data(), newData.size());
|
||||
|
||||
if (status == bsdiff::BSDiffStatus::OK)
|
||||
return DiffApplicationResult::Ok;
|
||||
|
||||
LOG(LERROR, ("Could not apply patch with bsdiff:", status));
|
||||
return DiffApplicationResult::Failed;
|
||||
}
|
||||
catch (std::exception const & e)
|
||||
{
|
||||
LOG(LERROR, ("Error during ApplyDiffVersion0:", e.what()));
|
||||
return DiffApplicationResult::Failed;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
|
@ -97,7 +130,8 @@ namespace generator
|
|||
{
|
||||
namespace mwm_diff
|
||||
{
|
||||
bool MakeDiff(std::string const & oldMwmPath, std::string const & newMwmPath, std::string const & diffPath)
|
||||
bool MakeDiff(std::string const & oldMwmPath, std::string const & newMwmPath,
|
||||
std::string const & diffPath, base::Cancellable const & cancellable)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
@ -107,7 +141,7 @@ bool MakeDiff(std::string const & oldMwmPath, std::string const & newMwmPath, st
|
|||
|
||||
switch (VERSION_LATEST)
|
||||
{
|
||||
case VERSION_V0: return MakeDiffVersion0(oldReader, newReader, diffFileWriter);
|
||||
case VERSION_V0: return MakeDiffVersion0(oldReader, newReader, diffFileWriter, cancellable);
|
||||
default:
|
||||
LOG(LERROR,
|
||||
("Making mwm diffs with diff format version", VERSION_LATEST, "is not implemented"));
|
||||
|
|
|
@ -23,7 +23,7 @@ enum class DiffApplicationResult
|
|||
// It is assumed that the files at |oldMwmPath| and |newMwmPath| are valid mwms.
|
||||
// Returns true on success and false on failure.
|
||||
bool MakeDiff(std::string const & oldMwmPath, std::string const & newMwmPath,
|
||||
std::string const & diffPath);
|
||||
std::string const & diffPath, base::Cancellable const & cancellable);
|
||||
|
||||
// Applies the diff at |diffPath| to the mwm at |oldMwmPath|. The resulting
|
||||
// mwm is stored at |newMwmPath|.
|
||||
|
@ -32,8 +32,7 @@ bool MakeDiff(std::string const & oldMwmPath, std::string const & newMwmPath,
|
|||
// The application process can be stopped via |cancellable| in which case
|
||||
// it is up to the caller to clean the partially written file at |diffPath|.
|
||||
DiffApplicationResult ApplyDiff(std::string const & oldMwmPath, std::string const & newMwmPath,
|
||||
std::string const & diffPath,
|
||||
base::Cancellable const & cancellable);
|
||||
std::string const & diffPath, base::Cancellable const & cancellable);
|
||||
|
||||
std::string DebugPrint(DiffApplicationResult const & result);
|
||||
} // namespace mwm_diff
|
||||
|
|
|
@ -39,9 +39,12 @@ UNIT_TEST(IncrementalUpdates_Smoke)
|
|||
}
|
||||
|
||||
base::Cancellable cancellable;
|
||||
TEST(MakeDiff(oldMwmPath, newMwmPath1, diffPath), ());
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable), DiffApplicationResult::Ok,
|
||||
());
|
||||
|
||||
// Reset cancellable before test.
|
||||
cancellable.Reset();
|
||||
LOG(LINFO, ("Starting first diff application"));
|
||||
TEST(MakeDiff(oldMwmPath, newMwmPath1, diffPath, cancellable), ());
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable), DiffApplicationResult::Ok, ());
|
||||
|
||||
{
|
||||
// Alter the old mwm slightly.
|
||||
|
@ -54,15 +57,17 @@ UNIT_TEST(IncrementalUpdates_Smoke)
|
|||
writer.Write(oldMwmContents.data(), oldMwmContents.size());
|
||||
}
|
||||
|
||||
TEST(MakeDiff(oldMwmPath, newMwmPath1, diffPath), ());
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable), DiffApplicationResult::Ok,
|
||||
());
|
||||
// Reset cancellable before test.
|
||||
cancellable.Reset();
|
||||
LOG(LINFO, ("Starting second diff application"));
|
||||
TEST(MakeDiff(oldMwmPath, newMwmPath1, diffPath, cancellable), ());
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable), DiffApplicationResult::Ok, ());
|
||||
|
||||
TEST(base::IsEqualFiles(newMwmPath1, newMwmPath2), ());
|
||||
|
||||
cancellable.Cancel();
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable),
|
||||
DiffApplicationResult::Cancelled, ());
|
||||
LOG(LINFO, ("Cancelling ApplyDiff"));
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable), DiffApplicationResult::Cancelled, ());
|
||||
cancellable.Reset();
|
||||
|
||||
{
|
||||
|
@ -77,15 +82,19 @@ UNIT_TEST(IncrementalUpdates_Smoke)
|
|||
writer.Write(diffContents.data(), diffContents.size());
|
||||
}
|
||||
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable),
|
||||
DiffApplicationResult::Failed, ());
|
||||
// Reset cancellable before test.
|
||||
cancellable.Reset();
|
||||
LOG(LINFO, ("Starting corrupted diff application"));
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable), DiffApplicationResult::Failed, ());
|
||||
|
||||
{
|
||||
// Reset the diff file contents.
|
||||
FileWriter writer(diffPath);
|
||||
}
|
||||
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable),
|
||||
DiffApplicationResult::Failed, ());
|
||||
// Reset cancellable before test.
|
||||
cancellable.Reset();
|
||||
LOG(LINFO, ("Starting empty diff application"));
|
||||
TEST_EQUAL(ApplyDiff(oldMwmPath, newMwmPath2, diffPath, cancellable), DiffApplicationResult::Failed, ());
|
||||
}
|
||||
} // namespace generator::diff_tests
|
||||
|
|
|
@ -32,7 +32,6 @@ omim_link_libraries(
|
|||
ICU::i18n
|
||||
cppjansson
|
||||
protobuf
|
||||
bsdiff
|
||||
minizip
|
||||
succinct
|
||||
pugixml
|
||||
|
|
|
@ -23,7 +23,6 @@ omim_link_libraries(
|
|||
mwm_diff
|
||||
coding
|
||||
base
|
||||
bsdiff
|
||||
ICU::i18n
|
||||
cppjansson
|
||||
opening_hours
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 6e1746357549ca4b09ee1560e9bf25e3fcce20ea
|
||||
Subproject commit 2796db7ae3c3f3c00ae07880dc2d8dfc8edd776b
|
|
@ -1 +1 @@
|
|||
Subproject commit 89dd0a6b351f32c91d6aeaaf322e861bf7f0dc10
|
||||
Subproject commit 4cfda06f87d44d452a471c30435e48f690de7a1a
|
|
@ -1,218 +0,0 @@
|
|||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 54;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
4586D0B51F4811BB00DF9CE5 /* bsdiff_common.h in Headers */ = {isa = PBXBuildFile; fileRef = 4586D0B11F4811BB00DF9CE5 /* bsdiff_common.h */; };
|
||||
4586D0B61F4811BB00DF9CE5 /* bsdiff_search.h in Headers */ = {isa = PBXBuildFile; fileRef = 4586D0B21F4811BB00DF9CE5 /* bsdiff_search.h */; };
|
||||
4586D0B71F4811BB00DF9CE5 /* bsdiff.h in Headers */ = {isa = PBXBuildFile; fileRef = 4586D0B31F4811BB00DF9CE5 /* bsdiff.h */; };
|
||||
4586D0BE1F4811E600DF9CE5 /* divsufsort_private.h in Headers */ = {isa = PBXBuildFile; fileRef = 4586D0B91F4811E600DF9CE5 /* divsufsort_private.h */; };
|
||||
4586D0BF1F4811E600DF9CE5 /* divsufsort.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4586D0BA1F4811E600DF9CE5 /* divsufsort.cc */; };
|
||||
4586D0C01F4811E600DF9CE5 /* divsufsort.h in Headers */ = {isa = PBXBuildFile; fileRef = 4586D0BB1F4811E600DF9CE5 /* divsufsort.h */; };
|
||||
4586D0C11F4811E600DF9CE5 /* sssort.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4586D0BC1F4811E600DF9CE5 /* sssort.cc */; };
|
||||
4586D0C21F4811E600DF9CE5 /* trsort.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4586D0BD1F4811E600DF9CE5 /* trsort.cc */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
4586D0A11F480FE600DF9CE5 /* libbsdiff.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libbsdiff.a; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
4586D0AF1F4810A700DF9CE5 /* common-debug.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; name = "common-debug.xcconfig"; path = "../common-debug.xcconfig"; sourceTree = "<group>"; };
|
||||
4586D0B01F4810A700DF9CE5 /* common-release.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; name = "common-release.xcconfig"; path = "../common-release.xcconfig"; sourceTree = "<group>"; };
|
||||
4586D0B11F4811BB00DF9CE5 /* bsdiff_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bsdiff_common.h; path = bsdiff/bsdiff_common.h; sourceTree = "<group>"; };
|
||||
4586D0B21F4811BB00DF9CE5 /* bsdiff_search.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bsdiff_search.h; path = bsdiff/bsdiff_search.h; sourceTree = "<group>"; };
|
||||
4586D0B31F4811BB00DF9CE5 /* bsdiff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bsdiff.h; path = bsdiff/bsdiff.h; sourceTree = "<group>"; };
|
||||
4586D0B91F4811E600DF9CE5 /* divsufsort_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = divsufsort_private.h; path = divsufsort/divsufsort_private.h; sourceTree = "<group>"; };
|
||||
4586D0BA1F4811E600DF9CE5 /* divsufsort.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = divsufsort.cc; path = divsufsort/divsufsort.cc; sourceTree = "<group>"; };
|
||||
4586D0BB1F4811E600DF9CE5 /* divsufsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = divsufsort.h; path = divsufsort/divsufsort.h; sourceTree = "<group>"; };
|
||||
4586D0BC1F4811E600DF9CE5 /* sssort.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sssort.cc; path = divsufsort/sssort.cc; sourceTree = "<group>"; };
|
||||
4586D0BD1F4811E600DF9CE5 /* trsort.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = trsort.cc; path = divsufsort/trsort.cc; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
4586D09E1F480FE600DF9CE5 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
4586D0981F480FE600DF9CE5 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
4586D0AF1F4810A700DF9CE5 /* common-debug.xcconfig */,
|
||||
4586D0B01F4810A700DF9CE5 /* common-release.xcconfig */,
|
||||
4586D0A31F480FE600DF9CE5 /* bsdiff */,
|
||||
4586D0A21F480FE600DF9CE5 /* Products */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
4586D0A21F480FE600DF9CE5 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
4586D0A11F480FE600DF9CE5 /* libbsdiff.a */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
4586D0A31F480FE600DF9CE5 /* bsdiff */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
4586D0B11F4811BB00DF9CE5 /* bsdiff_common.h */,
|
||||
4586D0B21F4811BB00DF9CE5 /* bsdiff_search.h */,
|
||||
4586D0B31F4811BB00DF9CE5 /* bsdiff.h */,
|
||||
4586D0B91F4811E600DF9CE5 /* divsufsort_private.h */,
|
||||
4586D0BA1F4811E600DF9CE5 /* divsufsort.cc */,
|
||||
4586D0BB1F4811E600DF9CE5 /* divsufsort.h */,
|
||||
4586D0BC1F4811E600DF9CE5 /* sssort.cc */,
|
||||
4586D0BD1F4811E600DF9CE5 /* trsort.cc */,
|
||||
);
|
||||
name = bsdiff;
|
||||
path = "../../3party/bsdiff-courgette";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXHeadersBuildPhase section */
|
||||
4586D09F1F480FE600DF9CE5 /* Headers */ = {
|
||||
isa = PBXHeadersBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
4586D0BE1F4811E600DF9CE5 /* divsufsort_private.h in Headers */,
|
||||
4586D0B71F4811BB00DF9CE5 /* bsdiff.h in Headers */,
|
||||
4586D0B61F4811BB00DF9CE5 /* bsdiff_search.h in Headers */,
|
||||
4586D0B51F4811BB00DF9CE5 /* bsdiff_common.h in Headers */,
|
||||
4586D0C01F4811E600DF9CE5 /* divsufsort.h in Headers */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXHeadersBuildPhase section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
4586D0A01F480FE600DF9CE5 /* bsdiff */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 4586D0AC1F480FE600DF9CE5 /* Build configuration list for PBXNativeTarget "bsdiff" */;
|
||||
buildPhases = (
|
||||
4586D09D1F480FE600DF9CE5 /* Sources */,
|
||||
4586D09E1F480FE600DF9CE5 /* Frameworks */,
|
||||
4586D09F1F480FE600DF9CE5 /* Headers */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = bsdiff;
|
||||
productName = bsdiff;
|
||||
productReference = 4586D0A11F480FE600DF9CE5 /* libbsdiff.a */;
|
||||
productType = "com.apple.product-type.library.static";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
4586D0991F480FE600DF9CE5 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
BuildIndependentTargetsInParallel = YES;
|
||||
DefaultBuildSystemTypeForWorkspace = Latest;
|
||||
LastUpgradeCheck = 1510;
|
||||
TargetAttributes = {
|
||||
4586D0A01F480FE600DF9CE5 = {
|
||||
CreatedOnToolsVersion = 8.3.3;
|
||||
ProvisioningStyle = Automatic;
|
||||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 4586D09C1F480FE600DF9CE5 /* Build configuration list for PBXProject "bsdiff" */;
|
||||
compatibilityVersion = "Xcode 12.0";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
Base,
|
||||
);
|
||||
mainGroup = 4586D0981F480FE600DF9CE5;
|
||||
productRefGroup = 4586D0A21F480FE600DF9CE5 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
4586D0A01F480FE600DF9CE5 /* bsdiff */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
4586D09D1F480FE600DF9CE5 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
4586D0BF1F4811E600DF9CE5 /* divsufsort.cc in Sources */,
|
||||
4586D0C11F4811E600DF9CE5 /* sssort.cc in Sources */,
|
||||
4586D0C21F4811E600DF9CE5 /* trsort.cc in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
4586D0AA1F480FE600DF9CE5 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
baseConfigurationReference = 4586D0AF1F4810A700DF9CE5 /* common-debug.xcconfig */;
|
||||
buildSettings = {
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
4586D0AB1F480FE600DF9CE5 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
baseConfigurationReference = 4586D0B01F4810A700DF9CE5 /* common-release.xcconfig */;
|
||||
buildSettings = {
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
4586D0AD1F480FE600DF9CE5 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
EXECUTABLE_PREFIX = lib;
|
||||
GCC_WARN_INHIBIT_ALL_WARNINGS = YES;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
4586D0AE1F480FE600DF9CE5 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
EXECUTABLE_PREFIX = lib;
|
||||
GCC_WARN_INHIBIT_ALL_WARNINGS = YES;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
4586D09C1F480FE600DF9CE5 /* Build configuration list for PBXProject "bsdiff" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
4586D0AA1F480FE600DF9CE5 /* Debug */,
|
||||
4586D0AB1F480FE600DF9CE5 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
4586D0AC1F480FE600DF9CE5 /* Build configuration list for PBXNativeTarget "bsdiff" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
4586D0AD1F480FE600DF9CE5 /* Debug */,
|
||||
4586D0AE1F480FE600DF9CE5 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 4586D0991F480FE600DF9CE5 /* Project object */;
|
||||
}
|
3
xcode/omim.xcworkspace/contents.xcworkspacedata
generated
3
xcode/omim.xcworkspace/contents.xcworkspacedata
generated
|
@ -36,9 +36,6 @@
|
|||
<FileRef
|
||||
location = "container:agg/agg.xcodeproj">
|
||||
</FileRef>
|
||||
<FileRef
|
||||
location = "container:bsdiff/bsdiff.xcodeproj">
|
||||
</FileRef>
|
||||
<FileRef
|
||||
location = "container:expat/expat.xcodeproj">
|
||||
</FileRef>
|
||||
|
|
Loading…
Add table
Reference in a new issue