ICU-21237 Improving how file deletion and cleanup works by adding a build label.

See #1246
This commit is contained in:
David Beaumont 2020-08-19 21:16:28 +00:00 committed by David Beaumont
parent 9a6a64ce05
commit dcdb30d708
3 changed files with 165 additions and 21 deletions

View file

@ -50,6 +50,21 @@ public final class CleanOutputDirectoryTask extends Task {
private static final CharMatcher NOT_WHITESPACE = CharMatcher.whitespace().negate();
private static final String HEADER_FILE = "ldml2icu_header.txt";
// If present in the header of a file, this line is used to determine that the file was
// auto-generated. This allows us to change the rest of the header freely without issue.
// However if it's not present in the file, we fallback to comparing the rest of the
// header without it (since that's the old behaviour).
// Once there's been an ICU release with this line included in the headers of all data
// files, we can remove the fallback and just test for this line and nothing else.
private static final String WAS_GENERATED_LABEL =
"Generated using tools/cldr/cldr-to-icu/build-icu-data.xml";
// The number of header lines to check before giving up if we don't find the generated
// label.
private static final int MAX_HEADER_CHECK_LINES = 20;
private Path root = null;
private boolean forceDelete = false;
private final List<Dir> outputDirs = new ArrayList<>();
@ -57,7 +72,13 @@ public final class CleanOutputDirectoryTask extends Task {
public CleanOutputDirectoryTask() {
// TODO: Consider passing in header lines via Ant?
this.headerLines = readLinesFromResource("/ldml2icu_header.txt");
this.headerLines = readLinesFromResource("/" + HEADER_FILE);
// For now assume that the generated label is the last line of the header.
checkState(Iterables.getLast(headerLines).equals(WAS_GENERATED_LABEL),
"Expected last line of %s header file to be:\n\t%s", HEADER_FILE, WAS_GENERATED_LABEL);
// Make sure we check at least a few more lines than is in the current header.
checkState(MAX_HEADER_CHECK_LINES >= headerLines.size() + 5,
"Unexpectedly large header file; please increase MAX_HEADER_CHECK_LINES constant");
}
public static final class Retain extends Task {
@ -214,31 +235,69 @@ public final class CleanOutputDirectoryTask extends Task {
return false;
}
try (BufferedReader r = Files.newBufferedReader(path, UTF_8)) {
// A byte-order-mark (BOM) is added to ICU data files, but not JSON deps files, so just
// treat it as optional everywhere (it's not the important thing we check here).
r.mark(1);
int maybeByteOrderMark = r.read();
if (maybeByteOrderMark != '\uFEFF') {
// Also reset if the file was empty, but that should be harmless.
r.reset();
}
for (String headerLine : headerLines) {
String line = r.readLine();
if (line == null) {
return false;
}
int headerStart = skipComment(line);
if (headerStart < 0
|| !line.regionMatches(headerStart, headerLine, 0, headerLine.length())) {
return false;
}
}
return true;
return wasFileAutoGenerated(r, headerLines);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
// TODO: Once the WAS_GENERATED_LABEL is in all auto-generated ICU data files, simplify this.
// Static and non-private for testing.
static boolean wasFileAutoGenerated(BufferedReader fileReader, ImmutableList<String> headerLines)
throws IOException {
// A byte-order-mark (BOM) is added to ICU data files, but not JSON deps files, so just
// treat it as optional everywhere (it's not the important thing we check here).
fileReader.mark(1);
int maybeByteOrderMark = fileReader.read();
if (maybeByteOrderMark != '\uFEFF') {
// Also reset if the file was empty, but that should be harmless.
fileReader.reset();
}
boolean isLenientHeaderMatchSoFar = true;
for (int n = 0; n < MAX_HEADER_CHECK_LINES ; n++) {
String line = fileReader.readLine();
// True if we have processed the header, not including the trailing generated label.
boolean headerIsProcessed = n >= headerLines.size() - 1;
boolean isCompleteLenientMatch = isLenientHeaderMatchSoFar && headerIsProcessed;
if (line == null) {
// We ran off the end of the file, so we're done.
return isCompleteLenientMatch;
}
int headerStart = skipComment(line);
if (headerStart < 0) {
// We ran off the end of the expected comment section, so we're done.
return isCompleteLenientMatch;
}
if (matchesToEndOfLine(line, headerStart, WAS_GENERATED_LABEL)) {
// Finding the generated label trumps any lenient matching.
return true;
}
if (!isLenientHeaderMatchSoFar) {
// We already failed at lenient matching, so keep going in the hope of finding
// the generated label (we don't need to check the header any more).
continue;
}
if (headerIsProcessed) {
// We finishing processing the header and it matched (not including the
// generated label) but for older data files, that's fine.
return true;
}
// Check the next header line (not including the trailing generated label).
isLenientHeaderMatchSoFar = matchesToEndOfLine(line, headerStart, headerLines.get(n));
}
// This is actually an unusual case. It corresponds to a file which:
// * has a leading comment section at least as long as MAX_HEADER_CHECK_LINES
// * does not contain WAS_GENERATED_LABEL anywhere in the first MAX_HEADER_CHECK_LINES
// Most files checked are expected to match, and so will not get here, but instead
// return via one of the return statements above.
return false;
}
private static boolean matchesToEndOfLine(String line, int start, String expected) {
return line.length() - start == expected.length()
&& line.regionMatches(start, expected, 0, expected.length());
}
private static int skipComment(String line) {
if (line.startsWith("#")) {
return toCommentStart(line, 1);

View file

@ -1,2 +1,3 @@
© 2016 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html#License
Generated using tools/cldr/cldr-to-icu/build-icu-data.xml

View file

@ -0,0 +1,84 @@
package org.unicode.icu.tool.cldrtoicu.ant;
import com.google.common.collect.ImmutableList;
import com.google.common.truth.BooleanSubject;
import com.google.common.truth.Truth;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
@RunWith(JUnit4.class)
public class CleanOutputDirectoryTaskTest {
// Not using the original field since we want this test to fail if this changes unexpectedly.
private static final String WAS_GENERATED_LABEL =
"Generated using tools/cldr/cldr-to-icu/build-icu-data.xml";
// Commented version of the label for test data.
private static final String WAS_GENERATED_LINE = "// " + WAS_GENERATED_LABEL;
private final static ImmutableList<String> HEADER = ImmutableList.of(
"Header#1",
"Header#2",
WAS_GENERATED_LABEL);
@Test
public void testWasFileAutoGenerated_lenientMatching() throws IOException {
// Testing comment prefixes (// or # supported equally)
assertWasAutogenerated("// Header#1", "// Header#2").isTrue();
assertWasAutogenerated("# Header#1", "# Header#2").isTrue();
assertWasAutogenerated("# Header#1", "// Header#2").isTrue();
assertWasAutogenerated("// Header#1", "//Header#2").isTrue();
// Extra lines
assertWasAutogenerated("// Header#1", "// Header#2", "// More comment", "Not a comment").isTrue();
// BOM is ignored on first line
assertWasAutogenerated("\uFEFF// Header#1", "// Header#2").isTrue();
}
@Test
public void testWasFileAutoGenerated_lenientMatching_fail() throws IOException {
// New blank line.
assertWasAutogenerated("", "// Header#1", "// Header#2").isFalse();
// Duplicated line.
assertWasAutogenerated("// Header#1", "// Header#1").isFalse();
// Reversed lines.
assertWasAutogenerated("// Header#2", "// Header#1").isFalse();
// Not commented.
assertWasAutogenerated("Header#1", "Header#2").isFalse();
// Extra blank line.
assertWasAutogenerated("// Header#1", "", "// Header#2").isFalse();
// Misplaced BOM.
assertWasAutogenerated("// Header#1", "\uFEFF// Header#2").isFalse();
}
@Test
public void testWasFileAutoGenerated_withLabel() throws IOException {
// With the label in the header comment section everything passes.
assertWasAutogenerated("// Header#1", "// Header#2", WAS_GENERATED_LINE).isTrue();
assertWasAutogenerated("// Hello", "// World", WAS_GENERATED_LINE).isTrue();
assertWasAutogenerated("// Shorter Header", WAS_GENERATED_LINE).isTrue();
assertWasAutogenerated("// This", "// Is", "// A", "// Longer", "// Header", WAS_GENERATED_LINE).isTrue();
assertWasAutogenerated("// Where the label is", WAS_GENERATED_LINE, "// Does not matter").isTrue();
assertWasAutogenerated(WAS_GENERATED_LINE).isTrue();
}
@Test
public void testWasFileAutoGenerated_matchingEndsAfterComments() throws IOException {
assertWasAutogenerated("// Header#1", "// Header#Changed", WAS_GENERATED_LINE).isTrue();
// Label outside the header comment section does not count.
assertWasAutogenerated("// Header#1", "// Header#Changed", "Not a comment", WAS_GENERATED_LINE).isFalse();
}
private static BooleanSubject assertWasAutogenerated(String... fileLines) throws IOException {
return Truth.assertWithMessage("wasAutogenerated: %s", Arrays.asList(fileLines))
.that(CleanOutputDirectoryTask.wasFileAutoGenerated(testFile(fileLines), HEADER));
}
private static BufferedReader testFile(String... lines) {
return new BufferedReader(new StringReader(String.join("\n", lines) + "\n"));
}
}