mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-21755 commit checker: add support for COMMIT_METADATA.md file
- new option, --commit-metadata= with path to metadata file - new option, --fix-version=41 (used for SKIP sections) - scaffolding for 'bad commits' list - new module CommitMetadata with unit tests - sample file format TEST_COMMIT_METADATA.md - such commits are skipped - refactored the commit skipping part (formerly used for cherry pick skips) - add a report section for skipped commits - add a cache for JIRA queries (for dev use): --cache-for-dev "/tmp/cldr-commit-cache" - optional - add an 'excluded commits' section at the bottom - make sure commit metadata is used to update ticket IDs and messages.
This commit is contained in:
parent
123e5c1cd6
commit
8ca2e06b72
5 changed files with 287 additions and 24 deletions
|
@ -1,5 +1,5 @@
|
|||
# Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
# Author: shane@unicode.org
|
||||
|
||||
[[source]]
|
||||
|
@ -15,3 +15,4 @@ gitpython = "*"
|
|||
jira = "*"
|
||||
|
||||
[dev-packages]
|
||||
pytest = "*"
|
||||
|
|
25
tools/commit-checker/TEST_COMMIT_METADATA.md
Normal file
25
tools/commit-checker/TEST_COMMIT_METADATA.md
Normal file
|
@ -0,0 +1,25 @@
|
|||
# - Commit Metadata
|
||||
|
||||
### Copyright (C) 2022 and later: Unicode, Inc. and others.
|
||||
### License & terms of use: http://www.unicode.org/copyright.html
|
||||
### All lines starting with one hash (#) are significant
|
||||
### All lines starting with a dash (-) are significant.
|
||||
###
|
||||
### The following list of commits is parsed by the ICU/CLDR commit checker.
|
||||
### It has the following structure:
|
||||
### `- <commit> <bug id or hyphen> <message>`
|
||||
|
||||
- 00decafbad CLDR-0000 Bad commit message
|
||||
- 50257a7a3eba7bbed634961a2aa74e77b12810bf - Early commit, no bug id
|
||||
- 283dc84d81cfc47fb15cd664fce4b0151d070ea6 - Early commit, no bug id
|
||||
- 02198373a591a15b804127acddd32582ec985b7e CLDR-15852 v42 merge commit
|
||||
|
||||
|
||||
### The following are items to skip for a certain CLDR version
|
||||
### Format: `# SKIP v00` followed by a list of commits to skip for that version
|
||||
|
||||
# SKIP v41
|
||||
|
||||
- 56ca5d5 CLDR-14877 split ticket
|
||||
|
||||
|
|
@ -9,11 +9,14 @@ import os
|
|||
import re
|
||||
import sys
|
||||
import datetime
|
||||
from hashlib import sha256
|
||||
import pickle
|
||||
|
||||
from enum import Enum
|
||||
from collections import namedtuple
|
||||
from git import Repo
|
||||
from jira import JIRA
|
||||
from commit_metadata import CommitMetadata
|
||||
|
||||
# singleCount = 0
|
||||
|
||||
|
@ -110,6 +113,11 @@ flag_parser.add_argument(
|
|||
help = "Hostname of the Jira instance",
|
||||
default = "unicode-org.atlassian.net"
|
||||
)
|
||||
flag_parser.add_argument(
|
||||
"--commit-metadata",
|
||||
help = "path to COMMIT_METADATA.md",
|
||||
default = "COMMIT_METADATA.md"
|
||||
)
|
||||
flag_parser.add_argument(
|
||||
"--jira-username",
|
||||
help = "Username to use for authenticating to Jira",
|
||||
|
@ -125,6 +133,16 @@ flag_parser.add_argument(
|
|||
help = "JQL query load tickets; this should match tickets expected to correspond to the commits being checked. Example: 'project=ICU and fixVersion=63.1'; set fixVersion to the upcoming version.",
|
||||
required = True
|
||||
)
|
||||
flag_parser.add_argument(
|
||||
"--fix-version",
|
||||
help = "Assumed Fix version. Used for commit metadata",
|
||||
required = False
|
||||
)
|
||||
flag_parser.add_argument(
|
||||
"--cache-for-dev",
|
||||
help = "DEV USE ONLY: prefix of path to a jira cache",
|
||||
required = False
|
||||
)
|
||||
flag_parser.add_argument(
|
||||
"--github-url",
|
||||
help = "Base URL of the GitHub repo",
|
||||
|
@ -171,19 +189,26 @@ def pretty_print_issue(issue, type=None, **kwargs):
|
|||
if issue.issue.fields.components and len(issue.issue.fields.components) > 0:
|
||||
print("\t- Component(s): " + (' '.join(sorted([str(component.name) for component in issue.issue.fields.components]))))
|
||||
|
||||
def get_commits(repo_root, rev_range, **kwargs):
|
||||
def get_commits(commit_metadata_obj, repo_root, rev_range, **kwargs):
|
||||
"""
|
||||
Yields an ICUCommit for each commit in the user-specified rev-range.
|
||||
"""
|
||||
repo = Repo(repo_root)
|
||||
for commit in repo.iter_commits(rev_range):
|
||||
match = re.search(r"^(\w+-\d+) ", commit.message)
|
||||
if match:
|
||||
issue_id = match.group(1)
|
||||
# print("@@@ %s = %s / %s" % (issue_id, commit, commit.summary), file=sys.stderr)
|
||||
yield ICUCommit(issue_id, commit)
|
||||
else:
|
||||
yield ICUCommit(None, commit)
|
||||
issue_id = None
|
||||
if commit_metadata_obj:
|
||||
# read from the non-skip list
|
||||
from_commit_metadata = commit_metadata_obj.get_commit_info(commit.hexsha, skip=None)
|
||||
if from_commit_metadata:
|
||||
(sha, issue_id, message) = from_commit_metadata
|
||||
if message:
|
||||
# Update message if found
|
||||
commit.message = commit.message + "\nCOMMIT_METADATA: " + message
|
||||
if not issue_id:
|
||||
match = re.search(r"^(\w+-\d+) ", commit.message)
|
||||
if match:
|
||||
issue_id = match.group(1)
|
||||
yield ICUCommit(issue_id, commit)
|
||||
|
||||
def get_cherrypicked_commits(repo_root, rev_range, **kwargs):
|
||||
"""
|
||||
|
@ -255,6 +280,40 @@ def get_jira_issues(jira_query, **kwargs):
|
|||
start += batch_size
|
||||
|
||||
jira_issue_map = dict() # loaded in main()
|
||||
commit_metadata = None
|
||||
fixversion_to_skip = None
|
||||
|
||||
def get_issue_cache_path(args):
|
||||
return '%s-%s.pickle' % (args.cache_for_dev, sha256(args.jira_query.encode()).hexdigest())
|
||||
|
||||
def load_issues(args):
|
||||
global jira_issue_ids, jira_issue_map, closed_jira_issue_ids
|
||||
|
||||
if args.cache_for_dev:
|
||||
issue_cache_path = get_issue_cache_path(args)
|
||||
if os.path.exists(issue_cache_path):
|
||||
with open(issue_cache_path, 'rb') as f:
|
||||
(issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map) = pickle.load(f)
|
||||
print("Loaded jira cache to " + issue_cache_path, file=sys.stderr)
|
||||
return issues
|
||||
# proceed to load
|
||||
issues = list(get_jira_issues(**vars(args)))
|
||||
# add all queried issues to the cache
|
||||
for issue in issues:
|
||||
jira_issue_map[issue.issue_id] = issue
|
||||
# only the issue ids in-query
|
||||
jira_issue_ids = set(issue.issue_id for issue in issues)
|
||||
# only the closed issue ids in-query
|
||||
closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
|
||||
|
||||
return issues
|
||||
|
||||
def save_issues_to_cache(args, issues):
|
||||
if args.cache_for_dev:
|
||||
issue_cache_path = get_issue_cache_path(args)
|
||||
with open(issue_cache_path, 'wb') as f:
|
||||
pickle.dump((issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map), f)
|
||||
print("Wrote cache to " + issue_cache_path, file=sys.stderr)
|
||||
|
||||
def get_single_jira_issue(issue_id, **kwargs):
|
||||
"""
|
||||
|
@ -293,6 +352,7 @@ def print_sectionheader(section):
|
|||
#print("### %s%s" % (aname(section), section))
|
||||
|
||||
def main():
|
||||
global fixversion_to_skip
|
||||
args = flag_parser.parse_args()
|
||||
print("TIP: Have you pulled the latest main? This script only looks at local commits.", file=sys.stderr)
|
||||
if not args.jira_username or not args.jira_password:
|
||||
|
@ -301,30 +361,40 @@ def main():
|
|||
else:
|
||||
authenticated = True
|
||||
|
||||
if args.fix_version:
|
||||
fixversion_to_skip = 'v%s' % args.fix_version
|
||||
args.fixversion_to_skip = fixversion_to_skip
|
||||
|
||||
if args.commit_metadata and os.path.exists(args.commit_metadata):
|
||||
commit_metadata = CommitMetadata(args.commit_metadata)
|
||||
else:
|
||||
commit_metadata = CommitMetadata() # null
|
||||
args.commit_metadata_obj = commit_metadata
|
||||
|
||||
# exclude these, already merged to old maint
|
||||
excludeAlreadyMergedToOldMaint = get_cherrypicked_commits(**vars(args))
|
||||
exclude_already_merged_to_old_maint = get_cherrypicked_commits(**vars(args))
|
||||
|
||||
commits = list(get_commits(**vars(args)))
|
||||
issues = list(get_jira_issues(**vars(args)))
|
||||
|
||||
def exclude_commit_sha(sha) -> bool:
|
||||
"""Return true if this sha is excluded"""
|
||||
exclude = (sha in exclude_already_merged_to_old_maint) or (commit_metadata.get_commit_info(sha, skip=fixversion_to_skip) is not None)
|
||||
return exclude
|
||||
|
||||
# commit_issue_ids is all commits in the git query. Excluding cherry exclusions.
|
||||
commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint)
|
||||
commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and not exclude_commit_sha(commit.commit.hexsha))
|
||||
# which issues have commits that were excluded
|
||||
excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha in excludeAlreadyMergedToOldMaint)
|
||||
excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and exclude_commit_sha(commit.commit.hexsha))
|
||||
excluded_commits = set(commit for commit in commits if exclude_commit_sha(commit.commit.hexsha))
|
||||
|
||||
# grouped_commits is all commits and issue_ids in the git query, regardless of issue status
|
||||
# but NOT including cherry exclusions
|
||||
grouped_commits = [
|
||||
(issue_id, [commit for commit in commits if commit.issue_id == issue_id and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint])
|
||||
(issue_id, [commit for commit in commits if commit.issue_id == issue_id and not exclude_commit_sha(commit.commit.hexsha)])
|
||||
for issue_id in sorted(commit_issue_ids)
|
||||
]
|
||||
# add all queried issues to the cache
|
||||
for issue in issues:
|
||||
jira_issue_map[issue.issue_id] = issue
|
||||
# only the issue ids in-query
|
||||
jira_issue_ids = set(issue.issue_id for issue in issues)
|
||||
# only the closed issue ids in-query
|
||||
closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
|
||||
|
||||
issues = load_issues(args)
|
||||
|
||||
# keep track of issues that we already said have no commit.
|
||||
no_commit_ids = set()
|
||||
|
@ -376,7 +446,7 @@ def main():
|
|||
no_commit_ids.add(issue.issue_id)
|
||||
pretty_print_issue(issue, type=CLOSED_NO_COMMIT, **vars(args))
|
||||
if issue.issue_id in excluded_commit_issue_ids:
|
||||
print("\t - **Note: Has cherry-picked commits. Fix Version may be wrong.**")
|
||||
print("\t - **Note: Has excluded/cherry-picked commits. Fix Version may be wrong.**")
|
||||
print()
|
||||
if not found:
|
||||
print("*Success: No problems in this category!*")
|
||||
|
@ -451,8 +521,6 @@ def main():
|
|||
print("##### Commits with Issue %s" % issue_id)
|
||||
print()
|
||||
for commit in commits:
|
||||
if(commit.commit.hexsha in excludeAlreadyMergedToOldMaint):
|
||||
print("@@@ ALREADY MERGED")
|
||||
pretty_print_commit(commit, **vars(args))
|
||||
print()
|
||||
if not found:
|
||||
|
@ -542,5 +610,26 @@ def main():
|
|||
print("## Total Problems: %s" % total_problems)
|
||||
print("## Issues under review: %s" % len(issues_in_review)) # not counted as a problem.
|
||||
|
||||
if fixversion_to_skip:
|
||||
print("## fix version (for COMMIT_METADATA SKIP) %s" % fixversion_to_skip)
|
||||
|
||||
if len(excluded_commits) > 0:
|
||||
print()
|
||||
print("## Excluded commits: %d" % len(excluded_commits))
|
||||
for commit in excluded_commits:
|
||||
pretty_print_commit(commit, **vars(args))
|
||||
from_commit_metadata = commit_metadata.get_commit_info(commit.commit.hexsha, skip=fixversion_to_skip)
|
||||
if from_commit_metadata:
|
||||
(sha, id, message) = from_commit_metadata
|
||||
# Print any notes in the skip list here.
|
||||
if id:
|
||||
print("\t- COMMIT_METADATA.md: %s" % id)
|
||||
if message:
|
||||
print("\t- COMMIT_METADATA.md: %s" % message)
|
||||
if commit.commit.hexsha in exclude_already_merged_to_old_maint:
|
||||
print("\t- NOTE: excluded due to already being merged to old maint")
|
||||
# save out the dev cache here, so that any 1-off issues are also included in the cache
|
||||
save_issues_to_cache(args, issues)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
83
tools/commit-checker/commit_metadata.py
Normal file
83
tools/commit-checker/commit_metadata.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
# Copyright (C) 2022 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
# Author: srloomis@unicode.org
|
||||
|
||||
from re import compile
|
||||
from typing import Tuple
|
||||
import typing
|
||||
|
||||
commit_line = compile('^\s*-\s+([0-9a-f]+)\s+([^\s]+)\s+(.*)$') # '- <sha> <hypen-or-ticketid> <message>'
|
||||
verb_line = compile('^\s*#\s+([A-Z]+)\s+(.*)$') # '# ACTION any other content…'
|
||||
|
||||
class CommitMetadata():
|
||||
def __init__(self, metadata_file=None) -> None:
|
||||
skipset = {}
|
||||
fixset = [] # sha, id, message
|
||||
current_skip = None
|
||||
if metadata_file:
|
||||
with open(metadata_file, 'r') as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip()
|
||||
m = commit_line.match(line)
|
||||
if m:
|
||||
sha = m.group(1)
|
||||
id = m.group(2)
|
||||
message = m.group(3)
|
||||
fixset.append((sha, id, message))
|
||||
if current_skip:
|
||||
skipset[current_skip].append((sha, id, message))
|
||||
continue
|
||||
m = verb_line.match(line)
|
||||
if m:
|
||||
action = m.group(1)
|
||||
content = m.group(2)
|
||||
# for now we only support SKIP
|
||||
assert action == "SKIP", "Unknown action %s in %s (expected 'SKIP')" % (action, line)
|
||||
current_skip = content
|
||||
assert content not in skipset, "Error: two sections like %s" % line
|
||||
skipset[content] = [] # initialize this
|
||||
continue
|
||||
self.fixset = fixset
|
||||
self.skipset = skipset
|
||||
pass
|
||||
|
||||
def get_commit_info(self, commit, skip=None) -> typing.Tuple[str, str, str]:
|
||||
"""Return an override line given a commit
|
||||
|
||||
Args:
|
||||
commit (str): hash or short hash of a commit
|
||||
skip (str): If set, a version to skip such as 'v41' (this will match a section - SKIP v41)
|
||||
|
||||
Returns:
|
||||
sha (str): original sha from line
|
||||
id (str): ticket id or hash
|
||||
message (str): override message
|
||||
"""
|
||||
if skip:
|
||||
if not skip in self.skipset:
|
||||
return None
|
||||
return CommitMetadata.match_list(commit, self.skipset[skip])
|
||||
else:
|
||||
return CommitMetadata.match_list(commit, self.fixset)
|
||||
|
||||
@staticmethod
|
||||
def match_list(commit, l) -> any:
|
||||
"""Find the first match in a list of commits
|
||||
|
||||
Args:
|
||||
commit (str): short or long commit string
|
||||
l (list): list of tuples, where item 0 is the commit
|
||||
|
||||
Returns:
|
||||
any: matching list member, or None
|
||||
"""
|
||||
for i in l:
|
||||
if CommitMetadata.match_commit(commit, i[0]):
|
||||
return i
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def match_commit(h1, h2) -> bool:
|
||||
"""return true if the prefix of the hashes are the same"""
|
||||
comm = min(len(h1), len(h2))
|
||||
return h1[0:comm] == h2[0:comm]
|
65
tools/commit-checker/test_commit_metadata.py
Normal file
65
tools/commit-checker/test_commit_metadata.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
# Copyright (C) 2022 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
# Author: srloomis@unicode.org
|
||||
|
||||
from commit_metadata import CommitMetadata
|
||||
|
||||
def test_hash():
|
||||
assert CommitMetadata.match_commit("512a679aba", "512a679aba22e144a4443df5f8f304e4f8b39054")
|
||||
assert CommitMetadata.match_commit("512a679aba", "512a679aba")
|
||||
assert CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "512a679aba22e144a4443df5f8f304e4f8b39054")
|
||||
assert not CommitMetadata.match_commit("512a679aba", "16aae80199")
|
||||
assert not CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "16aae80199")
|
||||
|
||||
def test_matchcommit():
|
||||
assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
|
||||
assert CommitMetadata.match_list("512a679aba22e144a4443df5f8f304e4f8b39054", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
|
||||
assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)]) == ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)
|
||||
assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba", 99)])
|
||||
assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)])
|
||||
|
||||
def test_read():
|
||||
m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
|
||||
assert m
|
||||
|
||||
assert not m.get_commit_info('00000000') # not in list
|
||||
assert m.get_commit_info('00decafbad')
|
||||
assert m.get_commit_info('00decafbad')[1].startswith('CLDR-0000')
|
||||
assert m.get_commit_info('56ca5d5')
|
||||
assert m.get_commit_info('56ca5d5')[1].startswith('CLDR-14877')
|
||||
# short or long, same
|
||||
assert m.get_commit_info('56ca5d5') == m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de')
|
||||
|
||||
# skip list
|
||||
assert m.get_commit_info('56ca5d5', skip='v41')
|
||||
assert not m.get_commit_info('56ca5d5', skip='v42')
|
||||
assert m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v41')
|
||||
assert not m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v42')
|
||||
assert not m.get_commit_info('00decafbad', 'v41')
|
||||
|
||||
def test_null_read():
|
||||
m = CommitMetadata(metadata_file=None)
|
||||
assert m
|
||||
|
||||
# function with no info
|
||||
assert not m.get_commit_info('00000000') # not in list
|
||||
assert not m.get_commit_info('00decafbad')
|
||||
assert not m.get_commit_info('56ca5d5')
|
||||
assert not m.get_commit_info('56ca5d5', skip='v41')
|
||||
assert not m.get_commit_info('56ca5d5', skip='v42')
|
||||
|
||||
def test_parse_42():
|
||||
m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
|
||||
assert m
|
||||
|
||||
# no skip
|
||||
info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e')
|
||||
assert info
|
||||
assert info[0] == '02198373a591a15b804127acddd32582ec985b7e'
|
||||
assert info[1] == 'CLDR-15852'
|
||||
assert info[2] == 'v42 merge commit'
|
||||
|
||||
# skip
|
||||
info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e', skip='v42')
|
||||
# not found because it isn't in SKIP v42
|
||||
assert not info
|
Loading…
Add table
Reference in a new issue