ICU-21755 commit checker: add support for COMMIT_METADATA.md file

- new option, --commit-metadata= with path to metadata file
- new option, --fix-version=41 (used for SKIP sections)
- scaffolding for 'bad commits' list
- new module CommitMetadata with unit tests
- sample file format TEST_COMMIT_METADATA.md
- such commits are skipped
- refactored the commit skipping part (formerly used for cherry pick skips)
- add a report section for skipped commits
- add a cache for JIRA queries (for dev use): --cache-for-dev "/tmp/cldr-commit-cache" - optional
- add an 'excluded commits' section at the bottom
- make sure commit metadata is used to update ticket IDs and messages.
This commit is contained in:
Steven R. Loomis 2022-07-26 12:19:55 -05:00
parent 123e5c1cd6
commit 8ca2e06b72
5 changed files with 287 additions and 24 deletions

View file

@ -1,5 +1,5 @@
# Copyright (C) 2018 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# License & terms of use: http://www.unicode.org/copyright.html
# Author: shane@unicode.org
[[source]]
@ -15,3 +15,4 @@ gitpython = "*"
jira = "*"
[dev-packages]
pytest = "*"

View file

@ -0,0 +1,25 @@
# - Commit Metadata
### Copyright (C) 2022 and later: Unicode, Inc. and others.
### License & terms of use: http://www.unicode.org/copyright.html
### All lines starting with one hash (#) are significant
### All lines starting with a dash (-) are significant.
###
### The following list of commits is parsed by the ICU/CLDR commit checker.
### It has the following structure:
### `- <commit> <bug id or hyphen> <message>`
- 00decafbad CLDR-0000 Bad commit message
- 50257a7a3eba7bbed634961a2aa74e77b12810bf - Early commit, no bug id
- 283dc84d81cfc47fb15cd664fce4b0151d070ea6 - Early commit, no bug id
- 02198373a591a15b804127acddd32582ec985b7e CLDR-15852 v42 merge commit
### The following are items to skip for a certain CLDR version
### Format: `# SKIP v00` followed by a list of commits to skip for that version
# SKIP v41
- 56ca5d5 CLDR-14877 split ticket

View file

@ -9,11 +9,14 @@ import os
import re
import sys
import datetime
from hashlib import sha256
import pickle
from enum import Enum
from collections import namedtuple
from git import Repo
from jira import JIRA
from commit_metadata import CommitMetadata
# singleCount = 0
@ -110,6 +113,11 @@ flag_parser.add_argument(
help = "Hostname of the Jira instance",
default = "unicode-org.atlassian.net"
)
flag_parser.add_argument(
"--commit-metadata",
help = "path to COMMIT_METADATA.md",
default = "COMMIT_METADATA.md"
)
flag_parser.add_argument(
"--jira-username",
help = "Username to use for authenticating to Jira",
@ -125,6 +133,16 @@ flag_parser.add_argument(
help = "JQL query load tickets; this should match tickets expected to correspond to the commits being checked. Example: 'project=ICU and fixVersion=63.1'; set fixVersion to the upcoming version.",
required = True
)
flag_parser.add_argument(
"--fix-version",
help = "Assumed Fix version. Used for commit metadata",
required = False
)
flag_parser.add_argument(
"--cache-for-dev",
help = "DEV USE ONLY: prefix of path to a jira cache",
required = False
)
flag_parser.add_argument(
"--github-url",
help = "Base URL of the GitHub repo",
@ -171,19 +189,26 @@ def pretty_print_issue(issue, type=None, **kwargs):
if issue.issue.fields.components and len(issue.issue.fields.components) > 0:
print("\t- Component(s): " + (' '.join(sorted([str(component.name) for component in issue.issue.fields.components]))))
def get_commits(repo_root, rev_range, **kwargs):
def get_commits(commit_metadata_obj, repo_root, rev_range, **kwargs):
"""
Yields an ICUCommit for each commit in the user-specified rev-range.
"""
repo = Repo(repo_root)
for commit in repo.iter_commits(rev_range):
match = re.search(r"^(\w+-\d+) ", commit.message)
if match:
issue_id = match.group(1)
# print("@@@ %s = %s / %s" % (issue_id, commit, commit.summary), file=sys.stderr)
yield ICUCommit(issue_id, commit)
else:
yield ICUCommit(None, commit)
issue_id = None
if commit_metadata_obj:
# read from the non-skip list
from_commit_metadata = commit_metadata_obj.get_commit_info(commit.hexsha, skip=None)
if from_commit_metadata:
(sha, issue_id, message) = from_commit_metadata
if message:
# Update message if found
commit.message = commit.message + "\nCOMMIT_METADATA: " + message
if not issue_id:
match = re.search(r"^(\w+-\d+) ", commit.message)
if match:
issue_id = match.group(1)
yield ICUCommit(issue_id, commit)
def get_cherrypicked_commits(repo_root, rev_range, **kwargs):
"""
@ -255,6 +280,40 @@ def get_jira_issues(jira_query, **kwargs):
start += batch_size
jira_issue_map = dict() # loaded in main()
commit_metadata = None
fixversion_to_skip = None
def get_issue_cache_path(args):
return '%s-%s.pickle' % (args.cache_for_dev, sha256(args.jira_query.encode()).hexdigest())
def load_issues(args):
global jira_issue_ids, jira_issue_map, closed_jira_issue_ids
if args.cache_for_dev:
issue_cache_path = get_issue_cache_path(args)
if os.path.exists(issue_cache_path):
with open(issue_cache_path, 'rb') as f:
(issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map) = pickle.load(f)
print("Loaded jira cache to " + issue_cache_path, file=sys.stderr)
return issues
# proceed to load
issues = list(get_jira_issues(**vars(args)))
# add all queried issues to the cache
for issue in issues:
jira_issue_map[issue.issue_id] = issue
# only the issue ids in-query
jira_issue_ids = set(issue.issue_id for issue in issues)
# only the closed issue ids in-query
closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
return issues
def save_issues_to_cache(args, issues):
if args.cache_for_dev:
issue_cache_path = get_issue_cache_path(args)
with open(issue_cache_path, 'wb') as f:
pickle.dump((issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map), f)
print("Wrote cache to " + issue_cache_path, file=sys.stderr)
def get_single_jira_issue(issue_id, **kwargs):
"""
@ -293,6 +352,7 @@ def print_sectionheader(section):
#print("### %s%s" % (aname(section), section))
def main():
global fixversion_to_skip
args = flag_parser.parse_args()
print("TIP: Have you pulled the latest main? This script only looks at local commits.", file=sys.stderr)
if not args.jira_username or not args.jira_password:
@ -301,30 +361,40 @@ def main():
else:
authenticated = True
if args.fix_version:
fixversion_to_skip = 'v%s' % args.fix_version
args.fixversion_to_skip = fixversion_to_skip
if args.commit_metadata and os.path.exists(args.commit_metadata):
commit_metadata = CommitMetadata(args.commit_metadata)
else:
commit_metadata = CommitMetadata() # null
args.commit_metadata_obj = commit_metadata
# exclude these, already merged to old maint
excludeAlreadyMergedToOldMaint = get_cherrypicked_commits(**vars(args))
exclude_already_merged_to_old_maint = get_cherrypicked_commits(**vars(args))
commits = list(get_commits(**vars(args)))
issues = list(get_jira_issues(**vars(args)))
def exclude_commit_sha(sha) -> bool:
"""Return true if this sha is excluded"""
exclude = (sha in exclude_already_merged_to_old_maint) or (commit_metadata.get_commit_info(sha, skip=fixversion_to_skip) is not None)
return exclude
# commit_issue_ids is all commits in the git query. Excluding cherry exclusions.
commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint)
commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and not exclude_commit_sha(commit.commit.hexsha))
# which issues have commits that were excluded
excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha in excludeAlreadyMergedToOldMaint)
excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and exclude_commit_sha(commit.commit.hexsha))
excluded_commits = set(commit for commit in commits if exclude_commit_sha(commit.commit.hexsha))
# grouped_commits is all commits and issue_ids in the git query, regardless of issue status
# but NOT including cherry exclusions
grouped_commits = [
(issue_id, [commit for commit in commits if commit.issue_id == issue_id and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint])
(issue_id, [commit for commit in commits if commit.issue_id == issue_id and not exclude_commit_sha(commit.commit.hexsha)])
for issue_id in sorted(commit_issue_ids)
]
# add all queried issues to the cache
for issue in issues:
jira_issue_map[issue.issue_id] = issue
# only the issue ids in-query
jira_issue_ids = set(issue.issue_id for issue in issues)
# only the closed issue ids in-query
closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
issues = load_issues(args)
# keep track of issues that we already said have no commit.
no_commit_ids = set()
@ -376,7 +446,7 @@ def main():
no_commit_ids.add(issue.issue_id)
pretty_print_issue(issue, type=CLOSED_NO_COMMIT, **vars(args))
if issue.issue_id in excluded_commit_issue_ids:
print("\t - **Note: Has cherry-picked commits. Fix Version may be wrong.**")
print("\t - **Note: Has excluded/cherry-picked commits. Fix Version may be wrong.**")
print()
if not found:
print("*Success: No problems in this category!*")
@ -451,8 +521,6 @@ def main():
print("##### Commits with Issue %s" % issue_id)
print()
for commit in commits:
if(commit.commit.hexsha in excludeAlreadyMergedToOldMaint):
print("@@@ ALREADY MERGED")
pretty_print_commit(commit, **vars(args))
print()
if not found:
@ -542,5 +610,26 @@ def main():
print("## Total Problems: %s" % total_problems)
print("## Issues under review: %s" % len(issues_in_review)) # not counted as a problem.
if fixversion_to_skip:
print("## fix version (for COMMIT_METADATA SKIP) %s" % fixversion_to_skip)
if len(excluded_commits) > 0:
print()
print("## Excluded commits: %d" % len(excluded_commits))
for commit in excluded_commits:
pretty_print_commit(commit, **vars(args))
from_commit_metadata = commit_metadata.get_commit_info(commit.commit.hexsha, skip=fixversion_to_skip)
if from_commit_metadata:
(sha, id, message) = from_commit_metadata
# Print any notes in the skip list here.
if id:
print("\t- COMMIT_METADATA.md: %s" % id)
if message:
print("\t- COMMIT_METADATA.md: %s" % message)
if commit.commit.hexsha in exclude_already_merged_to_old_maint:
print("\t- NOTE: excluded due to already being merged to old maint")
# save out the dev cache here, so that any 1-off issues are also included in the cache
save_issues_to_cache(args, issues)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,83 @@
# Copyright (C) 2022 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Author: srloomis@unicode.org
from re import compile
from typing import Tuple
import typing
commit_line = compile('^\s*-\s+([0-9a-f]+)\s+([^\s]+)\s+(.*)$') # '- <sha> <hypen-or-ticketid> <message>'
verb_line = compile('^\s*#\s+([A-Z]+)\s+(.*)$') # '# ACTION any other content…'
class CommitMetadata():
def __init__(self, metadata_file=None) -> None:
skipset = {}
fixset = [] # sha, id, message
current_skip = None
if metadata_file:
with open(metadata_file, 'r') as f:
for line in f.readlines():
line = line.strip()
m = commit_line.match(line)
if m:
sha = m.group(1)
id = m.group(2)
message = m.group(3)
fixset.append((sha, id, message))
if current_skip:
skipset[current_skip].append((sha, id, message))
continue
m = verb_line.match(line)
if m:
action = m.group(1)
content = m.group(2)
# for now we only support SKIP
assert action == "SKIP", "Unknown action %s in %s (expected 'SKIP')" % (action, line)
current_skip = content
assert content not in skipset, "Error: two sections like %s" % line
skipset[content] = [] # initialize this
continue
self.fixset = fixset
self.skipset = skipset
pass
def get_commit_info(self, commit, skip=None) -> typing.Tuple[str, str, str]:
"""Return an override line given a commit
Args:
commit (str): hash or short hash of a commit
skip (str): If set, a version to skip such as 'v41' (this will match a section - SKIP v41)
Returns:
sha (str): original sha from line
id (str): ticket id or hash
message (str): override message
"""
if skip:
if not skip in self.skipset:
return None
return CommitMetadata.match_list(commit, self.skipset[skip])
else:
return CommitMetadata.match_list(commit, self.fixset)
@staticmethod
def match_list(commit, l) -> any:
"""Find the first match in a list of commits
Args:
commit (str): short or long commit string
l (list): list of tuples, where item 0 is the commit
Returns:
any: matching list member, or None
"""
for i in l:
if CommitMetadata.match_commit(commit, i[0]):
return i
return None
@staticmethod
def match_commit(h1, h2) -> bool:
"""return true if the prefix of the hashes are the same"""
comm = min(len(h1), len(h2))
return h1[0:comm] == h2[0:comm]

View file

@ -0,0 +1,65 @@
# Copyright (C) 2022 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Author: srloomis@unicode.org
from commit_metadata import CommitMetadata
def test_hash():
assert CommitMetadata.match_commit("512a679aba", "512a679aba22e144a4443df5f8f304e4f8b39054")
assert CommitMetadata.match_commit("512a679aba", "512a679aba")
assert CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "512a679aba22e144a4443df5f8f304e4f8b39054")
assert not CommitMetadata.match_commit("512a679aba", "16aae80199")
assert not CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "16aae80199")
def test_matchcommit():
assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
assert CommitMetadata.match_list("512a679aba22e144a4443df5f8f304e4f8b39054", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)]) == ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)
assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba", 99)])
assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)])
def test_read():
m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
assert m
assert not m.get_commit_info('00000000') # not in list
assert m.get_commit_info('00decafbad')
assert m.get_commit_info('00decafbad')[1].startswith('CLDR-0000')
assert m.get_commit_info('56ca5d5')
assert m.get_commit_info('56ca5d5')[1].startswith('CLDR-14877')
# short or long, same
assert m.get_commit_info('56ca5d5') == m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de')
# skip list
assert m.get_commit_info('56ca5d5', skip='v41')
assert not m.get_commit_info('56ca5d5', skip='v42')
assert m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v41')
assert not m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v42')
assert not m.get_commit_info('00decafbad', 'v41')
def test_null_read():
m = CommitMetadata(metadata_file=None)
assert m
# function with no info
assert not m.get_commit_info('00000000') # not in list
assert not m.get_commit_info('00decafbad')
assert not m.get_commit_info('56ca5d5')
assert not m.get_commit_info('56ca5d5', skip='v41')
assert not m.get_commit_info('56ca5d5', skip='v42')
def test_parse_42():
m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
assert m
# no skip
info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e')
assert info
assert info[0] == '02198373a591a15b804127acddd32582ec985b7e'
assert info[1] == 'CLDR-15852'
assert info[2] == 'v42 merge commit'
# skip
info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e', skip='v42')
# not found because it isn't in SKIP v42
assert not info