1
0
Fork 0

Refactor the update script

This is mainly done to allow better notifications and simplify a few
things.
This commit is contained in:
Luca Beltrame 2022-01-15 10:52:30 +01:00
parent fd91c01607
commit c47ce0dc06
Signed by: einar
GPG key ID: 4707F46E9EC72DEC

View file

@ -5,10 +5,11 @@
import argparse
from datetime import date
from collections import defaultdict
from functools import lru_cache
import logging
import json
from pathlib import Path
from typing import Union, Dict, Any, List
from typing import Union, Dict, Any, List, Tuple
from urllib.parse import quote
import git
@ -23,9 +24,9 @@ MATRIX_COMMANDER = "/home/mocker/local-venv/bin/matrix-commander.py"
REPO_TEMPLATE = "https://invent.kde.org/{}"
MESSAGE_TEMPLATE = f"""
### OBS package update complete
## OBS package update report
Stats for {date.today().strftime('%Y-%m-%d')}:
Updated at {date.today().strftime('%Y-%m-%d')}
"""
@ -49,101 +50,6 @@ def get_remote_hash(url: str, branch: str = "master") -> str:
return git_hash
class RepoUpdater:
def __init__(self, config: str, cache_file: str) -> None:
if not Path(cache_file).exists():
logging.debug("File cache not found, not loading")
self._data: Dict[str, Dict[str, str]] = dict()
else:
with open(self.cache) as handle:
self._data = json.load(handle)
with open(config, "r") as mapping:
repo_data = json.load(mapping)
self.config = repo_data
def __getitem__(self, key: str) -> Dict[str, str]:
if key not in self._data:
raise KeyError(f"{key}")
return self._data[key]
def __setitem__(self, key: str, value: Dict[str, str]) -> None:
self._data[key] = value
def get(self, key: str,
*args: Any, **kwargs: Any) -> Union[None, str, Dict[str, str]]:
return self._data.get(key, *args, **kwargs)
def package_lists(self, repo: str) -> List[str]:
return self[repo]
def get_updated_remotes(self, repository: str) -> List[str]:
to_update = list()
repodata = self.config[repository]
for repo in repodata:
for item in repo:
kde_name = item["kde"]
branch = item["branch"]
url = REPO_TEMPLATE.format(kde_name)
if not project_exists(kde_name):
logging.warning("Repository %s not found, skipping",
kde_name)
continue
local_hash = self[repository].get(kde_name, "")
remote_hash = get_remote_hash(url, branch)
if local_hash != remote_hash:
to_update.append(item["obs"])
return to_update
class GitHashCache:
def __init__(self, cache_file: str) -> None:
self.cache = cache_file
self._data: Dict[str, Dict[str, str]] = dict()
def __getitem__(self, key: str) -> Dict[str, str]:
if key not in self._data:
raise KeyError
return self._data[key]
def __setitem__(self, key: str, value: Dict[str, str]) -> None:
self._data[key] = value
def get(self, key: str,
*args: Any, **kwargs: Any) -> Union[None, str, Dict[str, str]]:
return self._data.get(key, *args, **kwargs)
def save(self) -> None:
logging.debug("Saving pickled data")
with open(self.cache, "w") as handle:
json.dump(self._data, handle, indent=4)
def load(self) -> None:
if not Path(self.cache).exists():
logging.debug("File cache not found, not loading")
return
with open(self.cache) as handle:
self._data = json.load(handle)
revision = gitcmd.ls_remote(url, branch, refs=True)
def trigger_update(repository: str, package_name: str,
token: str) -> Union[requests.Response, bool]:
@ -163,80 +69,113 @@ def trigger_update(repository: str, package_name: str,
return result
def update_package(hash_data: GitHashCache, package_name: str,
remote_name: str, obs_repository: str,
branch: str,
token: str,
stats: Dict[str, int]) -> None:
class RepoUpdater:
repo_name = "https://invent.kde.org/{}".format(remote_name)
def __init__(self, config: str, cache_file: str, token_file: str) -> None:
if not project_exists(remote_name):
logging.warning("Repository %s not found, skipping", remote_name)
return
self.cache = cache_file
self.token = token_file
remote_hash = get_remote_hash(repo_name, branch)
if hash_data.get(obs_repository) is None:
logging.debug("No prior data - initializing empty")
hash_data[obs_repository] = dict()
current_hash = hash_data[obs_repository].get(remote_name, "")
logging.debug("Package %s, theirs %s, ours %s",
remote_name, remote_hash, current_hash)
if remote_hash != current_hash:
logging.debug("Hash doesn't match, updating")
if trigger_update(obs_repository, package_name, token):
hash_data[obs_repository][remote_name] = remote_hash
stats["updated"] += 1
hash_data.save()
if not Path(cache_file).exists():
logging.debug("File cache not found, not loading")
self._data: Dict[str, Dict[str, str]] = dict()
else:
stats["errors"] += 1
with open(cache_file) as handle:
self._data = json.load(handle)
with open(config, "r") as mapping:
repo_data = json.load(mapping)
self.config = repo_data
@property
def repositories(self) -> List[str]:
return self._data.keys()
def update_repository(self, repository) -> List[Tuple[str, str, str]]:
if self._data.get(repository) is None:
logging.debug("No prior data - initializing empty")
self._data[repository] = dict()
to_update = self.get_updated_remotes(repository)
if not to_update:
logging.debug(f"Nothing to update for {repository}")
return
logging.info(f"Found {len(to_update)} updated repositories")
updated = list()
logging.info("Updating packages for %s", repository)
for package in to_update:
if trigger_update(repository, package, self.token):
remote_hash = to_update[package]
remote_name = self.config[repository][package]["kde"]
self._data[repository][remote_name] = remote_hash
self.save_cache()
updated.append((package, remote_name, remote_hash))
else:
updated.append((package, remote_name, "error"))
return updated
@lru_cache(maxsize=200)
def get_updated_remotes(self, repository: str) -> Dict[str, str]:
to_update = dict()
repodata = self.config[repository]
for repo, data in repodata.items():
kde_name = data["kde"]
branch = data["branch"]
url = REPO_TEMPLATE.format(kde_name)
if not project_exists(kde_name):
logging.warning("Repository %s not found, skipping",
kde_name)
continue
local_hash = self._data[repository].get(kde_name, "")
remote_hash = get_remote_hash(url, branch)
if local_hash != remote_hash:
logging.debug("Hash doesn't match, marking as changed")
to_update[repo] = remote_hash
return to_update
def save_cache(self) -> None:
logging.debug("Saving JSON cache")
with open(self.cache, "w") as handle:
json.dump(self._data, handle, indent=4)
def update_packages(cache_file: str,
repo_mapping_file: str, token: str) -> None:
hash_data = GitHashCache(cache_file)
hash_data.load()
stats = dict()
with open(repo_mapping_file, "r") as mapping:
repo_data = json.load(mapping)
for obs_repository, branch_data in repo_data.items():
repo_stats: Dict[str, int] = defaultdict(int)
logging.info("Updating packages for %s", obs_repository)
for package in branch_data:
kde_name = package["kde"]
obs_name = package["obs"]
branch = package["branch"]
package_name = Path(kde_name).name
logging.debug("Updating package %s (%s)",
package_name, obs_name)
logging.debug("Using branch %s", branch)
update_package(hash_data, obs_name, kde_name, obs_repository,
branch, token, repo_stats)
stats[obs_repository] = repo_stats
logging.debug("Saving data")
hash_data.save()
notify_matrix(stats)
def notify_matrix(stats: Dict[str, Dict[str, int]]) -> None:
def notify_matrix(update_data: Dict[str, List[Tuple[str, str]]]) -> None:
structure = [MESSAGE_TEMPLATE]
for key, value in stats.items():
row = (f"* {key}: {value['updated']} updated packages,"
f" {value['errors']} errors")
structure.append(row)
structure.append("Updated packages:\n")
errors = list()
for repo, update in update_data.items():
heading = f"### {repo}\n"
structure.append(heading)
structure.append(f"Updated {len(update)} packages.")
for package, remote, state in update:
if state != "error":
row = (f"* {package} - [{state}]"
f"(https://commits.kde.org/{remote}/{state}")
structure.append(row)
else:
errors.append(package)
structure.append("#### Packages with errors")
for errored in errors:
structure.append(f"* {errored}")
message = "\n".join(structure)
@ -279,12 +218,19 @@ def main() -> None:
with open(options.token) as handle:
token = handle.read().strip()
update_packages(cache_file, options.mapping_file, token)
updater = RepoUpdater(options.mapping_file, cache_file, token)
updated_data = dict()
for repo in updater.repositories:
updated = updater.update_repository(repo)
updated_data[repo] = updated
if options.repo_root is not None:
logging.info("Committing changes")
commit_changes(cache_file, options.repo_root)
notify_matrix(updated_data)
logging.info("Complete")