redesign to use force pushes only if necessary, to avoid race conditions, and to add email reports

This commit is contained in:
Ralf Jung 2015-02-22 19:52:18 +01:00
parent 7373610f7e
commit 503462ba44
5 changed files with 276 additions and 196 deletions

156
git_mirror.py Normal file
View file

@ -0,0 +1,156 @@
import sys, os, subprocess
import configparser, itertools, json, re
import email.mime.text, email.utils, smtplib
class GitCommand:
def __getattr__(self, name):
def call(*args, capture_stderr = False, check = True):
'''If <capture_stderr>, return stderr merged with stdout. Otherwise, return stdout and forward stderr to our own.
If <check> is true, throw an exception of the process fails with non-zero exit code. Otherwise, do not.
In any case, return a pair of the captured output and the exit code.'''
cmd = ["git", name.replace('_', '-')] + list(args)
with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT if capture_stderr else None) as p:
(stdout, stderr) = p.communicate()
assert stderr is None
code = p.returncode
if check and code:
raise Exception("Error running {0}: Non-zero exit code".format(cmd))
return (stdout.decode('utf-8').strip('\n'), code)
return call
git = GitCommand()
git_nullsha = 40*"0"
def git_is_forced_update(oldsha, newsha):
out, code = git.merge_base("--is-ancestor", oldsha, newsha, check = False) # "Check if the first <commit> is an ancestor of the second <commit>"
assert not out
assert code in (0, 1)
return False if code == 0 else True # if oldsha is an ancestor of newsha, then this was a "good" (non-forced) update
def read_config(fname, defSection = 'DEFAULT'):
'''Reads a config file that may have options outside of any section.'''
config = configparser.ConfigParser()
with open(fname) as file:
stream = itertools.chain(("["+defSection+"]\n",), file)
config.read_file(stream)
return config
def send_mail(subject, text, receivers, sender='post+webhook@ralfj.de', replyTo=None):
assert isinstance(receivers, list)
if not len(receivers): return # nothing to do
# construct content
msg = email.mime.text.MIMEText(text.encode('UTF-8'), 'plain', 'UTF-8')
msg['Subject'] = subject
msg['Date'] = email.utils.formatdate(localtime=True)
msg['From'] = sender
msg['To'] = ', '.join(receivers)
if replyTo is not None:
msg['Reply-To'] = replyTo
# put into envelope and send
s = smtplib.SMTP('localhost')
s.sendmail(sender, receivers, msg.as_string())
s.quit()
def get_github_payload():
'''Reeturn the github-style JSON encoded payload (as if we were called as a github webhook)'''
try:
data = sys.stdin.buffer.read()
data = json.loads(data.decode('utf-8'))
return data
except:
return {} # nothing read
class Repo:
def __init__(self, name, conf):
'''Creates a repository from a section of the git-mirror configuration file'''
self.name = name
self.local = conf['local']
self.owner = conf['owner'] # email address to notify in case of problems
self.mirrors = {} # maps mirrors to their URLs
mirror_prefix = 'mirror-'
for name in filter(lambda s: s.startswith(mirror_prefix), conf.keys()):
mirror = name[len(mirror_prefix):]
self.mirrors[mirror] = conf[name]
def mail_owner(self, msg):
send_mail("git-mirror {0}".format(self.name), msg, [self.owner])
def find_mirror_by_url(self, match_urls):
for mirror, url in self.mirrors.items():
if url in match_urls:
return mirror
return None
def update_mirrors(self, ref, oldsha, newsha, except_mirrors = [], suppress_stderr = False):
'''Update the <ref> from <oldsha> to <newsha> on all mirrors. The update must already have happened locally.'''
assert len(oldsha) == 40 and len(newsha) == 40, "These are not valid SHAs."
os.chdir(self.local)
# check for a forced update
is_forced = newsha != git_nullsha and oldsha != git_nullsha and git_is_forced_update(oldsha, newsha)
# tell all the mirrors
for mirror in self.mirrors:
if mirror in except_mirrors:
continue
# update this mirror
if is_forced:
# forcibly update ref remotely (someone already did a force push and hence accepted data loss)
git.push('--force', self.mirrors[mirror], newsha+":"+ref, capture_stderr = suppress_stderr)
else:
# nicely update ref remotely (this avoids data loss due to race conditions)
git.push(self.mirrors[mirror], newsha+":"+ref, capture_stderr = suppress_stderr)
def update_ref_from_mirror(self, ref, oldsha, newsha, mirror, suppress_stderr = False):
'''Update the local version of this <ref> to what's currently on the given <mirror>. <oldsha> and <newsha> are checked. Then update all the other mirrors.'''
os.chdir(self.local)
url = self.mirrors[mirror]
# first check whether the remote really is at newsha
remote_state, code = git.ls_remote(url, ref)
if remote_state:
remote_sha = remote_state.split()[0]
else:
remote_sha = git_nullsha
assert newsha == remote_sha, "Someone lied about the new SHA, which should be {0}.".format(newsha)
# locally, we have to be at oldsha or newsha (the latter can happen if we already got this update, e.g. if it originated from us)
local_state, code = git.show_ref(ref, check=False)
if code == 0:
local_sha = local_state.split()[0]
else:
if len(local_state):
raise Exception("Something went wrong getting the local state of {0}.".format(ref))
local_sha = git_nullsha
assert local_sha in (oldsha, newsha), "Someone lied about the old SHA."
# if we are already at newsha locally, we also ran the local hooks, so we do not have to do anything
if local_sha == newsha:
return
# update local state from local_sha to newsha.
if newsha != git_nullsha:
# We *could* now fetch the remote ref and immediately update the local one. However, then we would have to
# decide whether we want to allow a force-update or not. Also, the ref could already have changed remotely,
# so that may update to some other commit.
# Instead, we just fetch without updating any local ref. If the remote side changed in such a way that
# <newsha> is not actually fetched, that's a race and will be noticed when updating the local ref.
git.fetch(url, ref, capture_stderr = suppress_stderr)
# now update the ref, checking the old value is still local_oldsha.
git.update_ref(ref, newsha, 40*"0" if local_sha is None else local_sha)
else:
# ref does not exist anymore. delete it.
assert local_sha != git_nullsha, "Why didn't we bail out earlier if there is nothing to do...?"
git.update_ref("-d", ref, local_sha) # this checks that the old value is still local_sha
# update all the mirrors
self.update_mirrors(ref, oldsha, newsha, [mirror], suppress_stderr)
def find_repo_by_directory(repos, dir):
for (name, repo) in repos.items():
if dir == repo.local:
return name
return None
def load_repos():
conffile = os.path.join(os.path.dirname(__file__), 'git-mirror.conf')
conf = read_config(conffile)
repos = {}
for name, section in conf.items():
if name != 'DEFAULT':
repos[name] = Repo(name, section)
return repos

51
githook.py Executable file
View file

@ -0,0 +1,51 @@
#!/usr/bin/python3
# Copyright (c) 2014, Ralf Jung <post@ralfj.de>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#==============================================================================
# This is the hook called by git post-commit. It updats all mirrors to the status of the local repository.
import traceback
from git_mirror import *
if __name__ == "__main__":
repo = None # we will try to use this during exception handling
try:
repos = load_repos()
# find the repository we are dealing with
reponame = find_repo_by_directory(repos, os.getcwd())
if reponame is None or reponame not in repos:
raise Exception("Unknown repository.")
# now sync this repository
repo = repos[reponame]
# parse the information we get from stdin. we trust this information.
for line in sys.stdin:
(oldsha, newsha, ref) = line.split()
repo.update_mirrors(ref, oldsha, newsha)
except Exception as e:
if repo is not None:
repo.mail_owner("There was a problem running the git-mirror git hook:\n\n{0}".format(traceback.format_exc()))
# do not print all the details
sys.stderr.write("We have a problem:\n{0}".format('\n'.join(traceback.format_exception_only(type(e), e))))

194
update.py
View file

@ -1,194 +0,0 @@
#!/usr/bin/python3
import sys, os, subprocess, argparse
import configparser, itertools, json, re
import traceback
import email.mime.text, email.utils, smtplib
class GitCommand:
def __getattr__(self, name):
def call(*args, capture_stderr = False, check = True):
'''If <capture_stderr>, return stderr merged with stdout. Otherwise, return stdout and forward stderr to our own.
If <check> is true, throw an exception of the process fails with non-zero exit code. Otherwise, do not.
In any case, return a pair of the captured output and the exit code.'''
cmd = ["git", name.replace('_', '-')] + list(args)
with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT if capture_stderr else None) as p:
(stdout, stderr) = p.communicate()
assert stderr is None
code = p.returncode
if check and code:
raise Exception("Error running {0}: Non-zero exit code".format(cmd))
return (stdout.decode('utf-8').strip('\n'), code)
return call
git = GitCommand()
def read_config(fname, defSection = 'DEFAULT'):
'''Reads a config file that may have options outside of any section.'''
config = configparser.ConfigParser()
with open(fname) as file:
stream = itertools.chain(("["+defSection+"]\n",), file)
config.read_file(stream)
return config
def send_mail(subject, text, receivers, sender='post+webhook@ralfj.de', replyTo=None):
assert isinstance(receivers, list)
if not len(receivers): return # nothing to do
# construct content
msg = email.mime.text.MIMEText(text.encode('UTF-8'), 'plain', 'UTF-8')
msg['Subject'] = subject
msg['Date'] = email.utils.formatdate(localtime=True)
msg['From'] = sender
msg['To'] = ', '.join(receivers)
if replyTo is not None:
msg['Reply-To'] = replyTo
# put into envelope and send
s = smtplib.SMTP('localhost')
s.sendmail(sender, receivers, msg.as_string())
s.quit()
def get_github_payload():
'''Reeturn the github-style JSON encoded payload (as if we were called as a github webhook)'''
try:
data = sys.stdin.buffer.read()
data = json.loads(data.decode('utf-8'))
return data
except:
return {} # nothing read
class Repo:
def __init__(self, conf):
'''Creates a repository from a section of the git-mirror configuration file'''
self.local = conf['local']
self.mirrors = {} # maps mirrors to their URLs
mirror_prefix = 'mirror-'
for name in filter(lambda s: s.startswith(mirror_prefix), conf.keys()):
mirror = name[len(mirror_prefix):]
self.mirrors[mirror] = conf[name]
def find_mirror_by_url(self, match_urls):
for mirror, url in self.mirrors.items():
if url in match_urls:
return mirror
return None
def have_ref(self, ref, url=None):
'''Tests if a given ref exists, locally or (if the url is given) remotely'''
if url is None:
out, code = git.show_ref(ref, check = False)
if code and len(out):
raise Exception("Checking for a local ref failed")
else:
out, code = git.ls_remote(url, ref)
# the ref exists iff we have output
return len(out) > 0
def update_mirrors(self, ref, delete, exception = None, suppress_stderr = False):
'''Update <ref> on all mirrors except for <exception> to the local state, or delete it.'''
for mirror in self.mirrors:
if mirror == exception:
continue
# update this mirror
if not self.have_ref(ref):
# delete ref remotely
git.push(self.mirrors[mirror], ':'+ref, capture_stderr = suppress_stderr)
else:
# update ref remotely
git.push('--force', self.mirrors[mirror], ref, capture_stderr = suppress_stderr)
def update_ref(self, ref, source, suppress_stderr = False):
'''Update the <ref> to its state in <source> everywhere. <source> is None to refer to the local repository,
or the name of a mirror.'''
os.chdir(self.local)
if source is None:
# We already have the latest version locally. Update all the mirrors.
self.update_mirrors(ref, delete = not self.have_ref(ref), suppress_stderr = suppress_stderr)
else:
# update our version of this ref. This may fail if the ref does not exist anymore.
url = self.mirrors[source]
if not self.have_ref(ref, url):
# delete ref locally
git.update_ref("-d", ref)
# and everywhere (except for the source)
self.update_mirrors(ref, delete = True, exception = source, suppress_stderr = suppress_stderr)
else:
# update local ref to remote state (yes, there's a race condition here - the ref could no longer exist by now)
git.fetch(url, ref+":"+ref)
# and everywhere else
self.update_mirrors(ref, delete = False, exception = source, suppress_stderr = suppress_stderr)
def find_repo_by_directory(repos, dir):
for (name, repo) in repos.items():
if dir == repo.local:
return name
return None
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Keep git repositories in sync')
parser.add_argument("--git-hook",
action="store_true", dest="git_hook",
help="Act as git hook: Auto-detect the repository based on the working directoy, and fetch information from stdin the way git encodes it")
parser.add_argument("--web-hook",
action="store_true", dest="web_hook",
help="Act as github-style web hook: Repository has to be given explicitly, all the rest is read from stdin JSON form")
parser.add_argument("-r", "--repository",
dest="repository",
help="The name of the repository to act on")
args = parser.parse_args()
if args.git_hook and args.web_hook:
raise Exception("I cannot be two hooks at once.")
try:
# All arguments are *untrusted* input, as we may be called via sudo from the webserver. So we fix the configuration file location.
conffile = os.path.join(os.path.dirname(__file__), 'git-mirror.conf')
conf = read_config(conffile)
repos = {}
for name, section in conf.items():
if name != 'DEFAULT':
repos[name] = Repo(section)
# find the repository we are dealing with
reponame = args.repository
if reponame is None and args.git_hook:
reponame = find_repo_by_directory(repos, os.getcwd())
if reponame is None or reponame not in repos:
raise Exception("Unknown or missing repository name.")
# now sync this repository
repo = repos[reponame]
if args.git_hook:
# parse the information we get from stdin
for line in sys.stdin:
(oldsha, newsha, ref) = line.split()
repo.update_ref(ref, source = None)
elif args.web_hook:
data = get_github_payload()
ref = data["ref"]
# validate the ref name
if re.match('refs/[a-z/]+', ref) is None:
raise Exception("Invalid ref name {0}".format(ref))
# collect URLs of this repository
urls = []
for key in ("git_url", "ssh_url", "clone_url"):
urls.append(data["repository"][key])
source = repo.find_mirror_by_url(urls)
if source is None:
raise Exception("Could not find the source.")
repo.update_ref(ref, source = source, suppress_stderr = True)
# print an answer
print("Content-Type: text/plain")
print()
print("Updated {0}:{1} from source {2}".format(reponame, ref, source))
else:
raise Exception("No manual mode is implemented so far.")
except Exception as e:
# don't leak filenames etc. when we are running as a hook
if args.web_hook:
print("Status: 500 Internal Server Error")
print("Content-Type: text/plain")
print()
print(str(e))
elif args.git_hook:
#sys.stderr.write(str(e))
traceback.print_exc()
else:
traceback.print_exc()

66
webhook-core.py Executable file
View file

@ -0,0 +1,66 @@
#!/usr/bin/python3
# Copyright (c) 2014, Ralf Jung <post@ralfj.de>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#==============================================================================
# This is the hook called by git post-commit. It updats all mirrors to the status of the local repository.
import sys, traceback
from git_mirror import *
if __name__ == "__main__":
repo = None # we will try to use this during exception handling
try:
repos = load_repos()
reponame = sys.argv[1] if len(sys.argv) > 1 else None
if reponame not in repos:
raise Exception("Repository missing or not found.")
repo = repos[reponame]
# now sync this repository
data = get_github_payload()
ref = data["ref"]
oldsha = data["before"]
newsha = data["after"]
# validate the ref name
if re.match('refs/[a-z/]+', ref) is None:
raise Exception("Invalid ref name {0}".format(ref))
# collect URLs of this repository, to find the mirror name
urls = []
for key in ("git_url", "ssh_url", "clone_url"):
urls.append(data["repository"][key])
mirror = repo.find_mirror_by_url(urls)
if mirror is None:
raise Exception("Could not find the mirror.")
repo.update_ref_from_mirror(ref, oldsha, newsha, mirror, suppress_stderr = True)
# print an answer
print("Content-Type: text/plain")
print()
print("Updated {0}:{1} from mirror {2} from {3} to {4}".format(reponame, ref, mirror, oldsha, newsha))
except Exception as e:
if repo is not None:
repo.mail_owner("There was a problem running the git-mirror webhook:\n\n{0}".format(traceback.format_exc()))
# do not print all the details
print("Status: 500 Internal Server Error")
print("Content-Type: text/plain")
print()
print("We have a problem:\n{0}".format('\n'.join(traceback.format_exception_only(type(e), e))))

View file

@ -10,6 +10,7 @@ def is_github(remote_addr):
for net in github['hooks']:
if remote_addr in ip_network(net):
return True
return False
# get repository from query string
query = os.getenv("QUERY_STRING")
@ -18,5 +19,5 @@ repository = query.get('repository', [])
repository = repository[0] if len(repository) else ''
# execute the actual script
git_mirror = "/home/ralf/git-mirror/update.py"
os.execlp("sudo", "sudo", "-n", "-u", "git", git_mirror, "--web-hook", "--repository", repository)
webhook_core = "/home/ralf/git-mirror/webhook-core.py"
os.execlp("sudo", "sudo", "-n", "-u", "git", webhook_core, repository)