implement a way to limit the statistics to commits after a start date

This is really useful when computing statistics over a set of
repositories, where some repositories are much older than other.

Signed-off-by: Heikki Hokkanen <hoxu@users.sf.net>
This commit is contained in:
Sylvain Joyeux
2014-05-29 22:01:23 +02:00
committed by Heikki Hokkanen
parent e56e7b6f91
commit 780c0fd57e

View File

@@ -48,6 +48,7 @@ conf = {
'project_name': '',
'merge_authors': {},
'processes': 8,
'start_date': ''
}
def getpipeoutput(cmds, quiet = False):
@@ -72,6 +73,12 @@ def getpipeoutput(cmds, quiet = False):
exectime_external += (end - start)
return output.rstrip('\n')
def getlogrange(defaultrange = 'HEAD', end_only = True):
commit_range = getcommitrange(defaultrange, end_only)
if len(conf['start_date']) > 0:
return '--since=%s %s' % (conf['start_date'], commit_range)
return commit_range
def getcommitrange(defaultrange = 'HEAD', end_only = False):
if len(conf['commit_end']) > 0:
if end_only or len(conf['commit_begin']) == 0:
@@ -280,7 +287,7 @@ class GitDataCollector(DataCollector):
def collect(self, dir):
DataCollector.collect(self, dir)
self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(), 'wc -l']))
#self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
# tags
@@ -323,7 +330,7 @@ class GitDataCollector(DataCollector):
# Collect revision statistics
# Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).split('\n')
lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
for line in lines:
parts = line.split(' ', 4)
author = ''
@@ -432,7 +439,7 @@ class GitDataCollector(DataCollector):
self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
# outputs "<stamp> <files>" for each revision
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
lines = []
revs_to_read = []
time_rev_count = []
@@ -534,7 +541,7 @@ class GitDataCollector(DataCollector):
extra = ''
if conf['linear_linestats']:
extra = '--first-parent -m'
lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getcommitrange('HEAD'))]).split('\n')
lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
lines.reverse()
files = 0; inserted = 0; deleted = 0; total_lines = 0
author = None
@@ -590,7 +597,7 @@ class GitDataCollector(DataCollector):
# Similar to the above, but never use --first-parent
# (we need to walk through every commit to know who
# committed what, not just through mainline)
lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getcommitrange('HEAD'))]).split('\n')
lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
lines.reverse()
files = 0; inserted = 0; deleted = 0
author = None