mirror of
https://github.com/hoxu/gitstats.git
synced 2026-03-01 18:23:26 +01:00
377 lines
10 KiB
Python
Executable File
377 lines
10 KiB
Python
Executable File
#!/usr/bin/python
|
|
# Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
|
|
# GPLv2
|
|
import commands
|
|
import datetime
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
def getoutput(cmd):
|
|
print '>> %s' % cmd
|
|
output = commands.getoutput(cmd)
|
|
return output
|
|
|
|
def getkeyssortedbyvalues(dict):
|
|
return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
|
|
|
|
class DataCollector:
|
|
def __init__(self):
|
|
pass
|
|
|
|
##
|
|
# This should be the main function to extract data from the repository.
|
|
def collect(self, dir):
|
|
self.dir = dir
|
|
|
|
##
|
|
# : get a dictionary of author
|
|
def getAuthorInfo(self, author):
|
|
return None
|
|
|
|
def getActivityByDayOfWeek(self):
|
|
return {}
|
|
|
|
def getActivityByHourOfDay(self):
|
|
return {}
|
|
|
|
##
|
|
# Get a list of authors
|
|
def getAuthors(self):
|
|
return []
|
|
|
|
def getFirstCommitDate(self):
|
|
return datetime.datetime.now()
|
|
|
|
def getLastCommitDate(self):
|
|
return datetime.datetime.now()
|
|
|
|
def getTags(self):
|
|
return []
|
|
|
|
def getTotalAuthors(self):
|
|
return -1
|
|
|
|
def getTotalCommits(self):
|
|
return -1
|
|
|
|
def getTotalFiles(self):
|
|
return -1
|
|
|
|
def getTotalLOC(self):
|
|
return -1
|
|
|
|
class GitDataCollector(DataCollector):
|
|
def collect(self, dir):
|
|
DataCollector.collect(self, dir)
|
|
|
|
self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
|
|
self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
|
|
self.total_files = int(getoutput('git-ls-files |wc -l'))
|
|
self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
|
|
|
|
self.activity_by_hour_of_day = {} # hour -> commits
|
|
self.activity_by_day_of_week = {} # day -> commits
|
|
|
|
self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
|
|
|
|
# author of the month
|
|
self.author_of_month = {} # month -> author -> commits
|
|
self.author_of_year = {} # year -> author -> commits
|
|
self.commits_by_month = {} # month -> commits
|
|
self.first_commit_stamp = 0
|
|
self.last_commit_stamp = 0
|
|
|
|
# TODO also collect statistics for "last 30 days"/"last 12 months"
|
|
lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
|
|
for line in lines:
|
|
# linux-2.6 says "<unknown>" for one line O_o
|
|
parts = line.split(' ')
|
|
author = ''
|
|
try:
|
|
stamp = int(parts[0])
|
|
except ValueError:
|
|
stamp = 0
|
|
if len(parts) > 1:
|
|
author = ' '.join(parts[1:])
|
|
date = datetime.datetime.fromtimestamp(float(stamp))
|
|
|
|
# First and last commit stamp
|
|
if self.last_commit_stamp == 0:
|
|
self.last_commit_stamp = stamp
|
|
self.first_commit_stamp = stamp
|
|
|
|
# activity
|
|
# hour
|
|
hour = date.hour
|
|
if hour in self.activity_by_hour_of_day:
|
|
self.activity_by_hour_of_day[hour] += 1
|
|
else:
|
|
self.activity_by_hour_of_day[hour] = 1
|
|
|
|
# day
|
|
day = date.weekday()
|
|
if day in self.activity_by_day_of_week:
|
|
self.activity_by_day_of_week[day] += 1
|
|
else:
|
|
self.activity_by_day_of_week[day] = 1
|
|
|
|
# author stats
|
|
if author not in self.authors:
|
|
self.authors[author] = {}
|
|
# TODO commits
|
|
if 'last_commit_stamp' not in self.authors[author]:
|
|
self.authors[author]['last_commit_stamp'] = stamp
|
|
self.authors[author]['first_commit_stamp'] = stamp
|
|
if 'commits' in self.authors[author]:
|
|
self.authors[author]['commits'] += 1
|
|
else:
|
|
self.authors[author]['commits'] = 1
|
|
|
|
# author of the month/year
|
|
yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
|
|
if yymm in self.author_of_month:
|
|
if author in self.author_of_month[yymm]:
|
|
self.author_of_month[yymm][author] += 1
|
|
else:
|
|
self.author_of_month[yymm][author] = 1
|
|
else:
|
|
self.author_of_month[yymm] = {}
|
|
self.author_of_month[yymm][author] = 1
|
|
if yymm in self.commits_by_month:
|
|
self.commits_by_month[yymm] += 1
|
|
else:
|
|
self.commits_by_month[yymm] = 1
|
|
|
|
yy = datetime.datetime.fromtimestamp(stamp).year
|
|
if yy in self.author_of_year:
|
|
if author in self.author_of_year[yy]:
|
|
self.author_of_year[yy][author] += 1
|
|
else:
|
|
self.author_of_year[yy][author] = 1
|
|
else:
|
|
self.author_of_year[yy] = {}
|
|
self.author_of_year[yy][author] = 1
|
|
|
|
def getActivityByDayOfWeek(self):
|
|
return self.activity_by_day_of_week
|
|
|
|
def getActivityByHourOfDay(self):
|
|
return self.activity_by_hour_of_day
|
|
|
|
def getAuthorInfo(self, author):
|
|
a = self.authors[author]
|
|
|
|
commits = a['commits']
|
|
commits_frac = (100 * float(commits)) / self.getTotalCommits()
|
|
date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
|
|
date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
|
|
|
|
res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
|
|
return res
|
|
|
|
def getAuthors(self):
|
|
lines = getoutput('git-rev-list --all --pretty=format:%an |grep -v ^commit |sort |uniq')
|
|
return lines.split('\n')
|
|
|
|
def getFirstCommitDate(self):
|
|
return datetime.datetime.fromtimestamp(self.first_commit_stamp)
|
|
|
|
def getLastCommitDate(self):
|
|
return datetime.datetime.fromtimestamp(self.last_commit_stamp)
|
|
|
|
def getTags(self):
|
|
lines = getoutput('git-show-ref --tags |cut -d/ -f3')
|
|
return lines.split('\n')
|
|
|
|
def getTagDate(self, tag):
|
|
return self.revToDate('tags/' + tag)
|
|
|
|
def getTotalAuthors(self):
|
|
return self.total_authors
|
|
|
|
def getTotalCommits(self):
|
|
return self.total_commits
|
|
|
|
def getTotalFiles(self):
|
|
return self.total_files
|
|
|
|
def getTotalLOC(self):
|
|
return self.total_lines
|
|
|
|
def revToDate(self, rev):
|
|
stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
|
|
return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
|
|
|
|
class ReportCreator:
|
|
def __init__(self):
|
|
pass
|
|
|
|
def create(self, data, path):
|
|
self.data = data
|
|
self.path = path
|
|
|
|
class HTMLReportCreator(ReportCreator):
|
|
def create(self, data, path):
|
|
ReportCreator.create(self, data, path)
|
|
|
|
f = open(path + "/index.html", 'w')
|
|
format = '%Y-%m-%d %H:%m:%S'
|
|
self.printHeader(f)
|
|
|
|
f.write('<h1>StatGit</h1>')
|
|
|
|
f.write('<dl>');
|
|
f.write('<dt>Generated</dt><dd>%s</dd>' % datetime.datetime.now().strftime(format));
|
|
f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
|
|
f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
|
|
f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
|
|
f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
|
|
f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
|
|
f.write('</dl>');
|
|
|
|
f.write("""<ul>
|
|
<li><a href="activity.html">Activity</a></li>
|
|
<li><a href="authors.html">Authors</a></li>
|
|
<li><a href="files.html">Files</a></li>
|
|
<li><a href="lines.html">Lines</a></li>
|
|
</ul>
|
|
""")
|
|
|
|
f.write('<h2>Tags</h2>')
|
|
f.write('<table>')
|
|
f.write('<tr><th>Name</th><th>Date</th><th>Developers</th></tr>')
|
|
for tag in data.getTags():
|
|
f.write('<tr><td>%s</td><td></td></tr>' % tag)
|
|
f.write('</table>')
|
|
|
|
f.write('</body>\n</html>');
|
|
f.close()
|
|
|
|
# activity.html
|
|
f = open(path + '/activity.html', 'w')
|
|
self.printHeader(f)
|
|
f.write('<h1>Activity</h1>')
|
|
|
|
f.write('<h2>Last 30 days</h2>')
|
|
|
|
f.write('<h2>Last 12 months</h2>')
|
|
|
|
f.write('\n<h2>Hour of Day</h2>\n\n')
|
|
hour_of_day = data.getActivityByHourOfDay()
|
|
f.write('<table><tr><th>Hour</th>')
|
|
for i in range(1, 25):
|
|
f.write('<th>%d</th>' % i)
|
|
f.write('</tr>\n<tr><th>Commits</th>')
|
|
for i in range(0, 24):
|
|
if i in hour_of_day:
|
|
f.write('<td>%d</td>' % hour_of_day[i])
|
|
else:
|
|
f.write('<td>0</td>')
|
|
f.write('</tr>\n<tr><th>%</th>')
|
|
totalcommits = data.getTotalCommits()
|
|
for i in range(0, 24):
|
|
if i in hour_of_day:
|
|
f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
|
|
else:
|
|
f.write('<td>0.00</td>')
|
|
f.write('</tr></table>')
|
|
|
|
### Day of Week
|
|
# TODO show also by hour of weekday?
|
|
f.write('\n<h2>Day of Week</h2>\n\n')
|
|
day_of_week = data.getActivityByDayOfWeek()
|
|
f.write('<table>')
|
|
f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
|
|
for d in range(0, 7):
|
|
f.write('<tr>')
|
|
f.write('<th>%d</th>' % (d + 1))
|
|
if d in day_of_week:
|
|
f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
|
|
else:
|
|
f.write('<td>0</td>')
|
|
f.write('</tr>')
|
|
f.write('</table>')
|
|
|
|
f.close()
|
|
|
|
# authors.html
|
|
f = open(path + '/authors.html', 'w')
|
|
self.printHeader(f)
|
|
|
|
f.write('<h1>Authors</h1>')
|
|
|
|
f.write('\n<h2>List of authors</h2>\n\n')
|
|
|
|
f.write('<table class="authors">')
|
|
f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
|
|
for author in data.getAuthors():
|
|
info = data.getAuthorInfo(author)
|
|
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
|
|
f.write('</table>')
|
|
|
|
f.write('\n<h2>Author of Month</h2>\n\n')
|
|
f.write('<table>')
|
|
f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
|
|
for yymm in reversed(sorted(data.author_of_month.keys())):
|
|
authordict = data.author_of_month[yymm]
|
|
authors = getkeyssortedbyvalues(authordict)
|
|
authors.reverse()
|
|
commits = data.author_of_month[yymm][authors[0]]
|
|
f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
|
|
|
|
f.write('</table>')
|
|
|
|
f.write('\n<h2>Author of Year</h2>\n\n')
|
|
f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
|
|
for yy in reversed(sorted(data.author_of_year.keys())):
|
|
authordict = data.author_of_year[yy]
|
|
authors = getkeyssortedbyvalues(authordict)
|
|
authors.reverse()
|
|
commits = data.author_of_year[yy][authors[0]]
|
|
f.write('<tr><td>%s</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits))
|
|
f.write('</table>')
|
|
|
|
f.write('</body></html>')
|
|
f.close()
|
|
pass
|
|
|
|
def printHeader(self, f):
|
|
f.write("""<html>
|
|
<head>
|
|
<title>StatGit</title>
|
|
<link rel="stylesheet" href="statgit.css" type="text/css" />
|
|
</head>
|
|
<body>
|
|
""")
|
|
|
|
|
|
usage = """
|
|
Usage: statgit [options] <gitpath> <outputpath>
|
|
|
|
Options:
|
|
-o html
|
|
"""
|
|
|
|
if len(sys.argv) < 3:
|
|
print usage
|
|
sys.exit(0)
|
|
|
|
gitpath = sys.argv[1]
|
|
outputpath = sys.argv[2]
|
|
|
|
print 'Git path: %s' % gitpath
|
|
print 'Output path: %s' % outputpath
|
|
|
|
os.chdir(gitpath)
|
|
|
|
print 'Collecting data...'
|
|
data = GitDataCollector()
|
|
data.collect(gitpath)
|
|
|
|
print 'Generating report...'
|
|
report = HTMLReportCreator()
|
|
report.create(data, outputpath)
|
|
|
|
|