Files
gitstats-mirror/statgit
2007-08-02 16:30:24 +03:00

279 lines
7.0 KiB
Python
Executable File

#!/usr/bin/python
# Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
# GPLv2
import commands
import datetime
import os
import re
import sys
def getoutput(cmd):
print '>> %s' % cmd
output = commands.getoutput(cmd)
return output
class DataCollector:
def __init__(self):
pass
##
# This should be the main function to extract data from the repository.
def collect(self, dir):
self.dir = dir
##
# : get a dictionary of author
def getAuthorInfo(self, author):
return None
def getActivityByDayOfWeek(self):
return {}
def getActivityByHourOfDay(self):
return {}
##
# Get a list of authors
def getAuthors(self):
return []
def getFirstCommitDate(self):
return datetime.datetime.now()
def getLastCommitDate(self):
return datetime.datetime.now()
def getTags(self):
return []
def getTotalAuthors(self):
return -1
def getTotalCommits(self):
return -1
def getTotalFiles(self):
return -1
def getTotalLOC(self):
return -1
class GitDataCollector(DataCollector):
def collect(self, dir):
DataCollector.collect(self, dir)
self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
self.total_commits = int(getoutput('git-rev-list --all |wc -l'))
self.total_files = int(getoutput('git-ls-files |wc -l'))
self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
self.activity_by_hour_of_day = {} # hour -> commits
self.activity_by_day_of_week = {} # day -> commits
# activity
lines = getoutput('git-rev-list --all --pretty=format:%at |grep -v ^commit').split('\n')
for stamp in lines:
date = datetime.datetime.fromtimestamp(float(stamp))
# hour
hour = date.hour
if hour in self.activity_by_hour_of_day:
self.activity_by_hour_of_day[hour] += 1
else:
self.activity_by_hour_of_day[hour] = 1
# day
day = date.weekday()
if day in self.activity_by_day_of_week:
self.activity_by_day_of_week[day] += 1
else:
self.activity_by_day_of_week[day] = 1
def getActivityByDayOfWeek(self):
return self.activity_by_day_of_week
def getActivityByHourOfDay(self):
return self.activity_by_hour_of_day
def getAuthorInfo(self, author):
commits = int(getoutput('git-rev-list --all --author="%s" |wc -l' % author))
commits_frac = (100 * float(commits)) / self.getTotalCommits()
date_first = '0000-00-00'
date_last = '0000-00-00'
rev_last = getoutput('git-rev-list --all --author="%s" -n 1' % author)
rev_first = getoutput('git-rev-list --all --author="%s" |tail -n 1' % author)
date_first = self.revToDate(rev_first)
date_last = self.revToDate(rev_last)
res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
return res
def getAuthors(self):
lines = getoutput('git-rev-list --all --pretty=format:%an |grep -v ^commit |sort |uniq')
return lines.split('\n')
def getTags(self):
lines = getoutput('git-show-ref --tags |cut -d/ -f3')
return lines.split('\n')
def getTagDate(self, tag):
return self.revToDate('tags/' + tag)
def getTotalAuthors(self):
return self.total_authors
def getTotalCommits(self):
return self.total_commits
def getTotalFiles(self):
return self.total_files
def getTotalLOC(self):
return self.total_lines
def revToDate(self, rev):
stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
class ReportCreator:
def __init__(self):
pass
def create(self, data, path):
self.data = data
self.path = path
class HTMLReportCreator(ReportCreator):
def create(self, data, path):
ReportCreator.create(self, data, path)
f = open(path + "/index.html", 'w')
format = '%Y-%m-%d %H:%m:%S'
self.printHeader(f)
f.write('<h1>StatGit</h1>')
f.write('<dl>');
f.write('<dt>Generated</dt><dd>%s</dd>' % datetime.datetime.now().strftime(format));
f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
f.write('</dl>');
f.write("""<ul>
<li><a href="activity.html">Activity</a></li>
<li><a href="authors.html">Authors</a></li>
<li><a href="files.html">Files</a></li>
<li><a href="lines.html">Lines</a></li>
</ul>
""")
f.write('<h2>Authors</h2>')
f.write('<table class="authors">')
f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
for author in data.getAuthors():
info = data.getAuthorInfo(author)
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
f.write('</table>')
f.write('<h2>Tags</h2>')
f.write('<table>')
f.write('<tr><th>Name</th><th>Date</th><th>Developers</th></tr>')
for tag in data.getTags():
f.write('<tr><td>%s</td><td></td></tr>' % tag)
f.write('</table>')
f.write('</body>\n</html>');
f.close()
# activity.html
f = open(path + '/activity.html', 'w')
self.printHeader(f)
f.write('<h1>Activity</h1>')
f.write('<h2>Last 30 days</h2>')
f.write('<h2>Last 12 months</h2>')
f.write('\n<h2>Hour of Day</h2>\n\n')
hour_of_day = data.getActivityByHourOfDay()
f.write('<table><tr><th>Hour</th>')
for i in range(1, 25):
f.write('<th>%d</th>' % i)
f.write('</tr>\n<tr><th>Commits</th>')
for i in range(0, 24):
if i in hour_of_day:
f.write('<td>%d</td>' % hour_of_day[i])
else:
f.write('<td>0</td>')
f.write('</tr>\n<tr><th>%</th>')
totalcommits = data.getTotalCommits()
for i in range(0, 24):
if i in hour_of_day:
f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
else:
f.write('<td>0.00</td>')
f.write('</tr></table>')
### Day of Week
# TODO show also by hour of weekday?
f.write('\n<h2>Day of Week</h2>\n\n')
day_of_week = data.getActivityByDayOfWeek()
f.write('<table>')
f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
for d in range(0, 7):
f.write('<tr>')
f.write('<th>%d</th>' % (d + 1))
if d in day_of_week:
f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
else:
f.write('<td>0</td>')
f.write('</tr>')
f.write('</table>')
f.close()
pass
def printHeader(self, f):
f.write("""<html>
<head>
<title>StatGit</title>
<link rel="stylesheet" href="statgit.css" type="text/css" />
</head>
<body>
""")
usage = """
Usage: statgit [options] <gitpath> <outputpath>
Options:
-o html
"""
if len(sys.argv) < 3:
print usage
sys.exit(0)
gitpath = sys.argv[1]
outputpath = sys.argv[2]
print 'Git path: %s' % gitpath
print 'Output path: %s' % outputpath
os.chdir(gitpath)
print 'Collecting data...'
data = GitDataCollector()
data.collect(gitpath)
print 'Generating report...'
report = HTMLReportCreator()
report.create(data, outputpath)