Files
gitstats-mirror/gitstats
Heikki Hokkanen 0212bf1ce9 Tags: show commits & authors for each tag.
A simple assumption is made about the tags: it's assumed that they come in
chronological order, and share the same history (eg. v0.0.1, v0.0.2). That way,
to get commits made for v0.0.2, we simply look for history from v0.0.1 to
v0.0.2.
2009-06-18 21:07:35 +03:00

970 lines
29 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright (c) 2007-2009 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
# GPLv2 / GPLv3
import datetime
import glob
import os
import pickle
import re
import shutil
import subprocess
import sys
import time
import zlib
GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
MAX_EXT_LENGTH = 10 # maximum file extension length
exectime_internal = 0.0
exectime_external = 0.0
time_start = time.time()
# By default, gnuplot is searched from path, but can be overridden with the
# environment variable "GNUPLOT"
gnuplot_cmd = 'gnuplot'
if 'GNUPLOT' in os.environ:
gnuplot_cmd = os.environ['GNUPLOT']
def getpipeoutput(cmds, quiet = False):
global exectime_external
start = time.time()
if not quiet:
print '>> ' + ' | '.join(cmds),
sys.stdout.flush()
p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
p = p0
for x in cmds[1:]:
p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
p0 = p
output = p.communicate()[0]
end = time.time()
if not quiet:
print '\r[%.5f] >> %s' % (end - start, ' | '.join(cmds))
exectime_external += (end - start)
return output.rstrip('\n')
def getkeyssortedbyvalues(dict):
return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
# dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
def getkeyssortedbyvaluekey(d, key):
return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
VERSION = 0
def getversion():
global VERSION
if VERSION == 0:
VERSION = getpipeoutput(["git rev-parse --short HEAD"]).split('\n')[0]
return VERSION
class DataCollector:
"""Manages data collection from a revision control repository."""
def __init__(self):
self.stamp_created = time.time()
self.cache = {}
##
# This should be the main function to extract data from the repository.
def collect(self, dir):
self.dir = dir
self.projectname = os.path.basename(os.path.abspath(dir))
##
# Load cacheable data
def loadCache(self, cachefile):
if not os.path.exists(cachefile):
return
print 'Loading cache...'
f = open(cachefile)
try:
self.cache = pickle.loads(zlib.decompress(f.read()))
except:
# temporary hack to upgrade non-compressed caches
f.seek(0)
self.cache = pickle.load(f)
f.close()
##
# Produce any additional statistics from the extracted data.
def refine(self):
pass
##
# : get a dictionary of author
def getAuthorInfo(self, author):
return None
def getActivityByDayOfWeek(self):
return {}
def getActivityByHourOfDay(self):
return {}
##
# Get a list of authors
def getAuthors(self):
return []
def getFirstCommitDate(self):
return datetime.datetime.now()
def getLastCommitDate(self):
return datetime.datetime.now()
def getStampCreated(self):
return self.stamp_created
def getTags(self):
return []
def getTotalAuthors(self):
return -1
def getTotalCommits(self):
return -1
def getTotalFiles(self):
return -1
def getTotalLOC(self):
return -1
##
# Save cacheable data
def saveCache(self, filename):
print 'Saving cache...'
f = open(cachefile, 'w')
#pickle.dump(self.cache, f)
data = zlib.compress(pickle.dumps(self.cache))
f.write(data)
f.close()
class GitDataCollector(DataCollector):
def collect(self, dir):
DataCollector.collect(self, dir)
try:
self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
except:
self.total_authors = 0
#self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
self.activity_by_hour_of_day = {} # hour -> commits
self.activity_by_day_of_week = {} # day -> commits
self.activity_by_month_of_year = {} # month [1-12] -> commits
self.activity_by_hour_of_week = {} # weekday -> hour -> commits
self.activity_by_hour_of_day_busiest = 0
self.activity_by_hour_of_week_busiest = 0
self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
# author of the month
self.author_of_month = {} # month -> author -> commits
self.author_of_year = {} # year -> author -> commits
self.commits_by_month = {} # month -> commits
self.commits_by_year = {} # year -> commits
self.first_commit_stamp = 0
self.last_commit_stamp = 0
# tags
self.tags = {}
lines = getpipeoutput(['git show-ref --tags']).split('\n')
for line in lines:
if len(line) == 0:
continue
(hash, tag) = line.split(' ')
tag = tag.replace('refs/tags/', '')
output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
if len(output) > 0:
parts = output.split(' ')
stamp = 0
try:
stamp = int(parts[0])
except ValueError:
stamp = 0
self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
# collect info on tags, starting from latest
tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
prev = None
for tag in reversed(tags_sorted_by_date_desc):
#print prev, tag
cmd = 'git shortlog -s "%s"' % tag
if prev != None:
cmd += ' "^%s"' % prev
output = getpipeoutput([cmd])
prev = tag
for line in output.split('\n'):
parts = re.split('\s+', line, 2)
#print parts
commits = int(parts[1])
author = parts[2]
self.tags[tag]['commits'] += commits
self.tags[tag]['authors'][author] = commits
#print self.tags
# Collect revision statistics
# Outputs "<stamp> <author>"
lines = getpipeoutput(['git rev-list --pretty=format:"%at %an" HEAD', 'grep -v ^commit']).split('\n')
for line in lines:
# linux-2.6 says "<unknown>" for one line O_o
parts = line.split(' ')
author = ''
try:
stamp = int(parts[0])
except ValueError:
stamp = 0
if len(parts) > 1:
author = ' '.join(parts[1:])
date = datetime.datetime.fromtimestamp(float(stamp))
# First and last commit stamp
if self.last_commit_stamp == 0:
self.last_commit_stamp = stamp
self.first_commit_stamp = stamp
# activity
# hour
hour = date.hour
if hour in self.activity_by_hour_of_day:
self.activity_by_hour_of_day[hour] += 1
else:
self.activity_by_hour_of_day[hour] = 1
# most active hour?
if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
# day of week
day = date.weekday()
if day in self.activity_by_day_of_week:
self.activity_by_day_of_week[day] += 1
else:
self.activity_by_day_of_week[day] = 1
# hour of week
if day not in self.activity_by_hour_of_week:
self.activity_by_hour_of_week[day] = {}
if hour not in self.activity_by_hour_of_week[day]:
self.activity_by_hour_of_week[day][hour] = 1
else:
self.activity_by_hour_of_week[day][hour] += 1
# most active hour?
if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
# month of year
month = date.month
if month in self.activity_by_month_of_year:
self.activity_by_month_of_year[month] += 1
else:
self.activity_by_month_of_year[month] = 1
# author stats
if author not in self.authors:
self.authors[author] = {}
# commits
if 'last_commit_stamp' not in self.authors[author]:
self.authors[author]['last_commit_stamp'] = stamp
self.authors[author]['first_commit_stamp'] = stamp
if 'commits' in self.authors[author]:
self.authors[author]['commits'] += 1
else:
self.authors[author]['commits'] = 1
# author of the month/year
yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
if yymm in self.author_of_month:
if author in self.author_of_month[yymm]:
self.author_of_month[yymm][author] += 1
else:
self.author_of_month[yymm][author] = 1
else:
self.author_of_month[yymm] = {}
self.author_of_month[yymm][author] = 1
if yymm in self.commits_by_month:
self.commits_by_month[yymm] += 1
else:
self.commits_by_month[yymm] = 1
yy = datetime.datetime.fromtimestamp(stamp).year
if yy in self.author_of_year:
if author in self.author_of_year[yy]:
self.author_of_year[yy][author] += 1
else:
self.author_of_year[yy][author] = 1
else:
self.author_of_year[yy] = {}
self.author_of_year[yy][author] = 1
if yy in self.commits_by_year:
self.commits_by_year[yy] += 1
else:
self.commits_by_year[yy] = 1
# TODO Optimize this, it's the worst bottleneck
# outputs "<stamp> <files>" for each revision
self.files_by_stamp = {} # stamp -> files
revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
lines = []
for revline in revlines:
time, rev = revline.split(' ')
#linecount = int(getpipeoutput(['git-ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
linecount = self.getFilesInCommit(rev)
lines.append('%d %d' % (int(time), linecount))
self.total_commits = len(lines)
for line in lines:
parts = line.split(' ')
if len(parts) != 2:
continue
(stamp, files) = parts[0:2]
try:
self.files_by_stamp[int(stamp)] = int(files)
except ValueError:
print 'Warning: failed to parse line "%s"' % line
# extensions
self.extensions = {} # extension -> files, lines
lines = getpipeoutput(['git ls-files']).split('\n')
self.total_files = len(lines)
for line in lines:
base = os.path.basename(line)
# Ignore extensionless (including .hidden files)
if base.find('.') == -1 or base.rfind('.') == 0:
ext = ''
else:
ext = base[(base.rfind('.') + 1):]
if len(ext) > MAX_EXT_LENGTH:
ext = ''
if ext not in self.extensions:
self.extensions[ext] = {'files': 0, 'lines': 0}
self.extensions[ext]['files'] += 1
try:
# Escaping could probably be improved here
self.extensions[ext]['lines'] += int(getpipeoutput(['wc -l "%s"' % line]).split()[0])
except:
print 'Warning: Could not count lines for file "%s"' % line
# line statistics
# outputs:
# N files changed, N insertions (+), N deletions(-)
# <stamp> <author>
self.changes_by_date = {} # stamp -> { files, ins, del }
lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
lines.reverse()
files = 0; inserted = 0; deleted = 0; total_lines = 0
for line in lines:
if len(line) == 0:
continue
# <stamp> <author>
if line.find('files changed,') == -1:
pos = line.find(' ')
if pos != -1:
try:
(stamp, author) = (int(line[:pos]), line[pos+1:])
self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
except ValueError:
print 'Warning: unexpected line "%s"' % line
else:
print 'Warning: unexpected line "%s"' % line
else:
numbers = re.findall('\d+', line)
if len(numbers) == 3:
(files, inserted, deleted) = map(lambda el : int(el), numbers)
total_lines += inserted
total_lines -= deleted
else:
print 'Warning: failed to handle line "%s"' % line
(files, inserted, deleted) = (0, 0, 0)
#self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
self.total_lines = total_lines
def refine(self):
# authors
# name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
authors_by_commits.reverse() # most first
for i, name in enumerate(authors_by_commits):
self.authors[name]['place_by_commits'] = i + 1
for name in self.authors.keys():
a = self.authors[name]
a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
delta = date_last - date_first
a['date_first'] = date_first.strftime('%Y-%m-%d')
a['date_last'] = date_last.strftime('%Y-%m-%d')
a['timedelta'] = delta
def getActivityByDayOfWeek(self):
return self.activity_by_day_of_week
def getActivityByHourOfDay(self):
return self.activity_by_hour_of_day
def getAuthorInfo(self, author):
return self.authors[author]
def getAuthors(self):
return self.authors.keys()
def getCommitDeltaDays(self):
return (self.last_commit_stamp - self.first_commit_stamp) / 86400
def getFilesInCommit(self, rev):
try:
res = self.cache['files_in_tree'][rev]
except:
res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
if 'files_in_tree' not in self.cache:
self.cache['files_in_tree'] = {}
self.cache['files_in_tree'][rev] = res
return res
def getFirstCommitDate(self):
return datetime.datetime.fromtimestamp(self.first_commit_stamp)
def getLastCommitDate(self):
return datetime.datetime.fromtimestamp(self.last_commit_stamp)
def getTags(self):
lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
return lines.split('\n')
def getTagDate(self, tag):
return self.revToDate('tags/' + tag)
def getTotalAuthors(self):
return self.total_authors
def getTotalCommits(self):
return self.total_commits
def getTotalFiles(self):
return self.total_files
def getTotalLOC(self):
return self.total_lines
def revToDate(self, rev):
stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
class ReportCreator:
"""Creates the actual report based on given data."""
def __init__(self):
pass
def create(self, data, path):
self.data = data
self.path = path
def html_linkify(text):
return text.lower().replace(' ', '_')
def html_header(level, text):
name = html_linkify(text)
return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
class HTMLReportCreator(ReportCreator):
def create(self, data, path):
ReportCreator.create(self, data, path)
self.title = data.projectname
# copy static files if they do not exist
for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
basedir = os.path.dirname(os.path.abspath(__file__))
shutil.copyfile(basedir + '/' + file, path + '/' + file)
f = open(path + "/index.html", 'w')
format = '%Y-%m-%d %H:%M:%S'
self.printHeader(f)
f.write('<h1>GitStats - %s</h1>' % data.projectname)
self.printNav(f)
f.write('<dl>')
f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s)</dd>' % getversion())
f.write('<dt>Report Period</dt><dd>%s to %s (%d days)</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format), data.getCommitDeltaDays()))
f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
f.write('</dl>')
f.write('</body>\n</html>')
f.close()
###
# Activity
f = open(path + '/activity.html', 'w')
self.printHeader(f)
f.write('<h1>Activity</h1>')
self.printNav(f)
#f.write('<h2>Last 30 days</h2>')
#f.write('<h2>Last 12 months</h2>')
# Hour of Day
f.write(html_header(2, 'Hour of Day'))
hour_of_day = data.getActivityByHourOfDay()
f.write('<table><tr><th>Hour</th>')
for i in range(0, 24):
f.write('<th>%d</th>' % i)
f.write('</tr>\n<tr><th>Commits</th>')
fp = open(path + '/hour_of_day.dat', 'w')
for i in range(0, 24):
if i in hour_of_day:
r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
fp.write('%d %d\n' % (i, hour_of_day[i]))
else:
f.write('<td>0</td>')
fp.write('%d 0\n' % i)
fp.close()
f.write('</tr>\n<tr><th>%</th>')
totalcommits = data.getTotalCommits()
for i in range(0, 24):
if i in hour_of_day:
r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
else:
f.write('<td>0.00</td>')
f.write('</tr></table>')
f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
fg = open(path + '/hour_of_day.dat', 'w')
for i in range(0, 24):
if i in hour_of_day:
fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
else:
fg.write('%d 0\n' % (i + 1))
fg.close()
# Day of Week
f.write(html_header(2, 'Day of Week'))
day_of_week = data.getActivityByDayOfWeek()
f.write('<div class="vtable"><table>')
f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
fp = open(path + '/day_of_week.dat', 'w')
for d in range(0, 7):
commits = 0
if d in day_of_week:
commits = day_of_week[d]
fp.write('%d %d\n' % (d + 1, commits))
f.write('<tr>')
f.write('<th>%d</th>' % (d + 1))
if d in day_of_week:
f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
else:
f.write('<td>0</td>')
f.write('</tr>')
f.write('</table></div>')
f.write('<img src="day_of_week.png" alt="Day of Week" />')
fp.close()
# Hour of Week
f.write(html_header(2, 'Hour of Week'))
f.write('<table>')
f.write('<tr><th>Weekday</th>')
for hour in range(0, 24):
f.write('<th>%d</th>' % (hour))
f.write('</tr>')
for weekday in range(0, 7):
f.write('<tr><th>%d</th>' % (weekday + 1))
for hour in range(0, 24):
try:
commits = data.activity_by_hour_of_week[weekday][hour]
except KeyError:
commits = 0
if commits != 0:
f.write('<td')
r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
f.write('>%d</td>' % commits)
else:
f.write('<td></td>')
f.write('</tr>')
f.write('</table>')
# Month of Year
f.write(html_header(2, 'Month of Year'))
f.write('<div class="vtable"><table>')
f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
fp = open (path + '/month_of_year.dat', 'w')
for mm in range(1, 13):
commits = 0
if mm in data.activity_by_month_of_year:
commits = data.activity_by_month_of_year[mm]
f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
fp.write('%d %d\n' % (mm, commits))
fp.close()
f.write('</table></div>')
f.write('<img src="month_of_year.png" alt="Month of Year" />')
# Commits by year/month
f.write(html_header(2, 'Commits by year/month'))
f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
for yymm in reversed(sorted(data.commits_by_month.keys())):
f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
f.write('</table></div>')
f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
fg = open(path + '/commits_by_year_month.dat', 'w')
for yymm in sorted(data.commits_by_month.keys()):
fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
fg.close()
# Commits by year
f.write(html_header(2, 'Commits by Year'))
f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
for yy in reversed(sorted(data.commits_by_year.keys())):
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
f.write('</table></div>')
f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
fg = open(path + '/commits_by_year.dat', 'w')
for yy in sorted(data.commits_by_year.keys()):
fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
fg.close()
f.write('</body></html>')
f.close()
###
# Authors
f = open(path + '/authors.html', 'w')
self.printHeader(f)
f.write('<h1>Authors</h1>')
self.printNav(f)
# Authors :: List of authors
f.write(html_header(2, 'List of Authors'))
f.write('<table class="authors sortable" id="authors">')
f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th># by commits</th></tr>')
for author in sorted(data.getAuthors()):
info = data.getAuthorInfo(author)
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['place_by_commits']))
f.write('</table>')
# Authors :: Author of Month
f.write(html_header(2, 'Author of Month'))
f.write('<table class="sortable" id="aom">')
f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
for yymm in reversed(sorted(data.author_of_month.keys())):
authordict = data.author_of_month[yymm]
authors = getkeyssortedbyvalues(authordict)
authors.reverse()
commits = data.author_of_month[yymm][authors[0]]
next = ', '.join(authors[1:5])
f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
f.write('</table>')
f.write(html_header(2, 'Author of Year'))
f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
for yy in reversed(sorted(data.author_of_year.keys())):
authordict = data.author_of_year[yy]
authors = getkeyssortedbyvalues(authordict)
authors.reverse()
commits = data.author_of_year[yy][authors[0]]
next = ', '.join(authors[1:5])
f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
f.write('</table>')
f.write('</body></html>')
f.close()
###
# Files
f = open(path + '/files.html', 'w')
self.printHeader(f)
f.write('<h1>Files</h1>')
self.printNav(f)
f.write('<dl>\n')
f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
f.write('</dl>\n')
# Files :: File count by date
f.write(html_header(2, 'File count by date'))
fg = open(path + '/files_by_date.dat', 'w')
for stamp in sorted(data.files_by_stamp.keys()):
fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
fg.close()
f.write('<img src="files_by_date.png" alt="Files by Date" />')
#f.write('<h2>Average file size by date</h2>')
# Files :: Extensions
f.write(html_header(2, 'Extensions'))
f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
for ext in sorted(data.extensions.keys()):
files = data.extensions[ext]['files']
lines = data.extensions[ext]['lines']
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
f.write('</table>')
f.write('</body></html>')
f.close()
###
# Lines
f = open(path + '/lines.html', 'w')
self.printHeader(f)
f.write('<h1>Lines</h1>')
self.printNav(f)
f.write('<dl>\n')
f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
f.write('</dl>\n')
f.write(html_header(2, 'Lines of Code'))
f.write('<img src="lines_of_code.png" />')
fg = open(path + '/lines_of_code.dat', 'w')
for stamp in sorted(data.changes_by_date.keys()):
fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
fg.close()
f.write('</body></html>')
f.close()
###
# tags.html
f = open(path + '/tags.html', 'w')
self.printHeader(f)
f.write('<h1>Tags</h1>')
self.printNav(f)
f.write('<dl>')
f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
if len(data.tags) > 0:
f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
f.write('</dl>')
f.write('<table>')
f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
# sort the tags by date desc
tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
for tag in tags_sorted_by_date_desc:
authorinfo = []
authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
for i in reversed(authors_by_commits):
authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
f.write('</table>')
f.write('</body></html>')
f.close()
self.createGraphs(path)
def createGraphs(self, path):
print 'Generating graphs...'
# hour of day
f = open(path + '/hour_of_day.plot', 'w')
f.write(GNUPLOT_COMMON)
f.write(
"""
set output 'hour_of_day.png'
unset key
set xrange [0.5:24.5]
set xtics 4
set ylabel "Commits"
plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
""")
f.close()
# day of week
f = open(path + '/day_of_week.plot', 'w')
f.write(GNUPLOT_COMMON)
f.write(
"""
set output 'day_of_week.png'
unset key
set xrange [0.5:7.5]
set xtics 1
set ylabel "Commits"
plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
""")
f.close()
# Month of Year
f = open(path + '/month_of_year.plot', 'w')
f.write(GNUPLOT_COMMON)
f.write(
"""
set output 'month_of_year.png'
unset key
set xrange [0.5:12.5]
set xtics 1
set ylabel "Commits"
plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
""")
f.close()
# commits_by_year_month
f = open(path + '/commits_by_year_month.plot', 'w')
f.write(GNUPLOT_COMMON)
f.write(
"""
set output 'commits_by_year_month.png'
unset key
set xdata time
set timefmt "%Y-%m"
set format x "%Y-%m"
set xtics rotate by 90 15768000
set bmargin 5
set ylabel "Commits"
plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
""")
f.close()
# commits_by_year
f = open(path + '/commits_by_year.plot', 'w')
f.write(GNUPLOT_COMMON)
f.write(
"""
set output 'commits_by_year.png'
unset key
set xtics 1
set ylabel "Commits"
set yrange [0:]
plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
""")
f.close()
# Files by date
f = open(path + '/files_by_date.plot', 'w')
f.write(GNUPLOT_COMMON)
f.write(
"""
set output 'files_by_date.png'
unset key
set xdata time
set timefmt "%Y-%m-%d"
set format x "%Y-%m-%d"
set ylabel "Files"
set xtics rotate by 90
set bmargin 6
plot 'files_by_date.dat' using 1:2 w histeps
""")
f.close()
# Lines of Code
f = open(path + '/lines_of_code.plot', 'w')
f.write(GNUPLOT_COMMON)
f.write(
"""
set output 'lines_of_code.png'
unset key
set xdata time
set timefmt "%s"
set format x "%Y-%m-%d"
set ylabel "Lines"
set xtics rotate by 90
set bmargin 6
plot 'lines_of_code.dat' using 1:2 w lines
""")
f.close()
os.chdir(path)
files = glob.glob(path + '/*.plot')
for f in files:
out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
if len(out) > 0:
print out
def printHeader(self, f, title = ''):
f.write(
"""<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>GitStats - %s</title>
<link rel="stylesheet" href="gitstats.css" type="text/css" />
<meta name="generator" content="GitStats %s" />
<script type="text/javascript" src="sortable.js"></script>
</head>
<body>
""" % (self.title, getversion()))
def printNav(self, f):
f.write("""
<div class="nav">
<ul>
<li><a href="index.html">General</a></li>
<li><a href="activity.html">Activity</a></li>
<li><a href="authors.html">Authors</a></li>
<li><a href="files.html">Files</a></li>
<li><a href="lines.html">Lines</a></li>
<li><a href="tags.html">Tags</a></li>
</ul>
</div>
""")
usage = """
Usage: gitstats [options] <gitpath> <outputpath>
Options:
"""
if len(sys.argv) < 3:
print usage
sys.exit(0)
gitpath = sys.argv[1]
outputpath = os.path.abspath(sys.argv[2])
rundir = os.getcwd()
try:
os.makedirs(outputpath)
except OSError:
pass
if not os.path.isdir(outputpath):
print 'FATAL: Output path is not a directory or does not exist'
sys.exit(1)
print 'Git path: %s' % gitpath
print 'Output path: %s' % outputpath
os.chdir(gitpath)
cachefile = os.path.join(outputpath, 'gitstats.cache')
print 'Collecting data...'
data = GitDataCollector()
data.loadCache(cachefile)
data.collect(gitpath)
print 'Refining data...'
data.saveCache(cachefile)
data.refine()
os.chdir(rundir)
print 'Generating report...'
report = HTMLReportCreator()
report.create(data, outputpath)
time_end = time.time()
exectime_internal = time_end - time_start
print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)