mirror of
https://github.com/hoxu/gitstats.git
synced 2026-03-01 18:23:26 +01:00
A simple assumption is made about the tags: it's assumed that they come in chronological order, and share the same history (eg. v0.0.1, v0.0.2). That way, to get commits made for v0.0.2, we simply look for history from v0.0.1 to v0.0.2.
970 lines
29 KiB
Python
Executable File
970 lines
29 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright (c) 2007-2009 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
|
|
# GPLv2 / GPLv3
|
|
import datetime
|
|
import glob
|
|
import os
|
|
import pickle
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import zlib
|
|
|
|
GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
|
|
MAX_EXT_LENGTH = 10 # maximum file extension length
|
|
|
|
exectime_internal = 0.0
|
|
exectime_external = 0.0
|
|
time_start = time.time()
|
|
|
|
# By default, gnuplot is searched from path, but can be overridden with the
|
|
# environment variable "GNUPLOT"
|
|
gnuplot_cmd = 'gnuplot'
|
|
if 'GNUPLOT' in os.environ:
|
|
gnuplot_cmd = os.environ['GNUPLOT']
|
|
|
|
def getpipeoutput(cmds, quiet = False):
|
|
global exectime_external
|
|
start = time.time()
|
|
if not quiet:
|
|
print '>> ' + ' | '.join(cmds),
|
|
sys.stdout.flush()
|
|
p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
|
|
p = p0
|
|
for x in cmds[1:]:
|
|
p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
|
|
p0 = p
|
|
output = p.communicate()[0]
|
|
end = time.time()
|
|
if not quiet:
|
|
print '\r[%.5f] >> %s' % (end - start, ' | '.join(cmds))
|
|
exectime_external += (end - start)
|
|
return output.rstrip('\n')
|
|
|
|
def getkeyssortedbyvalues(dict):
|
|
return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
|
|
|
|
# dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
|
|
def getkeyssortedbyvaluekey(d, key):
|
|
return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
|
|
|
|
VERSION = 0
|
|
def getversion():
|
|
global VERSION
|
|
if VERSION == 0:
|
|
VERSION = getpipeoutput(["git rev-parse --short HEAD"]).split('\n')[0]
|
|
return VERSION
|
|
|
|
class DataCollector:
|
|
"""Manages data collection from a revision control repository."""
|
|
def __init__(self):
|
|
self.stamp_created = time.time()
|
|
self.cache = {}
|
|
|
|
##
|
|
# This should be the main function to extract data from the repository.
|
|
def collect(self, dir):
|
|
self.dir = dir
|
|
self.projectname = os.path.basename(os.path.abspath(dir))
|
|
|
|
##
|
|
# Load cacheable data
|
|
def loadCache(self, cachefile):
|
|
if not os.path.exists(cachefile):
|
|
return
|
|
print 'Loading cache...'
|
|
f = open(cachefile)
|
|
try:
|
|
self.cache = pickle.loads(zlib.decompress(f.read()))
|
|
except:
|
|
# temporary hack to upgrade non-compressed caches
|
|
f.seek(0)
|
|
self.cache = pickle.load(f)
|
|
f.close()
|
|
|
|
##
|
|
# Produce any additional statistics from the extracted data.
|
|
def refine(self):
|
|
pass
|
|
|
|
##
|
|
# : get a dictionary of author
|
|
def getAuthorInfo(self, author):
|
|
return None
|
|
|
|
def getActivityByDayOfWeek(self):
|
|
return {}
|
|
|
|
def getActivityByHourOfDay(self):
|
|
return {}
|
|
|
|
##
|
|
# Get a list of authors
|
|
def getAuthors(self):
|
|
return []
|
|
|
|
def getFirstCommitDate(self):
|
|
return datetime.datetime.now()
|
|
|
|
def getLastCommitDate(self):
|
|
return datetime.datetime.now()
|
|
|
|
def getStampCreated(self):
|
|
return self.stamp_created
|
|
|
|
def getTags(self):
|
|
return []
|
|
|
|
def getTotalAuthors(self):
|
|
return -1
|
|
|
|
def getTotalCommits(self):
|
|
return -1
|
|
|
|
def getTotalFiles(self):
|
|
return -1
|
|
|
|
def getTotalLOC(self):
|
|
return -1
|
|
|
|
##
|
|
# Save cacheable data
|
|
def saveCache(self, filename):
|
|
print 'Saving cache...'
|
|
f = open(cachefile, 'w')
|
|
#pickle.dump(self.cache, f)
|
|
data = zlib.compress(pickle.dumps(self.cache))
|
|
f.write(data)
|
|
f.close()
|
|
|
|
class GitDataCollector(DataCollector):
|
|
def collect(self, dir):
|
|
DataCollector.collect(self, dir)
|
|
|
|
try:
|
|
self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
|
|
except:
|
|
self.total_authors = 0
|
|
#self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
|
|
|
|
self.activity_by_hour_of_day = {} # hour -> commits
|
|
self.activity_by_day_of_week = {} # day -> commits
|
|
self.activity_by_month_of_year = {} # month [1-12] -> commits
|
|
self.activity_by_hour_of_week = {} # weekday -> hour -> commits
|
|
self.activity_by_hour_of_day_busiest = 0
|
|
self.activity_by_hour_of_week_busiest = 0
|
|
|
|
self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
|
|
|
|
# author of the month
|
|
self.author_of_month = {} # month -> author -> commits
|
|
self.author_of_year = {} # year -> author -> commits
|
|
self.commits_by_month = {} # month -> commits
|
|
self.commits_by_year = {} # year -> commits
|
|
self.first_commit_stamp = 0
|
|
self.last_commit_stamp = 0
|
|
|
|
# tags
|
|
self.tags = {}
|
|
lines = getpipeoutput(['git show-ref --tags']).split('\n')
|
|
for line in lines:
|
|
if len(line) == 0:
|
|
continue
|
|
(hash, tag) = line.split(' ')
|
|
|
|
tag = tag.replace('refs/tags/', '')
|
|
output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
|
|
if len(output) > 0:
|
|
parts = output.split(' ')
|
|
stamp = 0
|
|
try:
|
|
stamp = int(parts[0])
|
|
except ValueError:
|
|
stamp = 0
|
|
self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
|
|
|
|
# collect info on tags, starting from latest
|
|
tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
|
|
prev = None
|
|
for tag in reversed(tags_sorted_by_date_desc):
|
|
#print prev, tag
|
|
cmd = 'git shortlog -s "%s"' % tag
|
|
if prev != None:
|
|
cmd += ' "^%s"' % prev
|
|
output = getpipeoutput([cmd])
|
|
prev = tag
|
|
for line in output.split('\n'):
|
|
parts = re.split('\s+', line, 2)
|
|
#print parts
|
|
commits = int(parts[1])
|
|
author = parts[2]
|
|
self.tags[tag]['commits'] += commits
|
|
self.tags[tag]['authors'][author] = commits
|
|
#print self.tags
|
|
|
|
# Collect revision statistics
|
|
# Outputs "<stamp> <author>"
|
|
lines = getpipeoutput(['git rev-list --pretty=format:"%at %an" HEAD', 'grep -v ^commit']).split('\n')
|
|
for line in lines:
|
|
# linux-2.6 says "<unknown>" for one line O_o
|
|
parts = line.split(' ')
|
|
author = ''
|
|
try:
|
|
stamp = int(parts[0])
|
|
except ValueError:
|
|
stamp = 0
|
|
if len(parts) > 1:
|
|
author = ' '.join(parts[1:])
|
|
date = datetime.datetime.fromtimestamp(float(stamp))
|
|
|
|
# First and last commit stamp
|
|
if self.last_commit_stamp == 0:
|
|
self.last_commit_stamp = stamp
|
|
self.first_commit_stamp = stamp
|
|
|
|
# activity
|
|
# hour
|
|
hour = date.hour
|
|
if hour in self.activity_by_hour_of_day:
|
|
self.activity_by_hour_of_day[hour] += 1
|
|
else:
|
|
self.activity_by_hour_of_day[hour] = 1
|
|
# most active hour?
|
|
if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
|
|
self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
|
|
|
|
# day of week
|
|
day = date.weekday()
|
|
if day in self.activity_by_day_of_week:
|
|
self.activity_by_day_of_week[day] += 1
|
|
else:
|
|
self.activity_by_day_of_week[day] = 1
|
|
|
|
# hour of week
|
|
if day not in self.activity_by_hour_of_week:
|
|
self.activity_by_hour_of_week[day] = {}
|
|
if hour not in self.activity_by_hour_of_week[day]:
|
|
self.activity_by_hour_of_week[day][hour] = 1
|
|
else:
|
|
self.activity_by_hour_of_week[day][hour] += 1
|
|
# most active hour?
|
|
if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
|
|
self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
|
|
|
|
# month of year
|
|
month = date.month
|
|
if month in self.activity_by_month_of_year:
|
|
self.activity_by_month_of_year[month] += 1
|
|
else:
|
|
self.activity_by_month_of_year[month] = 1
|
|
|
|
# author stats
|
|
if author not in self.authors:
|
|
self.authors[author] = {}
|
|
# commits
|
|
if 'last_commit_stamp' not in self.authors[author]:
|
|
self.authors[author]['last_commit_stamp'] = stamp
|
|
self.authors[author]['first_commit_stamp'] = stamp
|
|
if 'commits' in self.authors[author]:
|
|
self.authors[author]['commits'] += 1
|
|
else:
|
|
self.authors[author]['commits'] = 1
|
|
|
|
# author of the month/year
|
|
yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
|
|
if yymm in self.author_of_month:
|
|
if author in self.author_of_month[yymm]:
|
|
self.author_of_month[yymm][author] += 1
|
|
else:
|
|
self.author_of_month[yymm][author] = 1
|
|
else:
|
|
self.author_of_month[yymm] = {}
|
|
self.author_of_month[yymm][author] = 1
|
|
if yymm in self.commits_by_month:
|
|
self.commits_by_month[yymm] += 1
|
|
else:
|
|
self.commits_by_month[yymm] = 1
|
|
|
|
yy = datetime.datetime.fromtimestamp(stamp).year
|
|
if yy in self.author_of_year:
|
|
if author in self.author_of_year[yy]:
|
|
self.author_of_year[yy][author] += 1
|
|
else:
|
|
self.author_of_year[yy][author] = 1
|
|
else:
|
|
self.author_of_year[yy] = {}
|
|
self.author_of_year[yy][author] = 1
|
|
if yy in self.commits_by_year:
|
|
self.commits_by_year[yy] += 1
|
|
else:
|
|
self.commits_by_year[yy] = 1
|
|
|
|
# TODO Optimize this, it's the worst bottleneck
|
|
# outputs "<stamp> <files>" for each revision
|
|
self.files_by_stamp = {} # stamp -> files
|
|
revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
|
|
lines = []
|
|
for revline in revlines:
|
|
time, rev = revline.split(' ')
|
|
#linecount = int(getpipeoutput(['git-ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
|
|
linecount = self.getFilesInCommit(rev)
|
|
lines.append('%d %d' % (int(time), linecount))
|
|
|
|
self.total_commits = len(lines)
|
|
for line in lines:
|
|
parts = line.split(' ')
|
|
if len(parts) != 2:
|
|
continue
|
|
(stamp, files) = parts[0:2]
|
|
try:
|
|
self.files_by_stamp[int(stamp)] = int(files)
|
|
except ValueError:
|
|
print 'Warning: failed to parse line "%s"' % line
|
|
|
|
# extensions
|
|
self.extensions = {} # extension -> files, lines
|
|
lines = getpipeoutput(['git ls-files']).split('\n')
|
|
self.total_files = len(lines)
|
|
for line in lines:
|
|
base = os.path.basename(line)
|
|
# Ignore extensionless (including .hidden files)
|
|
if base.find('.') == -1 or base.rfind('.') == 0:
|
|
ext = ''
|
|
else:
|
|
ext = base[(base.rfind('.') + 1):]
|
|
if len(ext) > MAX_EXT_LENGTH:
|
|
ext = ''
|
|
|
|
if ext not in self.extensions:
|
|
self.extensions[ext] = {'files': 0, 'lines': 0}
|
|
|
|
self.extensions[ext]['files'] += 1
|
|
try:
|
|
# Escaping could probably be improved here
|
|
self.extensions[ext]['lines'] += int(getpipeoutput(['wc -l "%s"' % line]).split()[0])
|
|
except:
|
|
print 'Warning: Could not count lines for file "%s"' % line
|
|
|
|
# line statistics
|
|
# outputs:
|
|
# N files changed, N insertions (+), N deletions(-)
|
|
# <stamp> <author>
|
|
self.changes_by_date = {} # stamp -> { files, ins, del }
|
|
lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
|
|
lines.reverse()
|
|
files = 0; inserted = 0; deleted = 0; total_lines = 0
|
|
for line in lines:
|
|
if len(line) == 0:
|
|
continue
|
|
|
|
# <stamp> <author>
|
|
if line.find('files changed,') == -1:
|
|
pos = line.find(' ')
|
|
if pos != -1:
|
|
try:
|
|
(stamp, author) = (int(line[:pos]), line[pos+1:])
|
|
self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
|
|
except ValueError:
|
|
print 'Warning: unexpected line "%s"' % line
|
|
else:
|
|
print 'Warning: unexpected line "%s"' % line
|
|
else:
|
|
numbers = re.findall('\d+', line)
|
|
if len(numbers) == 3:
|
|
(files, inserted, deleted) = map(lambda el : int(el), numbers)
|
|
total_lines += inserted
|
|
total_lines -= deleted
|
|
else:
|
|
print 'Warning: failed to handle line "%s"' % line
|
|
(files, inserted, deleted) = (0, 0, 0)
|
|
#self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
|
|
self.total_lines = total_lines
|
|
|
|
def refine(self):
|
|
# authors
|
|
# name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
|
|
authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
|
|
authors_by_commits.reverse() # most first
|
|
for i, name in enumerate(authors_by_commits):
|
|
self.authors[name]['place_by_commits'] = i + 1
|
|
|
|
for name in self.authors.keys():
|
|
a = self.authors[name]
|
|
a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
|
|
date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
|
|
date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
|
|
delta = date_last - date_first
|
|
a['date_first'] = date_first.strftime('%Y-%m-%d')
|
|
a['date_last'] = date_last.strftime('%Y-%m-%d')
|
|
a['timedelta'] = delta
|
|
|
|
def getActivityByDayOfWeek(self):
|
|
return self.activity_by_day_of_week
|
|
|
|
def getActivityByHourOfDay(self):
|
|
return self.activity_by_hour_of_day
|
|
|
|
def getAuthorInfo(self, author):
|
|
return self.authors[author]
|
|
|
|
def getAuthors(self):
|
|
return self.authors.keys()
|
|
|
|
def getCommitDeltaDays(self):
|
|
return (self.last_commit_stamp - self.first_commit_stamp) / 86400
|
|
|
|
def getFilesInCommit(self, rev):
|
|
try:
|
|
res = self.cache['files_in_tree'][rev]
|
|
except:
|
|
res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
|
|
if 'files_in_tree' not in self.cache:
|
|
self.cache['files_in_tree'] = {}
|
|
self.cache['files_in_tree'][rev] = res
|
|
|
|
return res
|
|
|
|
def getFirstCommitDate(self):
|
|
return datetime.datetime.fromtimestamp(self.first_commit_stamp)
|
|
|
|
def getLastCommitDate(self):
|
|
return datetime.datetime.fromtimestamp(self.last_commit_stamp)
|
|
|
|
def getTags(self):
|
|
lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
|
|
return lines.split('\n')
|
|
|
|
def getTagDate(self, tag):
|
|
return self.revToDate('tags/' + tag)
|
|
|
|
def getTotalAuthors(self):
|
|
return self.total_authors
|
|
|
|
def getTotalCommits(self):
|
|
return self.total_commits
|
|
|
|
def getTotalFiles(self):
|
|
return self.total_files
|
|
|
|
def getTotalLOC(self):
|
|
return self.total_lines
|
|
|
|
def revToDate(self, rev):
|
|
stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
|
|
return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
|
|
|
|
class ReportCreator:
|
|
"""Creates the actual report based on given data."""
|
|
def __init__(self):
|
|
pass
|
|
|
|
def create(self, data, path):
|
|
self.data = data
|
|
self.path = path
|
|
|
|
def html_linkify(text):
|
|
return text.lower().replace(' ', '_')
|
|
|
|
def html_header(level, text):
|
|
name = html_linkify(text)
|
|
return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
|
|
|
|
class HTMLReportCreator(ReportCreator):
|
|
def create(self, data, path):
|
|
ReportCreator.create(self, data, path)
|
|
self.title = data.projectname
|
|
|
|
# copy static files if they do not exist
|
|
for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
|
|
basedir = os.path.dirname(os.path.abspath(__file__))
|
|
shutil.copyfile(basedir + '/' + file, path + '/' + file)
|
|
|
|
f = open(path + "/index.html", 'w')
|
|
format = '%Y-%m-%d %H:%M:%S'
|
|
self.printHeader(f)
|
|
|
|
f.write('<h1>GitStats - %s</h1>' % data.projectname)
|
|
|
|
self.printNav(f)
|
|
|
|
f.write('<dl>')
|
|
f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
|
|
f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
|
|
f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s)</dd>' % getversion())
|
|
f.write('<dt>Report Period</dt><dd>%s to %s (%d days)</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format), data.getCommitDeltaDays()))
|
|
f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
|
|
f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
|
|
f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
|
|
f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
|
|
f.write('</dl>')
|
|
|
|
f.write('</body>\n</html>')
|
|
f.close()
|
|
|
|
###
|
|
# Activity
|
|
f = open(path + '/activity.html', 'w')
|
|
self.printHeader(f)
|
|
f.write('<h1>Activity</h1>')
|
|
self.printNav(f)
|
|
|
|
#f.write('<h2>Last 30 days</h2>')
|
|
|
|
#f.write('<h2>Last 12 months</h2>')
|
|
|
|
# Hour of Day
|
|
f.write(html_header(2, 'Hour of Day'))
|
|
hour_of_day = data.getActivityByHourOfDay()
|
|
f.write('<table><tr><th>Hour</th>')
|
|
for i in range(0, 24):
|
|
f.write('<th>%d</th>' % i)
|
|
f.write('</tr>\n<tr><th>Commits</th>')
|
|
fp = open(path + '/hour_of_day.dat', 'w')
|
|
for i in range(0, 24):
|
|
if i in hour_of_day:
|
|
r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
|
|
f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
|
|
fp.write('%d %d\n' % (i, hour_of_day[i]))
|
|
else:
|
|
f.write('<td>0</td>')
|
|
fp.write('%d 0\n' % i)
|
|
fp.close()
|
|
f.write('</tr>\n<tr><th>%</th>')
|
|
totalcommits = data.getTotalCommits()
|
|
for i in range(0, 24):
|
|
if i in hour_of_day:
|
|
r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
|
|
f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
|
|
else:
|
|
f.write('<td>0.00</td>')
|
|
f.write('</tr></table>')
|
|
f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
|
|
fg = open(path + '/hour_of_day.dat', 'w')
|
|
for i in range(0, 24):
|
|
if i in hour_of_day:
|
|
fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
|
|
else:
|
|
fg.write('%d 0\n' % (i + 1))
|
|
fg.close()
|
|
|
|
# Day of Week
|
|
f.write(html_header(2, 'Day of Week'))
|
|
day_of_week = data.getActivityByDayOfWeek()
|
|
f.write('<div class="vtable"><table>')
|
|
f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
|
|
fp = open(path + '/day_of_week.dat', 'w')
|
|
for d in range(0, 7):
|
|
commits = 0
|
|
if d in day_of_week:
|
|
commits = day_of_week[d]
|
|
fp.write('%d %d\n' % (d + 1, commits))
|
|
f.write('<tr>')
|
|
f.write('<th>%d</th>' % (d + 1))
|
|
if d in day_of_week:
|
|
f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
|
|
else:
|
|
f.write('<td>0</td>')
|
|
f.write('</tr>')
|
|
f.write('</table></div>')
|
|
f.write('<img src="day_of_week.png" alt="Day of Week" />')
|
|
fp.close()
|
|
|
|
# Hour of Week
|
|
f.write(html_header(2, 'Hour of Week'))
|
|
f.write('<table>')
|
|
|
|
f.write('<tr><th>Weekday</th>')
|
|
for hour in range(0, 24):
|
|
f.write('<th>%d</th>' % (hour))
|
|
f.write('</tr>')
|
|
|
|
for weekday in range(0, 7):
|
|
f.write('<tr><th>%d</th>' % (weekday + 1))
|
|
for hour in range(0, 24):
|
|
try:
|
|
commits = data.activity_by_hour_of_week[weekday][hour]
|
|
except KeyError:
|
|
commits = 0
|
|
if commits != 0:
|
|
f.write('<td')
|
|
r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
|
|
f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
|
|
f.write('>%d</td>' % commits)
|
|
else:
|
|
f.write('<td></td>')
|
|
f.write('</tr>')
|
|
|
|
f.write('</table>')
|
|
|
|
# Month of Year
|
|
f.write(html_header(2, 'Month of Year'))
|
|
f.write('<div class="vtable"><table>')
|
|
f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
|
|
fp = open (path + '/month_of_year.dat', 'w')
|
|
for mm in range(1, 13):
|
|
commits = 0
|
|
if mm in data.activity_by_month_of_year:
|
|
commits = data.activity_by_month_of_year[mm]
|
|
f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
|
|
fp.write('%d %d\n' % (mm, commits))
|
|
fp.close()
|
|
f.write('</table></div>')
|
|
f.write('<img src="month_of_year.png" alt="Month of Year" />')
|
|
|
|
# Commits by year/month
|
|
f.write(html_header(2, 'Commits by year/month'))
|
|
f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
|
|
for yymm in reversed(sorted(data.commits_by_month.keys())):
|
|
f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
|
|
f.write('</table></div>')
|
|
f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
|
|
fg = open(path + '/commits_by_year_month.dat', 'w')
|
|
for yymm in sorted(data.commits_by_month.keys()):
|
|
fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
|
|
fg.close()
|
|
|
|
# Commits by year
|
|
f.write(html_header(2, 'Commits by Year'))
|
|
f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
|
|
for yy in reversed(sorted(data.commits_by_year.keys())):
|
|
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
|
|
f.write('</table></div>')
|
|
f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
|
|
fg = open(path + '/commits_by_year.dat', 'w')
|
|
for yy in sorted(data.commits_by_year.keys()):
|
|
fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
|
|
fg.close()
|
|
|
|
f.write('</body></html>')
|
|
f.close()
|
|
|
|
###
|
|
# Authors
|
|
f = open(path + '/authors.html', 'w')
|
|
self.printHeader(f)
|
|
|
|
f.write('<h1>Authors</h1>')
|
|
self.printNav(f)
|
|
|
|
# Authors :: List of authors
|
|
f.write(html_header(2, 'List of Authors'))
|
|
|
|
f.write('<table class="authors sortable" id="authors">')
|
|
f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th># by commits</th></tr>')
|
|
for author in sorted(data.getAuthors()):
|
|
info = data.getAuthorInfo(author)
|
|
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['place_by_commits']))
|
|
f.write('</table>')
|
|
|
|
# Authors :: Author of Month
|
|
f.write(html_header(2, 'Author of Month'))
|
|
f.write('<table class="sortable" id="aom">')
|
|
f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
|
|
for yymm in reversed(sorted(data.author_of_month.keys())):
|
|
authordict = data.author_of_month[yymm]
|
|
authors = getkeyssortedbyvalues(authordict)
|
|
authors.reverse()
|
|
commits = data.author_of_month[yymm][authors[0]]
|
|
next = ', '.join(authors[1:5])
|
|
f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
|
|
|
|
f.write('</table>')
|
|
|
|
f.write(html_header(2, 'Author of Year'))
|
|
f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
|
|
for yy in reversed(sorted(data.author_of_year.keys())):
|
|
authordict = data.author_of_year[yy]
|
|
authors = getkeyssortedbyvalues(authordict)
|
|
authors.reverse()
|
|
commits = data.author_of_year[yy][authors[0]]
|
|
next = ', '.join(authors[1:5])
|
|
f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
|
|
f.write('</table>')
|
|
|
|
f.write('</body></html>')
|
|
f.close()
|
|
|
|
###
|
|
# Files
|
|
f = open(path + '/files.html', 'w')
|
|
self.printHeader(f)
|
|
f.write('<h1>Files</h1>')
|
|
self.printNav(f)
|
|
|
|
f.write('<dl>\n')
|
|
f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
|
|
f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
|
|
f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
|
|
f.write('</dl>\n')
|
|
|
|
# Files :: File count by date
|
|
f.write(html_header(2, 'File count by date'))
|
|
|
|
fg = open(path + '/files_by_date.dat', 'w')
|
|
for stamp in sorted(data.files_by_stamp.keys()):
|
|
fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
|
|
fg.close()
|
|
|
|
f.write('<img src="files_by_date.png" alt="Files by Date" />')
|
|
|
|
#f.write('<h2>Average file size by date</h2>')
|
|
|
|
# Files :: Extensions
|
|
f.write(html_header(2, 'Extensions'))
|
|
f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
|
|
for ext in sorted(data.extensions.keys()):
|
|
files = data.extensions[ext]['files']
|
|
lines = data.extensions[ext]['lines']
|
|
f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
|
|
f.write('</table>')
|
|
|
|
f.write('</body></html>')
|
|
f.close()
|
|
|
|
###
|
|
# Lines
|
|
f = open(path + '/lines.html', 'w')
|
|
self.printHeader(f)
|
|
f.write('<h1>Lines</h1>')
|
|
self.printNav(f)
|
|
|
|
f.write('<dl>\n')
|
|
f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
|
|
f.write('</dl>\n')
|
|
|
|
f.write(html_header(2, 'Lines of Code'))
|
|
f.write('<img src="lines_of_code.png" />')
|
|
|
|
fg = open(path + '/lines_of_code.dat', 'w')
|
|
for stamp in sorted(data.changes_by_date.keys()):
|
|
fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
|
|
fg.close()
|
|
|
|
f.write('</body></html>')
|
|
f.close()
|
|
|
|
###
|
|
# tags.html
|
|
f = open(path + '/tags.html', 'w')
|
|
self.printHeader(f)
|
|
f.write('<h1>Tags</h1>')
|
|
self.printNav(f)
|
|
|
|
f.write('<dl>')
|
|
f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
|
|
if len(data.tags) > 0:
|
|
f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
|
|
f.write('</dl>')
|
|
|
|
f.write('<table>')
|
|
f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
|
|
# sort the tags by date desc
|
|
tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
|
|
for tag in tags_sorted_by_date_desc:
|
|
authorinfo = []
|
|
authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
|
|
for i in reversed(authors_by_commits):
|
|
authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
|
|
f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
|
|
f.write('</table>')
|
|
|
|
f.write('</body></html>')
|
|
f.close()
|
|
|
|
self.createGraphs(path)
|
|
|
|
def createGraphs(self, path):
|
|
print 'Generating graphs...'
|
|
|
|
# hour of day
|
|
f = open(path + '/hour_of_day.plot', 'w')
|
|
f.write(GNUPLOT_COMMON)
|
|
f.write(
|
|
"""
|
|
set output 'hour_of_day.png'
|
|
unset key
|
|
set xrange [0.5:24.5]
|
|
set xtics 4
|
|
set ylabel "Commits"
|
|
plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
|
|
""")
|
|
f.close()
|
|
|
|
# day of week
|
|
f = open(path + '/day_of_week.plot', 'w')
|
|
f.write(GNUPLOT_COMMON)
|
|
f.write(
|
|
"""
|
|
set output 'day_of_week.png'
|
|
unset key
|
|
set xrange [0.5:7.5]
|
|
set xtics 1
|
|
set ylabel "Commits"
|
|
plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
|
|
""")
|
|
f.close()
|
|
|
|
# Month of Year
|
|
f = open(path + '/month_of_year.plot', 'w')
|
|
f.write(GNUPLOT_COMMON)
|
|
f.write(
|
|
"""
|
|
set output 'month_of_year.png'
|
|
unset key
|
|
set xrange [0.5:12.5]
|
|
set xtics 1
|
|
set ylabel "Commits"
|
|
plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
|
|
""")
|
|
f.close()
|
|
|
|
# commits_by_year_month
|
|
f = open(path + '/commits_by_year_month.plot', 'w')
|
|
f.write(GNUPLOT_COMMON)
|
|
f.write(
|
|
"""
|
|
set output 'commits_by_year_month.png'
|
|
unset key
|
|
set xdata time
|
|
set timefmt "%Y-%m"
|
|
set format x "%Y-%m"
|
|
set xtics rotate by 90 15768000
|
|
set bmargin 5
|
|
set ylabel "Commits"
|
|
plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
|
|
""")
|
|
f.close()
|
|
|
|
# commits_by_year
|
|
f = open(path + '/commits_by_year.plot', 'w')
|
|
f.write(GNUPLOT_COMMON)
|
|
f.write(
|
|
"""
|
|
set output 'commits_by_year.png'
|
|
unset key
|
|
set xtics 1
|
|
set ylabel "Commits"
|
|
set yrange [0:]
|
|
plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
|
|
""")
|
|
f.close()
|
|
|
|
# Files by date
|
|
f = open(path + '/files_by_date.plot', 'w')
|
|
f.write(GNUPLOT_COMMON)
|
|
f.write(
|
|
"""
|
|
set output 'files_by_date.png'
|
|
unset key
|
|
set xdata time
|
|
set timefmt "%Y-%m-%d"
|
|
set format x "%Y-%m-%d"
|
|
set ylabel "Files"
|
|
set xtics rotate by 90
|
|
set bmargin 6
|
|
plot 'files_by_date.dat' using 1:2 w histeps
|
|
""")
|
|
f.close()
|
|
|
|
# Lines of Code
|
|
f = open(path + '/lines_of_code.plot', 'w')
|
|
f.write(GNUPLOT_COMMON)
|
|
f.write(
|
|
"""
|
|
set output 'lines_of_code.png'
|
|
unset key
|
|
set xdata time
|
|
set timefmt "%s"
|
|
set format x "%Y-%m-%d"
|
|
set ylabel "Lines"
|
|
set xtics rotate by 90
|
|
set bmargin 6
|
|
plot 'lines_of_code.dat' using 1:2 w lines
|
|
""")
|
|
f.close()
|
|
|
|
os.chdir(path)
|
|
files = glob.glob(path + '/*.plot')
|
|
for f in files:
|
|
out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
|
|
if len(out) > 0:
|
|
print out
|
|
|
|
def printHeader(self, f, title = ''):
|
|
f.write(
|
|
"""<?xml version="1.0" encoding="UTF-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<title>GitStats - %s</title>
|
|
<link rel="stylesheet" href="gitstats.css" type="text/css" />
|
|
<meta name="generator" content="GitStats %s" />
|
|
<script type="text/javascript" src="sortable.js"></script>
|
|
</head>
|
|
<body>
|
|
""" % (self.title, getversion()))
|
|
|
|
def printNav(self, f):
|
|
f.write("""
|
|
<div class="nav">
|
|
<ul>
|
|
<li><a href="index.html">General</a></li>
|
|
<li><a href="activity.html">Activity</a></li>
|
|
<li><a href="authors.html">Authors</a></li>
|
|
<li><a href="files.html">Files</a></li>
|
|
<li><a href="lines.html">Lines</a></li>
|
|
<li><a href="tags.html">Tags</a></li>
|
|
</ul>
|
|
</div>
|
|
""")
|
|
|
|
|
|
usage = """
|
|
Usage: gitstats [options] <gitpath> <outputpath>
|
|
|
|
Options:
|
|
"""
|
|
|
|
if len(sys.argv) < 3:
|
|
print usage
|
|
sys.exit(0)
|
|
|
|
gitpath = sys.argv[1]
|
|
outputpath = os.path.abspath(sys.argv[2])
|
|
rundir = os.getcwd()
|
|
|
|
try:
|
|
os.makedirs(outputpath)
|
|
except OSError:
|
|
pass
|
|
if not os.path.isdir(outputpath):
|
|
print 'FATAL: Output path is not a directory or does not exist'
|
|
sys.exit(1)
|
|
|
|
print 'Git path: %s' % gitpath
|
|
print 'Output path: %s' % outputpath
|
|
|
|
os.chdir(gitpath)
|
|
|
|
cachefile = os.path.join(outputpath, 'gitstats.cache')
|
|
|
|
print 'Collecting data...'
|
|
data = GitDataCollector()
|
|
data.loadCache(cachefile)
|
|
data.collect(gitpath)
|
|
print 'Refining data...'
|
|
data.saveCache(cachefile)
|
|
data.refine()
|
|
|
|
os.chdir(rundir)
|
|
|
|
print 'Generating report...'
|
|
report = HTMLReportCreator()
|
|
report.create(data, outputpath)
|
|
|
|
time_end = time.time()
|
|
exectime_internal = time_end - time_start
|
|
print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)
|