#!/usr/bin/env python # Copyright (c) 2007-2013 Heikki Hokkanen & others (see doc/author.txt) # GPLv2 / GPLv3 import datetime import getopt import glob import os import pickle import platform import re import shutil import subprocess import sys import time import zlib os.environ['LC_ALL'] = 'C' GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n' ON_LINUX = (platform.system() == 'Linux') WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') exectime_internal = 0.0 exectime_external = 0.0 time_start = time.time() # By default, gnuplot is searched from path, but can be overridden with the # environment variable "GNUPLOT" gnuplot_cmd = 'gnuplot' if 'GNUPLOT' in os.environ: gnuplot_cmd = os.environ['GNUPLOT'] conf = { 'max_domains': 10, 'max_ext_length': 10, 'style': 'gitstats.css', 'max_authors': 20, 'authors_top': 5, 'commit_begin': '', 'commit_end': 'HEAD', 'linear_linestats': 1, 'project_name': '', 'merge_authors': {} } def getpipeoutput(cmds, quiet = False): global exectime_external start = time.time() if not quiet and ON_LINUX and os.isatty(1): print '>> ' + ' | '.join(cmds), sys.stdout.flush() p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True) p = p0 for x in cmds[1:]: p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True) p0 = p output = p.communicate()[0] end = time.time() if not quiet: if ON_LINUX and os.isatty(1): print '\r', print '[%.5f] >> %s' % (end - start, ' | '.join(cmds)) exectime_external += (end - start) return output.rstrip('\n') def getcommitrange(defaultrange = 'HEAD', end_only = False): if len(conf['commit_end']) > 0: if end_only or len(conf['commit_begin']) == 0: return conf['commit_end'] return '%s..%s' % (conf['commit_begin'], conf['commit_end']) return defaultrange def getkeyssortedbyvalues(dict): return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items()))) # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits') def getkeyssortedbyvaluekey(d, key): return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys()))) def getstatsummarycounts(line): numbers = re.findall('\d+', line) if len(numbers) == 1: # neither insertions nor deletions: may probably only happen for "0 files changed" numbers.append(0); numbers.append(0); elif len(numbers) == 2 and line.find('(+)') != -1: numbers.append(0); # only insertions were printed on line elif len(numbers) == 2 and line.find('(-)') != -1: numbers.insert(1, 0); # only deletions were printed on line return numbers VERSION = 0 def getversion(): global VERSION if VERSION == 0: gitstats_repo = os.path.dirname(os.path.abspath(__file__)) VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" % (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])]) return VERSION def getgitversion(): return getpipeoutput(['git --version']).split('\n')[0] def getgnuplotversion(): return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0] class DataCollector: """Manages data collection from a revision control repository.""" def __init__(self): self.stamp_created = time.time() self.cache = {} self.total_authors = 0 self.activity_by_hour_of_day = {} # hour -> commits self.activity_by_day_of_week = {} # day -> commits self.activity_by_month_of_year = {} # month [1-12] -> commits self.activity_by_hour_of_week = {} # weekday -> hour -> commits self.activity_by_hour_of_day_busiest = 0 self.activity_by_hour_of_week_busiest = 0 self.activity_by_year_week = {} # yy_wNN -> commits self.activity_by_year_week_peak = 0 self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed} self.total_commits = 0 self.total_files = 0 self.authors_by_commits = 0 # domains self.domains = {} # domain -> commits # author of the month self.author_of_month = {} # month -> author -> commits self.author_of_year = {} # year -> author -> commits self.commits_by_month = {} # month -> commits self.commits_by_year = {} # year -> commits self.lines_added_by_month = {} # month -> lines added self.lines_added_by_year = {} # year -> lines added self.lines_removed_by_month = {} # month -> lines removed self.lines_removed_by_year = {} # year -> lines removed self.first_commit_stamp = 0 self.last_commit_stamp = 0 self.last_active_day = None self.active_days = set() # lines self.total_lines = 0 self.total_lines_added = 0 self.total_lines_removed = 0 # size self.total_size = 0 # timezone self.commits_by_timezone = {} # timezone -> commits # tags self.tags = {} self.files_by_stamp = {} # stamp -> files # extensions self.extensions = {} # extension -> files, lines # line statistics self.changes_by_date = {} # stamp -> { files, ins, del } ## # This should be the main function to extract data from the repository. def collect(self, dir): self.dir = dir if len(conf['project_name']) == 0: self.projectname = os.path.basename(os.path.abspath(dir)) else: self.projectname = conf['project_name'] ## # Load cacheable data def loadCache(self, cachefile): if not os.path.exists(cachefile): return print 'Loading cache...' f = open(cachefile, 'rb') try: self.cache = pickle.loads(zlib.decompress(f.read())) except: # temporary hack to upgrade non-compressed caches f.seek(0) self.cache = pickle.load(f) f.close() ## # Produce any additional statistics from the extracted data. def refine(self): pass ## # : get a dictionary of author def getAuthorInfo(self, author): return None def getActivityByDayOfWeek(self): return {} def getActivityByHourOfDay(self): return {} # : get a dictionary of domains def getDomainInfo(self, domain): return None ## # Get a list of authors def getAuthors(self): return [] def getFirstCommitDate(self): return datetime.datetime.now() def getLastCommitDate(self): return datetime.datetime.now() def getStampCreated(self): return self.stamp_created def getTags(self): return [] def getTotalAuthors(self): return -1 def getTotalCommits(self): return -1 def getTotalFiles(self): return -1 def getTotalLOC(self): return -1 ## # Save cacheable data def saveCache(self, cachefile): print 'Saving cache...' tempfile = cachefile + '.tmp' f = open(tempfile, 'wb') #pickle.dump(self.cache, f) data = zlib.compress(pickle.dumps(self.cache)) f.write(data) f.close() try: os.remove(cachefile) except OSError: pass os.rename(tempfile, cachefile) class GitDataCollector(DataCollector): def collect(self, dir): DataCollector.collect(self, dir) self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l'])) #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l')) # tags lines = getpipeoutput(['git show-ref --tags']).split('\n') for line in lines: if len(line) == 0: continue (hash, tag) = line.split(' ') tag = tag.replace('refs/tags/', '') output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash]) if len(output) > 0: parts = output.split(' ') stamp = 0 try: stamp = int(parts[0]) except ValueError: stamp = 0 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} } # collect info on tags, starting from latest tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items())))) prev = None for tag in reversed(tags_sorted_by_date_desc): cmd = 'git shortlog -s "%s"' % tag if prev != None: cmd += ' "^%s"' % prev output = getpipeoutput([cmd]) if len(output) == 0: continue prev = tag for line in output.split('\n'): parts = re.split('\s+', line, 2) commits = int(parts[1]) author = parts[2] if author in conf['merge_authors']: author = conf['merge_authors'][author] self.tags[tag]['commits'] += commits self.tags[tag]['authors'][author] = commits # Collect revision statistics # Outputs "