mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[dev-scripts] Add csv_to_scurve.py to convert csv documents into an scurve csv file ready for graphing.
I am upstreaming this for two reasons: 1. Traditionally I have done this by hand in a spreadsheet program. I would rather just have a program fix it up for me. 2. Multiple people have asked me about how to produce this sort of graph and I would just like to document it via a script. I hope it is useful to others. You use the script by invoking it as: ./csvcolumn_to_scurve <input_file> <before_column> <after_column> [output_file]
This commit is contained in:
55
utils/dev-scripts/csvcolumn_to_scurve.py
Executable file
55
utils/dev-scripts/csvcolumn_to_scurve.py
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This is a simple script that reads in a csv file, selects a column, and then
|
||||
# forms an "s-curve" graph of that column.
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import sys
|
||||
|
||||
|
||||
def get_data(input_file, before_column, after_column):
|
||||
|
||||
def get_selected_csv_rows(input_file, before_column, after_column):
|
||||
for row in csv.DictReader(input_file):
|
||||
before = float(row[before_column])
|
||||
after = float(row[after_column])
|
||||
delta = after / before
|
||||
yield delta
|
||||
|
||||
def f(input_data):
|
||||
result = list(enumerate(sorted(input_data)))
|
||||
count = float(len(result) - 1)
|
||||
return [(x[0] / count, x[1]) for x in result]
|
||||
|
||||
return f(get_selected_csv_rows(input_file, before_column, after_column))
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description="""
|
||||
|
||||
A script that reads in a csv file, splices out selected before/after
|
||||
column, and then outputs a new csv file with that data in s-curve form. An
|
||||
s-curve is a graph where one sorts the output %-change and graphs the %-n
|
||||
vs %-change.
|
||||
|
||||
NOTE: We assume that the csv has a csv header that maps to the before and
|
||||
after column names passed in.
|
||||
""")
|
||||
|
||||
p.add_argument('input_file', type=argparse.FileType('r'))
|
||||
p.add_argument('before_column_name', type=str)
|
||||
p.add_argument('after_column_name', type=str)
|
||||
|
||||
args = p.parse_args()
|
||||
|
||||
data = get_data(args.input_file, args.before_column_name,
|
||||
args.after_column_name)
|
||||
w = csv.DictWriter(sys.stdout, fieldnames=['N/total', 'New/Old'])
|
||||
w.writeheader()
|
||||
for d in data:
|
||||
w.writerow({'N/total': d[0], 'New/Old': d[1]})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user