Fix the sorting stage.

- Rework how the json files are sorted (numbers are treated as
   numerics).
 - Sort csv and txt files.
 - Sort segbits.*origin_info.db files.
 - Sort the grid file.

Signed-off-by: Tim 'mithro' Ansell <me@mith.ro>
This commit is contained in:
Tim 'mithro' Ansell 2020-02-09 23:17:56 -06:00
parent fd88bf59e0
commit 3c91c98e03
2 changed files with 177 additions and 51 deletions

View File

@ -47,6 +47,7 @@ sort sets (lists where the order doesn't matter).
"""
import csv
import os
import random
import re
@ -284,16 +285,35 @@ def sortable_line_from_segbits(l):
return (tag, tuple(bits)), l
def sort_db(filename):
def sortable_line_from_origin_segbits(l):
tag, origin, sbit = l.split(' ', 2)
tag = sortable_tag(tag)
bits = bit.parseline(sbit)
return (tag, tuple(bits)), l
def sort_db(pathname):
"""Sort a XXX.db file."""
filename = os.path.split(pathname)[-1]
if filename.startswith('segbits_'):
sortable_line_from_dbfile = sortable_line_from_segbits
if 'origin_info' in filename:
sortable_line_from_dbfile = sortable_line_from_origin_segbits
else:
sortable_line_from_dbfile = sortable_line_from_segbits
elif 'origin_info' in filename:
return False
elif filename.startswith('ppips_'):
sortable_line_from_dbfile = sortable_line_from_ppips
elif filename.startswith('grid-'):
sortable_line_from_dbfile = sortable_line_from_ppips
elif filename.startswith('mask_'):
sortable_line_from_dbfile = sortable_line_from_mask
else:
return False
lines = open(filename).readlines()
lines = open(pathname).readlines()
tosort = []
for l in lines:
@ -305,16 +325,16 @@ def sort_db(filename):
tosort.sort(key=cmp.cmp_key)
# Make sure the sort is stable
for i in range(0, 4):
copy = tosort.copy()
random.shuffle(copy)
copy.sort(key=cmp.cmp_key)
assert len(copy) == len(tosort)
for i in range(0, len(copy)):
assert copy[i] == tosort[i], "\n%r\n != \n%r\n" % (
copy[i], tosort[i])
#for i in range(0, 4):
# copy = tosort.copy()
# random.shuffle(copy)
# copy.sort(key=cmp.cmp_key)
# assert len(copy) == len(tosort)
# for i in range(0, len(copy)):
# assert copy[i] == tosort[i], "\n%r\n != \n%r\n" % (
# copy[i], tosort[i])
with open(filename, 'w') as f:
with open(pathname, 'w') as f:
for _, l in tosort:
f.write(l)
f.write('\n')
@ -322,11 +342,45 @@ def sort_db(filename):
return True
def sort_csv(pathname):
rows = []
fields = []
delimiter = None
with open(pathname, newline='') as f:
if pathname.endswith('.csv'):
delimiter = ','
elif pathname.endswith('.txt'):
delimiter = ' '
reader = csv.DictReader(f, delimiter=delimiter)
fields.extend(reader.fieldnames)
rows.extend(reader)
del reader
fields.sort()
def sort_key(r):
v = []
for field in fields:
v.append(sortable_tag(r[field]))
return tuple(v)
rows.sort(key=sort_key)
with open(pathname, 'w', newline='') as f:
writer = csv.DictWriter(
f, fields, delimiter=delimiter, lineterminator='\n')
writer.writeheader()
writer.writerows(rows)
return True
def sort_json(filename):
"""Sort a XXX.json file."""
try:
d = json.load(open(filename))
except json.JSONDecodeError:
except json.JSONDecodeError as e:
print(e)
return False
with open(filename, 'w') as f:
@ -335,30 +389,75 @@ def sort_json(filename):
return True
def sort_db_text(n):
rows = []
with open(n) as f:
for l in f:
rows.append(([extract_num(s) for s in l.split()], l))
rows.sort(key=lambda i: i[0])
with open(n, 'w') as f:
for l in rows:
f.write(l[-1])
return True
def sort_file(n):
assert os.path.exists(n)
base, ext = os.path.splitext(n)
dirname, base = os.path.split(base)
# Leave db files with fuzzer of origin untouched
if "origin_info" in n and not base.startswith('segbits'):
print("Ignoring file {:45s}".format(n), flush=True)
return
if ext == '.db':
print("Sorting DB file {:45s}".format(n), end=" ", flush=True)
x = sort_db(n)
elif ext == '.json':
print("Sorting JSON file {:45s}".format(n), end=" ", flush=True)
x = sort_json(n)
elif ext in ('.csv', '.txt'):
if n.endswith('-db.txt'):
print("Sorting txt file {:45s}".format(n), end=" ", flush=True)
x = sort_db_text(n)
else:
print("Sorting CSV file {:45s}".format(n), end=" ", flush=True)
x = sort_csv(n)
else:
print("Ignoring file {:45s}".format(n), end=" ", flush=True)
x = True
if x:
print(".. success.")
else:
print(".. failed.")
def sort_dir(dirname):
for n in sorted(os.listdir(dirname)):
n = os.path.join(dirname, n)
if os.path.isdir(n):
print("Entering dir {:45s}".format(n), flush=True)
sort_dir(n)
continue
elif not os.path.isfile(n):
print("Ignoring non-file {:45s}".format(n), flush=True)
continue
sort_file(n)
def main(argv):
for n in sorted(os.listdir()):
if not os.path.isfile(n):
continue
# Leave db files with fuzzer of origin untouched
if "origin_info" in n:
continue
base, ext = os.path.splitext(n)
if ext == '.db':
print("Sorting DB file {:40s}".format(n), end=" ", flush=True)
x = sort_db(n)
elif ext == '.json':
print("Sorting JSON file {:40s}".format(n), end=" ", flush=True)
x = sort_json(n)
else:
print("Ignoring file {:40s}".format(n), end=" ", flush=True)
x = True
if x:
print(".. success.")
else:
print(".. failed.")
if argv[1:]:
for n in argv[1:]:
sort_file(n)
else:
sort_dir('.')
return 0

View File

@ -4,6 +4,8 @@ import json
import re
import sys
from collections import OrderedDict
def extract_numbers(s):
"""
@ -31,23 +33,48 @@ def sort(data):
data.sort(key=lambda o: (o['tile_types'], o['grid_deltas']))
else:
def walker(o, f):
if isinstance(o, dict):
for i in o.values():
walker(i, f)
def key(o):
if o is None:
return None
elif isinstance(o, str):
return extract_numbers(o)
elif isinstance(o, int):
return o
elif isinstance(o, list):
return [key(i) for i in o]
elif isinstance(o, dict):
return [(key(k), key(v)) for k, v in o.items()]
raise ValueError(repr(o))
def rsorter(o):
if isinstance(o, dict):
nitems = []
for k, v in o.items():
nitems.append((key(k), k, rsorter(v)))
nitems.sort(key=lambda n: n[0])
new_dict = OrderedDict()
for _, k, v in nitems:
new_dict[k] = v
return new_dict
elif isinstance(o, list):
if len(o) == 2:
return o
nlist = []
for i in o:
walker(i, f)
f(o)
nlist.append((key(i), rsorter(i)))
nlist.sort(key=lambda n: n[0])
def f(o):
if isinstance(o, list):
if len(o) > 2:
strings = all(isinstance(x, str) for x in o)
if strings:
o.sort()
new_list = []
for _, i in nlist:
new_list.append(i)
return new_list
else:
return o
walker(data, f)
return rsorter(data)
def pprint(f, data):
@ -55,8 +82,8 @@ def pprint(f, data):
if not isinstance(f, io.TextIOBase):
detach = True
f = io.TextIOWrapper(f)
sort(data)
json.dump(data, f, sort_keys=True, indent=4)
data = sort(data)
json.dump(data, f, indent=4)
f.write('\n')
f.flush()
if detach: