mirror of https://github.com/openXC7/prjxray.git
Fix the sorting stage.
- Rework how the json files are sorted (numbers are treated as numerics). - Sort csv and txt files. - Sort segbits.*origin_info.db files. - Sort the grid file. Signed-off-by: Tim 'mithro' Ansell <me@mith.ro>
This commit is contained in:
parent
fd88bf59e0
commit
3c91c98e03
171
utils/sort_db.py
171
utils/sort_db.py
|
|
@ -47,6 +47,7 @@ sort sets (lists where the order doesn't matter).
|
|||
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
|
|
@ -284,16 +285,35 @@ def sortable_line_from_segbits(l):
|
|||
return (tag, tuple(bits)), l
|
||||
|
||||
|
||||
def sort_db(filename):
|
||||
def sortable_line_from_origin_segbits(l):
|
||||
tag, origin, sbit = l.split(' ', 2)
|
||||
tag = sortable_tag(tag)
|
||||
|
||||
bits = bit.parseline(sbit)
|
||||
|
||||
return (tag, tuple(bits)), l
|
||||
|
||||
|
||||
def sort_db(pathname):
|
||||
"""Sort a XXX.db file."""
|
||||
filename = os.path.split(pathname)[-1]
|
||||
if filename.startswith('segbits_'):
|
||||
sortable_line_from_dbfile = sortable_line_from_segbits
|
||||
if 'origin_info' in filename:
|
||||
sortable_line_from_dbfile = sortable_line_from_origin_segbits
|
||||
else:
|
||||
sortable_line_from_dbfile = sortable_line_from_segbits
|
||||
elif 'origin_info' in filename:
|
||||
return False
|
||||
elif filename.startswith('ppips_'):
|
||||
sortable_line_from_dbfile = sortable_line_from_ppips
|
||||
elif filename.startswith('grid-'):
|
||||
sortable_line_from_dbfile = sortable_line_from_ppips
|
||||
elif filename.startswith('mask_'):
|
||||
sortable_line_from_dbfile = sortable_line_from_mask
|
||||
else:
|
||||
return False
|
||||
|
||||
lines = open(filename).readlines()
|
||||
lines = open(pathname).readlines()
|
||||
|
||||
tosort = []
|
||||
for l in lines:
|
||||
|
|
@ -305,16 +325,16 @@ def sort_db(filename):
|
|||
tosort.sort(key=cmp.cmp_key)
|
||||
|
||||
# Make sure the sort is stable
|
||||
for i in range(0, 4):
|
||||
copy = tosort.copy()
|
||||
random.shuffle(copy)
|
||||
copy.sort(key=cmp.cmp_key)
|
||||
assert len(copy) == len(tosort)
|
||||
for i in range(0, len(copy)):
|
||||
assert copy[i] == tosort[i], "\n%r\n != \n%r\n" % (
|
||||
copy[i], tosort[i])
|
||||
#for i in range(0, 4):
|
||||
# copy = tosort.copy()
|
||||
# random.shuffle(copy)
|
||||
# copy.sort(key=cmp.cmp_key)
|
||||
# assert len(copy) == len(tosort)
|
||||
# for i in range(0, len(copy)):
|
||||
# assert copy[i] == tosort[i], "\n%r\n != \n%r\n" % (
|
||||
# copy[i], tosort[i])
|
||||
|
||||
with open(filename, 'w') as f:
|
||||
with open(pathname, 'w') as f:
|
||||
for _, l in tosort:
|
||||
f.write(l)
|
||||
f.write('\n')
|
||||
|
|
@ -322,11 +342,45 @@ def sort_db(filename):
|
|||
return True
|
||||
|
||||
|
||||
def sort_csv(pathname):
|
||||
rows = []
|
||||
fields = []
|
||||
delimiter = None
|
||||
with open(pathname, newline='') as f:
|
||||
if pathname.endswith('.csv'):
|
||||
delimiter = ','
|
||||
elif pathname.endswith('.txt'):
|
||||
delimiter = ' '
|
||||
reader = csv.DictReader(f, delimiter=delimiter)
|
||||
fields.extend(reader.fieldnames)
|
||||
rows.extend(reader)
|
||||
del reader
|
||||
|
||||
fields.sort()
|
||||
|
||||
def sort_key(r):
|
||||
v = []
|
||||
for field in fields:
|
||||
v.append(sortable_tag(r[field]))
|
||||
return tuple(v)
|
||||
|
||||
rows.sort(key=sort_key)
|
||||
|
||||
with open(pathname, 'w', newline='') as f:
|
||||
writer = csv.DictWriter(
|
||||
f, fields, delimiter=delimiter, lineterminator='\n')
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def sort_json(filename):
|
||||
"""Sort a XXX.json file."""
|
||||
try:
|
||||
d = json.load(open(filename))
|
||||
except json.JSONDecodeError:
|
||||
except json.JSONDecodeError as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
with open(filename, 'w') as f:
|
||||
|
|
@ -335,30 +389,75 @@ def sort_json(filename):
|
|||
return True
|
||||
|
||||
|
||||
def sort_db_text(n):
|
||||
rows = []
|
||||
with open(n) as f:
|
||||
for l in f:
|
||||
rows.append(([extract_num(s) for s in l.split()], l))
|
||||
|
||||
rows.sort(key=lambda i: i[0])
|
||||
|
||||
with open(n, 'w') as f:
|
||||
for l in rows:
|
||||
f.write(l[-1])
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def sort_file(n):
|
||||
|
||||
assert os.path.exists(n)
|
||||
|
||||
base, ext = os.path.splitext(n)
|
||||
dirname, base = os.path.split(base)
|
||||
|
||||
# Leave db files with fuzzer of origin untouched
|
||||
if "origin_info" in n and not base.startswith('segbits'):
|
||||
print("Ignoring file {:45s}".format(n), flush=True)
|
||||
return
|
||||
|
||||
if ext == '.db':
|
||||
print("Sorting DB file {:45s}".format(n), end=" ", flush=True)
|
||||
x = sort_db(n)
|
||||
elif ext == '.json':
|
||||
print("Sorting JSON file {:45s}".format(n), end=" ", flush=True)
|
||||
x = sort_json(n)
|
||||
elif ext in ('.csv', '.txt'):
|
||||
if n.endswith('-db.txt'):
|
||||
print("Sorting txt file {:45s}".format(n), end=" ", flush=True)
|
||||
x = sort_db_text(n)
|
||||
else:
|
||||
print("Sorting CSV file {:45s}".format(n), end=" ", flush=True)
|
||||
x = sort_csv(n)
|
||||
else:
|
||||
print("Ignoring file {:45s}".format(n), end=" ", flush=True)
|
||||
x = True
|
||||
if x:
|
||||
print(".. success.")
|
||||
else:
|
||||
print(".. failed.")
|
||||
|
||||
|
||||
def sort_dir(dirname):
|
||||
for n in sorted(os.listdir(dirname)):
|
||||
n = os.path.join(dirname, n)
|
||||
if os.path.isdir(n):
|
||||
print("Entering dir {:45s}".format(n), flush=True)
|
||||
sort_dir(n)
|
||||
continue
|
||||
elif not os.path.isfile(n):
|
||||
print("Ignoring non-file {:45s}".format(n), flush=True)
|
||||
continue
|
||||
|
||||
sort_file(n)
|
||||
|
||||
|
||||
def main(argv):
|
||||
for n in sorted(os.listdir()):
|
||||
if not os.path.isfile(n):
|
||||
continue
|
||||
# Leave db files with fuzzer of origin untouched
|
||||
if "origin_info" in n:
|
||||
continue
|
||||
|
||||
base, ext = os.path.splitext(n)
|
||||
|
||||
if ext == '.db':
|
||||
print("Sorting DB file {:40s}".format(n), end=" ", flush=True)
|
||||
x = sort_db(n)
|
||||
elif ext == '.json':
|
||||
print("Sorting JSON file {:40s}".format(n), end=" ", flush=True)
|
||||
x = sort_json(n)
|
||||
else:
|
||||
print("Ignoring file {:40s}".format(n), end=" ", flush=True)
|
||||
x = True
|
||||
if x:
|
||||
print(".. success.")
|
||||
else:
|
||||
print(".. failed.")
|
||||
|
||||
if argv[1:]:
|
||||
for n in argv[1:]:
|
||||
sort_file(n)
|
||||
else:
|
||||
sort_dir('.')
|
||||
return 0
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ import json
|
|||
import re
|
||||
import sys
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
def extract_numbers(s):
|
||||
"""
|
||||
|
|
@ -31,23 +33,48 @@ def sort(data):
|
|||
data.sort(key=lambda o: (o['tile_types'], o['grid_deltas']))
|
||||
else:
|
||||
|
||||
def walker(o, f):
|
||||
if isinstance(o, dict):
|
||||
for i in o.values():
|
||||
walker(i, f)
|
||||
def key(o):
|
||||
if o is None:
|
||||
return None
|
||||
elif isinstance(o, str):
|
||||
return extract_numbers(o)
|
||||
elif isinstance(o, int):
|
||||
return o
|
||||
elif isinstance(o, list):
|
||||
return [key(i) for i in o]
|
||||
elif isinstance(o, dict):
|
||||
return [(key(k), key(v)) for k, v in o.items()]
|
||||
raise ValueError(repr(o))
|
||||
|
||||
def rsorter(o):
|
||||
if isinstance(o, dict):
|
||||
nitems = []
|
||||
for k, v in o.items():
|
||||
nitems.append((key(k), k, rsorter(v)))
|
||||
nitems.sort(key=lambda n: n[0])
|
||||
|
||||
new_dict = OrderedDict()
|
||||
for _, k, v in nitems:
|
||||
new_dict[k] = v
|
||||
return new_dict
|
||||
|
||||
elif isinstance(o, list):
|
||||
if len(o) == 2:
|
||||
return o
|
||||
|
||||
nlist = []
|
||||
for i in o:
|
||||
walker(i, f)
|
||||
f(o)
|
||||
nlist.append((key(i), rsorter(i)))
|
||||
nlist.sort(key=lambda n: n[0])
|
||||
|
||||
def f(o):
|
||||
if isinstance(o, list):
|
||||
if len(o) > 2:
|
||||
strings = all(isinstance(x, str) for x in o)
|
||||
if strings:
|
||||
o.sort()
|
||||
new_list = []
|
||||
for _, i in nlist:
|
||||
new_list.append(i)
|
||||
return new_list
|
||||
else:
|
||||
return o
|
||||
|
||||
walker(data, f)
|
||||
return rsorter(data)
|
||||
|
||||
|
||||
def pprint(f, data):
|
||||
|
|
@ -55,8 +82,8 @@ def pprint(f, data):
|
|||
if not isinstance(f, io.TextIOBase):
|
||||
detach = True
|
||||
f = io.TextIOWrapper(f)
|
||||
sort(data)
|
||||
json.dump(data, f, sort_keys=True, indent=4)
|
||||
data = sort(data)
|
||||
json.dump(data, f, indent=4)
|
||||
f.write('\n')
|
||||
f.flush()
|
||||
if detach:
|
||||
|
|
|
|||
Loading…
Reference in New Issue