diff --git a/utils/sort_db.py b/utils/sort_db.py index c8698c27..c8c915da 100755 --- a/utils/sort_db.py +++ b/utils/sort_db.py @@ -47,6 +47,7 @@ sort sets (lists where the order doesn't matter). """ +import csv import os import random import re @@ -284,16 +285,35 @@ def sortable_line_from_segbits(l): return (tag, tuple(bits)), l -def sort_db(filename): +def sortable_line_from_origin_segbits(l): + tag, origin, sbit = l.split(' ', 2) + tag = sortable_tag(tag) + + bits = bit.parseline(sbit) + + return (tag, tuple(bits)), l + + +def sort_db(pathname): """Sort a XXX.db file.""" + filename = os.path.split(pathname)[-1] if filename.startswith('segbits_'): - sortable_line_from_dbfile = sortable_line_from_segbits + if 'origin_info' in filename: + sortable_line_from_dbfile = sortable_line_from_origin_segbits + else: + sortable_line_from_dbfile = sortable_line_from_segbits + elif 'origin_info' in filename: + return False elif filename.startswith('ppips_'): sortable_line_from_dbfile = sortable_line_from_ppips + elif filename.startswith('grid-'): + sortable_line_from_dbfile = sortable_line_from_ppips elif filename.startswith('mask_'): sortable_line_from_dbfile = sortable_line_from_mask + else: + return False - lines = open(filename).readlines() + lines = open(pathname).readlines() tosort = [] for l in lines: @@ -305,16 +325,16 @@ def sort_db(filename): tosort.sort(key=cmp.cmp_key) # Make sure the sort is stable - for i in range(0, 4): - copy = tosort.copy() - random.shuffle(copy) - copy.sort(key=cmp.cmp_key) - assert len(copy) == len(tosort) - for i in range(0, len(copy)): - assert copy[i] == tosort[i], "\n%r\n != \n%r\n" % ( - copy[i], tosort[i]) + #for i in range(0, 4): + # copy = tosort.copy() + # random.shuffle(copy) + # copy.sort(key=cmp.cmp_key) + # assert len(copy) == len(tosort) + # for i in range(0, len(copy)): + # assert copy[i] == tosort[i], "\n%r\n != \n%r\n" % ( + # copy[i], tosort[i]) - with open(filename, 'w') as f: + with open(pathname, 'w') as f: for _, l in tosort: f.write(l) f.write('\n') @@ -322,11 +342,45 @@ def sort_db(filename): return True +def sort_csv(pathname): + rows = [] + fields = [] + delimiter = None + with open(pathname, newline='') as f: + if pathname.endswith('.csv'): + delimiter = ',' + elif pathname.endswith('.txt'): + delimiter = ' ' + reader = csv.DictReader(f, delimiter=delimiter) + fields.extend(reader.fieldnames) + rows.extend(reader) + del reader + + fields.sort() + + def sort_key(r): + v = [] + for field in fields: + v.append(sortable_tag(r[field])) + return tuple(v) + + rows.sort(key=sort_key) + + with open(pathname, 'w', newline='') as f: + writer = csv.DictWriter( + f, fields, delimiter=delimiter, lineterminator='\n') + writer.writeheader() + writer.writerows(rows) + + return True + + def sort_json(filename): """Sort a XXX.json file.""" try: d = json.load(open(filename)) - except json.JSONDecodeError: + except json.JSONDecodeError as e: + print(e) return False with open(filename, 'w') as f: @@ -335,30 +389,75 @@ def sort_json(filename): return True +def sort_db_text(n): + rows = [] + with open(n) as f: + for l in f: + rows.append(([extract_num(s) for s in l.split()], l)) + + rows.sort(key=lambda i: i[0]) + + with open(n, 'w') as f: + for l in rows: + f.write(l[-1]) + + return True + + +def sort_file(n): + + assert os.path.exists(n) + + base, ext = os.path.splitext(n) + dirname, base = os.path.split(base) + + # Leave db files with fuzzer of origin untouched + if "origin_info" in n and not base.startswith('segbits'): + print("Ignoring file {:45s}".format(n), flush=True) + return + + if ext == '.db': + print("Sorting DB file {:45s}".format(n), end=" ", flush=True) + x = sort_db(n) + elif ext == '.json': + print("Sorting JSON file {:45s}".format(n), end=" ", flush=True) + x = sort_json(n) + elif ext in ('.csv', '.txt'): + if n.endswith('-db.txt'): + print("Sorting txt file {:45s}".format(n), end=" ", flush=True) + x = sort_db_text(n) + else: + print("Sorting CSV file {:45s}".format(n), end=" ", flush=True) + x = sort_csv(n) + else: + print("Ignoring file {:45s}".format(n), end=" ", flush=True) + x = True + if x: + print(".. success.") + else: + print(".. failed.") + + +def sort_dir(dirname): + for n in sorted(os.listdir(dirname)): + n = os.path.join(dirname, n) + if os.path.isdir(n): + print("Entering dir {:45s}".format(n), flush=True) + sort_dir(n) + continue + elif not os.path.isfile(n): + print("Ignoring non-file {:45s}".format(n), flush=True) + continue + + sort_file(n) + + def main(argv): - for n in sorted(os.listdir()): - if not os.path.isfile(n): - continue - # Leave db files with fuzzer of origin untouched - if "origin_info" in n: - continue - - base, ext = os.path.splitext(n) - - if ext == '.db': - print("Sorting DB file {:40s}".format(n), end=" ", flush=True) - x = sort_db(n) - elif ext == '.json': - print("Sorting JSON file {:40s}".format(n), end=" ", flush=True) - x = sort_json(n) - else: - print("Ignoring file {:40s}".format(n), end=" ", flush=True) - x = True - if x: - print(".. success.") - else: - print(".. failed.") - + if argv[1:]: + for n in argv[1:]: + sort_file(n) + else: + sort_dir('.') return 0 diff --git a/utils/xjson.py b/utils/xjson.py index 77eaf515..f216fb83 100755 --- a/utils/xjson.py +++ b/utils/xjson.py @@ -4,6 +4,8 @@ import json import re import sys +from collections import OrderedDict + def extract_numbers(s): """ @@ -31,23 +33,48 @@ def sort(data): data.sort(key=lambda o: (o['tile_types'], o['grid_deltas'])) else: - def walker(o, f): - if isinstance(o, dict): - for i in o.values(): - walker(i, f) + def key(o): + if o is None: + return None + elif isinstance(o, str): + return extract_numbers(o) + elif isinstance(o, int): + return o elif isinstance(o, list): + return [key(i) for i in o] + elif isinstance(o, dict): + return [(key(k), key(v)) for k, v in o.items()] + raise ValueError(repr(o)) + + def rsorter(o): + if isinstance(o, dict): + nitems = [] + for k, v in o.items(): + nitems.append((key(k), k, rsorter(v))) + nitems.sort(key=lambda n: n[0]) + + new_dict = OrderedDict() + for _, k, v in nitems: + new_dict[k] = v + return new_dict + + elif isinstance(o, list): + if len(o) == 2: + return o + + nlist = [] for i in o: - walker(i, f) - f(o) + nlist.append((key(i), rsorter(i))) + nlist.sort(key=lambda n: n[0]) - def f(o): - if isinstance(o, list): - if len(o) > 2: - strings = all(isinstance(x, str) for x in o) - if strings: - o.sort() + new_list = [] + for _, i in nlist: + new_list.append(i) + return new_list + else: + return o - walker(data, f) + return rsorter(data) def pprint(f, data): @@ -55,8 +82,8 @@ def pprint(f, data): if not isinstance(f, io.TextIOBase): detach = True f = io.TextIOWrapper(f) - sort(data) - json.dump(data, f, sort_keys=True, indent=4) + data = sort(data) + json.dump(data, f, indent=4) f.write('\n') f.flush() if detach: