Merge pull request #565 from mithro/formatdb

New `make formatdb` target
2019-01-30 17:47:02 +11:00 · 2019-01-30 17:47:02 +11:00 · 049d966ce9
parent ed28255558 df42e28558
commit 049d966ce9
5 changed files with 664 additions and 4 deletions
--- a/10
+++ b/10
@ -71,6 +71,16 @@ checkdb:
 		$(IN_ENV) python3 utils/checkdb.py --db-root $$DB; \
 	fi; done

+formatdb:
+	@for DB in database/*; do if [ -d $$DB ]; then \
+		echo ; \
+		echo "Formatting $$DB"; \
+		echo "============================"; \
+		($(IN_ENV) cd $$DB; python3 ../../utils/sort_db.py || exit 1) || exit 1; \
+	fi; done
+	@make checkdb
+	$(IN_ENV) ./utils/info_md.py --keep
+
 clean:
 	$(MAKE) -C database clean
 	$(MAKE) -C fuzzers clean
--- a/requirements.txt
+++ b/requirements.txt
@ -1,12 +1,13 @@
 futures
 intervaltree
 numpy
+parse
 progressbar2
 pyjson5
+pytest
 pyyaml
 scipy
-sympy
-yapf==0.24.0
-textx
-pytest
 simplejson
+sympy
+textx
+yapf==0.24.0
--- a/utils/cmp.py
+++ b/utils/cmp.py
@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+Python 3 removed the 'cmp' function and raises a Type error when you try to
+compare different types. This module recreates Python 2 style 'cmp' function
+which produces a "total ordering" for mixed type lists.
+"""
+
+import functools
+import itertools
+
+
+def cmp(a, b):
+    """
+
+    >>> cmp(1, 1)
+    0
+    >>> cmp('A', 'A')
+    0
+    >>> cmp(None, None)
+    0
+    >>> cmp(('A', 'B'), ('A', 'B'))
+    0
+    >>> cmp(['A', 'B'], ('A', 'B'))
+    0
+    >>> cmp((1, 2), (1, 2))
+    0
+    >>> cmp((1, 2), [1, 2])
+    0
+
+    >>> cmp(1, 2)
+    -1
+    >>> cmp('A', 'B')
+    -1
+    >>> cmp(('A', 'B'), ('A', 'C'))
+    -1
+    >>> cmp(['A', 'B'], ('A', 'C'))
+    -1
+    >>> cmp((1, 2), (1, 3))
+    -1
+    >>> cmp((1, 2), [1, 3])
+    -1
+
+    >>> cmp(2, 1)
+    1
+    >>> cmp('B', 'A')
+    1
+    >>> cmp(('A', 'C'), ('A', 'B'))
+    1
+    >>> cmp(['A', 'C'], ('A', 'B'))
+    1
+    >>> cmp((1, 3), (1, 2))
+    1
+    >>> cmp((1, 3), [1, 2])
+    1
+
+    >>> cmp(1, None)
+    1
+    >>> cmp('A', None)
+    1
+    >>> cmp(('A', 'B'), None)
+    1
+    >>> cmp(['A', 'B'], None)
+    1
+    >>> cmp((1, 2), None)
+    1
+    >>> cmp((1, 2), None)
+    1
+
+    >>> cmp(None, 2)
+    -1
+    >>> cmp(None, 'B')
+    -1
+    >>> cmp(None, ('A', 'B'))
+    -1
+    >>> cmp(None, ('A', 'C'))
+    -1
+    >>> cmp(None, (1, 2))
+    -1
+    >>> cmp(None, [1, 3])
+    -1
+
+    >>> cmp(1, 'A')
+    -1
+    >>> cmp('A', 1)
+    1
+
+    >>> cmp(('A', 'B'), 1)
+    1
+    >>> cmp(1, ['A', 'B'])
+    -1
+
+    >>> cmp((1, 2), 1)
+    1
+    >>> cmp(1, (1, 2))
+    -1
+
+    >>> cmp('A', 'AA')
+    -1
+    >>> cmp('AA', 'A')
+    1
+
+    >>> cmp(b'A', b'A')
+    0
+    >>> cmp(b'A', b'AA')
+    -1
+    >>> cmp(b'AA', b'A')
+    1
+
+    >>> def bit(*args):
+    ...   return args
+    >>> a = ('CLBLL', 'L', 'SLICEL', ('X', 0), 'AFFMUX', 'XOR')
+    >>> b = ('CLBLL', 'L', 'SLICEL', ('X', 0), 'AFFMUX', ('F', 7))
+    >>> cmp(a, b)
+    -1
+    >>> cmp(b, a)
+    1
+
+    """
+    if not isinstance(a, (str, bytes)) and not isinstance(b, (str, bytes)):
+        try:
+            for i, j in itertools.zip_longest(iter(a), iter(b)):
+                r = cmp(i, j)
+                if r != 0:
+                    return r
+            return 0
+        except TypeError:
+            pass
+    if type(a) == type(b):
+        if a == b:
+            return 0
+        elif a < b:
+            return -1
+        elif a > b:
+            return 1
+        else:
+            raise SystemError
+    return cmp(a.__class__.__name__, b.__class__.__name__)
+
+
+cmp_key = functools.cmp_to_key(cmp)
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
--- a/utils/info_md.py
+++ b/utils/info_md.py
@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+
+import argparse
+import hashlib
+import os
+import parse as format_parser
+import subprocess
+import sys
+"""Module for generating the Info.md file found in the database directory."""
+
+info_md_header = """
+# Details
+
+Last updated on {human_date} ({iso8601_date}).
+
+Created using [Project X-Ray](https://github.com/SymbiFlow/prjxray) version [{commit_hash_short}](https://github.com/SymbiFlow/prjxray/commit/{commit_hash_long}).
+
+Latest commit was;
+```
+{commit_latest}
+```
+
+"""
+
+info_md_section = """
+
+## Database for [{part_line}]({part_line}/)
+
+### Settings
+
+Created using following [settings/{part_line}.sh (sha256: {settings_sha256})](https://github.com/SymbiFlow/prjxray/blob/{commit_hash_long}/settings/{part_line}.sh)
+```shell
+{settings_contents}
+```
+
+### [Results]({part_line}/)
+
+Results have checksums;
+
+"""
+
+info_md_file = " * [`{file_sha256}  ./{file_short_path}`](./{file_short_path})\n"
+
+
+def sha256(s):
+    m = hashlib.sha256()
+    m.update(s)
+    return m.hexdigest()
+
+
+def sha256_file(p):
+    return sha256(open(p, 'rb').read())
+
+
+def run(c):
+    o = subprocess.check_output(c, shell=True)
+    return o.decode('utf-8').strip()
+
+
+def main(argv):
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--keep',
+        default=False,
+        action="store_true",
+        help="""\
+Keep the existing commit information.
+""")
+    args = parser.parse_args()
+
+    info_md_filename = os.path.join('database', 'Info.md')
+    assert os.path.exists(info_md_filename)
+
+    info_md = []
+
+    info_md.append(open('database/README.md').read())
+
+    v = {}
+    v['human_date'] = run('TZ=UTC date')
+    v['iso8601_date'] = run('TZ=UTC date --iso-8601=seconds')
+    if not args.keep:
+        v['commit_latest'] = run('git log -1')
+        v['commit_hash_short'] = run('git log -1 --pretty=%h')
+        v['commit_hash_long'] = run('git log -1 --pretty=%H')
+    else:
+        with open(info_md_filename) as f:
+            result = format_parser.parse(
+                '{before}' + info_md_header + '{after}', f.read())
+        assert result
+        assert result['human_date']
+        assert result['iso8601_date']
+        v['commit_latest'] = result['commit_latest']
+        v['commit_hash_short'] = result['commit_hash_short']
+        v['commit_hash_long'] = result['commit_hash_long']
+
+    info_md.append(info_md_header.format(**v))
+
+    for part_line in sorted(os.listdir('database')):
+        if part_line.startswith('.'):
+            continue
+        part_path = os.path.join('database', part_line)
+
+        if not os.path.isdir(part_path):
+            continue
+
+        files = list(os.listdir(part_path))
+        files.sort()
+
+        settings_path = os.path.join('settings', part_line + '.sh')
+        settings_raw = open(settings_path, 'rb').read()
+
+        w = {}
+        w['commit_hash_long'] = v['commit_hash_long']
+        w['part_line'] = part_line
+        w['settings_contents'] = settings_raw.decode('utf-8')
+        w['settings_sha256'] = sha256(settings_raw)
+
+        info_md.append(info_md_section.format(**w))
+
+        files = []
+        for dirpath, dirnames, filenames in os.walk(part_path):
+            for f in filenames:
+                files.append(os.path.join(dirpath, f))
+
+        files.sort()
+        for p in files:
+            x = {}
+            x['file_real_path'] = './' + p
+            x['file_short_path'] = os.path.join(
+                part_line, os.path.relpath(p, part_path))
+            x['file_sha256'] = sha256_file(p)
+            info_md.append(info_md_file.format(**x))
+
+    with open(info_md_filename, 'w') as f:
+        f.write("".join(info_md))
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
--- a/utils/sort_db.py
+++ b/utils/sort_db.py
@ -0,0 +1,363 @@
+#!/usr/bin/env python3
+"""
+Canonicalize the Project X-Ray database files by sorting. The aim is to reduce
+the diff output between runs to make it clearer what has changed.
+
+DB Files
+--------
+
+DB files are sorted into "natural" ordering. This is generally the order that a
+human would sort them in rather than how they sort as ASCII strings.
+
+For example with tags, a sequence of ABC1 to ABC12 would have the ASCII sort
+order of;
+
+    ABC1
+    ABC10
+    ABC11
+    ABC12
+    ABC2
+    ...
+    ABC9
+
+We instead sort them like the following;
+
+    ABC1
+    ABC2
+    ...
+    ABC9
+    ABC10
+    ABC11
+    ABC12
+
+For the segbit files, we sort the bit definitions ignoring any leading
+exclamation mark. Doing this generally makes it much easier to see patterns in
+the bit descriptions and you end up with output like the following for 1-hot
+encoded choices,
+
+    ABC.CHOICE1 22_15 !22_16 !22_17
+    ABC.CHOICE2 !22_15 22_16 !22_17
+    ABC.CHOICE3 !22_15 !22_16 22_17
+
+JSON Files
+----------
+
+For the JSON files, we run them through Python's pretty printing module and
+sort sets (lists where the order doesn't matter).
+
+"""
+
+import os
+import random
+import re
+import sys
+
+import json
+import xjson
+import cmp
+
+
+def split_all(s, chars):
+    """Split on multiple character values.
+
+    >>> split_all('a_b_c_d', '_. ')
+    ['a', 'b', 'c', 'd']
+    >>> split_all('a b c d', '_. ')
+    ['a', 'b', 'c', 'd']
+    >>> split_all('a.b.c.d', '_. ')
+    ['a', 'b', 'c', 'd']
+    >>> split_all('a_b.c d', '_. ')
+    ['a', 'b', 'c', 'd']
+    >>> split_all('a b_c.d', '_. ')
+    ['a', 'b', 'c', 'd']
+    """
+    chars = list(chars)
+
+    o = [s]
+    while len(chars) > 0:
+        c = chars.pop(0)
+
+        n = []
+        for i in o:
+            n += i.split(c)
+
+        o = n
+    return o
+
+
+NUM_REGEX = re.compile('^(.*?)([0-9]*)$')
+
+
+def extract_num(i):
+    """Extract number from a string to be sorted.
+
+    >>> extract_num('BLAH123')
+    ('BLAH', 123)
+    >>> extract_num('123')
+    123
+    >>> extract_num('BLAH')
+    'BLAH'
+    >>> extract_num('')
+    ''
+    """
+    g = NUM_REGEX.match(i).groups()
+    if len(g[-1]) == 0:
+        return i
+    i = int(g[-1])
+    if len(g[0]) == 0:
+        return i
+    else:
+        return (g[0], i)
+
+
+class bit(tuple):
+    """Class representing a bit specifier.
+
+    >>> a = bit.parse("02_12")
+    >>> a
+    bit(2, 12, True)
+    >>> b = bit.parse("!02_03")
+    >>> b
+    bit(2, 3, False)
+    >>> b == a
+    False
+    >>> b < a
+    True
+    >>> str(a)
+    '02_12'
+    >>> str(b)
+    '!02_03'
+
+    >>> bit.parseline("!30_04 !31_00 !31_01 31_02")
+    [bit(30, 4, False), bit(31, 0, False), bit(31, 1, False), bit(31, 2, True)]
+
+    >>> bit.parseline("31_02 !31_00 !31_01 !30_04")
+    [bit(30, 4, False), bit(31, 0, False), bit(31, 1, False), bit(31, 2, True)]
+    """
+
+    @classmethod
+    def parse(cls, s):
+        mode = s[0] != '!'
+        s = s.replace('!', '')
+        assert '_' in s, s
+        a, b = s.split('_', 1)
+        assert '_' not in b, s
+        return cls((extract_num(a), extract_num(b), mode))
+
+    @classmethod
+    def parseline(cls, s):
+        bits = [cls.parse(b) for b in s.split(' ')]
+        bits.sort()
+        return bits
+
+    def __repr__(self):
+        return "bit" + tuple.__repr__(self)
+
+    def __str__(self):
+        s = self
+        return "{}{:02d}_{:02d}".format(['!', ''][s[2]], s[0], s[1])
+
+
+def convert_bit(i):
+    """Convert a bit pattern into sortable form.
+
+    >>> convert_bit("02_12")
+    bit(2, 12, True)
+    >>> convert_bit("!02_12")
+    bit(2, 12, False)
+    >>> convert_bit("!02_02")
+    bit(2, 2, False)
+    >>> convert_bit("always")
+    'always'
+    """
+    if '_' not in i:
+        return i
+    return bit.parse(i)
+
+
+def segbit_line_sort_bits(l):
+    """Sort the bit section of a segbit line.
+
+    >>> segbit_line_sort_bits("A !28_35 !27_39 27_37")
+    'A 27_37 !27_39 !28_35'
+
+    >>> segbit_line_sort_bits("B !28_35 !27_39 !27_37")
+    'B !27_37 !27_39 !28_35'
+
+    >>> segbit_line_sort_bits("C 28_35 00_00 !27_37")
+    'C 00_00 !27_37 28_35'
+
+    """
+    tag, *segbits = l.split()
+
+    segbits = [bit.parse(b) for b in segbits]
+    segbits.sort()
+
+    return "{} {}".format(tag, " ".join(str(s) for s in segbits))
+
+
+def sortable_tag(t):
+    """
+    >>> sortable_tag("CLBLL_L.CLBLL_L_A.CLBLL_L_A1")
+    ('CLBLL', 'L', 'CLBLL', 'L', 'A', 'CLBLL', 'L', ('A', 1))
+
+    >>> sortable_tag("CLBLL_L.CLBLL_LOGIC_OUTS23.CLBLL_LL_DMUX")
+    ('CLBLL', 'L', 'CLBLL', 'LOGIC', ('OUTS', 23), 'CLBLL', 'LL', 'DMUX')
+
+    >>> sortable_tag("BRAM_L.RAMB18_Y0.INIT_B[9]")
+    ('BRAM', 'L', ('RAMB', 18), ('Y', 0), 'INIT', 'B', 9)
+
+    >>> sortable_tag("BRAM_L.RAMB18_Y0.READ_WIDTH_A_18")
+    ('BRAM', 'L', ('RAMB', 18), ('Y', 0), 'READ', 'WIDTH', 'A', 18)
+    """
+    return tuple(extract_num(i) for i in split_all(t, '_.[]') if i != '')
+
+
+def sortable_line_from_mask(l):
+    """Convert a line in a mask_XXXX.db file to something sortable.
+
+    Example lines from mask_XXX.db file
+    >>> a, b = sortable_line_from_mask("bit 00_00")
+    >>> a
+    bit(0, 0, True)
+    >>> b
+    'bit 00_00'
+
+    >>> a, b = sortable_line_from_mask("bit 09_39")
+    >>> a
+    bit(9, 39, True)
+    >>> b
+    'bit 09_39'
+    """
+    tag, b = l.split(' ', 1)
+    assert tag == 'bit', tag
+    return bit.parse(b), l
+
+
+def sortable_line_from_ppips(l):
+    """Convert a line in a ppips_XXX.db file to something sortable.
+
+    Example lines from ppips_XXX.db file
+    >>> a, b = sortable_line_from_ppips("CLBLL_L.CLBLL_L_A.CLBLL_L_A1 hint")
+    >>> a
+    (('CLBLL', 'L', 'CLBLL', 'L', 'A', 'CLBLL', 'L', ('A', 1)), 'hint')
+    >>> b
+    'CLBLL_L.CLBLL_L_A.CLBLL_L_A1 hint'
+
+    >>> a, b = sortable_line_from_ppips("CLBLL_L.CLBLL_LOGIC_OUTS23.CLBLL_LL_DMUX always")
+    >>> a
+    (('CLBLL', 'L', 'CLBLL', 'LOGIC', ('OUTS', 23), 'CLBLL', 'LL', 'DMUX'), 'always')
+    >>> b
+    'CLBLL_L.CLBLL_LOGIC_OUTS23.CLBLL_LL_DMUX always'
+    """
+    assert ' ' in l, repr(l)
+    tag, ptype = l.split(' ', 1)
+    tag = sortable_tag(tag)
+    return (tag, ptype), l
+
+
+def sortable_line_from_segbits(l):
+    """Convert a line in segbits_XXX.db file to something sortable.
+
+    >>> (tag, bits), b = sortable_line_from_segbits("BRAM_L.RAMB18_Y0.INIT_B[9] 27_15")
+    >>> tag
+    ('BRAM', 'L', ('RAMB', 18), ('Y', 0), 'INIT', 'B', 9)
+    >>> bits
+    (bit(27, 15, True),)
+    >>> b
+    'BRAM_L.RAMB18_Y0.INIT_B[9] 27_15'
+
+    >>> (tag, bits), b = sortable_line_from_segbits("BRAM_L.RAMB18_Y0.READ_WIDTH_A_18 !28_35 !27_39 27_37")
+    >>> tag
+    ('BRAM', 'L', ('RAMB', 18), ('Y', 0), 'READ', 'WIDTH', 'A', 18)
+    >>> bits
+    (bit(27, 37, True), bit(27, 39, False), bit(28, 35, False))
+    >>> b
+    'BRAM_L.RAMB18_Y0.READ_WIDTH_A_18 27_37 !27_39 !28_35'
+    """
+    tag, sbit = l.split(' ', 1)
+    tag = sortable_tag(tag)
+
+    bits = bit.parseline(sbit)
+
+    l = segbit_line_sort_bits(l)
+    return (tag, tuple(bits)), l
+
+
+def sort_db(filename):
+    """Sort a XXX.db file."""
+    if filename.startswith('segbits_'):
+        sortable_line_from_dbfile = sortable_line_from_segbits
+    elif filename.startswith('ppips_'):
+        sortable_line_from_dbfile = sortable_line_from_ppips
+    elif filename.startswith('mask_'):
+        sortable_line_from_dbfile = sortable_line_from_mask
+
+    lines = open(filename).readlines()
+
+    tosort = []
+    for l in lines:
+        l = l.strip()
+        if not l:
+            continue
+        tosort.append(sortable_line_from_dbfile(l))
+
+    tosort.sort(key=cmp.cmp_key)
+
+    # Make sure the sort is stable
+    for i in range(0, 4):
+        copy = tosort.copy()
+        random.shuffle(copy)
+        copy.sort(key=cmp.cmp_key)
+        assert len(copy) == len(tosort)
+        for i in range(0, len(copy)):
+            assert copy[i] == tosort[i], "\n%r\n != \n%r\n" % (
+                copy[i], tosort[i])
+
+    with open(filename, 'w') as f:
+        for _, l in tosort:
+            f.write(l)
+            f.write('\n')
+
+    return True
+
+
+def sort_json(filename):
+    """Sort a XXX.json file."""
+    try:
+        d = json.load(open(filename))
+    except json.JSONDecodeError:
+        return False
+
+    with open(filename, 'w') as f:
+        xjson.pprint(f, d)
+
+    return True
+
+
+def main(argv):
+    for n in sorted(os.listdir()):
+        if not os.path.isfile(n):
+            continue
+
+        base, ext = os.path.splitext(n)
+
+        if ext == '.db':
+            print("Sorting DB   file {:40s}".format(n), end=" ", flush=True)
+            x = sort_db(n)
+        elif ext == '.json':
+            print("Sorting JSON file {:40s}".format(n), end=" ", flush=True)
+            x = sort_json(n)
+        else:
+            print("Ignoring    file {:40s}".format(n), end=" ", flush=True)
+            x = True
+        if x:
+            print(".. success.")
+        else:
+            print(".. failed.")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))