timfuz: old massage routines added back in

Signed-off-by: John McMaster <johndmcmaster@gmail.com>
2018-08-24 14:56:50 -07:00 · 2018-08-24 14:56:50 -07:00 · 343cd5b413
parent aeebb45b34
commit 343cd5b413
4 changed files with 385 additions and 129 deletions
--- a/experiments/timfuz/timfuz.py
+++ b/experiments/timfuz/timfuz.py
@ -130,18 +130,18 @@ def Ab_ub_dt2d(eqns):
    return list(A_ubd), list(b_ub)

 # This significantly reduces runtime
-def simplify_rows(A_ubd, b_ub):
+def simplify_rows(Ads, b_ub):
    '''Remove duplicate equations, taking highest delay'''
    # dict of constants to highest delay
    eqns = OrderedDict()
-    assert len(A_ubd) == len(b_ub), (len(A_ubd), len(b_ub))
+    assert len(Ads) == len(b_ub), (len(Ads), len(b_ub))

    sys.stdout.write('SimpR ')
    sys.stdout.flush()
    progress = max(1, len(b_ub) / 100)
    zero_ds = 0
    zero_es = 0
-    for loopi, (b, rowd) in enumerate(zip(b_ub, A_ubd)):
+    for loopi, (b, rowd) in enumerate(zip(b_ub, Ads)):
        if loopi % progress == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
@ -173,61 +173,6 @@ def simplify_rows(A_ubd, b_ub):
    #return A_ub_np2d(A_ub_ret), b_ub_ret
    return A_ubd_ret, b_ub_ret

-def simplify_cols(names, A_ubd, b_ub):
-    '''
-    Remove unsued columns
-    This is fairly straightforward in dictionary form now as only have to remove and adjust indices
-    Maybe should use the names as keys? Then this wouldn't be needed anymore as indices wouldn't need to be rebased
-
-    XXX: shuffles the name order around. Do we care?
-    '''
-
-    # First: find unused names
-    # use dict since no standard ordered set
-    used_cols = set()
-    names_ret = OrderedDict()
-    col_old2new = OrderedDict()
-    rows = len(b_ub)
-    cols = len(names)
-
-    sys.stdout.write('SimpC indexing ')
-    sys.stdout.flush()
-    progress = max(1, rows / 100)
-    for rowi, rowd in enumerate(A_ubd):
-        if rowi % progress == 0:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-        for coli in rowd.keys():
-            used_cols.add(coli)
-
-    for coli in range(cols):
-        if coli in used_cols:
-            names_ret[names[coli]] = None
-            col_old2new[coli] = len(col_old2new)
-    assert len(used_cols) == len(col_old2new)
-
-    print(' done')
-
-    # Create a new matrix, copying important values over
-    #A_ub_ret = np.zeros((4, 1))
-    #A_ub_ret[3][0] = 1.0
-    #A_ub_ret = np.zeros((rows, len(names_ret)))
-    A_ub_ret = [None] * rows
-    sys.stdout.write('SimpC creating ')
-    sys.stdout.flush()
-    progress = max(1, rows / 100)
-    for rowi, rowd_old in enumerate(A_ubd):
-        if rowi % progress == 0:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-        l = [(col_old2new[k], v) for k, v in rowd_old.items()]
-        A_ub_ret[rowi] = OrderedDict(l)
-    print(' done')
-
-    print('Simplify cols: %d => %d cols' % (len(names), len(names_ret)))
-    nr = list(names_ret.keys())
-    return nr, A_ub_ret, b_ub
-
 def A_ubr_np2d(row):
    '''Convert a single row'''
    #d = {}
@ -239,10 +184,10 @@ def A_ubr_np2d(row):

 def A_ub_np2d(A_ub):
    '''Convert A_ub entries in numpy matrix to dictionary / sparse form'''
-    A_ubd = [None] * len(A_ub)
+    Adi = [None] * len(A_ub)
    for i, row in enumerate(A_ub):
-        A_ubd[i] = A_ubr_np2d(row)
-    return A_ubd
+        Adi[i] = A_ubr_np2d(row)
+    return Adi

 def Ar_di2np(row_di, cols):
    rownp = np.zeros(cols)
@ -282,66 +227,15 @@ def Ab_np2d(A_ub, b_ub_inv):
    b_ub = invb(b_ub_inv)
    return A_ubd, b_ub

-def sort_equations_(A_ubd, b_ub):
-    # Dictionaries aren't hashable for sorting even though they are comparable
-    return A_ub_t2d(sorted(A_ub_d2t(A_ubd)))
-
-def sort_equations(A_ub, b_ub):
+def sort_equations(Ads, b):
    # Track rows with value column
    # Hmm can't sort against np arrays
-    tosort = [(sorted(row.items()), b) for row, b in zip(A_ub, b_ub)]
+    tosort = [(sorted(row.items()), rowb) for row, rowb in zip(Ads, b)]
    #res = sorted(tosort, key=lambda e: e[0])
    res = sorted(tosort)
    A_ubtr, b_ubr = zip(*res)
    return [OrderedDict(rowt) for rowt in A_ubtr], b_ubr

-def lte_const(row_ref, row_cmp):
-    '''Return true if all constants are smaller magnitude in row_cmp than row_ref'''
-    #return False
-    for k, vc in row_cmp.items():
-        vr = row_ref.get(k, None)
-        # Not in reference?
-        if vr is None:
-            return False
-        if vr < vc:
-            return False
-    return True
-
-def shared_const(row_ref, row_cmp):
-    '''Return true if more constants are equal than not equal'''
-    #return False
-    matches = 0
-    unmatches = 0
-    ks = list(row_ref.keys()) + list(row_cmp.keys())
-    for k in ks:
-        vr = row_ref.get(k, None)
-        vc = row_cmp.get(k, None)
-        # At least one
-        if vr is not None and vc is not None:
-            if vc == vr:
-                matches += 1
-            else:
-                unmatches += 1
-        else:
-            unmatches += 1
-
-    # Will equation reduce if subtracted?
-    return matches > unmatches
-
-def reduce_const(row_ref, row_cmp):
-    '''Subtract cmp constants from ref'''
-    #ret = {}
-    ret = OrderedDict()
-    ks = set(row_ref.keys())
-    ks.update(set(row_cmp.keys()))
-    for k in ks:
-        vr = row_ref.get(k, 0)
-        vc = row_cmp.get(k, 0)
-        res = vr - vc
-        if res:
-            ret[k] = res
-    return ret
-
 def derive_eq_by_row(A_ubd, b_ub, verbose=0, col_lim=0, tweak=False):
    '''
    Derive equations by subtracting whole rows
@ -510,13 +404,13 @@ def derive_eq_by_col(A_ubd, b_ub, verbose=0):
    print('Derive col: %d => %d rows' % (len(b_ub), len(b_ub_ret)))
    return A_ubd_ret, b_ub_ret

-def col_dist(A_ubd, desc='of', names=[], lim=0):
+def col_dist(Ads, desc='of', names=[], lim=0):
    '''print(frequency distribution of number of elements in a given row'''
-    rows = len(A_ubd)
+    rows = len(Ads)
    cols = len(names)

    fs = {}
-    for row in A_ubd:
+    for row in Ads:
        this_cols = len(row)
        fs[this_cols] = fs.get(this_cols, 0) + 1

@ -912,3 +806,21 @@ def run_sub_json(Ads, sub_json, verbose=False):

    print("Sub: %u / %u rows changed" % (nsubs, nrows))
    print("Sub: %u => %u cols" % (ncols_old, ncols_new))
+
+def print_eqns(Ads, b, verbose=0, lim=3, label=''):
+    rows = len(b)
+
+    print('Sample equations (%s) from %d r' % (label, rows))
+    prints = 0
+    for rowi, row in enumerate(Ads):
+        if verbose or ((rowi < 10 or rowi % max(1, (rows / 20)) == 0) and (not lim or prints < lim)):
+            line = '  EQN: p%u: ' % rowi
+            for k, v in sorted(row.items()):
+                line += '%u*t%s ' % (v, k)
+            line += '= %d' % b[rowi]
+            print(line)
+            prints += 1
+
+def print_eqns_np(A_ub, b_ub, verbose=0):
+    Adi = A_ub_np2d(A_ub)
+    print_eqns(Adi, b_ub, verbose=verbose)
--- a/experiments/timfuz/timfuz_massage.py
+++ b/experiments/timfuz/timfuz_massage.py
@ -0,0 +1,335 @@
+#!/usr/bin/env python3
+
+from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist
+import numpy as np
+import math
+import sys
+import datetime
+import os
+import time
+import copy
+from collections import OrderedDict
+
+def lte_const(row_ref, row_cmp):
+    '''Return true if all constants are smaller magnitude in row_cmp than row_ref'''
+    #return False
+    for k, vc in row_cmp.items():
+        vr = row_ref.get(k, None)
+        # Not in reference?
+        if vr is None:
+            return False
+        if vr < vc:
+            return False
+    return True
+
+def shared_const(row_ref, row_cmp):
+    '''Return true if more constants are equal than not equal'''
+    #return False
+    matches = 0
+    unmatches = 0
+    ks = list(row_ref.keys()) + list(row_cmp.keys())
+    for k in ks:
+        vr = row_ref.get(k, None)
+        vc = row_cmp.get(k, None)
+        # At least one
+        if vr is not None and vc is not None:
+            if vc == vr:
+                matches += 1
+            else:
+                unmatches += 1
+        else:
+            unmatches += 1
+
+    # Will equation reduce if subtracted?
+    return matches > unmatches
+
+def reduce_const(row_ref, row_cmp):
+    '''Subtract cmp constants from ref'''
+    #ret = {}
+    ret = OrderedDict()
+    ks = set(row_ref.keys())
+    ks.update(set(row_cmp.keys()))
+    for k in ks:
+        vr = row_ref.get(k, 0)
+        vc = row_cmp.get(k, 0)
+        res = vr - vc
+        if res:
+            ret[k] = res
+    return ret
+
+def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
+    '''
+    Derive equations by subtracting whole rows
+
+    Given equations like:
+    t0           >= 10
+    t0 + t1      >= 15
+    t0 + t1 + t2 >= 17
+
+    When I look at these, I think of a solution something like:
+    t0 = 10f
+    t1 = 5
+    t2 = 2
+
+    However, linprog tends to choose solutions like:
+    t0 = 17
+    t1 = 0
+    t2 = 0
+
+    To this end, add additional constraints by finding equations that are subsets of other equations
+    How to do this in a reasonable time span?
+    Also equations are sparse, which makes this harder to compute
+    '''
+    rows = len(Ads)
+    assert rows == len(b)
+
+    # Index equations into hash maps so can lookup sparse elements quicker
+    assert len(Ads) == len(b)
+    Ads_ret = copy.copy(Ads)
+    assert len(Ads) == len(Ads_ret)
+
+    #print('Finding subsets')
+    ltes = 0
+    scs = 0
+    b_ret = list(b)
+    sys.stdout.write('Deriving rows ')
+    sys.stdout.flush()
+    progress = max(1, rows / 100)
+    for row_refi, row_ref in enumerate(Ads):
+        if row_refi % progress == 0:
+            sys.stdout.write('.')
+            sys.stdout.flush()
+        if col_lim and len(row_ref) > col_lim:
+            continue
+
+        for row_cmpi, row_cmp in enumerate(Ads):
+            if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
+                continue
+            # FIXME: this check was supposed to be removed
+            '''
+            Every elements in row_cmp is in row_ref
+            but this doesn't mean the constants are smaller
+            Filter these out
+            '''
+            # XXX: just reduce and filter out solutions with positive constants
+            # or actually are these also useful as is?
+            lte = lte_const(row_ref, row_cmp)
+            if lte:
+                ltes += 1
+            sc = 0 and shared_const(row_ref, row_cmp)
+            if sc:
+                scs += 1
+            if lte or sc:
+                if verbose:
+                    print('')
+                    print('match')
+                    print('  ', row_ref, b[row_refi])
+                    print('  ', row_cmp, b[row_cmpi])
+                # Reduce
+                A_new = reduce_const(row_ref, row_cmp)
+                # Did this actually significantly reduce the search space?
+                #if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
+                if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
+                    continue
+                b_new = b[row_refi] - b[row_cmpi]
+                # Definitely possible
+                # Maybe filter these out if they occur?
+                if verbose:
+                    print(b_new)
+                # Also inverted sign
+                if b_new <= 0:
+                    if verbose:
+                        print("Unexpected b")
+                    continue
+                if verbose:
+                    print('OK')
+                Ads_ret.append(A_new)
+                b_ret.append(b_new)
+    print(' done')
+
+    #A_ub_ret = A_di2np(Ads2, cols=cols)
+    print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b), len(b_ret), ltes, scs))
+    assert len(Ads_ret) == len(b_ret)
+    return Ads_ret, b_ret
+
+def derive_eq_by_col(Ads, b_ub, verbose=0):
+    '''
+    Derive equations by subtracting out all bounded constants (ie "known" columns)
+    '''
+    rows = len(Ads)
+
+    # Find all entries where
+
+    # Index equations with a single constraint
+    knowns = {}
+    sys.stdout.write('Derive col indexing ')
+    sys.stdout.flush()
+    progress = max(1, rows / 100)
+    for row_refi, row_refd in enumerate(Ads):
+        if row_refi % progress == 0:
+            sys.stdout.write('.')
+            sys.stdout.flush()
+        if len(row_refd) == 1:
+            k, v = list(row_refd.items())[0]
+            # Reduce any constants to canonical form
+            if v != 1:
+                row_refd[k] = 1
+                b_ub[row_refi] /= v
+            knowns[k] = b_ub[row_refi]
+    print(' done')
+    #knowns_set = set(knowns.keys())
+    print('%d constrained' % len(knowns))
+
+    '''
+    Now see what we can do
+    Rows that are already constrained: eliminate
+        TODO: maybe keep these if this would violate their constraint
+    Otherwise eliminate the original row and generate a simplified result now
+    '''
+    b_ret = []
+    Ads_ret = []
+    sys.stdout.write('Derive col main ')
+    sys.stdout.flush()
+    progress = max(1, rows / 100)
+    for row_refi, row_refd in enumerate(Ads):
+        if row_refi % progress == 0:
+            sys.stdout.write('.')
+            sys.stdout.flush()
+        # Reduce as much as possible
+        #row_new = {}
+        row_new = OrderedDict()
+        b_new = b_ub[row_refi]
+        # Copy over single entries
+        if len(row_refd) == 1:
+            row_new = row_refd
+        else:
+            for k, v in row_refd.items():
+                if k in knowns:
+                    # Remove column and take out corresponding delay
+                    b_new -= v * knowns[k]
+                # Copy over
+                else:
+                    row_new[k] = v
+
+        # Possibly reduced all usable contants out
+        if len(row_new) == 0:
+            continue
+        if b_new <= 0:
+            continue
+
+        Ads_ret.append(row_new)
+        b_ret.append(b_new)
+    print(' done')
+
+    print('Derive col: %d => %d rows' % (len(b_ub), len(b_ret)))
+    return Ads_ret, b_ret
+
+def massage_equations(Ads, b, verbose=False, derive_lim=3):
+    '''
+    Equation pipeline
+    Some operations may generate new equations
+    Simplify after these to avoid unnecessary overhead on redundant constraints
+    Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
+    Remove these columns as necessary to speed up solving
+    '''
+
+    def debug(what):
+        if verbose:
+            print('')
+            print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
+            col_dist(Ads, what)
+            check_feasible_d(Ads, b)
+
+    # Try to (intelligently) subtract equations to generate additional constraints
+    # This helps avoid putting all delay in a single shared variable
+    if derive_lim:
+        dstart = len(b)
+
+        # Original simple
+        if 0:
+            for di in range(derive_lim):
+                print
+                assert len(Ads) == len(b)
+                n_orig = len(b)
+
+                # Meat of the operation
+                # Focus on easy equations for first pass to get a lot of easy derrivations
+                col_lim = 12 if di == 0 else None
+                #col_lim = None
+                Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim)
+                debug("der_rows")
+                # Run another simplify pass since new equations may have overlap with original
+                Ads, b = simplify_rows(Ads, b)
+                print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b)))
+                debug("der_rows simp")
+
+                n_orig2 = len(b)
+                # Meat of the operation
+                Ads, b = derive_eq_by_col(Ads, b)
+                debug("der_cols")
+                # Run another simplify pass since new equations may have overlap with original
+                Ads, b = simplify_rows(Ads, b)
+                print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b)))
+                debug("der_cols simp")
+
+                if n_orig == len(b):
+                    break
+
+        if 1:
+            # Each iteration one more column is allowed until all columns are included
+            # (and the system is stable)
+            col_lim = 15
+            di = 0
+            while True:
+                print
+                n_orig = len(b)
+
+                print('Loop %d, lim %d' % (di + 1, col_lim))
+                # Meat of the operation
+                Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
+                debug("der_rows")
+                # Run another simplify pass since new equations may have overlap with original
+                Ads, b = simplify_rows(Ads, b)
+                print('Derive row: %d => %d equations' % (n_orig, len(b)))
+                debug("der_rows simp")
+
+                n_orig2 = len(b)
+                # Meat of the operation
+                Ads, b = derive_eq_by_col(Ads, b)
+                debug("der_cols")
+                # Run another simplify pass since new equations may have overlap with original
+                Ads, b = simplify_rows(Ads, b)
+                print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
+                debug("der_cols simp")
+
+                # Doesn't help computation, but helps debugging
+                Ads, b = sort_equations(Ads, b)
+                debug("loop done")
+                col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12)
+
+                rows = len(Ads)
+                if n_orig == len(b) and col_lim >= rows:
+                    break
+                col_lim += col_lim / 5
+                di += 1
+
+        dend = len(b)
+        print('')
+        print('Derive net: %d => %d' % (dstart, dend))
+        print('')
+        # Was experimentting to see how much the higher order columns really help
+
+    '''
+    cols_min_post = opts.get('cols_min_post', None)
+    cols_max_post = opts.get('cols_max_post', None)
+    # Filter input based on number of columns
+    if cols_min_post or cols_max_post:
+        Ads, b = filter_ncols(Ads=Ads, b=b, cols_min=cols_min_post, cols_max=cols_max_post)
+        debug("filter_ncals final")
+    '''
+
+    # Helps debug readability
+    Ads, b = sort_equations(Ads, b)
+    debug("final (sorted)")
+    return Ads, b
+
--- a/experiments/timfuz/timfuz_rref.py
+++ b/experiments/timfuz/timfuz_rref.py
@ -169,7 +169,7 @@ def comb_corr_sets(state, verbose=False):
            continue

        # a grouping
-        group_name = "GROUP_%u" % row_i
+        group_name = "GRP_%u" % row_i
        rowdsf = row_sym2dsf(rowsym, names)

        state.subs[group_name] = rowdsf
--- a/experiments/timfuz/timfuz_solve.py
+++ b/experiments/timfuz/timfuz_solve.py
@ -2,7 +2,8 @@

 # https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.linprog.html
 from scipy.optimize import linprog
-from timfuz import Benchmark, Ar_di2np, Ar_ds2t, A_di2ds, A_ds2di, simplify_rows, loadc_Ads_b, index_names, A_ds2np, load_sub, run_sub_json
+from timfuz import Benchmark, Ar_di2np, Ar_ds2t, A_di2ds, A_ds2di, simplify_rows, loadc_Ads_b, index_names, A_ds2np, load_sub, run_sub_json, A_ub_np2d, print_eqns, print_eqns_np
+from timfuz_massage import massage_equations
 import numpy as np
 import glob
 import json
@ -65,12 +66,10 @@ def check_feasible(A_ub, b_ub):
    print(' done')

 def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
+    # Given timing scores for above delays (-ps)
    assert type(Anp[0]) is np.ndarray, type(Anp[0])
    assert type(b) is np.ndarray, type(b)

-    # Given timing scores for above delays (-ps)
-    names_orig = names
-
    #check_feasible(Anp, b)

    '''
@ -91,6 +90,12 @@ def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
    #A_ub = -1.0 * Anp
    A_ub = [-1.0 * x for x in Anp]

+    if verbose:
+        print('')
+        print('A_ub b_ub')
+        print_eqns_np(A_ub, b_ub, verbose=verbose)
+        print('')
+
    print('Creating misc constants...')
    # Minimization function scalars
    # Treat all logic elements as equally important
@ -133,7 +138,7 @@ def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
    print('')
    # Now find smallest values for delay constants
    # Due to input bounds (ex: column limit), some delay elements may get eliminated entirely
-    print('Running linprog w/ %d r, %d c (%d name)' % (rows, cols, len(names_orig)))
+    print('Running linprog w/ %d r, %d c (%d name)' % (rows, cols, len(names)))
    res = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, callback=callback,
          options={"disp": True, 'maxiter': maxiter, 'bland': True, 'tol': 1e-6,})
    nonzeros = 0
@ -155,7 +160,7 @@ def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
        print('Writing %s' % fn_out)
        np.save(fn_out, (3, c, A_ub, b_ub, bounds, names, res, meta))

-def run(fns_in, corner, sub_json=None, dedup=True, verbose=False):
+def run(fns_in, corner, sub_json=None, dedup=True, massage=False, verbose=False):
    Ads, b = loadc_Ads_b(fns_in, corner, ico=True)

    # Remove duplicate rows
@ -175,12 +180,15 @@ def run(fns_in, corner, sub_json=None, dedup=True, verbose=False):
    else:
        names = index_names(Ads)

-    if 0:
+    if verbose:
        print
-        print_eqns(A_ubd, b_ub, verbose=verbose)
+        print_eqns(Ads, b, verbose=verbose)

-        print
-        col_dist(A_ubd, 'final', names)
+        #print
+        #col_dist(A_ubd, 'final', names)
+
+    if massage:
+        Ads, b = massage_equations(Ads, b)

    print('Converting to numpy...')
    names, Anp = A_ds2np(Ads)
@ -195,6 +203,7 @@ def main():
    )

    parser.add_argument('--verbose', action='store_true', help='')
+    parser.add_argument('--massage', action='store_true', help='')
    parser.add_argument('--sub-json', help='Group substitutions to make fully ranked')
    parser.add_argument('--corner', default="slow_max", help='')
    parser.add_argument(
@ -215,7 +224,7 @@ def main():

    try:
        run(sub_json=sub_json,
-            fns_in=fns_in, verbose=args.verbose, corner=args.corner)
+            fns_in=fns_in, verbose=args.verbose, corner=args.corner, massage=args.massage)
    finally:
        print('Exiting after %s' % bench)