timfuz: massage experiments

Signed-off-by: John McMaster <johndmcmaster@gmail.com>
2018-08-24 17:09:50 -07:00 · 2018-08-24 17:09:50 -07:00 · 6d78626239
parent 343cd5b413
commit 6d78626239
2 changed files with 279 additions and 96 deletions
--- a/experiments/timfuz/timfuz_massage.py
+++ b/experiments/timfuz/timfuz_massage.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3

-from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist
+from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist, index_names
 import numpy as np
 import math
 import sys
@ -92,9 +92,9 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
    ltes = 0
    scs = 0
    b_ret = list(b)
-    sys.stdout.write('Deriving rows ')
+    sys.stdout.write('Deriving rows (%u) ' % rows)
    sys.stdout.flush()
-    progress = max(1, rows / 100)
+    progress = int(max(1, rows / 100))
    for row_refi, row_ref in enumerate(Ads):
        if row_refi % progress == 0:
            sys.stdout.write('.')
@ -152,6 +152,104 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
    assert len(Ads_ret) == len(b_ret)
    return Ads_ret, b_ret

+def derive_eq_by_near_row(Ads, b, verbose=0, col_lim=0, tweak=False):
+    '''
+    Derive equations by subtracting whole rows
+
+    Given equations like:
+    t0           >= 10
+    t0 + t1      >= 15
+    t0 + t1 + t2 >= 17
+
+    When I look at these, I think of a solution something like:
+    t0 = 10f
+    t1 = 5
+    t2 = 2
+
+    However, linprog tends to choose solutions like:
+    t0 = 17
+    t1 = 0
+    t2 = 0
+
+    To this end, add additional constraints by finding equations that are subsets of other equations
+    How to do this in a reasonable time span?
+    Also equations are sparse, which makes this harder to compute
+    '''
+    rows = len(Ads)
+    assert rows == len(b)
+    rowdelta = int(rows / 2)
+
+    # Index equations into hash maps so can lookup sparse elements quicker
+    assert len(Ads) == len(b)
+    Ads_ret = copy.copy(Ads)
+    assert len(Ads) == len(Ads_ret)
+
+    #print('Finding subsets')
+    ltes = 0
+    scs = 0
+    b_ret = list(b)
+    sys.stdout.write('Deriving rows (%u) ' % rows)
+    sys.stdout.flush()
+    progress = int(max(1, rows / 100))
+    for row_refi, row_ref in enumerate(Ads):
+        if row_refi % progress == 0:
+            sys.stdout.write('.')
+            sys.stdout.flush()
+        if col_lim and len(row_ref) > col_lim:
+            continue
+
+        #for row_cmpi, row_cmp in enumerate(Ads):
+        for row_cmpi in range(max(0, row_refi - rowdelta), min(len(Ads), row_refi + rowdelta)):
+            if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
+                continue
+            row_cmp = Ads[row_cmpi]
+            # FIXME: this check was supposed to be removed
+            '''
+            Every elements in row_cmp is in row_ref
+            but this doesn't mean the constants are smaller
+            Filter these out
+            '''
+            # XXX: just reduce and filter out solutions with positive constants
+            # or actually are these also useful as is?
+            lte = lte_const(row_ref, row_cmp)
+            if lte:
+                ltes += 1
+            sc = 0 and shared_const(row_ref, row_cmp)
+            if sc:
+                scs += 1
+            if lte or sc:
+                if verbose:
+                    print('')
+                    print('match')
+                    print('  ', row_ref, b[row_refi])
+                    print('  ', row_cmp, b[row_cmpi])
+                # Reduce
+                A_new = reduce_const(row_ref, row_cmp)
+                # Did this actually significantly reduce the search space?
+                #if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
+                #if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
+                #    continue
+                b_new = b[row_refi] - b[row_cmpi]
+                # Definitely possible
+                # Maybe filter these out if they occur?
+                if verbose:
+                    print(b_new)
+                # Also inverted sign
+                if b_new <= 0:
+                    if verbose:
+                        print("Unexpected b")
+                    continue
+                if verbose:
+                    print('OK')
+                Ads_ret.append(A_new)
+                b_ret.append(b_new)
+    print(' done')
+
+    #A_ub_ret = A_di2np(Ads2, cols=cols)
+    print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b), len(b_ret), ltes, scs))
+    assert len(Ads_ret) == len(b_ret)
+    return Ads_ret, b_ret
+
 def derive_eq_by_col(Ads, b_ub, verbose=0):
    '''
    Derive equations by subtracting out all bounded constants (ie "known" columns)
@ -224,7 +322,7 @@ def derive_eq_by_col(Ads, b_ub, verbose=0):
    print('Derive col: %d => %d rows' % (len(b_ub), len(b_ret)))
    return Ads_ret, b_ret

-def massage_equations(Ads, b, verbose=False, derive_lim=3):
+def massage_equations_old(Ads, b, verbose=False, derive_lim=3):
    '''
    Equation pipeline
    Some operations may generate new equations
@ -242,82 +340,43 @@ def massage_equations(Ads, b, verbose=False, derive_lim=3):

    # Try to (intelligently) subtract equations to generate additional constraints
    # This helps avoid putting all delay in a single shared variable
-    if derive_lim:
-        dstart = len(b)
+    dstart = len(b)

-        # Original simple
-        if 0:
-            for di in range(derive_lim):
-                print
-                assert len(Ads) == len(b)
-                n_orig = len(b)
+    # Original simple
+    for di in range(derive_lim):
+        print
+        assert len(Ads) == len(b)
+        n_orig = len(b)

-                # Meat of the operation
-                # Focus on easy equations for first pass to get a lot of easy derrivations
-                col_lim = 12 if di == 0 else None
-                #col_lim = None
-                Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim)
-                debug("der_rows")
-                # Run another simplify pass since new equations may have overlap with original
-                Ads, b = simplify_rows(Ads, b)
-                print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b)))
-                debug("der_rows simp")
+        # Meat of the operation
+        # Focus on easy equations for first pass to get a lot of easy derrivations
+        col_lim = 12 if di == 0 else None
+        #col_lim = None
+        Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim)
+        debug("der_rows")
+        # Run another simplify pass since new equations may have overlap with original
+        Ads, b = simplify_rows(Ads, b)
+        print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b)))
+        debug("der_rows simp")

-                n_orig2 = len(b)
-                # Meat of the operation
-                Ads, b = derive_eq_by_col(Ads, b)
-                debug("der_cols")
-                # Run another simplify pass since new equations may have overlap with original
-                Ads, b = simplify_rows(Ads, b)
-                print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b)))
-                debug("der_cols simp")
+        n_orig2 = len(b)
+        # Meat of the operation
+        Ads, b = derive_eq_by_col(Ads, b)
+        debug("der_cols")
+        # Run another simplify pass since new equations may have overlap with original
+        Ads, b = simplify_rows(Ads, b)
+        print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b)))
+        debug("der_cols simp")

-                if n_orig == len(b):
-                    break
+        if n_orig == len(b):
+            break

-        if 1:
-            # Each iteration one more column is allowed until all columns are included
-            # (and the system is stable)
-            col_lim = 15
-            di = 0
-            while True:
-                print
-                n_orig = len(b)

-                print('Loop %d, lim %d' % (di + 1, col_lim))
-                # Meat of the operation
-                Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
-                debug("der_rows")
-                # Run another simplify pass since new equations may have overlap with original
-                Ads, b = simplify_rows(Ads, b)
-                print('Derive row: %d => %d equations' % (n_orig, len(b)))
-                debug("der_rows simp")
-
-                n_orig2 = len(b)
-                # Meat of the operation
-                Ads, b = derive_eq_by_col(Ads, b)
-                debug("der_cols")
-                # Run another simplify pass since new equations may have overlap with original
-                Ads, b = simplify_rows(Ads, b)
-                print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
-                debug("der_cols simp")
-
-                # Doesn't help computation, but helps debugging
-                Ads, b = sort_equations(Ads, b)
-                debug("loop done")
-                col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12)
-
-                rows = len(Ads)
-                if n_orig == len(b) and col_lim >= rows:
-                    break
-                col_lim += col_lim / 5
-                di += 1
-
-        dend = len(b)
-        print('')
-        print('Derive net: %d => %d' % (dstart, dend))
-        print('')
-        # Was experimentting to see how much the higher order columns really help
+    dend = len(b)
+    print('')
+    print('Derive net: %d => %d' % (dstart, dend))
+    print('')
+    # Was experimentting to see how much the higher order columns really help

    '''
    cols_min_post = opts.get('cols_min_post', None)
@ -333,3 +392,136 @@ def massage_equations(Ads, b, verbose=False, derive_lim=3):
    debug("final (sorted)")
    return Ads, b

+def massage_equations_old2(Ads, b, verbose=False):
+    '''
+    Equation pipeline
+    Some operations may generate new equations
+    Simplify after these to avoid unnecessary overhead on redundant constraints
+    Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
+    Remove these columns as necessary to speed up solving
+    '''
+
+    def debug(what):
+        if verbose:
+            print('')
+            print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
+            col_dist(Ads, what)
+            check_feasible_d(Ads, b)
+
+    # Try to (intelligently) subtract equations to generate additional constraints
+    # This helps avoid putting all delay in a single shared variable
+    dstart = len(b)
+    cols = len(index_names(Ads))
+
+    # Each iteration one more column is allowed until all columns are included
+    # (and the system is stable)
+    col_lim = 15
+    di = 0
+    while True:
+        print
+        n_orig = len(b)
+
+        print('Loop %d, lim %d' % (di + 1, col_lim))
+        # Meat of the operation
+        Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
+        debug("der_rows")
+        # Run another simplify pass since new equations may have overlap with original
+        Ads, b = simplify_rows(Ads, b)
+        print('Derive row: %d => %d equations' % (n_orig, len(b)))
+        debug("der_rows simp")
+
+        n_orig2 = len(b)
+        # Meat of the operation
+        Ads, b = derive_eq_by_col(Ads, b)
+        debug("der_cols")
+        # Run another simplify pass since new equations may have overlap with original
+        Ads, b = simplify_rows(Ads, b)
+        print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
+        debug("der_cols simp")
+
+        # Doesn't help computation, but helps debugging
+        Ads, b = sort_equations(Ads, b)
+        debug("loop done")
+        col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12)
+
+        rows = len(Ads)
+        if n_orig == len(b) and col_lim >= cols:
+            break
+        col_lim += col_lim / 5
+        di += 1
+
+        dend = len(b)
+        print('')
+        print('Derive net: %d => %d' % (dstart, dend))
+        print('')
+        # Was experimentting to see how much the higher order columns really help
+
+    # Helps debug readability
+    Ads, b = sort_equations(Ads, b)
+    debug("final (sorted)")
+    print('')
+    print('Massage final: %d => %d rows' % (dstart, dend))
+    return Ads, b
+
+def massage_equations(Ads, b, verbose=False):
+    '''
+    Equation pipeline
+    Some operations may generate new equations
+    Simplify after these to avoid unnecessary overhead on redundant constraints
+    Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
+    Remove these columns as necessary to speed up solving
+    '''
+
+    def debug(what):
+        if verbose:
+            print('')
+            print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
+            col_dist(Ads, what)
+            check_feasible_d(Ads, b)
+
+    # Try to (intelligently) subtract equations to generate additional constraints
+    # This helps avoid putting all delay in a single shared variable
+    dstart = len(b)
+    cols = len(index_names(Ads))
+
+    # Each iteration one more column is allowed until all columns are included
+    # (and the system is stable)
+    print
+    n_orig = len(b)
+
+    # Meat of the operation
+    Ads, b = derive_eq_by_near_row(Ads, b, tweak=True)
+    debug("der_rows")
+    # Run another simplify pass since new equations may have overlap with original
+    Ads, b = simplify_rows(Ads, b)
+    print('Derive row: %d => %d equations' % (n_orig, len(b)))
+    debug("der_rows simp")
+
+    n_orig2 = len(b)
+    # Meat of the operation
+    Ads, b = derive_eq_by_col(Ads, b)
+    debug("der_cols")
+    # Run another simplify pass since new equations may have overlap with original
+    Ads, b = simplify_rows(Ads, b)
+    print('Derive col: %d => %d equations' % (n_orig2, len(b)))
+    debug("der_cols simp")
+
+    # Doesn't help computation, but helps debugging
+    Ads, b = sort_equations(Ads, b)
+    debug("loop done")
+    col_dist(Ads, 'derive done', lim=12)
+
+    rows = len(Ads)
+
+    dend = len(b)
+    print('')
+    print('Derive net: %d => %d' % (dstart, dend))
+    print('')
+    # Was experimentting to see how much the higher order columns really help
+
+    # Helps debug readability
+    Ads, b = sort_equations(Ads, b)
+    debug("final (sorted)")
+    print('')
+    print('Massage final: %d => %d rows' % (dstart, dend))
+    return Ads, b
--- a/experiments/timfuz/timfuz_rref.py
+++ b/experiments/timfuz/timfuz_rref.py
@ -5,7 +5,7 @@ Triaging tool to help understand where we need more timing coverage
 Finds correlated variables to help make better test cases
 '''

-from timfuz import Benchmark, Ar_di2np, loadc_Ads_b, index_names, A_ds2np
+from timfuz import Benchmark, Ar_di2np, loadc_Ads_b, index_names, A_ds2np, simplify_rows
 import numpy as np
 import glob
 import math
@ -57,7 +57,9 @@ class State(object):

    @staticmethod
    def load(fn_ins):
-        Ads, _b = loadc_Ads_b(fn_ins, corner=None, ico=True)
+        Ads, b = loadc_Ads_b(fn_ins, corner=None, ico=True)
+        print('Simplifying')
+        #Ads, b = simplify_rows(Ads, b)
        return State(Ads)

 def write_state(state, fout):
@ -70,18 +72,6 @@ def write_state(state, fout):
    }
    json.dump(j, fout, sort_keys=True, indent=4, separators=(',', ': '))

-def Adi2matrix(Adi, cols):
-    A_ub2 = [np.zeros(cols) for _i in range(cols)]
-
-    dst_rowi = 0
-    for row in Adi:
-        rownp = Ar_di2np(row, cols=len(names), sf=1)
-
-        A_ub2[dst_rowi] = np.add(A_ub2[dst_rowi], rownp)
-        dst_rowi = (dst_rowi + 1) % len(names)
-
-    return A_ub2
-
 def Anp2matrix(Anp):
    '''
    Original idea was to make into a square matrix
@ -119,27 +109,28 @@ def row_sym2dsf(rowsym, names):
            ret[name] = (int(num), int(den))
    return ret

-def comb_corr_sets(state, verbose=False):
+def state_rref(state, verbose=False):
    print('Converting rows to integer keys')
    names, Anp = A_ds2np(state.Ads)

    print('np: %u rows x %u cols' % (len(Anp), len(Anp[0])))
    print('Combining rows into matrix')
-    mnp = Anp2matrix(Anp)
+    #mnp = Anp2matrix(Anp)
+    mnp = Anp
    print('Matrix: %u rows x %u cols' % (len(mnp), len(mnp[0])))
    print('Converting np to sympy matrix')
    mfrac = fracm(mnp)
-    print('mfrac', type(mfrac[0][0]))
    msym = sympy.Matrix(mfrac)
-    print('Making rref')
-    rref, pivots = msym.rref()
-
-
    if verbose:
        print('names')
        print(names)
        print('Matrix')
        sympy.pprint(msym)
+    print('Making rref')
+    rref, pivots = msym.rref()
+
+
+    if verbose:
        print('Pivots')
        sympy.pprint(pivots)
        print('rref')
@ -189,7 +180,7 @@ def run(fout, fn_ins, verbose=0):
    print('Loading data')

    state = State.load(fn_ins)
-    comb_corr_sets(state, verbose=verbose)
+    state_rref(state, verbose=verbose)
    state.print_stats()
    if fout:
        write_state(state, fout)