diff --git a/experiments/timfuz/timfuz_massage.py b/experiments/timfuz/timfuz_massage.py index add24fe1..3c7b78b8 100644 --- a/experiments/timfuz/timfuz_massage.py +++ b/experiments/timfuz/timfuz_massage.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist +from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist, index_names import numpy as np import math import sys @@ -92,9 +92,9 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False): ltes = 0 scs = 0 b_ret = list(b) - sys.stdout.write('Deriving rows ') + sys.stdout.write('Deriving rows (%u) ' % rows) sys.stdout.flush() - progress = max(1, rows / 100) + progress = int(max(1, rows / 100)) for row_refi, row_ref in enumerate(Ads): if row_refi % progress == 0: sys.stdout.write('.') @@ -152,6 +152,104 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False): assert len(Ads_ret) == len(b_ret) return Ads_ret, b_ret +def derive_eq_by_near_row(Ads, b, verbose=0, col_lim=0, tweak=False): + ''' + Derive equations by subtracting whole rows + + Given equations like: + t0 >= 10 + t0 + t1 >= 15 + t0 + t1 + t2 >= 17 + + When I look at these, I think of a solution something like: + t0 = 10f + t1 = 5 + t2 = 2 + + However, linprog tends to choose solutions like: + t0 = 17 + t1 = 0 + t2 = 0 + + To this end, add additional constraints by finding equations that are subsets of other equations + How to do this in a reasonable time span? + Also equations are sparse, which makes this harder to compute + ''' + rows = len(Ads) + assert rows == len(b) + rowdelta = int(rows / 2) + + # Index equations into hash maps so can lookup sparse elements quicker + assert len(Ads) == len(b) + Ads_ret = copy.copy(Ads) + assert len(Ads) == len(Ads_ret) + + #print('Finding subsets') + ltes = 0 + scs = 0 + b_ret = list(b) + sys.stdout.write('Deriving rows (%u) ' % rows) + sys.stdout.flush() + progress = int(max(1, rows / 100)) + for row_refi, row_ref in enumerate(Ads): + if row_refi % progress == 0: + sys.stdout.write('.') + sys.stdout.flush() + if col_lim and len(row_ref) > col_lim: + continue + + #for row_cmpi, row_cmp in enumerate(Ads): + for row_cmpi in range(max(0, row_refi - rowdelta), min(len(Ads), row_refi + rowdelta)): + if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim: + continue + row_cmp = Ads[row_cmpi] + # FIXME: this check was supposed to be removed + ''' + Every elements in row_cmp is in row_ref + but this doesn't mean the constants are smaller + Filter these out + ''' + # XXX: just reduce and filter out solutions with positive constants + # or actually are these also useful as is? + lte = lte_const(row_ref, row_cmp) + if lte: + ltes += 1 + sc = 0 and shared_const(row_ref, row_cmp) + if sc: + scs += 1 + if lte or sc: + if verbose: + print('') + print('match') + print(' ', row_ref, b[row_refi]) + print(' ', row_cmp, b[row_cmpi]) + # Reduce + A_new = reduce_const(row_ref, row_cmp) + # Did this actually significantly reduce the search space? + #if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2: + #if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2: + # continue + b_new = b[row_refi] - b[row_cmpi] + # Definitely possible + # Maybe filter these out if they occur? + if verbose: + print(b_new) + # Also inverted sign + if b_new <= 0: + if verbose: + print("Unexpected b") + continue + if verbose: + print('OK') + Ads_ret.append(A_new) + b_ret.append(b_new) + print(' done') + + #A_ub_ret = A_di2np(Ads2, cols=cols) + print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b), len(b_ret), ltes, scs)) + assert len(Ads_ret) == len(b_ret) + return Ads_ret, b_ret + def derive_eq_by_col(Ads, b_ub, verbose=0): ''' Derive equations by subtracting out all bounded constants (ie "known" columns) @@ -224,7 +322,7 @@ def derive_eq_by_col(Ads, b_ub, verbose=0): print('Derive col: %d => %d rows' % (len(b_ub), len(b_ret))) return Ads_ret, b_ret -def massage_equations(Ads, b, verbose=False, derive_lim=3): +def massage_equations_old(Ads, b, verbose=False, derive_lim=3): ''' Equation pipeline Some operations may generate new equations @@ -242,82 +340,43 @@ def massage_equations(Ads, b, verbose=False, derive_lim=3): # Try to (intelligently) subtract equations to generate additional constraints # This helps avoid putting all delay in a single shared variable - if derive_lim: - dstart = len(b) + dstart = len(b) - # Original simple - if 0: - for di in range(derive_lim): - print - assert len(Ads) == len(b) - n_orig = len(b) + # Original simple + for di in range(derive_lim): + print + assert len(Ads) == len(b) + n_orig = len(b) - # Meat of the operation - # Focus on easy equations for first pass to get a lot of easy derrivations - col_lim = 12 if di == 0 else None - #col_lim = None - Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim) - debug("der_rows") - # Run another simplify pass since new equations may have overlap with original - Ads, b = simplify_rows(Ads, b) - print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b))) - debug("der_rows simp") + # Meat of the operation + # Focus on easy equations for first pass to get a lot of easy derrivations + col_lim = 12 if di == 0 else None + #col_lim = None + Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim) + debug("der_rows") + # Run another simplify pass since new equations may have overlap with original + Ads, b = simplify_rows(Ads, b) + print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b))) + debug("der_rows simp") - n_orig2 = len(b) - # Meat of the operation - Ads, b = derive_eq_by_col(Ads, b) - debug("der_cols") - # Run another simplify pass since new equations may have overlap with original - Ads, b = simplify_rows(Ads, b) - print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b))) - debug("der_cols simp") + n_orig2 = len(b) + # Meat of the operation + Ads, b = derive_eq_by_col(Ads, b) + debug("der_cols") + # Run another simplify pass since new equations may have overlap with original + Ads, b = simplify_rows(Ads, b) + print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b))) + debug("der_cols simp") - if n_orig == len(b): - break + if n_orig == len(b): + break - if 1: - # Each iteration one more column is allowed until all columns are included - # (and the system is stable) - col_lim = 15 - di = 0 - while True: - print - n_orig = len(b) - print('Loop %d, lim %d' % (di + 1, col_lim)) - # Meat of the operation - Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True) - debug("der_rows") - # Run another simplify pass since new equations may have overlap with original - Ads, b = simplify_rows(Ads, b) - print('Derive row: %d => %d equations' % (n_orig, len(b))) - debug("der_rows simp") - - n_orig2 = len(b) - # Meat of the operation - Ads, b = derive_eq_by_col(Ads, b) - debug("der_cols") - # Run another simplify pass since new equations may have overlap with original - Ads, b = simplify_rows(Ads, b) - print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b))) - debug("der_cols simp") - - # Doesn't help computation, but helps debugging - Ads, b = sort_equations(Ads, b) - debug("loop done") - col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12) - - rows = len(Ads) - if n_orig == len(b) and col_lim >= rows: - break - col_lim += col_lim / 5 - di += 1 - - dend = len(b) - print('') - print('Derive net: %d => %d' % (dstart, dend)) - print('') - # Was experimentting to see how much the higher order columns really help + dend = len(b) + print('') + print('Derive net: %d => %d' % (dstart, dend)) + print('') + # Was experimentting to see how much the higher order columns really help ''' cols_min_post = opts.get('cols_min_post', None) @@ -333,3 +392,136 @@ def massage_equations(Ads, b, verbose=False, derive_lim=3): debug("final (sorted)") return Ads, b +def massage_equations_old2(Ads, b, verbose=False): + ''' + Equation pipeline + Some operations may generate new equations + Simplify after these to avoid unnecessary overhead on redundant constraints + Similarly some operations may eliminate equations, potentially eliminating a column (ie variable) + Remove these columns as necessary to speed up solving + ''' + + def debug(what): + if verbose: + print('') + print_eqns(Ads, b, verbose=verbose, label=what, lim=20) + col_dist(Ads, what) + check_feasible_d(Ads, b) + + # Try to (intelligently) subtract equations to generate additional constraints + # This helps avoid putting all delay in a single shared variable + dstart = len(b) + cols = len(index_names(Ads)) + + # Each iteration one more column is allowed until all columns are included + # (and the system is stable) + col_lim = 15 + di = 0 + while True: + print + n_orig = len(b) + + print('Loop %d, lim %d' % (di + 1, col_lim)) + # Meat of the operation + Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True) + debug("der_rows") + # Run another simplify pass since new equations may have overlap with original + Ads, b = simplify_rows(Ads, b) + print('Derive row: %d => %d equations' % (n_orig, len(b))) + debug("der_rows simp") + + n_orig2 = len(b) + # Meat of the operation + Ads, b = derive_eq_by_col(Ads, b) + debug("der_cols") + # Run another simplify pass since new equations may have overlap with original + Ads, b = simplify_rows(Ads, b) + print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b))) + debug("der_cols simp") + + # Doesn't help computation, but helps debugging + Ads, b = sort_equations(Ads, b) + debug("loop done") + col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12) + + rows = len(Ads) + if n_orig == len(b) and col_lim >= cols: + break + col_lim += col_lim / 5 + di += 1 + + dend = len(b) + print('') + print('Derive net: %d => %d' % (dstart, dend)) + print('') + # Was experimentting to see how much the higher order columns really help + + # Helps debug readability + Ads, b = sort_equations(Ads, b) + debug("final (sorted)") + print('') + print('Massage final: %d => %d rows' % (dstart, dend)) + return Ads, b + +def massage_equations(Ads, b, verbose=False): + ''' + Equation pipeline + Some operations may generate new equations + Simplify after these to avoid unnecessary overhead on redundant constraints + Similarly some operations may eliminate equations, potentially eliminating a column (ie variable) + Remove these columns as necessary to speed up solving + ''' + + def debug(what): + if verbose: + print('') + print_eqns(Ads, b, verbose=verbose, label=what, lim=20) + col_dist(Ads, what) + check_feasible_d(Ads, b) + + # Try to (intelligently) subtract equations to generate additional constraints + # This helps avoid putting all delay in a single shared variable + dstart = len(b) + cols = len(index_names(Ads)) + + # Each iteration one more column is allowed until all columns are included + # (and the system is stable) + print + n_orig = len(b) + + # Meat of the operation + Ads, b = derive_eq_by_near_row(Ads, b, tweak=True) + debug("der_rows") + # Run another simplify pass since new equations may have overlap with original + Ads, b = simplify_rows(Ads, b) + print('Derive row: %d => %d equations' % (n_orig, len(b))) + debug("der_rows simp") + + n_orig2 = len(b) + # Meat of the operation + Ads, b = derive_eq_by_col(Ads, b) + debug("der_cols") + # Run another simplify pass since new equations may have overlap with original + Ads, b = simplify_rows(Ads, b) + print('Derive col: %d => %d equations' % (n_orig2, len(b))) + debug("der_cols simp") + + # Doesn't help computation, but helps debugging + Ads, b = sort_equations(Ads, b) + debug("loop done") + col_dist(Ads, 'derive done', lim=12) + + rows = len(Ads) + + dend = len(b) + print('') + print('Derive net: %d => %d' % (dstart, dend)) + print('') + # Was experimentting to see how much the higher order columns really help + + # Helps debug readability + Ads, b = sort_equations(Ads, b) + debug("final (sorted)") + print('') + print('Massage final: %d => %d rows' % (dstart, dend)) + return Ads, b diff --git a/experiments/timfuz/timfuz_rref.py b/experiments/timfuz/timfuz_rref.py index 31ecca0d..85122d54 100644 --- a/experiments/timfuz/timfuz_rref.py +++ b/experiments/timfuz/timfuz_rref.py @@ -5,7 +5,7 @@ Triaging tool to help understand where we need more timing coverage Finds correlated variables to help make better test cases ''' -from timfuz import Benchmark, Ar_di2np, loadc_Ads_b, index_names, A_ds2np +from timfuz import Benchmark, Ar_di2np, loadc_Ads_b, index_names, A_ds2np, simplify_rows import numpy as np import glob import math @@ -57,7 +57,9 @@ class State(object): @staticmethod def load(fn_ins): - Ads, _b = loadc_Ads_b(fn_ins, corner=None, ico=True) + Ads, b = loadc_Ads_b(fn_ins, corner=None, ico=True) + print('Simplifying') + #Ads, b = simplify_rows(Ads, b) return State(Ads) def write_state(state, fout): @@ -70,18 +72,6 @@ def write_state(state, fout): } json.dump(j, fout, sort_keys=True, indent=4, separators=(',', ': ')) -def Adi2matrix(Adi, cols): - A_ub2 = [np.zeros(cols) for _i in range(cols)] - - dst_rowi = 0 - for row in Adi: - rownp = Ar_di2np(row, cols=len(names), sf=1) - - A_ub2[dst_rowi] = np.add(A_ub2[dst_rowi], rownp) - dst_rowi = (dst_rowi + 1) % len(names) - - return A_ub2 - def Anp2matrix(Anp): ''' Original idea was to make into a square matrix @@ -119,27 +109,28 @@ def row_sym2dsf(rowsym, names): ret[name] = (int(num), int(den)) return ret -def comb_corr_sets(state, verbose=False): +def state_rref(state, verbose=False): print('Converting rows to integer keys') names, Anp = A_ds2np(state.Ads) print('np: %u rows x %u cols' % (len(Anp), len(Anp[0]))) print('Combining rows into matrix') - mnp = Anp2matrix(Anp) + #mnp = Anp2matrix(Anp) + mnp = Anp print('Matrix: %u rows x %u cols' % (len(mnp), len(mnp[0]))) print('Converting np to sympy matrix') mfrac = fracm(mnp) - print('mfrac', type(mfrac[0][0])) msym = sympy.Matrix(mfrac) - print('Making rref') - rref, pivots = msym.rref() - - if verbose: print('names') print(names) print('Matrix') sympy.pprint(msym) + print('Making rref') + rref, pivots = msym.rref() + + + if verbose: print('Pivots') sympy.pprint(pivots) print('rref') @@ -189,7 +180,7 @@ def run(fout, fn_ins, verbose=0): print('Loading data') state = State.load(fn_ins) - comb_corr_sets(state, verbose=verbose) + state_rref(state, verbose=verbose) state.print_stats() if fout: write_state(state, fout)