timfuz: massage experiments

Signed-off-by: John McMaster <johndmcmaster@gmail.com>
This commit is contained in:
John McMaster 2018-08-24 17:09:50 -07:00
parent 343cd5b413
commit 6d78626239
2 changed files with 279 additions and 96 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3
from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist
from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist, index_names
import numpy as np
import math
import sys
@ -92,9 +92,9 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
ltes = 0
scs = 0
b_ret = list(b)
sys.stdout.write('Deriving rows ')
sys.stdout.write('Deriving rows (%u) ' % rows)
sys.stdout.flush()
progress = max(1, rows / 100)
progress = int(max(1, rows / 100))
for row_refi, row_ref in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
@ -152,6 +152,104 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
assert len(Ads_ret) == len(b_ret)
return Ads_ret, b_ret
def derive_eq_by_near_row(Ads, b, verbose=0, col_lim=0, tweak=False):
'''
Derive equations by subtracting whole rows
Given equations like:
t0 >= 10
t0 + t1 >= 15
t0 + t1 + t2 >= 17
When I look at these, I think of a solution something like:
t0 = 10f
t1 = 5
t2 = 2
However, linprog tends to choose solutions like:
t0 = 17
t1 = 0
t2 = 0
To this end, add additional constraints by finding equations that are subsets of other equations
How to do this in a reasonable time span?
Also equations are sparse, which makes this harder to compute
'''
rows = len(Ads)
assert rows == len(b)
rowdelta = int(rows / 2)
# Index equations into hash maps so can lookup sparse elements quicker
assert len(Ads) == len(b)
Ads_ret = copy.copy(Ads)
assert len(Ads) == len(Ads_ret)
#print('Finding subsets')
ltes = 0
scs = 0
b_ret = list(b)
sys.stdout.write('Deriving rows (%u) ' % rows)
sys.stdout.flush()
progress = int(max(1, rows / 100))
for row_refi, row_ref in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if col_lim and len(row_ref) > col_lim:
continue
#for row_cmpi, row_cmp in enumerate(Ads):
for row_cmpi in range(max(0, row_refi - rowdelta), min(len(Ads), row_refi + rowdelta)):
if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
continue
row_cmp = Ads[row_cmpi]
# FIXME: this check was supposed to be removed
'''
Every elements in row_cmp is in row_ref
but this doesn't mean the constants are smaller
Filter these out
'''
# XXX: just reduce and filter out solutions with positive constants
# or actually are these also useful as is?
lte = lte_const(row_ref, row_cmp)
if lte:
ltes += 1
sc = 0 and shared_const(row_ref, row_cmp)
if sc:
scs += 1
if lte or sc:
if verbose:
print('')
print('match')
print(' ', row_ref, b[row_refi])
print(' ', row_cmp, b[row_cmpi])
# Reduce
A_new = reduce_const(row_ref, row_cmp)
# Did this actually significantly reduce the search space?
#if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
#if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
# continue
b_new = b[row_refi] - b[row_cmpi]
# Definitely possible
# Maybe filter these out if they occur?
if verbose:
print(b_new)
# Also inverted sign
if b_new <= 0:
if verbose:
print("Unexpected b")
continue
if verbose:
print('OK')
Ads_ret.append(A_new)
b_ret.append(b_new)
print(' done')
#A_ub_ret = A_di2np(Ads2, cols=cols)
print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b), len(b_ret), ltes, scs))
assert len(Ads_ret) == len(b_ret)
return Ads_ret, b_ret
def derive_eq_by_col(Ads, b_ub, verbose=0):
'''
Derive equations by subtracting out all bounded constants (ie "known" columns)
@ -224,7 +322,7 @@ def derive_eq_by_col(Ads, b_ub, verbose=0):
print('Derive col: %d => %d rows' % (len(b_ub), len(b_ret)))
return Ads_ret, b_ret
def massage_equations(Ads, b, verbose=False, derive_lim=3):
def massage_equations_old(Ads, b, verbose=False, derive_lim=3):
'''
Equation pipeline
Some operations may generate new equations
@ -242,82 +340,43 @@ def massage_equations(Ads, b, verbose=False, derive_lim=3):
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
if derive_lim:
dstart = len(b)
dstart = len(b)
# Original simple
if 0:
for di in range(derive_lim):
print
assert len(Ads) == len(b)
n_orig = len(b)
# Original simple
for di in range(derive_lim):
print
assert len(Ads) == len(b)
n_orig = len(b)
# Meat of the operation
# Focus on easy equations for first pass to get a lot of easy derrivations
col_lim = 12 if di == 0 else None
#col_lim = None
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b)))
debug("der_rows simp")
# Meat of the operation
# Focus on easy equations for first pass to get a lot of easy derrivations
col_lim = 12 if di == 0 else None
#col_lim = None
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b)))
debug("der_cols simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b)))
debug("der_cols simp")
if n_orig == len(b):
break
if n_orig == len(b):
break
if 1:
# Each iteration one more column is allowed until all columns are included
# (and the system is stable)
col_lim = 15
di = 0
while True:
print
n_orig = len(b)
print('Loop %d, lim %d' % (di + 1, col_lim))
# Meat of the operation
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row: %d => %d equations' % (n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
debug("der_cols simp")
# Doesn't help computation, but helps debugging
Ads, b = sort_equations(Ads, b)
debug("loop done")
col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12)
rows = len(Ads)
if n_orig == len(b) and col_lim >= rows:
break
col_lim += col_lim / 5
di += 1
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
'''
cols_min_post = opts.get('cols_min_post', None)
@ -333,3 +392,136 @@ def massage_equations(Ads, b, verbose=False, derive_lim=3):
debug("final (sorted)")
return Ads, b
def massage_equations_old2(Ads, b, verbose=False):
'''
Equation pipeline
Some operations may generate new equations
Simplify after these to avoid unnecessary overhead on redundant constraints
Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
Remove these columns as necessary to speed up solving
'''
def debug(what):
if verbose:
print('')
print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
col_dist(Ads, what)
check_feasible_d(Ads, b)
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
dstart = len(b)
cols = len(index_names(Ads))
# Each iteration one more column is allowed until all columns are included
# (and the system is stable)
col_lim = 15
di = 0
while True:
print
n_orig = len(b)
print('Loop %d, lim %d' % (di + 1, col_lim))
# Meat of the operation
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row: %d => %d equations' % (n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
debug("der_cols simp")
# Doesn't help computation, but helps debugging
Ads, b = sort_equations(Ads, b)
debug("loop done")
col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12)
rows = len(Ads)
if n_orig == len(b) and col_lim >= cols:
break
col_lim += col_lim / 5
di += 1
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
# Helps debug readability
Ads, b = sort_equations(Ads, b)
debug("final (sorted)")
print('')
print('Massage final: %d => %d rows' % (dstart, dend))
return Ads, b
def massage_equations(Ads, b, verbose=False):
'''
Equation pipeline
Some operations may generate new equations
Simplify after these to avoid unnecessary overhead on redundant constraints
Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
Remove these columns as necessary to speed up solving
'''
def debug(what):
if verbose:
print('')
print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
col_dist(Ads, what)
check_feasible_d(Ads, b)
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
dstart = len(b)
cols = len(index_names(Ads))
# Each iteration one more column is allowed until all columns are included
# (and the system is stable)
print
n_orig = len(b)
# Meat of the operation
Ads, b = derive_eq_by_near_row(Ads, b, tweak=True)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row: %d => %d equations' % (n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col: %d => %d equations' % (n_orig2, len(b)))
debug("der_cols simp")
# Doesn't help computation, but helps debugging
Ads, b = sort_equations(Ads, b)
debug("loop done")
col_dist(Ads, 'derive done', lim=12)
rows = len(Ads)
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
# Helps debug readability
Ads, b = sort_equations(Ads, b)
debug("final (sorted)")
print('')
print('Massage final: %d => %d rows' % (dstart, dend))
return Ads, b

View File

@ -5,7 +5,7 @@ Triaging tool to help understand where we need more timing coverage
Finds correlated variables to help make better test cases
'''
from timfuz import Benchmark, Ar_di2np, loadc_Ads_b, index_names, A_ds2np
from timfuz import Benchmark, Ar_di2np, loadc_Ads_b, index_names, A_ds2np, simplify_rows
import numpy as np
import glob
import math
@ -57,7 +57,9 @@ class State(object):
@staticmethod
def load(fn_ins):
Ads, _b = loadc_Ads_b(fn_ins, corner=None, ico=True)
Ads, b = loadc_Ads_b(fn_ins, corner=None, ico=True)
print('Simplifying')
#Ads, b = simplify_rows(Ads, b)
return State(Ads)
def write_state(state, fout):
@ -70,18 +72,6 @@ def write_state(state, fout):
}
json.dump(j, fout, sort_keys=True, indent=4, separators=(',', ': '))
def Adi2matrix(Adi, cols):
A_ub2 = [np.zeros(cols) for _i in range(cols)]
dst_rowi = 0
for row in Adi:
rownp = Ar_di2np(row, cols=len(names), sf=1)
A_ub2[dst_rowi] = np.add(A_ub2[dst_rowi], rownp)
dst_rowi = (dst_rowi + 1) % len(names)
return A_ub2
def Anp2matrix(Anp):
'''
Original idea was to make into a square matrix
@ -119,27 +109,28 @@ def row_sym2dsf(rowsym, names):
ret[name] = (int(num), int(den))
return ret
def comb_corr_sets(state, verbose=False):
def state_rref(state, verbose=False):
print('Converting rows to integer keys')
names, Anp = A_ds2np(state.Ads)
print('np: %u rows x %u cols' % (len(Anp), len(Anp[0])))
print('Combining rows into matrix')
mnp = Anp2matrix(Anp)
#mnp = Anp2matrix(Anp)
mnp = Anp
print('Matrix: %u rows x %u cols' % (len(mnp), len(mnp[0])))
print('Converting np to sympy matrix')
mfrac = fracm(mnp)
print('mfrac', type(mfrac[0][0]))
msym = sympy.Matrix(mfrac)
print('Making rref')
rref, pivots = msym.rref()
if verbose:
print('names')
print(names)
print('Matrix')
sympy.pprint(msym)
print('Making rref')
rref, pivots = msym.rref()
if verbose:
print('Pivots')
sympy.pprint(pivots)
print('rref')
@ -189,7 +180,7 @@ def run(fout, fn_ins, verbose=0):
print('Loading data')
state = State.load(fn_ins)
comb_corr_sets(state, verbose=verbose)
state_rref(state, verbose=verbose)
state.print_stats()
if fout:
write_state(state, fout)