timfuz: old massage routines added back in

Signed-off-by: John McMaster <johndmcmaster@gmail.com>
This commit is contained in:
John McMaster 2018-08-24 14:56:50 -07:00
parent aeebb45b34
commit 343cd5b413
4 changed files with 385 additions and 129 deletions

View File

@ -130,18 +130,18 @@ def Ab_ub_dt2d(eqns):
return list(A_ubd), list(b_ub)
# This significantly reduces runtime
def simplify_rows(A_ubd, b_ub):
def simplify_rows(Ads, b_ub):
'''Remove duplicate equations, taking highest delay'''
# dict of constants to highest delay
eqns = OrderedDict()
assert len(A_ubd) == len(b_ub), (len(A_ubd), len(b_ub))
assert len(Ads) == len(b_ub), (len(Ads), len(b_ub))
sys.stdout.write('SimpR ')
sys.stdout.flush()
progress = max(1, len(b_ub) / 100)
zero_ds = 0
zero_es = 0
for loopi, (b, rowd) in enumerate(zip(b_ub, A_ubd)):
for loopi, (b, rowd) in enumerate(zip(b_ub, Ads)):
if loopi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
@ -173,61 +173,6 @@ def simplify_rows(A_ubd, b_ub):
#return A_ub_np2d(A_ub_ret), b_ub_ret
return A_ubd_ret, b_ub_ret
def simplify_cols(names, A_ubd, b_ub):
'''
Remove unsued columns
This is fairly straightforward in dictionary form now as only have to remove and adjust indices
Maybe should use the names as keys? Then this wouldn't be needed anymore as indices wouldn't need to be rebased
XXX: shuffles the name order around. Do we care?
'''
# First: find unused names
# use dict since no standard ordered set
used_cols = set()
names_ret = OrderedDict()
col_old2new = OrderedDict()
rows = len(b_ub)
cols = len(names)
sys.stdout.write('SimpC indexing ')
sys.stdout.flush()
progress = max(1, rows / 100)
for rowi, rowd in enumerate(A_ubd):
if rowi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
for coli in rowd.keys():
used_cols.add(coli)
for coli in range(cols):
if coli in used_cols:
names_ret[names[coli]] = None
col_old2new[coli] = len(col_old2new)
assert len(used_cols) == len(col_old2new)
print(' done')
# Create a new matrix, copying important values over
#A_ub_ret = np.zeros((4, 1))
#A_ub_ret[3][0] = 1.0
#A_ub_ret = np.zeros((rows, len(names_ret)))
A_ub_ret = [None] * rows
sys.stdout.write('SimpC creating ')
sys.stdout.flush()
progress = max(1, rows / 100)
for rowi, rowd_old in enumerate(A_ubd):
if rowi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
l = [(col_old2new[k], v) for k, v in rowd_old.items()]
A_ub_ret[rowi] = OrderedDict(l)
print(' done')
print('Simplify cols: %d => %d cols' % (len(names), len(names_ret)))
nr = list(names_ret.keys())
return nr, A_ub_ret, b_ub
def A_ubr_np2d(row):
'''Convert a single row'''
#d = {}
@ -239,10 +184,10 @@ def A_ubr_np2d(row):
def A_ub_np2d(A_ub):
'''Convert A_ub entries in numpy matrix to dictionary / sparse form'''
A_ubd = [None] * len(A_ub)
Adi = [None] * len(A_ub)
for i, row in enumerate(A_ub):
A_ubd[i] = A_ubr_np2d(row)
return A_ubd
Adi[i] = A_ubr_np2d(row)
return Adi
def Ar_di2np(row_di, cols):
rownp = np.zeros(cols)
@ -282,66 +227,15 @@ def Ab_np2d(A_ub, b_ub_inv):
b_ub = invb(b_ub_inv)
return A_ubd, b_ub
def sort_equations_(A_ubd, b_ub):
# Dictionaries aren't hashable for sorting even though they are comparable
return A_ub_t2d(sorted(A_ub_d2t(A_ubd)))
def sort_equations(A_ub, b_ub):
def sort_equations(Ads, b):
# Track rows with value column
# Hmm can't sort against np arrays
tosort = [(sorted(row.items()), b) for row, b in zip(A_ub, b_ub)]
tosort = [(sorted(row.items()), rowb) for row, rowb in zip(Ads, b)]
#res = sorted(tosort, key=lambda e: e[0])
res = sorted(tosort)
A_ubtr, b_ubr = zip(*res)
return [OrderedDict(rowt) for rowt in A_ubtr], b_ubr
def lte_const(row_ref, row_cmp):
'''Return true if all constants are smaller magnitude in row_cmp than row_ref'''
#return False
for k, vc in row_cmp.items():
vr = row_ref.get(k, None)
# Not in reference?
if vr is None:
return False
if vr < vc:
return False
return True
def shared_const(row_ref, row_cmp):
'''Return true if more constants are equal than not equal'''
#return False
matches = 0
unmatches = 0
ks = list(row_ref.keys()) + list(row_cmp.keys())
for k in ks:
vr = row_ref.get(k, None)
vc = row_cmp.get(k, None)
# At least one
if vr is not None and vc is not None:
if vc == vr:
matches += 1
else:
unmatches += 1
else:
unmatches += 1
# Will equation reduce if subtracted?
return matches > unmatches
def reduce_const(row_ref, row_cmp):
'''Subtract cmp constants from ref'''
#ret = {}
ret = OrderedDict()
ks = set(row_ref.keys())
ks.update(set(row_cmp.keys()))
for k in ks:
vr = row_ref.get(k, 0)
vc = row_cmp.get(k, 0)
res = vr - vc
if res:
ret[k] = res
return ret
def derive_eq_by_row(A_ubd, b_ub, verbose=0, col_lim=0, tweak=False):
'''
Derive equations by subtracting whole rows
@ -510,13 +404,13 @@ def derive_eq_by_col(A_ubd, b_ub, verbose=0):
print('Derive col: %d => %d rows' % (len(b_ub), len(b_ub_ret)))
return A_ubd_ret, b_ub_ret
def col_dist(A_ubd, desc='of', names=[], lim=0):
def col_dist(Ads, desc='of', names=[], lim=0):
'''print(frequency distribution of number of elements in a given row'''
rows = len(A_ubd)
rows = len(Ads)
cols = len(names)
fs = {}
for row in A_ubd:
for row in Ads:
this_cols = len(row)
fs[this_cols] = fs.get(this_cols, 0) + 1
@ -912,3 +806,21 @@ def run_sub_json(Ads, sub_json, verbose=False):
print("Sub: %u / %u rows changed" % (nsubs, nrows))
print("Sub: %u => %u cols" % (ncols_old, ncols_new))
def print_eqns(Ads, b, verbose=0, lim=3, label=''):
rows = len(b)
print('Sample equations (%s) from %d r' % (label, rows))
prints = 0
for rowi, row in enumerate(Ads):
if verbose or ((rowi < 10 or rowi % max(1, (rows / 20)) == 0) and (not lim or prints < lim)):
line = ' EQN: p%u: ' % rowi
for k, v in sorted(row.items()):
line += '%u*t%s ' % (v, k)
line += '= %d' % b[rowi]
print(line)
prints += 1
def print_eqns_np(A_ub, b_ub, verbose=0):
Adi = A_ub_np2d(A_ub)
print_eqns(Adi, b_ub, verbose=verbose)

View File

@ -0,0 +1,335 @@
#!/usr/bin/env python3
from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist
import numpy as np
import math
import sys
import datetime
import os
import time
import copy
from collections import OrderedDict
def lte_const(row_ref, row_cmp):
'''Return true if all constants are smaller magnitude in row_cmp than row_ref'''
#return False
for k, vc in row_cmp.items():
vr = row_ref.get(k, None)
# Not in reference?
if vr is None:
return False
if vr < vc:
return False
return True
def shared_const(row_ref, row_cmp):
'''Return true if more constants are equal than not equal'''
#return False
matches = 0
unmatches = 0
ks = list(row_ref.keys()) + list(row_cmp.keys())
for k in ks:
vr = row_ref.get(k, None)
vc = row_cmp.get(k, None)
# At least one
if vr is not None and vc is not None:
if vc == vr:
matches += 1
else:
unmatches += 1
else:
unmatches += 1
# Will equation reduce if subtracted?
return matches > unmatches
def reduce_const(row_ref, row_cmp):
'''Subtract cmp constants from ref'''
#ret = {}
ret = OrderedDict()
ks = set(row_ref.keys())
ks.update(set(row_cmp.keys()))
for k in ks:
vr = row_ref.get(k, 0)
vc = row_cmp.get(k, 0)
res = vr - vc
if res:
ret[k] = res
return ret
def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
'''
Derive equations by subtracting whole rows
Given equations like:
t0 >= 10
t0 + t1 >= 15
t0 + t1 + t2 >= 17
When I look at these, I think of a solution something like:
t0 = 10f
t1 = 5
t2 = 2
However, linprog tends to choose solutions like:
t0 = 17
t1 = 0
t2 = 0
To this end, add additional constraints by finding equations that are subsets of other equations
How to do this in a reasonable time span?
Also equations are sparse, which makes this harder to compute
'''
rows = len(Ads)
assert rows == len(b)
# Index equations into hash maps so can lookup sparse elements quicker
assert len(Ads) == len(b)
Ads_ret = copy.copy(Ads)
assert len(Ads) == len(Ads_ret)
#print('Finding subsets')
ltes = 0
scs = 0
b_ret = list(b)
sys.stdout.write('Deriving rows ')
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_ref in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if col_lim and len(row_ref) > col_lim:
continue
for row_cmpi, row_cmp in enumerate(Ads):
if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
continue
# FIXME: this check was supposed to be removed
'''
Every elements in row_cmp is in row_ref
but this doesn't mean the constants are smaller
Filter these out
'''
# XXX: just reduce and filter out solutions with positive constants
# or actually are these also useful as is?
lte = lte_const(row_ref, row_cmp)
if lte:
ltes += 1
sc = 0 and shared_const(row_ref, row_cmp)
if sc:
scs += 1
if lte or sc:
if verbose:
print('')
print('match')
print(' ', row_ref, b[row_refi])
print(' ', row_cmp, b[row_cmpi])
# Reduce
A_new = reduce_const(row_ref, row_cmp)
# Did this actually significantly reduce the search space?
#if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
continue
b_new = b[row_refi] - b[row_cmpi]
# Definitely possible
# Maybe filter these out if they occur?
if verbose:
print(b_new)
# Also inverted sign
if b_new <= 0:
if verbose:
print("Unexpected b")
continue
if verbose:
print('OK')
Ads_ret.append(A_new)
b_ret.append(b_new)
print(' done')
#A_ub_ret = A_di2np(Ads2, cols=cols)
print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b), len(b_ret), ltes, scs))
assert len(Ads_ret) == len(b_ret)
return Ads_ret, b_ret
def derive_eq_by_col(Ads, b_ub, verbose=0):
'''
Derive equations by subtracting out all bounded constants (ie "known" columns)
'''
rows = len(Ads)
# Find all entries where
# Index equations with a single constraint
knowns = {}
sys.stdout.write('Derive col indexing ')
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_refd in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if len(row_refd) == 1:
k, v = list(row_refd.items())[0]
# Reduce any constants to canonical form
if v != 1:
row_refd[k] = 1
b_ub[row_refi] /= v
knowns[k] = b_ub[row_refi]
print(' done')
#knowns_set = set(knowns.keys())
print('%d constrained' % len(knowns))
'''
Now see what we can do
Rows that are already constrained: eliminate
TODO: maybe keep these if this would violate their constraint
Otherwise eliminate the original row and generate a simplified result now
'''
b_ret = []
Ads_ret = []
sys.stdout.write('Derive col main ')
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_refd in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
# Reduce as much as possible
#row_new = {}
row_new = OrderedDict()
b_new = b_ub[row_refi]
# Copy over single entries
if len(row_refd) == 1:
row_new = row_refd
else:
for k, v in row_refd.items():
if k in knowns:
# Remove column and take out corresponding delay
b_new -= v * knowns[k]
# Copy over
else:
row_new[k] = v
# Possibly reduced all usable contants out
if len(row_new) == 0:
continue
if b_new <= 0:
continue
Ads_ret.append(row_new)
b_ret.append(b_new)
print(' done')
print('Derive col: %d => %d rows' % (len(b_ub), len(b_ret)))
return Ads_ret, b_ret
def massage_equations(Ads, b, verbose=False, derive_lim=3):
'''
Equation pipeline
Some operations may generate new equations
Simplify after these to avoid unnecessary overhead on redundant constraints
Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
Remove these columns as necessary to speed up solving
'''
def debug(what):
if verbose:
print('')
print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
col_dist(Ads, what)
check_feasible_d(Ads, b)
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
if derive_lim:
dstart = len(b)
# Original simple
if 0:
for di in range(derive_lim):
print
assert len(Ads) == len(b)
n_orig = len(b)
# Meat of the operation
# Focus on easy equations for first pass to get a lot of easy derrivations
col_lim = 12 if di == 0 else None
#col_lim = None
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b)))
debug("der_cols simp")
if n_orig == len(b):
break
if 1:
# Each iteration one more column is allowed until all columns are included
# (and the system is stable)
col_lim = 15
di = 0
while True:
print
n_orig = len(b)
print('Loop %d, lim %d' % (di + 1, col_lim))
# Meat of the operation
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row: %d => %d equations' % (n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
debug("der_cols simp")
# Doesn't help computation, but helps debugging
Ads, b = sort_equations(Ads, b)
debug("loop done")
col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12)
rows = len(Ads)
if n_orig == len(b) and col_lim >= rows:
break
col_lim += col_lim / 5
di += 1
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
'''
cols_min_post = opts.get('cols_min_post', None)
cols_max_post = opts.get('cols_max_post', None)
# Filter input based on number of columns
if cols_min_post or cols_max_post:
Ads, b = filter_ncols(Ads=Ads, b=b, cols_min=cols_min_post, cols_max=cols_max_post)
debug("filter_ncals final")
'''
# Helps debug readability
Ads, b = sort_equations(Ads, b)
debug("final (sorted)")
return Ads, b

View File

@ -169,7 +169,7 @@ def comb_corr_sets(state, verbose=False):
continue
# a grouping
group_name = "GROUP_%u" % row_i
group_name = "GRP_%u" % row_i
rowdsf = row_sym2dsf(rowsym, names)
state.subs[group_name] = rowdsf

View File

@ -2,7 +2,8 @@
# https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.linprog.html
from scipy.optimize import linprog
from timfuz import Benchmark, Ar_di2np, Ar_ds2t, A_di2ds, A_ds2di, simplify_rows, loadc_Ads_b, index_names, A_ds2np, load_sub, run_sub_json
from timfuz import Benchmark, Ar_di2np, Ar_ds2t, A_di2ds, A_ds2di, simplify_rows, loadc_Ads_b, index_names, A_ds2np, load_sub, run_sub_json, A_ub_np2d, print_eqns, print_eqns_np
from timfuz_massage import massage_equations
import numpy as np
import glob
import json
@ -65,12 +66,10 @@ def check_feasible(A_ub, b_ub):
print(' done')
def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
# Given timing scores for above delays (-ps)
assert type(Anp[0]) is np.ndarray, type(Anp[0])
assert type(b) is np.ndarray, type(b)
# Given timing scores for above delays (-ps)
names_orig = names
#check_feasible(Anp, b)
'''
@ -91,6 +90,12 @@ def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
#A_ub = -1.0 * Anp
A_ub = [-1.0 * x for x in Anp]
if verbose:
print('')
print('A_ub b_ub')
print_eqns_np(A_ub, b_ub, verbose=verbose)
print('')
print('Creating misc constants...')
# Minimization function scalars
# Treat all logic elements as equally important
@ -133,7 +138,7 @@ def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
print('')
# Now find smallest values for delay constants
# Due to input bounds (ex: column limit), some delay elements may get eliminated entirely
print('Running linprog w/ %d r, %d c (%d name)' % (rows, cols, len(names_orig)))
print('Running linprog w/ %d r, %d c (%d name)' % (rows, cols, len(names)))
res = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, callback=callback,
options={"disp": True, 'maxiter': maxiter, 'bland': True, 'tol': 1e-6,})
nonzeros = 0
@ -155,7 +160,7 @@ def run_corner(Anp, b, names, verbose=False, opts={}, meta={}):
print('Writing %s' % fn_out)
np.save(fn_out, (3, c, A_ub, b_ub, bounds, names, res, meta))
def run(fns_in, corner, sub_json=None, dedup=True, verbose=False):
def run(fns_in, corner, sub_json=None, dedup=True, massage=False, verbose=False):
Ads, b = loadc_Ads_b(fns_in, corner, ico=True)
# Remove duplicate rows
@ -175,12 +180,15 @@ def run(fns_in, corner, sub_json=None, dedup=True, verbose=False):
else:
names = index_names(Ads)
if 0:
if verbose:
print
print_eqns(A_ubd, b_ub, verbose=verbose)
print_eqns(Ads, b, verbose=verbose)
print
col_dist(A_ubd, 'final', names)
#print
#col_dist(A_ubd, 'final', names)
if massage:
Ads, b = massage_equations(Ads, b)
print('Converting to numpy...')
names, Anp = A_ds2np(Ads)
@ -195,6 +203,7 @@ def main():
)
parser.add_argument('--verbose', action='store_true', help='')
parser.add_argument('--massage', action='store_true', help='')
parser.add_argument('--sub-json', help='Group substitutions to make fully ranked')
parser.add_argument('--corner', default="slow_max", help='')
parser.add_argument(
@ -215,7 +224,7 @@ def main():
try:
run(sub_json=sub_json,
fns_in=fns_in, verbose=args.verbose, corner=args.corner)
fns_in=fns_in, verbose=args.verbose, corner=args.corner, massage=args.massage)
finally:
print('Exiting after %s' % bench)