prjxray/experiments/timfuz/timfuz_massage.py

534 lines
17 KiB
Python

#!/usr/bin/env python3
from timfuz import simplify_rows, print_eqns, print_eqns_np, sort_equations, col_dist, index_names
import numpy as np
import math
import sys
import datetime
import os
import time
import copy
from collections import OrderedDict
def lte_const(row_ref, row_cmp):
'''Return true if all constants are smaller magnitude in row_cmp than row_ref'''
#return False
for k, vc in row_cmp.items():
vr = row_ref.get(k, None)
# Not in reference?
if vr is None:
return False
if vr < vc:
return False
return True
def shared_const(row_ref, row_cmp):
'''Return true if more constants are equal than not equal'''
#return False
matches = 0
unmatches = 0
ks = list(row_ref.keys()) + list(row_cmp.keys())
for k in ks:
vr = row_ref.get(k, None)
vc = row_cmp.get(k, None)
# At least one
if vr is not None and vc is not None:
if vc == vr:
matches += 1
else:
unmatches += 1
else:
unmatches += 1
# Will equation reduce if subtracted?
return matches > unmatches
def reduce_const(row_ref, row_cmp):
'''Subtract cmp constants from ref'''
#ret = {}
ret = OrderedDict()
ks = set(row_ref.keys())
ks.update(set(row_cmp.keys()))
for k in ks:
vr = row_ref.get(k, 0)
vc = row_cmp.get(k, 0)
res = vr - vc
if res:
ret[k] = res
return ret
def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
'''
Derive equations by subtracting whole rows
Given equations like:
t0 >= 10
t0 + t1 >= 15
t0 + t1 + t2 >= 17
When I look at these, I think of a solution something like:
t0 = 10f
t1 = 5
t2 = 2
However, linprog tends to choose solutions like:
t0 = 17
t1 = 0
t2 = 0
To this end, add additional constraints by finding equations that are subsets of other equations
How to do this in a reasonable time span?
Also equations are sparse, which makes this harder to compute
'''
rows = len(Ads)
assert rows == len(b)
# Index equations into hash maps so can lookup sparse elements quicker
assert len(Ads) == len(b)
Ads_ret = copy.copy(Ads)
assert len(Ads) == len(Ads_ret)
#print('Finding subsets')
ltes = 0
scs = 0
b_ret = list(b)
sys.stdout.write('Deriving rows (%u) ' % rows)
sys.stdout.flush()
progress = int(max(1, rows / 100))
for row_refi, row_ref in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if col_lim and len(row_ref) > col_lim:
continue
for row_cmpi, row_cmp in enumerate(Ads):
if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
continue
# FIXME: this check was supposed to be removed
'''
Every elements in row_cmp is in row_ref
but this doesn't mean the constants are smaller
Filter these out
'''
# XXX: just reduce and filter out solutions with positive constants
# or actually are these also useful as is?
lte = lte_const(row_ref, row_cmp)
if lte:
ltes += 1
sc = 0 and shared_const(row_ref, row_cmp)
if sc:
scs += 1
if lte or sc:
if verbose:
print('')
print('match')
print(' ', row_ref, b[row_refi])
print(' ', row_cmp, b[row_cmpi])
# Reduce
A_new = reduce_const(row_ref, row_cmp)
# Did this actually significantly reduce the search space?
#if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
continue
b_new = b[row_refi] - b[row_cmpi]
# Definitely possible
# Maybe filter these out if they occur?
if verbose:
print(b_new)
# Also inverted sign
if b_new <= 0:
if verbose:
print("Unexpected b")
continue
if verbose:
print('OK')
Ads_ret.append(A_new)
b_ret.append(b_new)
print(' done')
#A_ub_ret = A_di2np(Ads2, cols=cols)
print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b), len(b_ret), ltes, scs))
assert len(Ads_ret) == len(b_ret)
return Ads_ret, b_ret
def derive_eq_by_near_row(Ads, b, verbose=0, col_lim=0, tweak=False):
'''
Derive equations by subtracting whole rows
Given equations like:
t0 >= 10
t0 + t1 >= 15
t0 + t1 + t2 >= 17
When I look at these, I think of a solution something like:
t0 = 10f
t1 = 5
t2 = 2
However, linprog tends to choose solutions like:
t0 = 17
t1 = 0
t2 = 0
To this end, add additional constraints by finding equations that are subsets of other equations
How to do this in a reasonable time span?
Also equations are sparse, which makes this harder to compute
'''
rows = len(Ads)
assert rows == len(b)
rowdelta = int(rows / 2)
# Index equations into hash maps so can lookup sparse elements quicker
assert len(Ads) == len(b)
Ads_ret = copy.copy(Ads)
assert len(Ads) == len(Ads_ret)
#print('Finding subsets')
ltes = 0
scs = 0
b_ret = list(b)
sys.stdout.write('Deriving rows (%u) ' % rows)
sys.stdout.flush()
progress = int(max(1, rows / 100))
for row_refi, row_ref in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if col_lim and len(row_ref) > col_lim:
continue
#for row_cmpi, row_cmp in enumerate(Ads):
for row_cmpi in range(max(0, row_refi - rowdelta), min(len(Ads), row_refi + rowdelta)):
if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
continue
row_cmp = Ads[row_cmpi]
# FIXME: this check was supposed to be removed
'''
Every elements in row_cmp is in row_ref
but this doesn't mean the constants are smaller
Filter these out
'''
# XXX: just reduce and filter out solutions with positive constants
# or actually are these also useful as is?
lte = lte_const(row_ref, row_cmp)
if lte:
ltes += 1
sc = 0 and shared_const(row_ref, row_cmp)
if sc:
scs += 1
if lte or sc:
if verbose:
print('')
print('match')
print(' ', row_ref, b[row_refi])
print(' ', row_cmp, b[row_cmpi])
# Reduce
A_new = reduce_const(row_ref, row_cmp)
# Did this actually significantly reduce the search space?
#if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
#if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
# continue
b_new = b[row_refi] - b[row_cmpi]
# Definitely possible
# Maybe filter these out if they occur?
if verbose:
print(b_new)
# Also inverted sign
if b_new <= 0:
if verbose:
print("Unexpected b")
continue
if verbose:
print('OK')
Ads_ret.append(A_new)
b_ret.append(b_new)
print(' done')
#A_ub_ret = A_di2np(Ads2, cols=cols)
print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b), len(b_ret), ltes, scs))
assert len(Ads_ret) == len(b_ret)
return Ads_ret, b_ret
def derive_eq_by_col(Ads, b_ub, verbose=0):
'''
Derive equations by subtracting out all bounded constants (ie "known" columns)
'''
rows = len(Ads)
# Find all entries where
# Index equations with a single constraint
knowns = {}
sys.stdout.write('Derive col indexing ')
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_refd in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if len(row_refd) == 1:
k, v = list(row_refd.items())[0]
# Reduce any constants to canonical form
if v != 1:
row_refd[k] = 1
b_ub[row_refi] /= v
knowns[k] = b_ub[row_refi]
print(' done')
#knowns_set = set(knowns.keys())
print('%d constrained' % len(knowns))
'''
Now see what we can do
Rows that are already constrained: eliminate
TODO: maybe keep these if this would violate their constraint
Otherwise eliminate the original row and generate a simplified result now
'''
b_ret = []
Ads_ret = []
sys.stdout.write('Derive col main ')
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_refd in enumerate(Ads):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
# Reduce as much as possible
#row_new = {}
row_new = OrderedDict()
b_new = b_ub[row_refi]
# Copy over single entries
if len(row_refd) == 1:
row_new = row_refd
else:
for k, v in row_refd.items():
if k in knowns:
# Remove column and take out corresponding delay
b_new -= v * knowns[k]
# Copy over
else:
row_new[k] = v
# Possibly reduced all usable contants out
if len(row_new) == 0:
continue
if b_new <= 0:
continue
Ads_ret.append(row_new)
b_ret.append(b_new)
print(' done')
print('Derive col: %d => %d rows' % (len(b_ub), len(b_ret)))
return Ads_ret, b_ret
# keep derriving until solution is (probably) stable
def massage_equations_old(Ads, b, verbose=False, derive_lim=3):
'''
Equation pipeline
Some operations may generate new equations
Simplify after these to avoid unnecessary overhead on redundant constraints
Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
Remove these columns as necessary to speed up solving
'''
def debug(what):
if verbose:
print('')
print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
col_dist(Ads, what)
check_feasible_d(Ads, b)
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
dstart = len(b)
# Original simple
for di in range(derive_lim):
print
assert len(Ads) == len(b)
n_orig = len(b)
# Meat of the operation
# Focus on easy equations for first pass to get a lot of easy derrivations
col_lim = 12 if di == 0 else None
#col_lim = None
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b)))
debug("der_cols simp")
if n_orig == len(b):
break
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
'''
cols_min_post = opts.get('cols_min_post', None)
cols_max_post = opts.get('cols_max_post', None)
# Filter input based on number of columns
if cols_min_post or cols_max_post:
Ads, b = filter_ncols(Ads=Ads, b=b, cols_min=cols_min_post, cols_max=cols_max_post)
debug("filter_ncals final")
'''
# Helps debug readability
Ads, b = sort_equations(Ads, b)
debug("final (sorted)")
return Ads, b
# iteratively increasing column limit until all columns are added
def massage_equations_inc_col_lim(Ads, b, verbose=False):
'''
Equation pipeline
Some operations may generate new equations
Simplify after these to avoid unnecessary overhead on redundant constraints
Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
Remove these columns as necessary to speed up solving
'''
def debug(what):
if verbose:
print('')
print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
col_dist(Ads, what)
check_feasible_d(Ads, b)
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
dstart = len(b)
cols = len(index_names(Ads))
# Each iteration one more column is allowed until all columns are included
# (and the system is stable)
col_lim = 15
di = 0
while True:
print
n_orig = len(b)
print('Loop %d, lim %d' % (di + 1, col_lim))
# Meat of the operation
Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row: %d => %d equations' % (n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
debug("der_cols simp")
# Doesn't help computation, but helps debugging
Ads, b = sort_equations(Ads, b)
debug("loop done")
col_dist(Ads, 'derive done iter %d, lim %d' % (di, col_lim), lim=12)
rows = len(Ads)
if n_orig == len(b) and col_lim >= cols:
break
col_lim += col_lim / 5
di += 1
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
# Helps debug readability
Ads, b = sort_equations(Ads, b)
debug("final (sorted)")
print('')
print('Massage final: %d => %d rows' % (dstart, dend))
return Ads, b
# only derive based on nearby equations
# theory is they will be the best to diff
def massage_equations_near(Ads, b, verbose=False):
'''
Equation pipeline
Some operations may generate new equations
Simplify after these to avoid unnecessary overhead on redundant constraints
Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
Remove these columns as necessary to speed up solving
'''
def debug(what):
if verbose:
print('')
print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
col_dist(Ads, what)
check_feasible_d(Ads, b)
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
dstart = len(b)
cols = len(index_names(Ads))
# Each iteration one more column is allowed until all columns are included
# (and the system is stable)
print
n_orig = len(b)
# Meat of the operation
Ads, b = derive_eq_by_near_row(Ads, b, tweak=True)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive row: %d => %d equations' % (n_orig, len(b)))
debug("der_rows simp")
n_orig2 = len(b)
# Meat of the operation
Ads, b = derive_eq_by_col(Ads, b)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
Ads, b = simplify_rows(Ads, b)
print('Derive col: %d => %d equations' % (n_orig2, len(b)))
debug("der_cols simp")
# Doesn't help computation, but helps debugging
Ads, b = sort_equations(Ads, b)
debug("loop done")
col_dist(Ads, 'derive done', lim=12)
rows = len(Ads)
dend = len(b)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
# Helps debug readability
Ads, b = sort_equations(Ads, b)
debug("final (sorted)")
print('')
print('Massage final: %d => %d rows' % (dstart, dend))
return Ads, b
massage_equations = massage_equations_inc_col_lim