Merge pull request #124 from mcmasterg/timfuz_cleanup

timfuz: misc cleanup
2018-09-25 09:35:36 -07:00 · 2018-09-25 09:35:36 -07:00 · 31013a3e43
parent 7595a8d0a8 771cc1b1cc
commit 31013a3e43
6 changed files with 204 additions and 410 deletions
--- a/fuzzers/007-timing/csv_group2flat.py
+++ b/fuzzers/007-timing/csv_group2flat.py
@ -23,7 +23,7 @@ def gen_flat(fns_in, sub_json, corner=None):
            if zero_row:
                yield bound_name, 0
        elif sub:
-            print('sub', sub)
+            #print('sub', sub)
            # put entire delay into pivot
            pivot = sub_json['pivots'][bound_name]
            assert pivot not in group_zeros
@ -32,6 +32,7 @@ def gen_flat(fns_in, sub_json, corner=None):
            #for name in non_pivot:
            #    assert name not in nonzeros, (pivot, name, nonzeros)
            group_zeros.update(non_pivot)
+            #print('yield PIVOT', pivot)
            yield pivot, bound_bs
        else:
            nonzeros.add(bound_name)
@ -45,10 +46,19 @@ def gen_flat(fns_in, sub_json, corner=None):
    # XXX: how to best handle these?
    # should they be fixed 0?
    if zero_row:
-        print('zero_row', len(group_zeros), len(violations))
+        # ZERO names should always be zero
+        #print('ZEROs: %u' % len(sub_json['zero_names']))
        for zero in sub_json['zero_names']:
+            #print('yield ZERO', zero)
            yield zero, zero_row
-        for zero in group_zeros - violations:
+
+        real_zeros = group_zeros - violations
+        print(
+            'Zero candidates: %u w/ %u non-pivot conflicts => %u zeros as solved'
+            % (len(group_zeros), len(violations), len(real_zeros)))
+        # Only yield elements not already yielded
+        for zero in real_zeros:
+            #print('yield solve-0', zero)
            yield zero, zero_row


--- a/fuzzers/007-timing/projects/corner.mk
+++ b/fuzzers/007-timing/projects/corner.mk
@ -4,8 +4,9 @@ TIMFUZ_DIR=$(XRAY_DIR)/fuzzers/007-timing
 CORNER=slow_max
 ALLOW_ZERO_EQN?=N
 BADPRJ_OK?=N
+BUILD_DIR?=build

-all: build/$(CORNER)/timgrid-s.json
+all: $(BUILD_DIR)/$(CORNER)/timgrid-vc.json $(BUILD_DIR)/$(CORNER)/qor.txt

 run:
 	$(MAKE) clean
@ -13,35 +14,38 @@ run:
 	touch run.ok

 clean:
-	rm -rf specimen_[0-9][0-9][0-9]/ seg_clblx.segbits __pycache__ run.ok
-	rm -rf vivado*.log vivado_*.str vivado*.jou design *.bits *.dcp *.bit
-	rm -rf build
+	rm -rf $(BUILD_DIR)/$(CORNER)

 .PHONY: all run clean

-build/$(CORNER):
-	mkdir build/$(CORNER)
+$(BUILD_DIR)/$(CORNER):
+	mkdir $(BUILD_DIR)/$(CORNER)

-build/checksub:
+# parent should have built this
+$(BUILD_DIR)/checksub:
 	false

-build/$(CORNER)/leastsq.csv: build/sub.json build/grouped.csv build/checksub build/$(CORNER)
+$(BUILD_DIR)/$(CORNER)/leastsq.csv: $(BUILD_DIR)/sub.json $(BUILD_DIR)/grouped.csv $(BUILD_DIR)/checksub $(BUILD_DIR)/$(CORNER)
 	# Create a rough timing model that approximately fits the given paths
-	python3 $(TIMFUZ_DIR)/solve_leastsq.py --sub-json build/sub.json build/grouped.csv --corner $(CORNER) --out build/$(CORNER)/leastsq.csv.tmp
-	mv build/$(CORNER)/leastsq.csv.tmp build/$(CORNER)/leastsq.csv
+	python3 $(TIMFUZ_DIR)/solve_leastsq.py --sub-json $(BUILD_DIR)/sub.json $(BUILD_DIR)/grouped.csv --corner $(CORNER) --out $(BUILD_DIR)/$(CORNER)/leastsq.csv.tmp
+	mv $(BUILD_DIR)/$(CORNER)/leastsq.csv.tmp $(BUILD_DIR)/$(CORNER)/leastsq.csv

-build/$(CORNER)/linprog.csv: build/$(CORNER)/leastsq.csv build/grouped.csv
+$(BUILD_DIR)/$(CORNER)/linprog.csv: $(BUILD_DIR)/$(CORNER)/leastsq.csv $(BUILD_DIR)/grouped.csv
 	# Tweak rough timing model, making sure all constraints are satisfied
-	ALLOW_ZERO_EQN=$(ALLOW_ZERO_EQN) python3 $(TIMFUZ_DIR)/solve_linprog.py --sub-json build/sub.json --bounds-csv build/$(CORNER)/leastsq.csv --massage build/grouped.csv --corner $(CORNER) --out build/$(CORNER)/linprog.csv.tmp
-	mv build/$(CORNER)/linprog.csv.tmp build/$(CORNER)/linprog.csv
+	ALLOW_ZERO_EQN=$(ALLOW_ZERO_EQN) python3 $(TIMFUZ_DIR)/solve_linprog.py --sub-json $(BUILD_DIR)/sub.json --bounds-csv $(BUILD_DIR)/$(CORNER)/leastsq.csv --massage $(BUILD_DIR)/grouped.csv --corner $(CORNER) --out $(BUILD_DIR)/$(CORNER)/linprog.csv.tmp
+	mv $(BUILD_DIR)/$(CORNER)/linprog.csv.tmp $(BUILD_DIR)/$(CORNER)/linprog.csv

-build/$(CORNER)/flat.csv: build/$(CORNER)/linprog.csv
+$(BUILD_DIR)/$(CORNER)/flat.csv: $(BUILD_DIR)/$(CORNER)/linprog.csv
 	# Take separated variables and back-annotate them to the original timing variables
-	python3 $(TIMFUZ_DIR)/csv_group2flat.py --sub-json build/sub.json --corner $(CORNER) --out build/$(CORNER)/flat.csv.tmp build/$(CORNER)/linprog.csv
-	mv build/$(CORNER)/flat.csv.tmp build/$(CORNER)/flat.csv
+	python3 $(TIMFUZ_DIR)/csv_group2flat.py --sub-json $(BUILD_DIR)/sub.json --corner $(CORNER) --out $(BUILD_DIR)/$(CORNER)/flat.csv.tmp $(BUILD_DIR)/$(CORNER)/linprog.csv
+	mv $(BUILD_DIR)/$(CORNER)/flat.csv.tmp $(BUILD_DIR)/$(CORNER)/flat.csv

-build/$(CORNER)/timgrid-s.json: build/$(CORNER)/flat.csv
+$(BUILD_DIR)/$(CORNER)/timgrid-vc.json: $(BUILD_DIR)/$(CORNER)/flat.csv
 	# Final processing
 	# Insert timing delays into actual tile layouts
-	python3 $(TIMFUZ_DIR)/tile_annotate.py --timgrid-s $(TIMFUZ_DIR)/timgrid/build/timgrid-s.json --out build/$(CORNER)/timgrid-vc.json build/$(CORNER)/flat.csv
+	python3 $(TIMFUZ_DIR)/tile_annotate.py --timgrid-s $(TIMFUZ_DIR)/timgrid/build/timgrid-s.json --out $(BUILD_DIR)/$(CORNER)/timgrid-vc.json $(BUILD_DIR)/$(CORNER)/flat.csv
+
+$(BUILD_DIR)/$(CORNER)/qor.txt: $(BUILD_DIR)/$(CORNER)/flat.csv
+	python3 $(TIMFUZ_DIR)/solve_qor.py --corner $(CORNER) --bounds-csv $(BUILD_DIR)/$(CORNER)/flat.csv specimen_*/timing3.csv >$(BUILD_DIR)/$(CORNER)/qor.txt.tmp
+	mv $(BUILD_DIR)/$(CORNER)/qor.txt.tmp $(BUILD_DIR)/$(CORNER)/qor.txt

--- a/fuzzers/007-timing/projects/project.mk
+++ b/fuzzers/007-timing/projects/project.mk
@ -7,21 +7,33 @@ SPECIMENS_OK := $(addsuffix /OK,$(SPECIMENS))
 CSVS := $(addsuffix /timing3.csv,$(SPECIMENS))
 TIMFUZ_DIR=$(XRAY_DIR)/fuzzers/007-timing
 RREF_CORNER=slow_max
+# Allow an empty system of equations?
+# for testing only on small projects
 ALLOW_ZERO_EQN?=N
+# Constrained projects may fail to build
+# Set to Y to make a best effort to suck in the ones that did build
 BADPRJ_OK?=N
+# Set ZERO elements to zero delay (as is expected they should be)
+RMZERO?=N
+BUILD_DIR?=build

-TIMGRID_VCS=build/fast_max/timgrid-vc.json build/fast_min/timgrid-vc.json build/slow_max/timgrid-vc.json build/slow_min/timgrid-vc.json
+RREF_ARGS=
+ifeq ($(RMZERO),Y)
+RREF_ARGS+=--rm-zero
+endif

-all: build/timgrid-v.json
+TIMGRID_VCS=$(BUILD_DIR)/fast_max/timgrid-vc.json $(BUILD_DIR)/fast_min/timgrid-vc.json $(BUILD_DIR)/slow_max/timgrid-vc.json $(BUILD_DIR)/slow_min/timgrid-vc.json

-# make build/checksub first
-build/fast_max/timgrid-vc.json: build/checksub
+all: $(BUILD_DIR)/timgrid-v.json
+
+# make $(BUILD_DIR)/checksub first
+$(BUILD_DIR)/fast_max/timgrid-vc.json: $(BUILD_DIR)/checksub
 	$(MAKE) -f $(TIMFUZ_DIR)/projects/corner.mk CORNER=fast_max
-build/fast_min/timgrid-vc.json: build/checksub
+$(BUILD_DIR)/fast_min/timgrid-vc.json: $(BUILD_DIR)/checksub
 	$(MAKE) -f $(TIMFUZ_DIR)/projects/corner.mk CORNER=fast_min
-build/slow_max/timgrid-vc.json: build/checksub
+$(BUILD_DIR)/slow_max/timgrid-vc.json: $(BUILD_DIR)/checksub
 	$(MAKE) -f $(TIMFUZ_DIR)/projects/corner.mk CORNER=slow_max
-build/slow_min/timgrid-vc.json: build/checksub
+$(BUILD_DIR)/slow_min/timgrid-vc.json: $(BUILD_DIR)/checksub
 	$(MAKE) -f $(TIMFUZ_DIR)/projects/corner.mk CORNER=slow_min

 $(SPECIMENS_OK):
@ -36,7 +48,7 @@ run:
 clean:
 	rm -rf specimen_[0-9][0-9][0-9]/ seg_clblx.segbits __pycache__ run.ok
 	rm -rf vivado*.log vivado_*.str vivado*.jou design *.bits *.dcp *.bit
-	rm -rf build
+	rm -rf $(BUILD_DIR)

 .PHONY: all run clean

@ -51,27 +63,27 @@ exist_csvs = \
        done

 # rref should be the same regardless of corner
-build/sub.json: $(SPECIMENS_OK)
-	mkdir -p build
+$(BUILD_DIR)/sub.json: $(SPECIMENS_OK)
+	mkdir -p $(BUILD_DIR)
 	# Discover which variables can be separated
 	# This is typically the longest running operation
 	\
 	    csvs=$$(for f in $(CSVS); do if [ "$(BADPRJ_OK)" != 'Y' -o -f $$f ] ; then echo $$f; fi; done) ; \
-	    python3 $(TIMFUZ_DIR)/rref.py --corner $(RREF_CORNER) --simplify --out build/sub.json.tmp $$csvs
-	mv build/sub.json.tmp build/sub.json
+	    python3 $(TIMFUZ_DIR)/rref.py --corner $(RREF_CORNER) --simplify $(RREF_ARGS) --out $(BUILD_DIR)/sub.json.tmp $$csvs
+	mv $(BUILD_DIR)/sub.json.tmp $(BUILD_DIR)/sub.json

-build/grouped.csv: $(SPECIMENS_OK) build/sub.json
+$(BUILD_DIR)/grouped.csv: $(SPECIMENS_OK) $(BUILD_DIR)/sub.json
 	# Separate variables
 	\
 	    csvs=$$(for f in $(CSVS); do if [ "$(BADPRJ_OK)" != 'Y' -o -f $$f ] ; then echo $$f; fi; done) ; \
-	    python3 $(TIMFUZ_DIR)/csv_flat2group.py --sub-json build/sub.json --strict --out build/grouped.csv.tmp $$csvs
-	mv build/grouped.csv.tmp build/grouped.csv
+	    python3 $(TIMFUZ_DIR)/csv_flat2group.py --sub-json $(BUILD_DIR)/sub.json --strict --out $(BUILD_DIR)/grouped.csv.tmp $$csvs
+	mv $(BUILD_DIR)/grouped.csv.tmp $(BUILD_DIR)/grouped.csv

-build/checksub: build/grouped.csv build/sub.json
+$(BUILD_DIR)/checksub: $(BUILD_DIR)/grouped.csv $(BUILD_DIR)/sub.json
 	# Verify sub.json makes a cleanly solvable solution with no non-pivot leftover
-	python3 $(TIMFUZ_DIR)/checksub.py --sub-json build/sub.json build/grouped.csv
-	touch build/checksub
+	python3 $(TIMFUZ_DIR)/checksub.py --sub-json $(BUILD_DIR)/sub.json $(BUILD_DIR)/grouped.csv
+	touch $(BUILD_DIR)/checksub

-build/timgrid-v.json: $(TIMGRID_VCS)
-	python3 $(TIMFUZ_DIR)/timgrid_vc2v.py --out build/timgrid-v.json $(TIMGRID_VCS)
+$(BUILD_DIR)/timgrid-v.json: $(TIMGRID_VCS)
+	python3 $(TIMFUZ_DIR)/timgrid_vc2v.py --out $(BUILD_DIR)/timgrid-v.json $(TIMGRID_VCS)

--- a/fuzzers/007-timing/timfuz.py
+++ b/fuzzers/007-timing/timfuz.py
@ -240,16 +240,16 @@ def simplify_rows(Ads, b_ub, remove_zd=False, corner=None):
    sys.stdout.write('SimpR ')
    sys.stdout.flush()
    progress = int(max(1, len(b_ub) / 100))
+    # Equations with a total delay of zero
    zero_ds = 0
+    # Equations with zero elements
+    # These should have zero delay
    zero_es = 0
    for loopi, (b, rowd) in enumerate(zip(b_ub, Ads)):
        if loopi % progress == 0:
            sys.stdout.write('.')
            sys.stdout.flush()

-        # TODO: elements have zero delay (ex: COUT)
-        # Remove these from now since they make me nervous
-        # Although they should just solve to 0
        if remove_zd and not b:
            zero_ds += 1
            continue
@ -260,6 +260,24 @@ def simplify_rows(Ads, b_ub, remove_zd=False, corner=None):
                assert zero_es == 0, 'Unexpected zero element row with non-zero delay'
            zero_es += 1
            continue
+        '''
+        Reduce any constants to canonical form
+        this simplifies later steps that optimize based on assuming there aren't duplicate constants
+
+        Ex:
+        a = 10
+        2 a = 30
+        max corner will get simplified to
+        a = 15
+
+        Otherwise these are not identical equations and would not get simplied
+        Don't try handling the general case of non-trivial equations
+        '''
+        if len(rowd) == 1:
+            k, v = list(rowd.items())[0]
+            if v != 1:
+                rowd = {k: 1}
+                b = 1.0 * b / v

        rowt = Ar_ds2t(rowd)
        eqns[rowt] = minmax(eqns.get(rowt, T_UNK), b)
@ -267,11 +285,12 @@ def simplify_rows(Ads, b_ub, remove_zd=False, corner=None):
    print(' done')

    print(
-        'Simplify rows: %d => %d rows w/ zd %d, ze %d' %
+        'Simplify rows: %d => %d rows w/ rm zd %d, rm ze %d' %
        (len(b_ub), len(eqns), zero_ds, zero_es))
    if len(eqns) == 0:
        raise SimplifiedToZero()
    A_ubd_ret, b_ub_ret = Ab_ub_dt2d(eqns)
+    #print_eqns(A_ubd_ret, b_ub_ret, verbose=True, label='Debug')
    return A_ubd_ret, b_ub_ret


@ -349,177 +368,6 @@ def sort_equations(Ads, b):
    return [OrderedDict(rowt) for rowt in A_ubtr], b_ubr


-def derive_eq_by_row(A_ubd, b_ub, verbose=0, col_lim=0, tweak=False):
-    '''
-    Derive equations by subtracting whole rows
-
-    Given equations like:
-    t0           >= 10
-    t0 + t1      >= 15
-    t0 + t1 + t2 >= 17
-
-    When I look at these, I think of a solution something like:
-    t0 = 10f
-    t1 = 5
-    t2 = 2
-
-    However, linprog tends to choose solutions like:
-    t0 = 17
-    t1 = 0
-    t2 = 0
-
-    To this end, add additional constraints by finding equations that are subsets of other equations
-    How to do this in a reasonable time span?
-    Also equations are sparse, which makes this harder to compute
-    '''
-    rows = len(A_ubd)
-    assert rows == len(b_ub)
-
-    # Index equations into hash maps so can lookup sparse elements quicker
-    assert len(A_ubd) == len(b_ub)
-    A_ubd_ret = copy.copy(A_ubd)
-    assert len(A_ubd) == len(A_ubd_ret)
-
-    #print('Finding subsets')
-    ltes = 0
-    scs = 0
-    b_ub_ret = list(b_ub)
-    sys.stdout.write('Deriving rows ')
-    sys.stdout.flush()
-    progress = max(1, rows / 100)
-    for row_refi, row_ref in enumerate(A_ubd):
-        if row_refi % progress == 0:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-        if col_lim and len(row_ref) > col_lim:
-            continue
-
-        for row_cmpi, row_cmp in enumerate(A_ubd):
-            if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
-                continue
-            # FIXME: this check was supposed to be removed
-            '''
-            Every elements in row_cmp is in row_ref
-            but this doesn't mean the constants are smaller
-            Filter these out
-            '''
-            # XXX: just reduce and filter out solutions with positive constants
-            # or actually are these also useful as is?
-            lte = lte_const(row_ref, row_cmp)
-            if lte:
-                ltes += 1
-            sc = 0 and shared_const(row_ref, row_cmp)
-            if sc:
-                scs += 1
-            if lte or sc:
-                if verbose:
-                    print('')
-                    print('match')
-                    print('  ', row_ref, b_ub[row_refi])
-                    print('  ', row_cmp, b_ub[row_cmpi])
-                # Reduce
-                A_new = reduce_const(row_ref, row_cmp)
-                # Did this actually significantly reduce the search space?
-                #if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
-                if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
-                    continue
-                b_new = b_ub[row_refi] - b_ub[row_cmpi]
-                # Definitely possible
-                # Maybe filter these out if they occur?
-                if verbose:
-                    print(b_new)
-                # Also inverted sign
-                if b_new <= 0:
-                    if verbose:
-                        print("Unexpected b")
-                    continue
-                if verbose:
-                    print('OK')
-                A_ubd_ret.append(A_new)
-                b_ub_ret.append(b_new)
-    print(' done')
-
-    #A_ub_ret = A_di2np(A_ubd2, cols=cols)
-    print(
-        'Derive row: %d => %d rows using %d lte, %d sc' %
-        (len(b_ub), len(b_ub_ret), ltes, scs))
-    assert len(A_ubd_ret) == len(b_ub_ret)
-    return A_ubd_ret, b_ub_ret
-
-
-def derive_eq_by_col(A_ubd, b_ub, verbose=0):
-    '''
-    Derive equations by subtracting out all bounded constants (ie "known" columns)
-    '''
-    rows = len(A_ubd)
-
-    # Find all entries where
-
-    # Index equations with a single constraint
-    knowns = {}
-    sys.stdout.write('Derive col indexing ')
-    #A_ubd = A_ub_np2d(A_ub)
-    sys.stdout.flush()
-    progress = max(1, rows / 100)
-    for row_refi, row_refd in enumerate(A_ubd):
-        if row_refi % progress == 0:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-        if len(row_refd) == 1:
-            k, v = list(row_refd.items())[0]
-            # Reduce any constants to canonical form
-            if v != 1:
-                row_refd[k] = 1
-                b_ub[row_refi] /= v
-            knowns[k] = b_ub[row_refi]
-    print(' done')
-    #knowns_set = OrderedSet(knowns.keys())
-    print('%d constrained' % len(knowns))
-    '''
-    Now see what we can do
-    Rows that are already constrained: eliminate
-        TODO: maybe keep these if this would violate their constraint
-    Otherwise eliminate the original row and generate a simplified result now
-    '''
-    b_ub_ret = []
-    A_ubd_ret = []
-    sys.stdout.write('Derive col main ')
-    sys.stdout.flush()
-    progress = max(1, rows / 100)
-    for row_refi, row_refd in enumerate(A_ubd):
-        if row_refi % progress == 0:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-        # Reduce as much as possible
-        #row_new = {}
-        row_new = OrderedDict()
-        b_new = b_ub[row_refi]
-        # Copy over single entries
-        if len(row_refd) == 1:
-            row_new = row_refd
-        else:
-            for k, v in row_refd.items():
-                if k in knowns:
-                    # Remove column and take out corresponding delay
-                    b_new -= v * knowns[k]
-                # Copy over
-                else:
-                    row_new[k] = v
-
-        # Possibly reduced all usable contants out
-        if len(row_new) == 0:
-            continue
-        if b_new <= 0:
-            continue
-
-        A_ubd_ret.append(row_new)
-        b_ub_ret.append(b_new)
-    print(' done')
-
-    print('Derive col: %d => %d rows' % (len(b_ub), len(b_ub_ret)))
-    return A_ubd_ret, b_ub_ret
-
-
 def col_dist(Ads, desc='of', names=[], lim=0):
    '''print(frequency distribution of number of elements in a given row'''
    rows = len(Ads)
--- a/fuzzers/007-timing/timfuz_massage.py
+++ b/fuzzers/007-timing/timfuz_massage.py
@ -24,30 +24,8 @@ def lte_const(row_ref, row_cmp):
    return True


-def shared_const(row_ref, row_cmp):
-    '''Return true if more constants are equal than not equal'''
-    #return False
-    matches = 0
-    unmatches = 0
-    ks = list(row_ref.keys()) + list(row_cmp.keys())
-    for k in ks:
-        vr = row_ref.get(k, None)
-        vc = row_cmp.get(k, None)
-        # At least one
-        if vr is not None and vc is not None:
-            if vc == vr:
-                matches += 1
-            else:
-                unmatches += 1
-        else:
-            unmatches += 1
-
-    # Will equation reduce if subtracted?
-    return matches > unmatches
-
-
-def reduce_const(row_ref, row_cmp):
-    '''Subtract cmp constants from ref'''
+def sub_rows(row_ref, row_cmp):
+    '''Do row_ref - row_cmp'''
    #ret = {}
    ret = OrderedDict()
    ks = set(row_ref.keys())
@ -61,10 +39,13 @@ def reduce_const(row_ref, row_cmp):
    return ret


-def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
+def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, cmp_heuristic=True):
    '''
    Derive equations by subtracting whole rows

+    cmp_heuristic: drop large generated rows since these are unlikely to constrain the solution
+
+
    Given equations like:
    t0           >= 10
    t0 + t1      >= 15
@ -94,11 +75,11 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):

    #print('Finding subsets')
    ltes = 0
-    scs = 0
    b_ret = list(b)
    sys.stdout.write('Deriving rows (%u) ' % rows)
    sys.stdout.flush()
    progress = int(max(1, rows / 100))
+    b_warns = 0
    for row_refi, row_ref in enumerate(Ads):
        if row_refi % progress == 0:
            sys.stdout.write('.')
@ -109,166 +90,64 @@ def derive_eq_by_row(Ads, b, verbose=0, col_lim=0, tweak=False):
        for row_cmpi, row_cmp in enumerate(Ads):
            if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
                continue
-            # FIXME: this check was supposed to be removed
-            '''
-            Every elements in row_cmp is in row_ref
-            but this doesn't mean the constants are smaller
-            Filter these out
-            '''
-            # XXX: just reduce and filter out solutions with positive constants
-            # or actually are these also useful as is?
-            lte = lte_const(row_ref, row_cmp)
-            if lte:
-                ltes += 1
-            sc = 0 and shared_const(row_ref, row_cmp)
-            if sc:
-                scs += 1
-            if lte or sc:
-                if verbose:
-                    print('')
-                    print('match')
-                    print('  ', row_ref, b[row_refi])
-                    print('  ', row_cmp, b[row_cmpi])
-                # Reduce
-                A_new = reduce_const(row_ref, row_cmp)
-                # Did this actually significantly reduce the search space?
-                #if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
-                if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
-                    continue
-                b_new = b[row_refi] - b[row_cmpi]
-                # Definitely possible
-                # Maybe filter these out if they occur?
-                if verbose:
-                    print(b_new)
-                # Also inverted sign
-                if b_new <= 0:
-                    if verbose:
-                        print("Unexpected b")
-                    continue
-                if verbose:
-                    print('OK')
-                Ads_ret.append(A_new)
-                b_ret.append(b_new)
-    print(' done')
-
-    #A_ub_ret = A_di2np(Ads2, cols=cols)
-    print(
-        'Derive row: %d => %d rows using %d lte, %d sc' %
-        (len(b), len(b_ret), ltes, scs))
-    assert len(Ads_ret) == len(b_ret)
-    return Ads_ret, b_ret
-
-
-def derive_eq_by_near_row(Ads, b, verbose=0, col_lim=0, tweak=False):
-    '''
-    Derive equations by subtracting whole rows
-
-    Given equations like:
-    t0           >= 10
-    t0 + t1      >= 15
-    t0 + t1 + t2 >= 17
-
-    When I look at these, I think of a solution something like:
-    t0 = 10f
-    t1 = 5
-    t2 = 2
-
-    However, linprog tends to choose solutions like:
-    t0 = 17
-    t1 = 0
-    t2 = 0
-
-    To this end, add additional constraints by finding equations that are subsets of other equations
-    How to do this in a reasonable time span?
-    Also equations are sparse, which makes this harder to compute
-    '''
-    rows = len(Ads)
-    assert rows == len(b)
-    rowdelta = int(rows / 2)
-
-    # Index equations into hash maps so can lookup sparse elements quicker
-    assert len(Ads) == len(b)
-    Ads_ret = copy.copy(Ads)
-    assert len(Ads) == len(Ads_ret)
-
-    #print('Finding subsets')
-    ltes = 0
-    scs = 0
-    b_ret = list(b)
-    sys.stdout.write('Deriving rows (%u) ' % rows)
-    sys.stdout.flush()
-    progress = int(max(1, rows / 100))
-    for row_refi, row_ref in enumerate(Ads):
-        if row_refi % progress == 0:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-        if col_lim and len(row_ref) > col_lim:
-            continue
-
-        #for row_cmpi, row_cmp in enumerate(Ads):
-        for row_cmpi in range(max(0, row_refi - rowdelta),
-                              min(len(Ads), row_refi + rowdelta)):
-            if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
+            if not lte_const(row_ref, row_cmp):
                continue
-            row_cmp = Ads[row_cmpi]
-            # FIXME: this check was supposed to be removed
-            '''
-            Every elements in row_cmp is in row_ref
-            but this doesn't mean the constants are smaller
-            Filter these out
-            '''
-            # XXX: just reduce and filter out solutions with positive constants
-            # or actually are these also useful as is?
-            lte = lte_const(row_ref, row_cmp)
-            if lte:
-                ltes += 1
-            sc = 0 and shared_const(row_ref, row_cmp)
-            if sc:
-                scs += 1
-            if lte or sc:
+            ltes += 1
+            if verbose:
+                print('')
+                print('match')
+                print('  ', row_ref, b[row_refi])
+                print('  ', row_cmp, b[row_cmpi])
+
+            # Reduce
+            row_new = sub_rows(row_ref, row_cmp)
+
+            # Did this actually significantly reduce the search space?
+            # should be a relatively small number of rows and be significantly smaller than at least one of them
+            def is_smaller_row():
+                # Keep any generally small rows
+                if len(row_new) <= 8:
+                    return True
+                # And anything that reduced at least one row by half
+                # Ex: going from 120 and 100 element rows to a 20 element row
+                return len(row_new) <= len(row_cmp) / 2 or len(
+                    row_new) <= len(row_ref) / 2
+
+            if cmp_heuristic and not is_smaller_row():
+                continue
+            b_new = b[row_refi] - b[row_cmpi]
+            # Definitely possible
+            # Maybe filter these out if they occur?
+            if verbose:
+                print(b_new)
+            # Also inverted sign
+            if b_new < 0:
                if verbose:
-                    print('')
-                    print('match')
-                    print('  ', row_ref, b[row_refi])
-                    print('  ', row_cmp, b[row_cmpi])
-                # Reduce
-                A_new = reduce_const(row_ref, row_cmp)
-                # Did this actually significantly reduce the search space?
-                #if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
-                #if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
-                #    continue
-                b_new = b[row_refi] - b[row_cmpi]
-                # Definitely possible
-                # Maybe filter these out if they occur?
-                if verbose:
-                    print(b_new)
-                # Also inverted sign
-                if b_new <= 0:
-                    if verbose:
-                        print("Unexpected b")
-                    continue
-                if verbose:
-                    print('OK')
-                Ads_ret.append(A_new)
-                b_ret.append(b_new)
+                    print("Unexpected b")
+                b_warns += 1
+                continue
+            if verbose:
+                print('OK')
+            Ads_ret.append(row_new)
+            b_ret.append(b_new)
    print(' done')

    #A_ub_ret = A_di2np(Ads2, cols=cols)
    print(
-        'Derive row: %d => %d rows using %d lte, %d sc' %
-        (len(b), len(b_ret), ltes, scs))
+        'Derive row: %d => %d rows using %d lte' % (len(b), len(b_ret), ltes))
+    print('Dropped %u invalid equations' % b_warns)
    assert len(Ads_ret) == len(b_ret)
    return Ads_ret, b_ret


-def derive_eq_by_col(Ads, b_ub, verbose=0):
+def derive_eq_by_col(Ads, b_ub, verbose=0, keep_orig=True):
    '''
    Derive equations by subtracting out all bounded constants (ie "known" columns)
+    XXX: is this now redundant with derive_row?
+    Seems like a degenerate case, although maybe quicker
    '''
    rows = len(Ads)

-    # Find all entries where
-
    # Index equations with a single constraint
    knowns = {}
    sys.stdout.write('Derive col indexing ')
@ -280,11 +159,11 @@ def derive_eq_by_col(Ads, b_ub, verbose=0):
            sys.stdout.flush()
        if len(row_refd) == 1:
            k, v = list(row_refd.items())[0]
-            # Reduce any constants to canonical form
-            if v != 1:
-                row_refd[k] = 1
-                b_ub[row_refi] /= v
-            knowns[k] = b_ub[row_refi]
+            # assume simplify_equations handles de-duplicating
+            new_b = 1.0 * b_ub[row_refi] / v
+            assert k not in knowns, "Got new %s w/ val %u, old val %u" % (
+                k, new_b, knowns[k])
+            knowns[k] = new_b
    print(' done')
    #knowns_set = set(knowns.keys())
    print('%d constrained' % len(knowns))
@ -303,10 +182,15 @@ def derive_eq_by_col(Ads, b_ub, verbose=0):
        if row_refi % progress == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
+        b_ref = b_ub[row_refi]
+        if keep_orig:
+            Ads_ret.append(row_refd)
+            b_ret.append(b_ref)
+
        # Reduce as much as possible
        #row_new = {}
        row_new = OrderedDict()
-        b_new = b_ub[row_refi]
+        b_new = b_ref
        # Copy over single entries
        if len(row_refd) == 1:
            row_new = row_refd
@ -322,11 +206,13 @@ def derive_eq_by_col(Ads, b_ub, verbose=0):
        # Possibly reduced all usable contants out
        if len(row_new) == 0:
            continue
-        if b_new <= 0:
+        # invalid?
+        if b_new < 0:
            continue

-        Ads_ret.append(row_new)
-        b_ret.append(b_new)
+        if not keep_orig or row_new != row_refd:
+            Ads_ret.append(row_new)
+            b_ret.append(b_new)
    print(' done')

    print('Derive col: %d => %d rows' % (len(b_ub), len(b_ret)))
@ -358,12 +244,16 @@ def massage_equations(Ads, b, verbose=False, corner=None):

    assert len(Ads) == len(b), 'Ads, b length mismatch'

+    def check_cols():
+        assert len(index_names(Ads)) == cols
+
    def debug(what):
-        if verbose:
+        check_cols()
+        if 1 or verbose:
            print('')
            print_eqns(Ads, b, verbose=verbose, label=what, lim=20)
            col_dist(Ads, what)
-            check_feasible_d(Ads, b)
+            #check_feasible_d(Ads, b)

    # Try to (intelligently) subtract equations to generate additional constraints
    # This helps avoid putting all delay in a single shared variable
@ -380,21 +270,27 @@ def massage_equations(Ads, b, verbose=False, corner=None):

        print('Loop %d, lim %d' % (di + 1, col_lim))
        # Meat of the operation
-        Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, tweak=True)
+        Ads, b = derive_eq_by_row(Ads, b, col_lim=col_lim, cmp_heuristic=True)
        debug("der_rows")
        # Run another simplify pass since new equations may have overlap with original
        Ads, b = simplify_rows(Ads, b, corner=corner)
        print('Derive row: %d => %d equations' % (n_orig, len(b)))
        debug("der_rows simp")

-        n_orig2 = len(b)
-        # Meat of the operation
-        Ads, b = derive_eq_by_col(Ads, b)
-        debug("der_cols")
-        # Run another simplify pass since new equations may have overlap with original
-        Ads, b = simplify_rows(Ads, b, corner=corner)
-        print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b)))
-        debug("der_cols simp")
+        # derive_cols is mostly degenerate case of derive_rows
+        # however, it will sub out single variables a lot faster if there are a lot of them
+        # linear vs above quadratic, might as well keep it in
+        if 1:
+            n_orig2 = len(b)
+            # Meat of the operation
+            Ads, b = derive_eq_by_col(Ads, b)
+            debug("der_cols")
+            # Run another simplify pass since new equations may have overlap with original
+            Ads, b = simplify_rows(Ads, b, corner=corner)
+            print(
+                'Derive col %d: %d => %d equations' %
+                (di + 1, n_orig2, len(b)))
+            debug("der_cols simp")

        # Doesn't help computation, but helps debugging
        Ads, b = sort_equations(Ads, b)
@ -419,4 +315,7 @@ def massage_equations(Ads, b, verbose=False, corner=None):
    debug("final (sorted)")
    print('')
    print('Massage final: %d => %d rows' % (dstart, dend))
+    cols_end = len(index_names(Ads))
+    print('Massage final: %d => %d cols' % (cols, cols_end))
+    assert cols_end == cols
    return Ads, b
--- a/fuzzers/007-timing/timfuz_solve.py
+++ b/fuzzers/007-timing/timfuz_solve.py
@ -64,8 +64,18 @@ def check_feasible(A_ub, b_ub):


 def filter_bounds(Ads, b, bounds, corner):
-    '''Given min variable delays, remove rows that won't constrain solution'''
-    #assert len(bounds) > 0
+    '''
+    Given min variable delays, remove rows that won't constrain solution
+       Ex for max corner:
+    Given bounds:
+        a >= 10
+        b >= 1
+        c >= 0
+    Given equations:
+        a + b >= 10
+        a + c >= 100
+    The first equation is already satisfied
+    However, the second needs either an increase in a or an increase in c    '''

    if 'max' in corner:
        # Keep delays possibly larger than current bound
@ -84,13 +94,24 @@ def filter_bounds(Ads, b, bounds, corner):

    ret_Ads = []
    ret_b = []
+    unknowns = set()
    for row_ds, row_b in zip(Ads, b):
        # some variables get estimated at 0
-        est = sum([bounds.get(k, T_UNK) * v for k, v in row_ds.items()])
+        def getvar(k):
+            #return bounds.get(k, T_UNK)
+            ret = bounds.get(k, None)
+            if ret is not None:
+                return ret
+            unknowns.add(k)
+            return T_UNK
+
+        est = sum([getvar(k) * v for k, v in row_ds.items()])
        # will this row potentially constrain us more?
        if keep(row_b, est):
            ret_Ads.append(row_ds)
            ret_b.append(row_b)
+    if len(unknowns):
+        print('WARNING: %u encountered undefined bounds' % len(unknowns))
    return ret_Ads, ret_b