timfuz: rref test

Signed-off-by: John McMaster <johndmcmaster@gmail.com>
This commit is contained in:
John McMaster 2018-08-23 18:18:36 -07:00
parent 9d65b21ef4
commit b3f881a54c
24 changed files with 6186 additions and 0 deletions

9
experiments/timfuz/.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
/.Xil
/design/
/design.bit
/design.bits
/design.dcp
/usage_statistics_webtalk.*
/vivado*
/specimen_*
build_speed

View File

@ -0,0 +1,22 @@
N := 1
SPECIMENS := $(addprefix specimen_,$(shell seq -f '%03.0f' $(N)))
SPECIMENS_OK := $(addsuffix /OK,$(SPECIMENS))
all: $(SPECIMENS_OK)
#python timfuz.py specimen_*/timing.txt
$(SPECIMENS_OK):
bash generate.sh $(subst /OK,,$@)
touch $@
run:
$(MAKE) clean
$(MAKE) all
touch run.ok
clean:
rm -rf specimen_[0-9][0-9][0-9]/ seg_clblx.segbits __pycache__ run.ok
rm -rf vivado*.log vivado_*.str vivado*.jou design *.bits *.dcp *.bit
.PHONY: database pushdb run clean

View File

@ -0,0 +1,71 @@
Timing analysis fuzzer
This runs some random designs through Vivado and extracts timing information in order to derive timing models
While Vivado has more involved RC (spice?) models incorporating fanout and other things,
for now we are shooting for simple, conservative models with a min and max timing delay
*******************************************************************************
Background
*******************************************************************************
Vivado seems to associate each delay model with a "speed index"
In particular, we are currently looking at pips and wires, each of which have a speed index associated with them
For every timeing path, we record the total delay from one site to another, excluding site delays
(timing analyzer provides an option to make this easy)
We then walk along the path and record all wires and pips in between
These are converted to their associated speed indexes
This gives an equation that a series of speed indexes was given a certain delay value
These equations are then fed into scipy.optimize.linprog to give estimates for the delay models
However, there are some complications. For example:
Given a system of equations like:
t0 = 5
t0 + t1 = 10
t0 + t1 + t2 = 12
The solver puts all the delays in t0
To get around this, we subtract equations from each other
Some additional info here: https://github.com/SymbiFlow/prjxray/wiki/Timing
*******************************************************************************
Quick start
*******************************************************************************
./speed.sh
python timfuz_delay.py --cols-max 9 timfuz_dat/s1_timing2.txt
Which will report something like
Delay on 36 / 162
Now add some more data in:
python timfuz_delay.py --cols-max 9 timfuz_dat/speed_json.json timfuz_dat/s*_timing2.txt
Which should get a few more delay elements, say:
Delay on 57 / 185
*******************************************************************************
From scratch
*******************************************************************************
Roughly something like this
Edit generate.tcl
Uncomment speed_models2
Run "make N=1"
python speed_json.py specimen_001/speed_model.txt speed_json.json
Edit generate.tcl
Comment speed_models2
Run "make N=4" to generate some more timing data
Now run as in the quick start
python timfuz_delay.py --cols-max 9 speed_json.json specimen_*/timing2.txt
*******************************************************************************
TODO:
*******************************************************************************
Verify elements are being imported correctly throughout the whole chain
Can any wires or similar be aggregated?
Ex: if a node consisents of two wire delay models and that pair is never seen elsewhere
Look at virtual switchboxes. Can these be removed?
Look at suspicous elements like WIRE_RC_ZERO

View File

@ -0,0 +1,71 @@
'''
pr0ntools
Benchmarking utility
Copyright 2010 John McMaster
'''
import time
def time_str(delta):
fraction = delta % 1
delta -= fraction
delta = int(delta)
seconds = delta % 60
delta /= 60
minutes = delta % 60
delta /= 60
hours = delta
return '%02d:%02d:%02d.%04d' % (hours, minutes, seconds, fraction * 10000)
class Benchmark:
start_time = None
end_time = None
def __init__(self, max_items = None):
# For the lazy
self.start_time = time.time()
self.end_time = None
self.max_items = max_items
self.cur_items = 0
def start(self):
self.start_time = time.time()
self.end_time = None
self.cur_items = 0
def stop(self):
self.end_time = time.time()
def advance(self, n = 1):
self.cur_items += n
def set_cur_items(self, n):
self.cur_items = n
def delta_s(self):
if self.end_time:
return self.end_time - self.start_time
else:
return time.time() - self.start_time
def __str__(self):
if self.end_time:
return time_str(self.end_time - self.start_time)
elif self.max_items:
cur_time = time.time()
delta_t = cur_time - self.start_time
rate_s = 'N/A'
if delta_t > 0.000001:
rate = self.cur_items / (delta_t)
rate_s = '%f items / sec' % rate
if rate == 0:
eta_str = 'inf'
else:
remaining = (self.max_items - self.cur_items) / rate
eta_str = time_str(remaining)
else:
eta_str = "indeterminate"
return '%d / %d, ETA: %s @ %s' % (self.cur_items, self.max_items, eta_str, rate_s)
else:
return time_str(time.time() - self.start_time)

9
experiments/timfuz/generate.sh Executable file
View File

@ -0,0 +1,9 @@
#!/bin/bash
set -ex
source ${XRAY_GENHEADER}
python ../placelut_ff_fb.py --sdx 4 --sdy 4 >placelut.v
vivado -mode batch -source ../generate.tcl

View File

@ -0,0 +1,279 @@
source ../../../utils/utils.tcl
proc pin_info {pin} {
set cell [get_cells -of_objects $pin]
set bel [get_bels -of_objects $cell]
set site [get_sites -of_objects $bel]
return "$site $bel"
}
proc pin_bel {pin} {
set cell [get_cells -of_objects $pin]
set bel [get_bels -of_objects $cell]
return $bel
}
proc build_design {} {
create_project -force -part $::env(XRAY_PART) design design
#read_verilog ../top.v
#read_verilog ../picorv32.v
#read_verilog ../oneblinkw.v
read_verilog placelut.v
synth_design -top top
puts "Locking pins"
set_property LOCK_PINS {I0:A1 I1:A2 I2:A3 I3:A4 I4:A5 I5:A6} \
[get_cells -quiet -filter {REF_NAME == LUT6} -hierarchical]
puts "Package stuff"
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_00) IOSTANDARD LVCMOS33" [get_ports clk]
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_01) IOSTANDARD LVCMOS33" [get_ports stb]
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_02) IOSTANDARD LVCMOS33" [get_ports di]
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_03) IOSTANDARD LVCMOS33" [get_ports do]
if {0 < 0} {
puts "pblocking"
create_pblock roi
set roipb [get_pblocks roi]
set_property EXCLUDE_PLACEMENT 1 $roipb
add_cells_to_pblock $roipb [get_cells roi]
resize_pblock $roipb -add "$::env(XRAY_ROI)"
puts "randplace"
randplace_pblock 150 $roipb
}
set_property CFGBVS VCCO [current_design]
set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property BITSTREAM.GENERAL.PERFRAMECRC YES [current_design]
puts "dedicated route"
set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk_IBUF]
place_design
route_design
write_checkpoint -force design.dcp
# disable combinitorial loop
# set_property IS_ENABLED 0 [get_drc_checks {LUTLP-1}]
#write_bitstream -force design.bit
}
# Changed to group wires and nodes
# This allows tracing the full path along with pips
proc write_info3 {} {
set outdir "."
set fp [open "$outdir/timing3.txt" w]
# bel as site/bel, so don't bother with site
puts $fp "net src_bel dst_bel ico fast_max fast_min slow_max slow_min pips inodes wires"
set TIME_start [clock clicks -milliseconds]
set verbose 0
set equations 0
set site_src_nets 0
set site_dst_nets 0
set neti 0
set nets [get_nets -hierarchical]
set nnets [llength $nets]
foreach net $nets {
incr neti
#if {$neti >= 10} {
# puts "Debug break"
# break
#}
puts "Net $neti / $nnets: $net"
# The semantics of get_pins -leaf is kind of odd
# When no passthrough LUTs exist, it has no effect
# When passthrough LUT present:
# -w/o -leaf: some pins + passthrough LUT pins
# -w/ -leaf: different pins + passthrough LUT pins
# With OUT filter this seems to be sufficient
set src_pin [get_pins -leaf -filter {DIRECTION == OUT} -of_objects $net]
set src_bel [pin_bel $src_pin]
set src_site [get_sites -of_objects $src_bel]
# Only one net driver
set src_site_pins [get_site_pins -filter {DIRECTION == OUT} -of_objects $net]
# Sometimes this is empty for reasons that escape me
# Emitting direction doesn't help
if {[llength $src_site_pins] < 1} {
if $verbose {
puts " Ignoring site internal net"
}
incr site_src_nets
continue
}
set dst_site_pins_net [get_site_pins -filter {DIRECTION == IN} -of_objects $net]
if {[llength $dst_site_pins_net] < 1} {
puts " Skipping site internal source net"
incr site_dst_nets
continue
}
foreach src_site_pin $src_site_pins {
if $verbose {
puts "Source: $src_pin at site $src_site:$src_bel, spin $src_site_pin"
}
# Run with and without interconnect only
foreach ico "0 1" {
set ico_flag ""
if $ico {
set ico_flag "-interconnect_only"
set delays [get_net_delays $ico_flag -of_objects $net]
} else {
set delays [get_net_delays -of_objects $net]
}
foreach delay $delays {
set delaystr [get_property NAME $delay]
set dst_pins [get_property TO_PIN $delay]
set dst_pin [get_pins $dst_pins]
#puts " $delaystr: $src_pin => $dst_pin"
set dst_bel [pin_bel $dst_pin]
set dst_site [get_sites -of_objects $dst_bel]
if $verbose {
puts " Dest: $dst_pin at site $dst_site:$dst_bel"
}
set dst_site_pins [get_site_pins -of_objects $dst_pin]
# Some nets are internal
# But should this happen on dest if we've already filtered source?
if {"$dst_site_pins" eq ""} {
continue
}
# Also apparantly you can have multiple of these as well
foreach dst_site_pin $dst_site_pins {
set fast_max [get_property "FAST_MAX" $delay]
set fast_min [get_property "FAST_MIN" $delay]
set slow_max [get_property "SLOW_MAX" $delay]
set slow_min [get_property "SLOW_MIN" $delay]
# Want:
# Site / BEL at src
# Site / BEL at dst
# Pips in between
# Walk net, looking for interesting elements in between
set pips [get_pips -of_objects $net -from $src_site_pin -to $dst_site_pin]
if $verbose {
foreach pip $pips {
puts " PIP $pip"
}
}
set nodes [get_nodes -of_objects $net -from $src_site_pin -to $dst_site_pin]
#set wires [get_wires -of_objects $net -from $src_site_pin -to $dst_site_pin]
set wires [get_wires -of_objects $nodes]
# puts $fp "$net $src_bel $dst_bel $ico $fast_max $fast_min $slow_max $slow_min $pips"
puts -nonewline $fp "$net $src_bel $dst_bel $ico $fast_max $fast_min $slow_max $slow_min"
# Write pips w/ speed index
puts -nonewline $fp " "
set needspace 0
foreach pip $pips {
if $needspace {
puts -nonewline $fp "|"
}
set speed_index [get_property SPEED_INDEX $pip]
puts -nonewline $fp "$pip:$speed_index"
set needspace 1
}
# Write nodes
#set nodes_str [string map {" " "|"} $nodes]
#puts -nonewline $fp " $nodes_str"
puts -nonewline $fp " "
set needspace 0
foreach node $nodes {
if $needspace {
puts -nonewline $fp "|"
}
set nwires [llength [get_wires -of_objects $node]]
puts -nonewline $fp "$node:$nwires"
set needspace 1
}
# Write wires
puts -nonewline $fp " "
set needspace 0
foreach wire $wires {
if $needspace {
puts -nonewline $fp "|"
}
set speed_index [get_property SPEED_INDEX $wire]
puts -nonewline $fp "$wire:$speed_index"
set needspace 1
}
puts $fp ""
incr equations
break
}
}
}
}
}
close $fp
set TIME_taken [expr [clock clicks -milliseconds] - $TIME_start]
puts "Took ms: $TIME_taken"
puts "Generated $equations equations"
puts "Skipped $site_src_nets (+ $site_dst_nets) site nets"
}
proc pips_all {} {
set outdir "."
set fp [open "$outdir/pip_all.txt" w]
set items [get_pips]
puts "Items: [llength $items]"
set needspace 0
set properties [list_property [lindex $items 0]]
foreach item $items {
set needspace 0
foreach property $properties {
set val [get_property $property $item]
if {"$val" ne ""} {
if $needspace {
puts -nonewline $fp " "
}
puts -nonewline $fp "$property:$val"
set needspace 1
}
}
puts $fp ""
}
close $fp
}
proc wires_all {} {
set outdir "."
set fp [open "$outdir/wire_all.txt" w]
set items [get_wires]
puts "Items: [llength $items]"
set needspace 0
set properties [list_property [lindex $items 0]]
foreach item $items {
set needspace 0
foreach property $properties {
set val [get_property $property $item]
if {"$val" ne ""} {
if $needspace {
puts -nonewline $fp " "
}
puts -nonewline $fp "$property:$val"
set needspace 1
}
}
puts $fp ""
}
close $fp
}
build_design
#write_info2
write_info3
#wires_all
#pips_all

View File

@ -0,0 +1,99 @@
'''
Verifies that node timing info is unique
'''
import re
def gen_nodes(fin):
for l in fin:
lj = {}
l = l.strip()
for kvs in l.split():
name, value = kvs.split(':')
'''
NAME:LIOB33_SING_X0Y199/IOB_IBUF0
IS_BAD:0
IS_COMPLETE:1
IS_GND:0 IS_INPUT_PIN:1 IS_OUTPUT_PIN:0 IS_PIN:1 IS_VCC:0
NUM_WIRES:2
PIN_WIRE:1
'''
if name in ('COST_CODE', 'SPEED_CLASS'):
value = int(value)
lj[name] = value
tile_type, xy, wname = re.match(r'(.*)_(X[0-9]*Y[0-9]*)/(.*)', lj['NAME']).groups()
lj['tile_type'] = tile_type
lj['xy'] = xy
lj['wname'] = wname
lj['l'] = l
yield lj
def run(node_fin, verbose=0):
refnodes = {}
nodei = 0
for nodei, anode in enumerate(gen_nodes(node_fin)):
def getk(anode):
return anode['wname']
#return (anode['tile_type'], anode['wname'])
if nodei % 1000 == 0:
print 'Check node %d' % nodei
# Existing node?
try:
refnode = refnodes[getk(anode)]
except KeyError:
# Set as reference
refnodes[getk(anode)] = anode
continue
# Verify equivilence
for k in (
'SPEED_CLASS',
'COST_CODE', 'COST_CODE_NAME',
'IS_BAD', 'IS_COMPLETE', 'IS_GND', 'IS_VCC',
):
if k in refnode and k in anode:
def fail():
print 'Mismatch on %s' % k
print refnode[k], anode[k]
print refnode['l']
print anode['l']
#assert 0
if k == 'SPEED_CLASS':
# Parameters known to effect SPEED_CLASS
# Verify at least one parameter is different
if refnode[k] != anode[k]:
for k2 in ('IS_PIN', 'IS_INPUT_PIN', 'IS_OUTPUT_PIN', 'PIN_WIRE', 'NUM_WIRES'):
if refnode[k2] != anode[k2]:
break
else:
if 0:
print
fail()
elif refnode[k] != anode[k]:
print
fail()
# A key in one but not the other?
elif k in refnode or k in anode:
assert 0
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
description=
'Timing fuzzer'
)
parser.add_argument('--verbose', type=int, help='')
parser.add_argument(
'node_fn_in',
default='/dev/stdin',
nargs='?',
help='Input file')
args = parser.parse_args()
run(open(args.node_fn_in, 'r'), verbose=args.verbose)

View File

@ -0,0 +1,37 @@
module roi (
input wire clk,
output wire out);
reg [23:0] counter;
assign out = counter[23] ^ counter[22] ^ counter[2] && counter[1] || counter[0];
always @(posedge clk) begin
counter <= counter + 1;
end
endmodule
module top(input wire clk, input wire stb, input wire di, output wire do);
localparam integer DIN_N = 0;
localparam integer DOUT_N = 1;
reg [DIN_N-1:0] din;
wire [DOUT_N-1:0] dout;
reg [DIN_N-1:0] din_shr;
reg [DOUT_N-1:0] dout_shr;
always @(posedge clk) begin
din_shr <= {din_shr, di};
dout_shr <= {dout_shr, din_shr[DIN_N-1]};
if (stb) begin
din <= din_shr;
dout_shr <= dout;
end
end
assign do = dout_shr[DOUT_N-1];
roi roi(
.clk(clk),
.out(dout[0])
);
endmodule

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
'''
Verifies that node timing info is unique
'''
import re
def gen_wires(fin):
for l in fin:
lj = {}
l = l.strip()
for kvs in l.split():
name, value = kvs.split(':')
lj[name] = value
tile_type, xy, wname = re.match(r'(.*)_(X[0-9]*Y[0-9]*)/(.*)', lj['NAME']).groups()
lj['tile_type'] = tile_type
lj['xy'] = xy
lj['wname'] = wname
lj['l'] = l
yield lj
def run(node_fin, verbose=0):
refnodes = {}
nodei = 0
for nodei, anode in enumerate(gen_wires(node_fin)):
def getk(anode):
return anode['wname']
return (anode['tile_type'], anode['wname'])
if nodei % 1000 == 0:
print 'Check node %d' % nodei
# Existing node?
try:
refnode = refnodes[getk(anode)]
except KeyError:
# Set as reference
refnodes[getk(anode)] = anode
continue
k_invariant = (
'CAN_INVERT',
'IS_BUFFERED_2_0',
'IS_BUFFERED_2_1',
'IS_DIRECTIONAL',
'IS_EXCLUDED_PIP',
'IS_FIXED_INVERSION',
'IS_INVERTED',
'IS_PSEUDO',
'IS_SITE_PIP',
'IS_TEST_PIP',
)
k_varies = (
'TILE',
)
# Verify equivilence
for k in k_invariant:
if k in refnode and k in anode:
def fail():
print 'Mismatch on %s' % k
print refnode[k], anode[k]
print refnode['l']
print anode['l']
#assert 0
if refnode[k] != anode[k]:
print
fail()
# A key in one but not the other?
elif k in refnode or k in anode:
assert 0
elif k not in k_varies:
assert 0
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
description=
'Timing fuzzer'
)
parser.add_argument('--verbose', type=int, help='')
parser.add_argument(
'node_fn_in',
default='/dev/stdin',
nargs='?',
help='Input file')
args = parser.parse_args()
run(open(args.node_fn_in, 'r'), verbose=args.verbose)

View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
import argparse
parser = argparse.ArgumentParser(description='')
parser.add_argument('--sdx', default='8', help='')
parser.add_argument('--sdy', default='4', help='')
args = parser.parse_args()
'''
Generate in pairs
Fill up switchbox quad for now
Create random connections between the LUTs
See how much routing pressure we can generate
Start with non-random connections to the LFSR for solver comparison
Start at SLICE_X16Y102
'''
SBASE = (16, 102)
SDX = int(args.sdx, 0)
SDY = int(args.sdy, 0)
nlut = 4 * SDX * SDY
nin = 6 * nlut
nout = nlut
print('placelut simple')
print('//SBASE: %s' % (SBASE,))
print('//SDX: %s' % (SDX,))
print('//SDY: %s' % (SDX,))
print('//nlut: %s' % (nlut,))
print('''\
module roi (
input wire clk,
input wire [%u:0] ins,
output wire [%u:0] outs);''') % (nin - 1, nout -1)
ini = 0
outi = 0
for lutx in xrange(SBASE[0], SBASE[0] + SDX):
for luty in xrange(SBASE[1], SBASE[1] + SDY):
loc = "SLICE_X%uY%u" % (lutx, luty)
for belc in 'ABCD':
bel = '%c6LUT' % belc
print('''\
(* KEEP, DONT_TOUCH, LOC="%s", BEL="%s" *)
LUT6 #(
.INIT(64'hBAD1DEA_1DEADCE0)
) %s (''') % (loc, bel, 'lut_x%uy%u_%c' % (lutx, luty, belc))
for i in xrange(6):
print('''\
.I%u(ins[%u]),''' % (i, ini))
ini += 1
print('''\
.O(outs[%u]));''') % (outi,)
outi += 1
assert nin == ini
assert nout == outi
print('''
endmodule
module top(input wire clk, input wire stb, input wire di, output wire do);
localparam integer DIN_N = %u;
localparam integer DOUT_N = %u;
reg [DIN_N-1:0] din;
wire [DOUT_N-1:0] dout;
reg [DIN_N-1:0] din_shr;
reg [DOUT_N-1:0] dout_shr;
always @(posedge clk) begin
din_shr <= {din_shr, di};
dout_shr <= {dout_shr, din_shr[DIN_N-1]};
if (stb) begin
din <= din_shr;
dout_shr <= dout;
end
end
assign do = dout_shr[DOUT_N-1];
roi roi(
.clk(clk),
.ins(din),
.outs(dout)
);
endmodule''') % (nin, nout)

View File

@ -0,0 +1,103 @@
#!/usr/bin/env python
'''
Note: vivado will (by default) fail bitgen DRC on LUT feedback loops
Looks like can probably be disabled, but we actually don't need a bitstream for timing analysis
'''
import argparse
import random
random.seed()
parser = argparse.ArgumentParser(description='')
parser.add_argument('--sdx', default='8', help='')
parser.add_argument('--sdy', default='4', help='')
args = parser.parse_args()
'''
Generate in pairs
Fill up switchbox quad for now
Create random connections between the LUTs
See how much routing pressure we can generate
Start with non-random connections to the LFSR for solver comparison
Start at SLICE_X16Y102
'''
SBASE = (16, 102)
SDX = int(args.sdx, 0)
SDY = int(args.sdy, 0)
nlut = 4 * SDX * SDY
nin = 6 * nlut
nout = nlut
print('//placelut w/ feedback')
print('//SBASE: %s' % (SBASE,))
print('//SDX: %s' % (SDX,))
print('//SDY: %s' % (SDX,))
print('//nlut: %s' % (nlut,))
print('''\
module roi (
input wire clk,
input wire [%u:0] ins,
output wire [%u:0] outs);''') % (nin - 1, nout -1)
ini = 0
outi = 0
for lutx in xrange(SBASE[0], SBASE[0] + SDX):
for luty in xrange(SBASE[1], SBASE[1] + SDY):
loc = "SLICE_X%uY%u" % (lutx, luty)
for belc in 'ABCD':
bel = '%c6LUT' % belc
print('''\
(* KEEP, DONT_TOUCH, LOC="%s", BEL="%s" *)
LUT6 #(
.INIT(64'hBAD1DEA_1DEADCE0)
) %s (''') % (loc, bel, 'lut_x%uy%u_%c' % (lutx, luty, belc))
for i in xrange(6):
if random.randint(0, 9) < 1:
wfrom = 'ins[%u]' % ini
ini += 1
else:
wfrom = 'outs[%u]' % random.randint(0, nout - 1)
print('''\
.I%u(%s),''' % (i, wfrom))
print('''\
.O(outs[%u]));''') % (outi,)
outi += 1
#assert nin == ini
assert nout == outi
print('''
endmodule
module top(input wire clk, input wire stb, input wire di, output wire do);
localparam integer DIN_N = %u;
localparam integer DOUT_N = %u;
reg [DIN_N-1:0] din;
wire [DOUT_N-1:0] dout;
reg [DIN_N-1:0] din_shr;
reg [DOUT_N-1:0] dout_shr;
always @(posedge clk) begin
din_shr <= {din_shr, di};
dout_shr <= {dout_shr, din_shr[DIN_N-1]};
if (stb) begin
din <= din_shr;
dout_shr <= dout;
end
end
assign do = dout_shr[DOUT_N-1];
roi roi(
.clk(clk),
.ins(din),
.outs(dout)
);
endmodule''') % (nin, nout)

View File

@ -0,0 +1,126 @@
#!/usr/bin/env python
'''
Note: vivado will (by default) fail bitgen DRC on LUT feedback loops
Looks like can probably be disabled, but we actually don't need a bitstream for timing analysis
ERROR: [Vivado 12-2285] Cannot set LOC property of instance 'roi/lut_x22y102_D', Instance roi/lut_x22y102_D can not be placed in D6LUT of site SLICE_X18Y103 because the bel is occupied by roi/lut_x18y103_D(port:). This could be caused by bel constraint conflict
Resolution: When using BEL constraints, ensure the BEL constraints are defined before the LOC constraints to avoid conflicts at a given site.
'''
import argparse
import random
random.seed()
parser = argparse.ArgumentParser(description='')
parser.add_argument('--sdx', default='8', help='')
parser.add_argument('--sdy', default='4', help='')
args = parser.parse_args()
'''
Generate in pairs
Fill up switchbox quad for now
Create random connections between the LUTs
See how much routing pressure we can generate
Start with non-random connections to the LFSR for solver comparison
Start at SLICE_X16Y102
'''
SBASE = (16, 102)
SDX = int(args.sdx, 0)
SDY = int(args.sdy, 0)
nlut = 4 * SDX * SDY
nin = 6 * nlut
nout = nlut
print('//placelut w/ FF + feedback')
print('//SBASE: %s' % (SBASE,))
print('//SDX: %s' % (SDX,))
print('//SDY: %s' % (SDX,))
print('//nlut: %s' % (nlut,))
print('''\
module roi (
input wire clk,
input wire [%u:0] ins,
output wire [%u:0] outs);''') % (nin - 1, nout -1)
ini = 0
outi = 0
for lutx in xrange(SBASE[0], SBASE[0] + SDX):
for luty in xrange(SBASE[1], SBASE[1] + SDY):
loc = "SLICE_X%uY%u" % (lutx, luty)
for belc in 'ABCD':
bel = '%c6LUT' % belc
name = 'lut_x%uy%u_%c' % (lutx, luty, belc)
print('''\
(* KEEP, DONT_TOUCH, LOC="%s", BEL="%s" *)
LUT6 #(
.INIT(64'hBAD1DEA_1DEADCE0)
) %s (''') % (loc, bel, name)
for i in xrange(6):
rval = random.randint(0, 9)
if rval < 3:
wfrom = 'ins[%u]' % ini
ini += 1
#elif rval < 6:
# wfrom = 'outsr[%u]' % random.randint(0, nout - 1)
else:
wfrom = 'outs[%u]' % random.randint(0, nout - 1)
print('''\
.I%u(%s),''' % (i, wfrom))
out_w = name + '_o'
print('''\
.O(%s));''') % (out_w,)
outs_w = "outs[%u]" % outi
if random.randint(0, 9) < 5:
print(' assign %s = %s;' % (outs_w, out_w))
else:
out_r = name + '_or'
print('''\
reg %s;
assign %s = %s;
always @(posedge clk) begin
%s = %s;
end
''' % (out_r, outs_w, out_r, out_r, out_w))
outi += 1
#assert nin == ini
assert nout == outi
print('''
endmodule
module top(input wire clk, input wire stb, input wire di, output wire do);
localparam integer DIN_N = %u;
localparam integer DOUT_N = %u;
reg [DIN_N-1:0] din;
wire [DOUT_N-1:0] dout;
reg [DIN_N-1:0] din_shr;
reg [DOUT_N-1:0] dout_shr;
always @(posedge clk) begin
din_shr <= {din_shr, di};
dout_shr <= {dout_shr, din_shr[DIN_N-1]};
if (stb) begin
din <= din_shr;
dout_shr <= dout;
end
end
assign do = dout_shr[DOUT_N-1];
roi roi(
.clk(clk),
.ins(din),
.outs(dout)
);
endmodule''') % (nin, nout)

8
experiments/timfuz/speed.sh Executable file
View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -ex
mkdir -p build_speed
cd build_speed
vivado -mode batch -source ../speed.tcl
python ../speed_json.py speed_model.txt node.txt speed.json

View File

@ -0,0 +1,182 @@
source ../../../utils/utils.tcl
proc pin_info {pin} {
set cell [get_cells -of_objects $pin]
set bel [get_bels -of_objects $cell]
set site [get_sites -of_objects $bel]
return "$site $bel"
}
proc pin_bel {pin} {
set cell [get_cells -of_objects $pin]
set bel [get_bels -of_objects $cell]
return $bel
}
proc build_design_full {} {
create_project -force -part $::env(XRAY_PART) design design
read_verilog ../top.v
read_verilog ../picorv32.v
synth_design -top top
set_property LOCK_PINS {I0:A1 I1:A2 I2:A3 I3:A4 I4:A5 I5:A6} \
[get_cells -quiet -filter {REF_NAME == LUT6} -hierarchical]
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_00) IOSTANDARD LVCMOS33" [get_ports clk]
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_01) IOSTANDARD LVCMOS33" [get_ports stb]
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_02) IOSTANDARD LVCMOS33" [get_ports di]
set_property -dict "PACKAGE_PIN $::env(XRAY_PIN_03) IOSTANDARD LVCMOS33" [get_ports do]
create_pblock roi
set roipb [get_pblocks roi]
set_property EXCLUDE_PLACEMENT 1 $roipb
add_cells_to_pblock $roipb [get_cells roi]
resize_pblock $roipb -add "$::env(XRAY_ROI)"
randplace_pblock 150 $roipb
set_property CFGBVS VCCO [current_design]
set_property CONFIG_VOLTAGE 3.3 [current_design]
set_property BITSTREAM.GENERAL.PERFRAMECRC YES [current_design]
set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk_IBUF]
place_design
route_design
write_checkpoint -force design.dcp
write_bitstream -force design.bit
}
proc build_design_synth {} {
create_project -force -part $::env(XRAY_PART) design design
read_verilog ../top.v
read_verilog ../picorv32.v
synth_design -top top
}
# WARNING: [Common 17-673] Cannot get value of property 'FORWARD' because this property is not valid in conjunction with other property setting on this object.
# WARNING: [Common 17-673] Cannot get value of property 'REVERSE' because this property is not valid in conjunction with other property setting on this object.
proc speed_models1 {} {
set outdir "."
set fp [open "$outdir/speed_model.txt" w]
# list_property [lindex [get_speed_models] 0]
set speed_models [get_speed_models]
set properties [list_property [lindex $speed_models 0]]
# "CLASS DELAY FAST_MAX FAST_MIN IS_INSTANCE_SPECIFIC NAME NAME_LOGICAL SLOW_MAX SLOW_MIN SPEED_INDEX TYPE"
puts $fp $properties
set needspace 0
foreach speed_model $speed_models {
foreach property $properties {
if $needspace {
puts -nonewline $fp " "
}
puts -nonewline $fp [get_property $property $speed_model]
set needspace 1
}
puts $fp ""
}
close $fp
}
proc speed_models2 {} {
set outdir "."
set fp [open "$outdir/speed_model.txt" w]
# list_property [lindex [get_speed_models] 0]
set speed_models [get_speed_models]
puts "Items: [llength $speed_models]"
set needspace 0
# Not all objects have the same properties
# But they do seem to have the same list
set properties [list_property [lindex $speed_models 0]]
foreach speed_model $speed_models {
set needspace 0
foreach property $properties {
set val [get_property $property $speed_model]
if {"$val" ne ""} {
if $needspace {
puts -nonewline $fp " "
}
puts -nonewline $fp "$property:$val"
set needspace 1
}
}
puts $fp ""
}
close $fp
}
# For cost codes
# Items: 2663055s
# Hmm too much
# Lets filter out items we really want
proc nodes_all {} {
set outdir "."
set fp [open "$outdir/node_all.txt" w]
set items [get_nodes]
puts "Items: [llength $items]"
set needspace 0
set properties [list_property [lindex $items 0]]
foreach item $items {
set needspace 0
foreach property $properties {
set val [get_property $property $item]
if {"$val" ne ""} {
if $needspace {
puts -nonewline $fp " "
}
puts -nonewline $fp "$property:$val"
set needspace 1
}
}
puts $fp ""
}
close $fp
}
# Only writes out items with unique cost codes
# (much faster)
proc nodes_unique_cc {} {
set outdir "."
set fp [open "$outdir/node.txt" w]
set items [get_nodes]
puts "Items: [llength $items]"
set needspace 0
set properties [list_property [lindex $items 0]]
set cost_codes_known [dict create]
set itemi 0
foreach item $items {
incr itemi
set cost_code [get_property COST_CODE $item]
if {[ dict exists $cost_codes_known $cost_code ]} {
continue
}
puts "Adding $cost_code @ item $itemi"
dict set cost_codes_known $cost_code 1
set needspace 0
foreach property $properties {
set val [get_property $property $item]
if {"$val" ne ""} {
if $needspace {
puts -nonewline $fp " "
}
puts -nonewline $fp "$property:$val"
set needspace 1
}
}
puts $fp ""
}
close $fp
}
build_design_full
speed_models2
nodes_unique_cc

View File

@ -0,0 +1,98 @@
import json
def load_speed(fin):
speed_models = {}
speed_types = {}
for l in fin:
delay = {}
l = l.strip()
for kvs in l.split():
name, value = kvs.split(':')
name = name.lower()
if name in ('class',):
continue
if name in ('speed_index',):
value = int(value)
if name == 'type':
speed_types.setdefault(value, {})
delay[name] = value
delayk = delay['name']
if delayk in delay:
raise Exception("Duplicate name")
if "name" in delay and "name_logical" in delay:
# Always true
if delay['name'] != delay['name_logical']:
raise Exception("nope!")
# Found a counter example
if 0 and delay['name'] != delay['forward']:
# ('BSW_NONTLFW_TLRV', '_BSW_LONG_NONTLFORWARD')
print(delay['name'], delay['forward'])
raise Exception("nope!")
# Found a counter example
if 0 and delay['forward'] != delay['reverse']:
# _BSW_LONG_NONTLFORWARD _BSW_LONG_TLREVERSE
print(delay['forward'], delay['reverse'])
raise Exception("nope!")
speed_models[delayk] = delay
return speed_models, speed_types
def load_cost_code(fin):
# COST_CODE:4 COST_CODE_NAME:SLOWSINGLE
cost_codes = {}
for l in fin:
lj = {}
l = l.strip()
for kvs in l.split():
name, value = kvs.split(':')
name = name.lower()
lj[name] = value
cost_code = {
'name': lj['cost_code_name'],
'code': int(lj['cost_code']),
# Hmm is this unique per type?
#'speed_class': int(lj['speed_class']),
}
cost_codes[cost_code['name']] = cost_code
return cost_codes
def run(speed_fin, node_fin, fout, verbose=0):
print('Loading data')
speed_models, speed_types = load_speed(speed_fin)
cost_codes = load_cost_code(node_fin)
j = {
'speed_model': speed_models,
'speed_type': speed_types,
'cost_code': cost_codes,
}
json.dump(j, fout, sort_keys=True, indent=4, separators=(',', ': '))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
description=
'Timing fuzzer'
)
parser.add_argument('--verbose', type=int, help='')
parser.add_argument(
'speed_fn_in',
default='/dev/stdin',
nargs='?',
help='Input file')
parser.add_argument(
'node_fn_in',
default='/dev/stdin',
nargs='?',
help='Input file')
parser.add_argument(
'fn_out',
default='/dev/stdout',
nargs='?',
help='Output file')
args = parser.parse_args()
run(open(args.speed_fn_in, 'r'), open(args.node_fn_in, 'r'), open(args.fn_out, 'w'), verbose=args.verbose)

View File

@ -0,0 +1,819 @@
#!/usr/bin/env python
# https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.optimize.linprog.html
from scipy.optimize import linprog
import math
import numpy as np
from collections import OrderedDict
import time
import re
import os
import datetime
import json
import copy
import sys
import random
import glob
from benchmark import Benchmark
NAME_ZERO = set([
"BSW_CLK_ZERO",
"BSW_ZERO",
"B_ZERO",
"C_CLK_ZERO",
"C_DSP_ZERO",
"C_ZERO",
"I_ZERO",
"O_ZERO",
"RC_ZERO",
"R_ZERO",
])
# csv index
corner_s2i = {
'fast_max': 0,
'fast_min': 1,
'slow_max': 2,
'slow_min': 3,
}
def print_eqns(A_ubd, b_ub, verbose=0, lim=3, label=''):
rows = len(b_ub)
print('Sample equations (%s) from %d r' % (label, rows))
prints = 0
#verbose = 1
for rowi, row in enumerate(A_ubd):
if verbose or ((rowi < 10 or rowi % max(1, (rows / 20)) == 0) and (not lim or prints < lim)):
line = ' EQN: p%u: ' % rowi
for k, v in sorted(row.items()):
line += '%u*t%d ' % (v, k)
line += '= %d' % b_ub[rowi]
print(line)
prints += 1
def print_name_eqns(A_ubd, b_ub, names, verbose=0, lim=3, label=''):
rows = len(b_ub)
print('Sample equations (%s) from %d r' % (label, rows))
prints = 0
#verbose = 1
for rowi, row in enumerate(A_ubd):
if verbose or ((rowi < 10 or rowi % max(1, (rows / 20)) == 0) and (not lim or prints < lim)):
line = ' EQN: p%u: ' % rowi
for k, v in sorted(row.items()):
line += '%u*%s ' % (v, names[k])
line += '= %d' % b_ub[rowi]
print(line)
prints += 1
def print_names(names, verbose=1):
print('Names: %d' % len(names))
for xi, name in enumerate(names):
print(' % 4u % -80s' % (xi, name))
def invb(b_ub):
#return [-b for b in b_ub]
return -np.array(b_ub)
def check_feasible_d(A_ubd, b_ub, names):
A_ub, b_ub_inv = Ab_d2np(A_ubd, b_ub, names)
check_feasible(A_ub, b_ub_inv)
def check_feasible(A_ub, b_ub):
sys.stdout.write('Check feasible ')
sys.stdout.flush()
rows = len(b_ub)
cols = len(A_ub[0])
progress = max(1, rows / 100)
# Chose a high arbitrary value for x
# Delays should be in order of ns, so a 10 ns delay should be way above what anything should be
xs = [10e3 for _i in range(cols)]
# FIXME: use the correct np function to do this for me
# Verify bounds
#b_res = np.matmul(A_ub, xs)
#print(type(A_ub), type(xs)
#A_ub = np.array(A_ub)
#xs = np.array(xs)
#b_res = np.matmul(A_ub, xs)
def my_mul(A_ub, xs):
#print('cols', cols
#print('rows', rows
ret = [None] * rows
for row in range(rows):
this = 0
for col in range(cols):
this += A_ub[row][col] * xs[col]
ret[row] = this
return ret
b_res = my_mul(A_ub, xs)
# Verify bound was respected
for rowi, (this_b, this_b_ub) in enumerate(zip(b_res, b_ub)):
if rowi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if this_b >= this_b_ub or this_b > 0:
print('% 4d Want res % 10.1f <= % 10.1f <= 0' % (rowi, this_b, this_b_ub))
raise Exception("Bad ")
print(' done')
def Ab_ub_dt2d(eqns):
'''Convert dict using the rows as keys into a list of dicts + b_ub list (ie return A_ub, b_ub)'''
#return [dict(rowt) for rowt in eqns]
rows = [(dict(rowt), b) for rowt, b in eqns.items()]
A_ubd, b_ub = zip(*rows)
return list(A_ubd), list(b_ub)
# This significantly reduces runtime
def simplify_rows(A_ubd, b_ub):
'''Remove duplicate equations, taking highest delay'''
# dict of constants to highest delay
eqns = OrderedDict()
assert len(A_ubd) == len(b_ub), (len(A_ubd), len(b_ub))
sys.stdout.write('SimpR ')
sys.stdout.flush()
progress = max(1, len(b_ub) / 100)
zero_ds = 0
zero_es = 0
for loopi, (b, rowd) in enumerate(zip(b_ub, A_ubd)):
if loopi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
# TODO: elements have zero delay (ex: COUT)
# Remove these from now since they make me nervous
# Although they should just solve to 0
if not b:
zero_ds += 1
continue
# A very few of these exist with very small values
# TODO: investigate, understand what these are
# Leaving these in can make the result unsolvable since there does not exist a set of constants to reach the delay
if len(rowd) == 0:
zero_es += 1
continue
rowt = Ar_ds2t(rowd)
eqns[rowt] = max(eqns.get(rowt, 0), b)
print(' done')
#A_ub_ret = eqns.keys()
A_ubd_ret, b_ub_ret = Ab_ub_dt2d(eqns)
print('Simplify rows: %d => %d w/ zd %d, ze %d' % (len(b_ub), len(b_ub_ret), zero_ds, zero_es))
#return A_ub_ret, b_ub_ret
#return A_ub_np2d(A_ub_ret), b_ub_ret
return A_ubd_ret, b_ub_ret
def simplify_cols(names, A_ubd, b_ub):
'''
Remove unsued columns
This is fairly straightforward in dictionary form now as only have to remove and adjust indices
Maybe should use the names as keys? Then this wouldn't be needed anymore as indices wouldn't need to be rebased
XXX: shuffles the name order around. Do we care?
'''
# First: find unused names
# use dict since no standard ordered set
used_cols = set()
names_ret = OrderedDict()
col_old2new = OrderedDict()
rows = len(b_ub)
cols = len(names)
sys.stdout.write('SimpC indexing ')
sys.stdout.flush()
progress = max(1, rows / 100)
for rowi, rowd in enumerate(A_ubd):
if rowi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
for coli in rowd.keys():
used_cols.add(coli)
for coli in range(cols):
if coli in used_cols:
names_ret[names[coli]] = None
col_old2new[coli] = len(col_old2new)
assert len(used_cols) == len(col_old2new)
print(' done')
# Create a new matrix, copying important values over
#A_ub_ret = np.zeros((4, 1))
#A_ub_ret[3][0] = 1.0
#A_ub_ret = np.zeros((rows, len(names_ret)))
A_ub_ret = [None] * rows
sys.stdout.write('SimpC creating ')
sys.stdout.flush()
progress = max(1, rows / 100)
for rowi, rowd_old in enumerate(A_ubd):
if rowi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
l = [(col_old2new[k], v) for k, v in rowd_old.items()]
A_ub_ret[rowi] = OrderedDict(l)
print(' done')
print('Simplify cols: %d => %d cols' % (len(names), len(names_ret)))
nr = list(names_ret.keys())
return nr, A_ub_ret, b_ub
def A_ubr_np2d(row, sf=1):
'''Convert a single row'''
#d = {}
d = OrderedDict()
for coli, val in enumerate(row):
if val:
d[coli] = sf * val
return d
def A_ub_np2d(A_ub, sf=1):
'''Convert A_ub entries in numpy matrix to dictionary / sparse form'''
A_ubd = [None] * len(A_ub)
for i, row in enumerate(A_ub):
A_ubd[i] = A_ubr_np2d(row, sf=sf)
return A_ubd
# def Ar_ds2np(row_ds, names):
# Ar_di2np(row_di, cols, sf=1)
def Ar_di2np(row_di, cols, sf=1):
rownp = np.zeros(cols)
for coli, val in row_di.items():
# Sign inversion due to way solver works
rownp[coli] = sf * val
return rownp
# NOTE: sign inversion
def A_di2np(Adi, cols, sf=1):
'''Convert A_ub entries in dictionary / sparse to numpy matrix form'''
return [Ar_di2np(row_di, cols, sf=sf) for row_di in Adi]
def Ar_ds2t(rowd):
'''Convert a dictionary row into a tuple with (column number, value) tuples'''
return tuple(sorted(rowd.items()))
def A_ubr_t2d(rowt):
'''Convert a dictionary row into a tuple with (column number, value) tuples'''
return OrderedDict(rowt)
def A_ub_d2t(A_ubd):
'''Convert rows as dicts to rows as tuples'''
return [Ar_ds2t(rowd) for rowd in A_ubd]
def A_ub_t2d(A_ubd):
'''Convert rows as tuples to rows as dicts'''
return [OrderedDict(rowt) for rowt in A_ubd]
def Ab_d2np(A_ubd, b_ub, names):
A_ub = A_di2np(A_ubd, len(names))
b_ub_inv = invb(b_ub)
return A_ub, b_ub_inv
def Ab_np2d(A_ub, b_ub_inv):
A_ubd = A_ub_np2d(A_ub)
b_ub = invb(b_ub_inv)
return A_ubd, b_ub
def sort_equations_(A_ubd, b_ub):
# Dictionaries aren't hashable for sorting even though they are comparable
return A_ub_t2d(sorted(A_ub_d2t(A_ubd)))
def sort_equations(A_ub, b_ub):
# Track rows with value column
# Hmm can't sort against np arrays
tosort = [(sorted(row.items()), b) for row, b in zip(A_ub, b_ub)]
#res = sorted(tosort, key=lambda e: e[0])
res = sorted(tosort)
A_ubtr, b_ubr = zip(*res)
return [OrderedDict(rowt) for rowt in A_ubtr], b_ubr
def lte_const(row_ref, row_cmp):
'''Return true if all constants are smaller magnitude in row_cmp than row_ref'''
#return False
for k, vc in row_cmp.items():
vr = row_ref.get(k, None)
# Not in reference?
if vr is None:
return False
if vr < vc:
return False
return True
def shared_const(row_ref, row_cmp):
'''Return true if more constants are equal than not equal'''
#return False
matches = 0
unmatches = 0
ks = list(row_ref.keys()) + list(row_cmp.keys())
for k in ks:
vr = row_ref.get(k, None)
vc = row_cmp.get(k, None)
# At least one
if vr is not None and vc is not None:
if vc == vr:
matches += 1
else:
unmatches += 1
else:
unmatches += 1
# Will equation reduce if subtracted?
return matches > unmatches
def reduce_const(row_ref, row_cmp):
'''Subtract cmp constants from ref'''
#ret = {}
ret = OrderedDict()
ks = set(row_ref.keys())
ks.update(set(row_cmp.keys()))
for k in ks:
vr = row_ref.get(k, 0)
vc = row_cmp.get(k, 0)
res = vr - vc
if res:
ret[k] = res
return ret
def derive_eq_by_row(A_ubd, b_ub, verbose=0, col_lim=0, tweak=False):
'''
Derive equations by subtracting whole rows
Given equations like:
t0 >= 10
t0 + t1 >= 15
t0 + t1 + t2 >= 17
When I look at these, I think of a solution something like:
t0 = 10f
t1 = 5
t2 = 2
However, linprog tends to choose solutions like:
t0 = 17
t1 = 0
t2 = 0
To this end, add additional constraints by finding equations that are subsets of other equations
How to do this in a reasonable time span?
Also equations are sparse, which makes this harder to compute
'''
rows = len(A_ubd)
assert rows == len(b_ub)
# Index equations into hash maps so can lookup sparse elements quicker
assert len(A_ubd) == len(b_ub)
A_ubd_ret = copy.copy(A_ubd)
assert len(A_ubd) == len(A_ubd_ret)
#print('Finding subsets')
ltes = 0
scs = 0
b_ub_ret = list(b_ub)
sys.stdout.write('Deriving rows ')
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_ref in enumerate(A_ubd):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if col_lim and len(row_ref) > col_lim:
continue
for row_cmpi, row_cmp in enumerate(A_ubd):
if row_refi == row_cmpi or col_lim and len(row_cmp) > col_lim:
continue
# FIXME: this check was supposed to be removed
'''
Every elements in row_cmp is in row_ref
but this doesn't mean the constants are smaller
Filter these out
'''
# XXX: just reduce and filter out solutions with positive constants
# or actually are these also useful as is?
lte = lte_const(row_ref, row_cmp)
if lte:
ltes += 1
sc = 0 and shared_const(row_ref, row_cmp)
if sc:
scs += 1
if lte or sc:
if verbose:
print('')
print('match')
print(' ', row_ref, b_ub[row_refi])
print(' ', row_cmp, b_ub[row_cmpi])
# Reduce
A_new = reduce_const(row_ref, row_cmp)
# Did this actually significantly reduce the search space?
#if tweak and len(A_new) > 4 and len(A_new) > len(row_cmp) / 2:
if tweak and len(A_new) > 8 and len(A_new) > len(row_cmp) / 2:
continue
b_new = b_ub[row_refi] - b_ub[row_cmpi]
# Definitely possible
# Maybe filter these out if they occur?
if verbose:
print(b_new)
# Also inverted sign
if b_new <= 0:
if verbose:
print("Unexpected b")
continue
if verbose:
print('OK')
A_ubd_ret.append(A_new)
b_ub_ret.append(b_new)
print(' done')
#A_ub_ret = A_di2np(A_ubd2, cols=cols)
print('Derive row: %d => %d rows using %d lte, %d sc' % (len(b_ub), len(b_ub_ret), ltes, scs))
assert len(A_ubd_ret) == len(b_ub_ret)
return A_ubd_ret, b_ub_ret
def derive_eq_by_col(A_ubd, b_ub, verbose=0):
'''
Derive equations by subtracting out all bounded constants (ie "known" columns)
'''
rows = len(A_ubd)
# Find all entries where
# Index equations with a single constraint
knowns = {}
sys.stdout.write('Derive col indexing ')
#A_ubd = A_ub_np2d(A_ub)
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_refd in enumerate(A_ubd):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
if len(row_refd) == 1:
k, v = list(row_refd.items())[0]
# Reduce any constants to canonical form
if v != 1:
row_refd[k] = 1
b_ub[row_refi] /= v
knowns[k] = b_ub[row_refi]
print(' done')
#knowns_set = set(knowns.keys())
print('%d constrained' % len(knowns))
'''
Now see what we can do
Rows that are already constrained: eliminate
TODO: maybe keep these if this would violate their constraint
Otherwise eliminate the original row and generate a simplified result now
'''
b_ub_ret = []
A_ubd_ret = []
sys.stdout.write('Derive col main ')
sys.stdout.flush()
progress = max(1, rows / 100)
for row_refi, row_refd in enumerate(A_ubd):
if row_refi % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
# Reduce as much as possible
#row_new = {}
row_new = OrderedDict()
b_new = b_ub[row_refi]
# Copy over single entries
if len(row_refd) == 1:
row_new = row_refd
else:
for k, v in row_refd.items():
if k in knowns:
# Remove column and take out corresponding delay
b_new -= v * knowns[k]
# Copy over
else:
row_new[k] = v
# Possibly reduced all usable contants out
if len(row_new) == 0:
continue
if b_new <= 0:
continue
A_ubd_ret.append(row_new)
b_ub_ret.append(b_new)
print(' done')
print('Derive col: %d => %d rows' % (len(b_ub), len(b_ub_ret)))
return A_ubd_ret, b_ub_ret
def col_dist(A_ubd, desc='of', names=[], lim=0):
'''print(frequency distribution of number of elements in a given row'''
rows = len(A_ubd)
cols = len(names)
fs = {}
for row in A_ubd:
this_cols = len(row)
fs[this_cols] = fs.get(this_cols, 0) + 1
print('Col count distribution (%s) for %dr x %dc w/ %d freqs' % (desc, rows, cols, len(fs)))
prints = 0
for i, (k, v) in enumerate(sorted(fs.items())):
if lim == 0 or (lim and prints < lim or i == len(fs) - 1):
print(' %d: %d' % (k, v))
prints += 1
if lim and prints == lim:
print(' ...')
def name_dist(A_ubd, desc='of', names=[], lim=0):
'''print(frequency distribution of number of times an element appears'''
rows = len(A_ubd)
cols = len(names)
fs = {i: 0 for i in range(len(names))}
for row in A_ubd:
for k in row.keys():
fs[k] += 1
print('Name count distribution (%s) for %dr x %dc' % (desc, rows, cols))
prints = 0
for namei, name in enumerate(names):
if lim == 0 or (lim and prints < lim or namei == len(fs) - 1):
print(' %s: %d' % (name, fs[namei]))
prints += 1
if lim and prints == lim:
print(' ...')
fs2 = {}
for v in fs.values():
fs2[v] = fs2.get(v, 0) + 1
prints = 0
print('Distribution distribution (%d items)'% len(fs2))
for i, (k, v) in enumerate(sorted(fs2.items())):
if lim == 0 or (lim and prints < lim or i == len(fs2) - 1):
print(' %s: %s' % (k, v))
prints += 1
if lim and prints == lim:
print(' ...')
zeros = fs2.get(0, 0)
if zeros:
raise Exception("%d names without equation" % zeros)
def filter_ncols(A_ubd, b_ub, cols_min=0, cols_max=0):
'''Only keep equations with a few delay elements'''
A_ubd_ret = []
b_ub_ret = []
#print('Removing large rows')
for rowd, b in zip(A_ubd, b_ub):
if (not cols_min or len(rowd) >= cols_min) and (not cols_max or len(rowd) <= cols_max):
A_ubd_ret.append(rowd)
b_ub_ret.append(b)
print('Filter ncols w/ %d <= cols <= %d: %d ==> %d rows' % (cols_min, cols_max, len(b_ub), len(b_ub_ret)))
assert len(b_ub_ret)
return A_ubd_ret, b_ub_ret
def preprocess(A_ubd, b_ub, opts, names, verbose=0):
def debug(what):
if verbose:
print('')
print_eqns(A_ubd, b_ub, verbose=verbose, label=what, lim=20)
col_dist(A_ubd, what, names)
check_feasible_d(A_ubd, b_ub, names)
col_dist(A_ubd, 'pre-filt', names, lim=12)
debug('pre-filt')
need_simpc = 0
# Input set may have redundant constraints
A_ubd, b_ub = simplify_rows(A_ubd=A_ubd, b_ub=b_ub)
debug("simp_rows")
cols_min_pre = opts.get('cols_min_pre', None)
cols_max_pre = opts.get('cols_max_pre', None)
# Filter input based on number of columns
if cols_min_pre or cols_max_pre:
A_ubd, b_ub = filter_ncols(A_ubd=A_ubd, b_ub=b_ub, cols_min=cols_min_pre, cols_max=cols_max_pre)
debug("filt_ncols")
need_simpc = 1
# Limit input rows, mostly for quick full run checks
row_limit = opts.get('row_limit', None)
if row_limit:
before_rows = len(b_ub)
A_ubd = A_ubd[0:row_limit]
b_ub = b_ub[0:row_limit]
print('Row limit %d => %d rows' % (before_rows, len(b_ub)))
need_simpc = 1
if need_simpc:
names, A_ubd, b_ub = simplify_cols(names=names, A_ubd=A_ubd, b_ub=b_ub)
debug("simp_cols")
return A_ubd, b_ub, names
def massage_equations(A_ubd, b_ub, opts, names, verbose=0):
'''
Equation pipeline
Some operations may generate new equations
Simplify after these to avoid unnecessary overhead on redundant constraints
Similarly some operations may eliminate equations, potentially eliminating a column (ie variable)
Remove these columns as necessary to speed up solving
'''
def debug(what):
if verbose:
print('')
print_eqns(A_ubd, b_ub, verbose=verbose, label=what, lim=20)
col_dist(A_ubd, what, names)
check_feasible_d(A_ubd, b_ub, names)
A_ubd, b_ub, names = preprocess(A_ubd, b_ub, opts, names, verbose=verbose)
# Try to (intelligently) subtract equations to generate additional constraints
# This helps avoid putting all delay in a single shared variable
derive_lim = opts.get('derive_lim', None)
if derive_lim:
dstart = len(b_ub)
# Original simple
if 0:
for di in range(derive_lim):
print
assert len(A_ubd) == len(b_ub)
n_orig = len(b_ub)
# Meat of the operation
# Focus on easy equations for first pass to get a lot of easy derrivations
col_lim = 12 if di == 0 else None
#col_lim = None
A_ubd, b_ub = derive_eq_by_row(A_ubd, b_ub, col_lim=col_lim)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
A_ubd, b_ub = simplify_rows(A_ubd, b_ub)
print('Derive row %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig, len(b_ub)))
debug("der_rows simp")
n_orig2 = len(b_ub)
# Meat of the operation
A_ubd, b_ub = derive_eq_by_col(A_ubd, b_ub)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
A_ubd, b_ub = simplify_rows(A_ubd=A_ubd, b_ub=b_ub)
print('Derive col %d / %d: %d => %d equations' % (di + 1, derive_lim, n_orig2, len(b_ub)))
debug("der_cols simp")
if n_orig == len(b_ub):
break
if 1:
# Each iteration one more column is allowed until all columns are included
# (and the system is stable)
col_lim = 15
di = 0
while True:
print
n_orig = len(b_ub)
print('Loop %d, lim %d' % (di + 1, col_lim))
# Meat of the operation
A_ubd, b_ub = derive_eq_by_row(A_ubd, b_ub, col_lim=col_lim, tweak=True)
debug("der_rows")
# Run another simplify pass since new equations may have overlap with original
A_ubd, b_ub = simplify_rows(A_ubd, b_ub)
print('Derive row: %d => %d equations' % (n_orig, len(b_ub)))
debug("der_rows simp")
n_orig2 = len(b_ub)
# Meat of the operation
A_ubd, b_ub = derive_eq_by_col(A_ubd, b_ub)
debug("der_cols")
# Run another simplify pass since new equations may have overlap with original
A_ubd, b_ub = simplify_rows(A_ubd=A_ubd, b_ub=b_ub)
print('Derive col %d: %d => %d equations' % (di + 1, n_orig2, len(b_ub)))
debug("der_cols simp")
# Doesn't help computation, but helps debugging
names, A_ubd, b_ub = simplify_cols(names=names, A_ubd=A_ubd, b_ub=b_ub)
A_ubd, b_ub = sort_equations(A_ubd, b_ub)
debug("loop done")
col_dist(A_ubd, 'derive done iter %d, lim %d' % (di, col_lim), names, lim=12)
rows = len(A_ubd)
if n_orig == len(b_ub) and col_lim >= rows:
break
col_lim += col_lim / 5
di += 1
dend = len(b_ub)
print('')
print('Derive net: %d => %d' % (dstart, dend))
print('')
# Was experimentting to see how much the higher order columns really help
cols_min_post = opts.get('cols_min_post', None)
cols_max_post = opts.get('cols_max_post', None)
# Filter input based on number of columns
if cols_min_post or cols_max_post:
A_ubd, b_ub = filter_ncols(A_ubd=A_ubd, b_ub=b_ub, cols_min=cols_min_post, cols_max=cols_max_post)
debug("filter_ncals final")
names, A_ubd, b_ub = simplify_cols(names=names, A_ubd=A_ubd, b_ub=b_ub)
debug("simp_cols final")
# Helps debug readability
A_ubd, b_ub = sort_equations(A_ubd, b_ub)
debug("final (sorted)")
return names, A_ubd, b_ub
def Ar_di2ds(rowA, names):
row = OrderedDict()
for k, v in rowA.items():
row[names[k]] = v
return row
def A_di2ds(Adi, names):
rows = []
for row_di in Adi:
rows.append(Ar_di2ds(row_di, names))
return rows
def Ar_ds2di(row_ds, names):
def keyi(name):
if name not in names:
names[name] = len(names)
return names[name]
row_di = OrderedDict()
for k, v in row_ds.items():
row_di[keyi(k)] = v
return row_di
def A_ds2di(rows):
names = OrderedDict()
A_ubd = []
for row_ds in rows:
A_ubd.append(Ar_ds2di(row_ds, names))
return list(names.keys()), A_ubd
def A_ds2np(Ads):
names, Adi = A_ds2di(Ads)
return names, A_di2np(Adi, len(names))
def loadc_Ads_mkb(fns, mkb, filt):
bs = []
Ads = []
for fn in fns:
with open(fn, 'r') as f:
# skip header
f.readline()
for l in f:
cols = l.split(',')
ico = bool(int(cols[0]))
corners = cols[1]
vars = cols[2:]
corners = [int(x) for x in corners.split()]
def mkvar(x):
i, var = x.split()
return (var, int(i))
vars = OrderedDict([mkvar(var) for var in vars])
if not filt(ico, corners, vars):
continue
bs.append(mkb(corners))
Ads.append(vars)
return Ads, bs
def loadc_Ads_b(fns, corner, ico=None):
corner = corner or "slow_max"
corneri = corner_s2i[corner]
if ico is not None:
filt = lambda ico, corners, vars: ico == ico
else:
filt = lambda ico, corners, vars: True
def mkb(val):
return val[corneri]
return loadc_Ads_mkb(fns, mkb, filt)
def index_names(Ads):
names = set()
for row_ds in Ads:
for k1 in row_ds.keys():
names.add(k1)
return names

View File

@ -0,0 +1,259 @@
#!/usr/bin/env python3
from timfuz import Benchmark, Ar_di2np, Ar_ds2t, A_di2ds, A_ds2di, simplify_rows, loadc_Ads_b, index_names, A_ds2np
import numpy as np
import glob
import json
import math
from collections import OrderedDict
# check for issues that may be due to round off error
STRICT = 1
def Adi2matrix_random(A_ubd, b_ub, names):
# random assignment
# was making some empty rows
A_ret = [np.zeros(len(names)) for _i in range(len(names))]
b_ret = np.zeros(len(names))
for row, b in zip(A_ubd, b_ub):
# Randomly assign to a row
dst_rowi = random.randint(0, len(names) - 1)
rownp = Ar_di2np(row, cols=len(names), sf=1)
A_ret[dst_rowi] = np.add(A_ret[dst_rowi], rownp)
b_ret[dst_rowi] += b
return A_ret, b_ret
def Ads2matrix_linear(Ads, b):
names, Adi = A_ds2di(Ads)
cols = len(names)
rows_out = len(b)
A_ret = [np.zeros(cols) for _i in range(rows_out)]
b_ret = np.zeros(rows_out)
dst_rowi = 0
for row_di, row_b in zip(Adi, b):
row_np = Ar_di2np(row_di, cols)
A_ret[dst_rowi] = np.add(A_ret[dst_rowi], row_np)
b_ret[dst_rowi] += row_b
dst_rowi = (dst_rowi + 1) % rows_out
return A_ret, b_ret
def row_sub_syms(row, sub_json, verbose=False):
if 0 and verbose:
print("")
print(row.items())
delsyms = 0
for k in sub_json['drop_names']:
try:
del row[k]
delsyms += 1
except KeyError:
pass
if verbose:
print("Deleted %u symbols" % delsyms)
if verbose:
print('Checking pivots')
print(sorted(row.items()))
for group, pivot in sorted(sub_json['pivots'].items()):
if pivot not in row:
continue
n = row[pivot]
print(' pivot %u %s' % (n, pivot))
for group, pivot in sorted(sub_json['pivots'].items()):
if pivot not in row:
continue
# take the sub out n times
# note constants may be negative
n = row[pivot]
if verbose:
print('pivot %i %s' % (n, pivot))
for subk, subv in sorted(sub_json['subs'][group].items()):
oldn = row.get(subk, 0)
rown = oldn - n * subv
if verbose:
print(" %s: %d => %d" % (subk, oldn, rown))
if rown == 0:
# only becomes zero if didn't previously exist
del row[subk]
if verbose:
print(" del")
else:
row[subk] = rown
row[group] = n
assert pivot not in row
# after all constants are applied, the row should end up positive?
if STRICT:
for k, v in sorted(row.items()):
assert v > 0, (k, v)
def run_sub_json(Ads, sub_json, verbose=False):
nrows = 0
nsubs = 0
ncols_old = 0
ncols_new = 0
print('Subbing %u rows' % len(Ads))
prints = set()
for rowi, row in enumerate(Ads):
if 0 and verbose:
print(row)
if verbose:
print('')
print('Row %u w/ %u elements' % (rowi, len(row)))
row_orig = dict(row)
row_sub_syms(row, sub_json, verbose=verbose)
nrows += 1
if row_orig != row:
nsubs += 1
if verbose:
rowt = Ar_ds2t(row)
if rowt not in prints:
print('row', row)
prints.add(rowt)
ncols_old += len(row_orig)
ncols_new += len(row)
if verbose:
print('')
print("Sub: %u / %u rows changed" % (nsubs, nrows))
print("Sub: %u => %u cols" % (ncols_old, ncols_new))
def pmatrix(Anp, s):
import sympy
msym = sympy.Matrix(Anp)
print(s)
sympy.pprint(msym)
def pds(Ads, s):
names, Anp = A_ds2np(Ads)
pmatrix(Anp, s)
print('Names: %s' % (names,))
def run(fns_in, sub_json=None, verbose=False, corner=None):
Ads, b = loadc_Ads_b(fns_in, corner, ico=True)
# Remove duplicate rows
# is this necessary?
# maybe better to just add them into the matrix directly
#Ads, b = simplify_rows(Ads, b)
if sub_json:
print('Subbing JSON %u rows' % len(Ads))
#pds(Ads, 'Orig')
names_old = index_names(Ads)
run_sub_json(Ads, sub_json, verbose=verbose)
names_new = index_names(Ads)
print("Sub: %u => %u names" % (len(names_old), len(names_new)))
print(names_new)
print('Subbed JSON %u rows' % len(Ads))
names = names_new
#pds(Ads, 'Sub')
else:
names = index_names(Ads)
# Squash into a matrix
# A_ub2, b_ub2 = Adi2matrix_random(A_ubd, b, names)
Amat, _bmat = Ads2matrix_linear(Ads, b)
#pmatrix(Amat, 'Matrix')
'''
The matrix must be fully ranked to even be considered reasonable
Even then, floating point error *possibly* could make it fully ranked, although probably not since we have whole numbers
Hence the slogdet check
'''
print
# https://docs.scipy.org/doc/numpy-dev/reference/generated/numpy.linalg.matrix_rank.html
print('rank: %s / %d col' % (np.linalg.matrix_rank(Amat), len(names)))
if 0:
# https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.linalg.slogdet.html
sign, logdet = np.linalg.slogdet(Amat)
# If the determinant is zero, then sign will be 0 and logdet will be -Inf
if sign == 0 and logdet == float('-inf'):
print('slogdet :( : 0')
else:
print('slogdet :) : %s, %s' % (sign, logdet))
def load_sub(fn):
delta = 0.001
j = json.load(open(fn, 'r'))
# convert groups to use integer constants
# beware of roundoff error
# if we round poorly here, it won't give incorrect results later, but may make it fail to find a good solution
if 'pivots' in j:
print('pivots: using existing')
else:
print('pivots: guessing')
pivots = OrderedDict()
j['pivots'] = pivots
for name, vals in sorted(j['subs'].items()):
pivot = None
for k, v in vals.items():
if STRICT:
vi = int(round(v))
assert abs(vi - v) < delta
vals[k] = vi
else:
vals[k] = float(v)
# there may be more than one acceptable pivot
# take the first
if v == 1 and pivot is None:
pivot = k
assert pivot is not None
pivots[name] = pivot
return j
def main():
import argparse
parser = argparse.ArgumentParser(
description=
'Check sub.json solution feasibility'
)
parser.add_argument('--verbose', action='store_true', help='')
parser.add_argument('--sub-json', help='')
parser.add_argument('--corner', default="slow_max", help='')
parser.add_argument(
'fns_in',
nargs='*',
help='timing3.txt input files')
args = parser.parse_args()
# Store options in dict to ease passing through functions
bench = Benchmark()
fns_in = args.fns_in
if not fns_in:
fns_in = glob.glob('specimen_*/timing3.txt')
sub_json = None
if args.sub_json:
sub_json = load_sub(args.sub_json)
try:
run(sub_json=sub_json,
fns_in=fns_in, verbose=args.verbose, corner=args.corner)
finally:
print('Exiting after %s' % bench)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,60 @@
#!/usr/bin/env python3
from timfuz import Benchmark, simplify_rows, loadc_Ads_b
import glob
def run(fout, fns_in, corner, verbose=0):
Ads, b = loadc_Ads_b(fns_in, corner, ico=True)
Ads, b = simplify_rows(Ads, b)
fout.write('ico,fast_max fast_min slow_max slow_min,rows...\n')
for row_b, row_ds in zip(b, Ads):
# write in same format, but just stick to this corner
out_b = [str(row_b) for _i in range(4)]
ico = '1'
items = [ico, ' '.join(out_b)]
for k, v in sorted(row_ds.items()):
items.append('%u %s' % (v, k))
fout.write(','.join(items) + '\n')
def main():
import argparse
parser = argparse.ArgumentParser(
description=
'Create a .csv with a single process corner'
)
parser.add_argument('--verbose', type=int, help='')
parser.add_argument('--auto-name', action='store_true', help='timing3.csv => timing3c.csv')
parser.add_argument('--out', default=None, help='Output csv')
parser.add_argument('--corner', help='Output csv')
parser.add_argument(
'fns_in',
nargs='*',
help='timing3.csv input files')
args = parser.parse_args()
bench = Benchmark()
fnout = args.out
if fnout is None:
if args.auto_name:
assert len(args.fns_in) == 1
fnin = args.fns_in[0]
fnout = fnin.replace('timing3.csv', 'timing3c.csv')
assert fnout != fnin, 'Expect timing3.csv in'
else:
fnout = '/dev/stdout'
print("Writing to %s" % fnout)
fout = open(fnout, 'w')
fns_in = args.fns_in
if not fns_in:
fns_in = glob.glob('specimen_*/timing3.csv')
run(fout=fout,
fns_in=fns_in, corner=args.corner, verbose=args.verbose)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,272 @@
#!/usr/bin/env python3
from timfuz import Benchmark, row_di2ds, A_di2ds
import numpy as np
import glob
import math
import json
import sympy
import sys
from collections import OrderedDict
# Speed index: some sort of special value
SI_NONE = 0xFFFF
PREFIX_W = 'WIRE_'
PREFIX_P = 'PIP_'
#PREFIX_W = ''
#PREFIX_P = ''
def parse_pip(s):
# Entries like
# CLK_BUFG_REBUF_X60Y117/CLK_BUFG_REBUF.CLK_BUFG_REBUF_R_CK_GCLK0_BOT<<->>CLK_BUFG_REBUF_R_CK_GCLK0_TOP
# Convert to (site, type, pip_junction, pip)
pipstr, speed_index = s.split(':')
speed_index = int(speed_index)
site, instance = pipstr.split('/')
#type, pip_junction, pip = others.split('.')
#return (site, type, pip_junction, pip)
return site, instance, int(speed_index)
def parse_node(s):
node, nwires = s.split(':')
return node, int(nwires)
def parse_wire(s):
# CLBLM_R_X3Y80/CLBLM_M_D6:952
wirestr, speed_index = s.split(':')
site, instance = wirestr.split('/')
return site, instance, int(speed_index)
# FIXME: these actually have a delay element
# Probably need to put these back in
def remove_virtual_pips(pips):
return pips
return filter(lambda pip: not re.match(r'CLBL[LM]_[LR]_', pip[0]), pips)
def load_timing3(f, name='file'):
# src_bel dst_bel ico fast_max fast_min slow_max slow_min pips
f.readline()
ret = []
bads = 0
for l in f:
# FIXME: hack
if 0 and 'CLK' in l:
continue
l = l.strip()
if not l:
continue
parts = l.split(' ')
# FIXME: deal with these nodes
if len(parts) != 11:
bads += 1
continue
net, src_bel, dst_bel, ico, fast_max, fast_min, slow_max, slow_min, pips, nodes, wires = parts
pips = pips.split('|')
nodes = nodes.split('|')
wires = wires.split('|')
ret.append({
'net': net,
'src_bel': src_bel,
'dst_bel': dst_bel,
'ico': int(ico),
# ps
'fast_max': int(fast_max),
'fast_min': int(fast_min),
'slow_max': int(slow_max),
'slow_min': int(slow_min),
'pips': remove_virtual_pips([parse_pip(pip) for pip in pips]),
'nodes': [parse_node(node) for node in nodes],
'wires': [parse_wire(wire) for wire in wires],
'line': l,
})
print(' load %s: %d bad, %d good' % (name, bads, len(ret)))
#assert 0
return ret
def load_speed_json(f):
j = json.load(f)
# Index speed indexes to names
speed_i2s = {}
for k, v in j['speed_model'].items():
i = v['speed_index']
if i != SI_NONE:
speed_i2s[i] = k
return j, speed_i2s
# Verify the nodes and wires really do line up
def vals2Adi_check(vals, names):
print('Checking')
for val in vals:
node_wires = 0
for _node, wiresn in val['nodes']:
node_wires += wiresn
assert node_wires == len(val['wires'])
print('Done')
assert 0
def vals2Adi(vals, speed_i2s,
name_tr={}, name_drop=[],
verbose=False):
def pip2speed(pip):
_site, _name, speed_index = pip
return PREFIX_P + speed_i2s[speed_index]
def wire2speed(wire):
_site, _name, speed_index = wire
return PREFIX_W + speed_i2s[speed_index]
# Want this ordered
names = OrderedDict()
print('Creating matrix w/ tr: %d, drop: %d' % (len(name_tr), len(name_drop)))
# Take sites out entirely using handy "interconnect only" option
#vals = filter(lambda x: str(x).find('SLICE') >= 0, vals)
# Highest count while still getting valid result
# First index all of the given pip types
# Start out as set then convert to list to keep matrix order consistent
sys.stdout.write('Indexing delay elements ')
sys.stdout.flush()
progress = max(1, len(vals) / 100)
for vali, val in enumerate(vals):
if vali % progress == 0:
sys.stdout.write('.')
sys.stdout.flush()
odl = [(pip2speed(pip), None) for pip in val['pips']]
names.update(OrderedDict(odl))
odl = [(wire2speed(wire), None) for wire in val['wires']]
names.update(OrderedDict(odl))
print(' done')
# Apply transform
orig_names = len(names)
for k in (list(name_drop) + list(name_tr.keys())):
if k in names:
del names[k]
else:
print('WARNING: failed to remove %s' % k)
names.update(OrderedDict([(name, None) for name in name_tr.values()]))
print('Names tr %d => %d' % (orig_names, len(names)))
# Make unique list
names = list(names.keys())
name_s2i = {}
for namei, name in enumerate(names):
name_s2i[name] = namei
if verbose:
for name in names:
print('NAME: ', name)
for name in name_drop:
print('DROP: ', name)
for l, r in name_tr.items():
print('TR: %s => %s' % (l, r))
# Now create a matrix with all of these delays
# Each row needs len(names) elements
# -2 means 2 elements present, 0 means absent
# (could hit same pip twice)
print('Creating delay element matrix w/ %d names' % len(names))
Adi = [None for _i in range(len(vals))]
for vali, val in enumerate(vals):
def add_name(name):
if name in name_drop:
return
name = name_tr.get(name, name)
namei = name_s2i[name]
row_di[namei] = row_di.get(namei, 0) + 1
# Start with 0 occurances
#row = [0 for _i in range(len(names))]
row_di = {}
#print('pips: ', val['pips']
for pip in val['pips']:
add_name(pip2speed(pip))
for wire in val['wires']:
add_name(wire2speed(wire))
#A_ub.append(row)
Adi[vali] = row_di
return Adi, names
# TODO: load directly as Ads
# remove names_tr, names_drop
def vals2Ads(vals, speed_i2s, verbose=False):
Adi, names = vals2Adi(vals, speed_i2s, verbose=False)
return A_di2ds(Adi, names)
def load_Ads(speed_json_f, f_ins):
print('Loading data')
_speedj, speed_i2s = load_speed_json(speed_json_f)
vals = []
for avals in [load_timing3(f_in, name) for f_in, name in f_ins]:
vals.extend(avals)
Ads = vals2Ads(vals, speed_i2s)
def mkb(val):
return (val['fast_max'], val['fast_min'], val['slow_max'], val['slow_min'])
b = [mkb(val) for val in vals]
ico = [val['ico'] for val in vals]
return Ads, b, ico
def run(speed_json_f, fout, f_ins, verbose=0, corner=None):
Ads, bs, ico = load_Ads(speed_json_f, f_ins)
fout.write('ico,fast_max fast_min slow_max slow_min,rows...\n')
for row_bs, row_ds, row_ico in zip(bs, Ads, ico):
# like: 123 456 120 450, 1 a, 2 b
# first column has delay corners, followed by delay element count
items = [str(row_ico), ' '.join([str(x) for x in row_bs])]
for k, v in sorted(row_ds.items()):
items.append('%u %s' % (v, k))
fout.write(','.join(items) + '\n')
def main():
import argparse
parser = argparse.ArgumentParser(
description=
'Convert obscure timing3.txt into more readable but roughly equivilent timing3.csv'
)
parser.add_argument('--verbose', type=int, help='')
parser.add_argument('--auto-name', action='store_true', help='timing3.txt => timing3.csv')
parser.add_argument('--speed-json', default='build_speed/speed.json',
help='Provides speed index to name translation')
parser.add_argument('--out', default=None, help='Output csv')
parser.add_argument(
'fns_in',
nargs='*',
help='timing3.txt input files')
args = parser.parse_args()
bench = Benchmark()
fnout = args.out
if fnout is None:
if args.auto_name:
assert len(args.fns_in) == 1
fnin = args.fns_in[0]
fnout = fnin.replace('.txt', '.csv')
assert fnout != fnin, 'Expect .txt in'
else:
fnout = '/dev/stdout'
print("Writing to %s" % fnout)
fout = open(fnout, 'w')
fns_in = args.fns_in
if not fns_in:
fns_in = glob.glob('specimen_*/timing3.txt')
run(speed_json_f=open(args.speed_json, 'r'), fout=fout,
f_ins=[(open(fn_in, 'r'), fn_in) for fn_in in fns_in], verbose=args.verbose)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,206 @@
#!/usr/bin/env python3
'''
Triaging tool to help understand where we need more timing coverage
Finds correlated variables to help make better test cases
'''
from timfuz import Benchmark, Ar_di2np, loadc_Ads_b, index_names, A_ds2np
import numpy as np
import glob
import math
import json
import sympy
from collections import OrderedDict
STRICT = 1
class State(object):
def __init__(self, Ads, drop_names=[]):
self.Ads = Ads
self.names = index_names(self.Ads)
# known zero delay elements
self.drop_names = set(drop_names)
# active names in rows
# includes sub symbols, excludes symbols that have been substituted out
self.base_names = set(self.names)
self.names = set(self.base_names)
# List of variable substitutions
# k => dict of v:n entries that it came from
self.subs = {}
self.verbose = True
def print_stats(self):
print("Stats")
print(" Substitutions: %u" % len(self.subs))
if self.subs:
print(" Largest: %u" % max([len(x) for x in self.subs.values()]))
print(" Rows: %u" % len(self.Ads))
print(" Cols (in): %u" % (len(self.base_names) + len(self.drop_names)))
print(" Cols (preprocessed): %u" % len(self.base_names))
print(" Drop names: %u" % len(self.drop_names))
print(" Cols (out): %u" % len(self.names))
assert len(self.names) >= len(self.subs)
@staticmethod
def load(fn_ins):
Ads, _b = loadc_Ads_b(fn_ins, corner=None, ico=True)
return State(Ads)
def write_state(state, fout):
j = {
'names': dict([(x, None) for x in state.names]),
'drop_names': list(state.drop_names),
'base_names': list(state.base_names),
'subs': dict([(name, values) for name, values in state.subs.items()]),
'pivots': state.pivots,
}
json.dump(j, fout, sort_keys=True, indent=4, separators=(',', ': '))
def Adi2matrix(Adi, cols):
A_ub2 = [np.zeros(cols) for _i in range(cols)]
dst_rowi = 0
for row in Adi:
rownp = Ar_di2np(row, cols=len(names), sf=1)
A_ub2[dst_rowi] = np.add(A_ub2[dst_rowi], rownp)
dst_rowi = (dst_rowi + 1) % len(names)
return A_ub2
def Anp2matrix(Anp):
ncols = len(Anp[0])
A_ub2 = [np.zeros(ncols) for _i in range(ncols)]
dst_rowi = 0
for rownp in Anp:
A_ub2[dst_rowi] = np.add(A_ub2[dst_rowi], rownp)
dst_rowi = (dst_rowi + 1) % ncols
return A_ub2
def row_np2ds(rownp, names):
ret = {}
assert len(rownp) == len(names), (len(rownp), len(names))
for namei, name in enumerate(names):
v = rownp[namei]
if v:
ret[name] = v
return ret
def comb_corr_sets(state, verbose=False):
print('Converting rows to integer keys')
names, Anp = A_ds2np(state.Ads)
print('np: %u rows x %u cols' % (len(Anp), len(Anp[0])))
print('Combining rows into matrix')
mnp = Anp2matrix(Anp)
print('Matrix: %u rows x %u cols' % (len(mnp), len(mnp[0])))
print('Converting np to sympy matrix')
msym = sympy.Matrix(mnp)
print('Making rref')
rref, pivots = msym.rref()
if verbose:
print('names')
print(names)
print('Matrix')
sympy.pprint(msym)
print('Pivots')
sympy.pprint(pivots)
print('rref')
sympy.pprint(rref)
state.pivots = {}
def row_solved(rownp, row_pivot):
for ci, c in enumerate(rownp):
if ci == row_pivot:
continue
if c != 0:
return False
return True
rrefnp = np.array(rref).astype(np.float64)
print('Computing groups w/ rref %u row x %u col' % (len(rrefnp), len(rrefnp[0])))
#print(rrefnp)
# rows that have a single 1 are okay
# anything else requires substitution (unless all 0)
# pivots may be fewer than the rows
# remaining rows should be 0s
for row_i, (row_pivot, rownp) in enumerate(zip(pivots, rrefnp)):
rowds = row_np2ds(rownp, names)
# boring cases: solved variable, not fully ranked
#if sum(rowds.values()) == 1:
if row_solved(rownp, row_pivot):
continue
# a grouping
group_name = "GROUP_%u" % row_i
if STRICT:
delta = 0.001
rowds_store = {}
for k, v in rowds.items():
vi = int(round(v))
error = abs(vi - v)
assert error < delta, (error, delta)
rowds_store[k] = vi
else:
rowds_store = rowds
state.subs[group_name] = rowds_store
# Add the new symbol
state.names.add(group_name)
# Remove substituted symbols
# Note: symbols may appear multiple times
state.names.difference_update(set(rowds.keys()))
pivot_name = names[row_pivot]
state.pivots[group_name] = pivot_name
if verbose:
print("%s (%s): %s" % (group_name, pivot_name, rowds))
return state
def run(fout, fn_ins, verbose=0):
print('Loading data')
state = State.load(fn_ins)
comb_corr_sets(state, verbose=verbose)
state.print_stats()
if fout:
write_state(state, fout)
def main():
import argparse
parser = argparse.ArgumentParser(
description=
'Timing fuzzer'
)
parser.add_argument('--verbose', action='store_true', help='')
parser.add_argument('--speed-json', default='build_speed/speed.json',
help='Provides speed index to name translation')
parser.add_argument('--out', help='Output sub.json substitution result')
parser.add_argument(
'fns_in',
nargs='*',
help='timing3.txt input files')
args = parser.parse_args()
bench = Benchmark()
fout = None
if args.out:
fout = open(args.out, 'w')
fns_in = args.fns_in
if not fns_in:
fns_in = glob.glob('specimen_*/timing3.csv')
try:
run(fout=fout,
fn_ins=args.fns_in, verbose=args.verbose)
finally:
print('Exiting after %s' % bench)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,98 @@
#!/usr/bin/env python3
def run_corner(A_ubd, b_ub, names, verbose=0, opts={}, meta={}):
# Given timing scores for above delays (-ps)
names_orig = names
#print_eqns(A_ub, b_ub, verbose=verbose)
names, A_ubd, b_ub = massage_equations(A_ubd, b_ub, opts, names, verbose=verbose)
print
print_eqns(A_ubd, b_ub, verbose=verbose)
print
col_dist(A_ubd, 'final', names)
A_ub, b_ub = Ab_d2np(A_ubd, b_ub, names)
# Its having trouble giving me solutions as this gets bigger
# Make a terrible baseline guess to confirm we aren't doing something bad
#print_names(names, verbose=verbose)
check_feasible(A_ub=A_ub, b_ub=b_ub)
'''
Be mindful of signs
Have something like
timing1/timing 2 are constants
delay1 + delay2 + delay4 >= timing1
delay2 + delay3 >= timing2
But need it in compliant form:
-delay1 + -delay2 + -delay4 <= -timing1
-delay2 + -delay3 <= -timing2
'''
rows = len(A_ub)
cols = len(A_ub[0])
# Minimization function scalars
# Treat all logic elements as equally important
c = [1 for _i in range(len(names))]
# Delays cannot be negative
# (this is also the default constraint)
#bounds = [(0, None) for _i in range(len(names))]
# Also you can provide one to apply to all
bounds = (0, None)
# Seems to take about rows + 3 iterations
# Give some margin
#maxiter = int(1.1 * rows + 100)
#maxiter = max(1000, int(1000 * rows + 1000))
# Most of the time I want it to just keep going unless I ^C it
maxiter = 1000000
if verbose >= 2:
print('b_ub', b_ub)
print('Unique delay elements: %d' % len(names))
print(' # delay minimization weights: %d' % len(c))
print(' # delay constraints: %d' % len(bounds))
print('Input paths')
print(' # timing scores: %d' % len(b_ub))
print(' Rows: %d' % rows)
tlast = [time.time()]
iters = [0]
printn = [0]
def callback(xk, **kwargs):
iters[0] = kwargs['nit']
if time.time() - tlast[0] > 1.0:
sys.stdout.write('I:%d ' % kwargs['nit'])
tlast[0] = time.time()
printn[0] += 1
if printn[0] % 10 == 0:
sys.stdout.write('\n')
sys.stdout.flush()
print('')
# Now find smallest values for delay constants
# Due to input bounds (ex: column limit), some delay elements may get eliminated entirely
print('Running linprog w/ %d r, %d c (%d name)' % (rows, cols, len(names_orig)))
res = linprog(c, A_ub=A_ub, b_ub=b_ub, bounds=bounds, callback=callback,
options={"disp": True, 'maxiter': maxiter, 'bland': True, 'tol': 1e-6,})
nonzeros = 0
print('Ran %d iters' % iters[0])
if res.success:
print('Result sample (%d elements)' % (len(res.x)))
plim = 3
for xi, (name, x) in enumerate(zip(names, res.x)):
nonzero = x >= 0.001
if nonzero:
nonzeros += 1
#if nonzero and (verbose >= 1 or xi > 30):
if nonzero and (verbose or ((nonzeros < 100 or nonzeros % 20 == 0) and nonzeros <= plim)):
print(' % 4u % -80s % 10.1f' % (xi, name, x))
print('Delay on %d / %d' % (nonzeros, len(res.x)))
if not os.path.exists('res'):
os.mkdir('res')
fn_out = 'res/%s' % datetime.datetime.utcnow().isoformat().split('.')[0]
print('Writing %s' % fn_out)
np.save(fn_out, (3, c, A_ub, b_ub, bounds, names, res, meta))

109
experiments/timfuz/top.v Normal file
View File

@ -0,0 +1,109 @@
//move some stuff to minitests/ncy0
`define SEED 32'h12345678
module top(input clk, stb, di, output do);
localparam integer DIN_N = 42;
localparam integer DOUT_N = 79;
reg [DIN_N-1:0] din;
wire [DOUT_N-1:0] dout;
reg [DIN_N-1:0] din_shr;
reg [DOUT_N-1:0] dout_shr;
always @(posedge clk) begin
din_shr <= {din_shr, di};
dout_shr <= {dout_shr, din_shr[DIN_N-1]};
if (stb) begin
din <= din_shr;
dout_shr <= dout;
end
end
assign do = dout_shr[DOUT_N-1];
roi #(.DIN_N(DIN_N), .DOUT_N(DOUT_N))
roi (
.clk(clk),
.din(din),
.dout(dout)
);
endmodule
module roi(input clk, input [DIN_N-1:0] din, output [DOUT_N-1:0] dout);
parameter integer DIN_N = -1;
parameter integer DOUT_N = -1;
/*
//Take out for now to make sure LUTs are more predictable
picorv32 picorv32 (
.clk(clk),
.resetn(din[0]),
.mem_valid(dout[0]),
.mem_instr(dout[1]),
.mem_ready(din[1]),
.mem_addr(dout[33:2]),
.mem_wdata(dout[66:34]),
.mem_wstrb(dout[70:67]),
.mem_rdata(din[33:2])
);
*/
/*
randluts randluts (
.din(din[41:34]),
.dout(dout[78:71])
);
*/
randluts #(.N(150)) randluts (
.din(din[41:34]),
.dout(dout[78:71])
);
endmodule
module randluts(input [7:0] din, output [7:0] dout);
parameter integer N = 250;
function [31:0] xorshift32(input [31:0] xorin);
begin
xorshift32 = xorin;
xorshift32 = xorshift32 ^ (xorshift32 << 13);
xorshift32 = xorshift32 ^ (xorshift32 >> 17);
xorshift32 = xorshift32 ^ (xorshift32 << 5);
end
endfunction
function [63:0] lutinit(input [7:0] a, b);
begin
lutinit[63:32] = xorshift32(xorshift32(xorshift32(xorshift32({a, b} ^ `SEED))));
lutinit[31: 0] = xorshift32(xorshift32(xorshift32(xorshift32({b, a} ^ `SEED))));
end
endfunction
wire [(N+1)*8-1:0] nets;
assign nets[7:0] = din;
assign dout = nets[(N+1)*8-1:N*8];
genvar i, j;
generate
for (i = 0; i < N; i = i+1) begin:is
for (j = 0; j < 8; j = j+1) begin:js
localparam integer k = xorshift32(xorshift32(xorshift32(xorshift32((i << 20) ^ (j << 10) ^ `SEED)))) & 255;
(* KEEP, DONT_TOUCH *)
LUT6 #(
.INIT(lutinit(i, j))
) lut (
.I0(nets[8*i+(k+0)%8]),
.I1(nets[8*i+(k+1)%8]),
.I2(nets[8*i+(k+2)%8]),
.I3(nets[8*i+(k+3)%8]),
.I4(nets[8*i+(k+4)%8]),
.I5(nets[8*i+(k+5)%8]),
.O(nets[8*i+8+j])
);
end
end
endgenerate
endmodule

View File

@ -0,0 +1,92 @@
'''
Verifies that node timing info is unique
'''
import re
def gen_wires(fin):
for l in fin:
lj = {}
l = l.strip()
for kvs in l.split():
name, value = kvs.split(':')
lj[name] = value
tile_type, xy, wname = re.match(r'(.*)_(X[0-9]*Y[0-9]*)/(.*)', lj['NAME']).groups()
lj['tile_type'] = tile_type
lj['xy'] = xy
lj['wname'] = wname
lj['l'] = l
yield lj
def run(node_fin, verbose=0):
refnodes = {}
nodei = 0
for nodei, anode in enumerate(gen_wires(node_fin)):
def getk(anode):
return anode['wname']
#return (anode['tile_type'], anode['wname'])
if nodei % 1000 == 0:
print 'Check node %d' % nodei
# Existing node?
try:
refnode = refnodes[getk(anode)]
except KeyError:
# Set as reference
refnodes[getk(anode)] = anode
continue
k_invariant = (
'COST_CODE',
'IS_INPUT_PIN',
'IS_OUTPUT_PIN',
'IS_PART_OF_BUS',
'NUM_INTERSECTS',
'NUM_TILE_PORTS',
'SPEED_INDEX',
'TILE_PATTERN_OFFSET',
)
k_varies = (
'ID_IN_TILE_TYPE',
'IS_CONNECTED',
'NUM_DOWNHILL_PIPS',
'NUM_PIPS',
'NUM_UPHILL_PIPS',
'TILE_NAME',
)
# Verify equivilence
for k in k_invariant:
if k in refnode and k in anode:
def fail():
print 'Mismatch on %s' % k
print refnode[k], anode[k]
print refnode['l']
print anode['l']
#assert 0
if refnode[k] != anode[k]:
print
fail()
# A key in one but not the other?
elif k in refnode or k in anode:
assert 0
elif k not in k_varies:
assert 0
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
description=
'Timing fuzzer'
)
parser.add_argument('--verbose', type=int, help='')
parser.add_argument(
'node_fn_in',
default='/dev/stdin',
nargs='?',
help='Input file')
args = parser.parse_args()
run(open(args.node_fn_in, 'r'), verbose=args.verbose)