mirror of https://github.com/VLSIDA/OpenRAM.git
Added function to get all data and scale vs just a portion
This commit is contained in:
parent
dcd20a250a
commit
5f4a2f0231
|
|
@ -1,4 +1,12 @@
|
|||
#import diversipy as dp
|
||||
#
|
||||
# Copyright (c) 2016-2019 Regents of the University of California and The Board
|
||||
# of Regents for the Oklahoma Agricultural and Mechanical College
|
||||
# (acting for and on behalf of Oklahoma State University)
|
||||
# All rights reserved.
|
||||
#
|
||||
|
||||
import debug
|
||||
|
||||
import csv
|
||||
import math
|
||||
import numpy as np
|
||||
|
|
@ -131,6 +139,11 @@ def rescale_data(data, old_maxs, old_mins, new_maxs, new_mins):
|
|||
return data_new_scaling
|
||||
|
||||
def sample_from_file(num_samples, file_name, sample_dir=None):
|
||||
"""
|
||||
Get a portion of the data from CSV file and scale it based on max/min of dataset.
|
||||
Duplicate samples are trimmed.
|
||||
"""
|
||||
|
||||
if sample_dir:
|
||||
maxs,mins,avgs = get_max_min_from_datasets(sample_dir)
|
||||
else:
|
||||
|
|
@ -142,10 +155,7 @@ def sample_from_file(num_samples, file_name, sample_dir=None):
|
|||
# Get algorithms sample points, assuming hypercube for now
|
||||
num_labels = 1
|
||||
inp_dims = len(all_data) - num_labels
|
||||
#samples = dp.hycusampling.lhd_matrix(num_samples, inp_dims)/num_samples
|
||||
#samples = dp.hycusampling.halton(num_samples, inp_dims)
|
||||
#samples = dp.hycusampling.random_uniform(num_samples, inp_dims)
|
||||
samples = None
|
||||
samples = np.random.rand(num_samples, inp_dims)
|
||||
|
||||
|
||||
# Scale data from file
|
||||
|
|
@ -169,6 +179,30 @@ def sample_from_file(num_samples, file_name, sample_dir=None):
|
|||
|
||||
return np.asarray(sampled_data), np.asarray(unused_new_scaling)
|
||||
|
||||
def get_scaled_data(file_name, sample_dir=None):
|
||||
"""Get data from CSV file and scale it based on max/min of dataset"""
|
||||
|
||||
if sample_dir:
|
||||
maxs,mins,avgs = get_max_min_from_datasets(sample_dir)
|
||||
else:
|
||||
maxs,mins,avgs = [], [], []
|
||||
|
||||
# Get data
|
||||
all_data = get_data(file_name)
|
||||
|
||||
# Data is scaled by max/min and data format is changed to points vs feature lists
|
||||
self_scaled_data = [[] for _ in range(len(all_data[0]))]
|
||||
self_maxs,self_mins = [],[]
|
||||
for feature_list in all_data:
|
||||
max_val = max(feature_list)
|
||||
self_maxs.append(max_val)
|
||||
min_val = min(feature_list)
|
||||
self_mins.append(min_val)
|
||||
for i in range(len(feature_list)):
|
||||
self_scaled_data[i].append((feature_list[i]-min_val)/(max_val-min_val))
|
||||
|
||||
return np.asarray(self_scaled_data)
|
||||
|
||||
def unscale_data(data, ref_dir, pos=None):
|
||||
if ref_dir:
|
||||
maxs,mins,avgs = get_max_min_from_datasets(ref_dir)
|
||||
|
|
|
|||
|
|
@ -587,7 +587,11 @@ class lib:
|
|||
if self.use_model:
|
||||
#FIXME: ML models only designed for delay. Cannot produce all values for Lib
|
||||
d = linear_regression()
|
||||
char_results = d.get_prediction()
|
||||
model_inputs = [OPTS.num_words,
|
||||
OPTS.word_size,
|
||||
OPTS.words_per_row,
|
||||
self.sram.width * self.sram.height]
|
||||
char_results = d.get_prediction(model_inputs)
|
||||
|
||||
#self.d = elmore(self.sram, self.sp_file, self.corner)
|
||||
# char_results = self.d.analytical_delay(self.slews,self.loads)
|
||||
|
|
|
|||
|
|
@ -19,13 +19,18 @@ data_dir = tech_path+'/'+OPTS.tech_name+relative_data_path
|
|||
|
||||
class linear_regression():
|
||||
|
||||
def get_prediction(self):
|
||||
def __init__(self):
|
||||
self.model = None
|
||||
|
||||
def get_prediction(self, model_inputs):
|
||||
|
||||
train_sets = []
|
||||
test_sets = []
|
||||
|
||||
file_path = data_dir +'/'+data_filename
|
||||
num_points_train = 5
|
||||
num_points_train = 7
|
||||
|
||||
samples = get_scaled_data(file_path, data_dir)
|
||||
|
||||
non_ip_samples, unused_samples = sample_from_file(num_points_train, file_path, data_dir)
|
||||
nip_features_subset, nip_labels_subset = non_ip_samples[:, :-1], non_ip_samples[:,-1:]
|
||||
|
|
@ -46,7 +51,7 @@ class linear_regression():
|
|||
new_error = self.run_model(train_x, train_y, test_x, test_y, data_dir)
|
||||
debug.info(1, "Model Error: {}".format(new_error))
|
||||
|
||||
def run_model(x,y,test_x,test_y, reference_dir):
|
||||
def train_model(self, x,y,test_x,test_y, reference_dir):
|
||||
model = LinearRegression()
|
||||
model.fit(x, y)
|
||||
|
||||
|
|
@ -56,9 +61,9 @@ class linear_regression():
|
|||
unscaled_labels = unscale_data(test_y.tolist(), reference_dir)
|
||||
unscaled_preds = unscale_data(pred.tolist(), reference_dir)
|
||||
unscaled_labels, unscaled_preds = (list(t) for t in zip(*sorted(zip(unscaled_labels, unscaled_preds))))
|
||||
avg_error = abs_error(unscaled_labels, unscaled_preds)
|
||||
max_error = max_error(unscaled_labels, unscaled_preds)
|
||||
min_error = min_error(unscaled_labels, unscaled_preds)
|
||||
avg_err = abs_error(unscaled_labels, unscaled_preds)
|
||||
max_err = max_error(unscaled_labels, unscaled_preds)
|
||||
min_err = min_error(unscaled_labels, unscaled_preds)
|
||||
|
||||
errors = {"avg_error": avg_error, "max_error":max_error, "min_error":min_error}
|
||||
errors = {"avg_error": avg_err, "max_error":max_err, "min_error":min_err}
|
||||
return errors
|
||||
Loading…
Reference in New Issue