diff --git a/compiler/characterizer/analytical_util.py b/compiler/characterizer/analytical_util.py index dacc1f94..b93f463d 100644 --- a/compiler/characterizer/analytical_util.py +++ b/compiler/characterizer/analytical_util.py @@ -1,4 +1,12 @@ -#import diversipy as dp +# +# Copyright (c) 2016-2019 Regents of the University of California and The Board +# of Regents for the Oklahoma Agricultural and Mechanical College +# (acting for and on behalf of Oklahoma State University) +# All rights reserved. +# + +import debug + import csv import math import numpy as np @@ -131,6 +139,11 @@ def rescale_data(data, old_maxs, old_mins, new_maxs, new_mins): return data_new_scaling def sample_from_file(num_samples, file_name, sample_dir=None): + """ + Get a portion of the data from CSV file and scale it based on max/min of dataset. + Duplicate samples are trimmed. + """ + if sample_dir: maxs,mins,avgs = get_max_min_from_datasets(sample_dir) else: @@ -142,10 +155,7 @@ def sample_from_file(num_samples, file_name, sample_dir=None): # Get algorithms sample points, assuming hypercube for now num_labels = 1 inp_dims = len(all_data) - num_labels - #samples = dp.hycusampling.lhd_matrix(num_samples, inp_dims)/num_samples - #samples = dp.hycusampling.halton(num_samples, inp_dims) - #samples = dp.hycusampling.random_uniform(num_samples, inp_dims) - samples = None + samples = np.random.rand(num_samples, inp_dims) # Scale data from file @@ -169,6 +179,30 @@ def sample_from_file(num_samples, file_name, sample_dir=None): return np.asarray(sampled_data), np.asarray(unused_new_scaling) +def get_scaled_data(file_name, sample_dir=None): + """Get data from CSV file and scale it based on max/min of dataset""" + + if sample_dir: + maxs,mins,avgs = get_max_min_from_datasets(sample_dir) + else: + maxs,mins,avgs = [], [], [] + + # Get data + all_data = get_data(file_name) + + # Data is scaled by max/min and data format is changed to points vs feature lists + self_scaled_data = [[] for _ in range(len(all_data[0]))] + self_maxs,self_mins = [],[] + for feature_list in all_data: + max_val = max(feature_list) + self_maxs.append(max_val) + min_val = min(feature_list) + self_mins.append(min_val) + for i in range(len(feature_list)): + self_scaled_data[i].append((feature_list[i]-min_val)/(max_val-min_val)) + + return np.asarray(self_scaled_data) + def unscale_data(data, ref_dir, pos=None): if ref_dir: maxs,mins,avgs = get_max_min_from_datasets(ref_dir) diff --git a/compiler/characterizer/lib.py b/compiler/characterizer/lib.py index e6f3f305..0204de62 100644 --- a/compiler/characterizer/lib.py +++ b/compiler/characterizer/lib.py @@ -587,7 +587,11 @@ class lib: if self.use_model: #FIXME: ML models only designed for delay. Cannot produce all values for Lib d = linear_regression() - char_results = d.get_prediction() + model_inputs = [OPTS.num_words, + OPTS.word_size, + OPTS.words_per_row, + self.sram.width * self.sram.height] + char_results = d.get_prediction(model_inputs) #self.d = elmore(self.sram, self.sp_file, self.corner) # char_results = self.d.analytical_delay(self.slews,self.loads) diff --git a/compiler/characterizer/linear_regression.py b/compiler/characterizer/linear_regression.py index b0d0ab8f..8c69f6be 100644 --- a/compiler/characterizer/linear_regression.py +++ b/compiler/characterizer/linear_regression.py @@ -19,13 +19,18 @@ data_dir = tech_path+'/'+OPTS.tech_name+relative_data_path class linear_regression(): - def get_prediction(self): + def __init__(self): + self.model = None + + def get_prediction(self, model_inputs): train_sets = [] test_sets = [] file_path = data_dir +'/'+data_filename - num_points_train = 5 + num_points_train = 7 + + samples = get_scaled_data(file_path, data_dir) non_ip_samples, unused_samples = sample_from_file(num_points_train, file_path, data_dir) nip_features_subset, nip_labels_subset = non_ip_samples[:, :-1], non_ip_samples[:,-1:] @@ -46,7 +51,7 @@ class linear_regression(): new_error = self.run_model(train_x, train_y, test_x, test_y, data_dir) debug.info(1, "Model Error: {}".format(new_error)) - def run_model(x,y,test_x,test_y, reference_dir): + def train_model(self, x,y,test_x,test_y, reference_dir): model = LinearRegression() model.fit(x, y) @@ -56,9 +61,9 @@ class linear_regression(): unscaled_labels = unscale_data(test_y.tolist(), reference_dir) unscaled_preds = unscale_data(pred.tolist(), reference_dir) unscaled_labels, unscaled_preds = (list(t) for t in zip(*sorted(zip(unscaled_labels, unscaled_preds)))) - avg_error = abs_error(unscaled_labels, unscaled_preds) - max_error = max_error(unscaled_labels, unscaled_preds) - min_error = min_error(unscaled_labels, unscaled_preds) + avg_err = abs_error(unscaled_labels, unscaled_preds) + max_err = max_error(unscaled_labels, unscaled_preds) + min_err = min_error(unscaled_labels, unscaled_preds) - errors = {"avg_error": avg_error, "max_error":max_error, "min_error":min_error} + errors = {"avg_error": avg_err, "max_error":max_err, "min_error":min_err} return errors \ No newline at end of file