From 6e7d1695b51087c793905d1569fc7cecb1c7dbfe Mon Sep 17 00:00:00 2001 From: Hunter Nichols Date: Mon, 7 Dec 2020 14:22:53 -0800 Subject: [PATCH] Cleaned code to remove validation during training. --- compiler/characterizer/analytical_util.py | 42 ++++++++++---- compiler/characterizer/linear_regression.py | 62 +++++++++------------ 2 files changed, 57 insertions(+), 47 deletions(-) diff --git a/compiler/characterizer/analytical_util.py b/compiler/characterizer/analytical_util.py index b93f463d..e09a1bba 100644 --- a/compiler/characterizer/analytical_util.py +++ b/compiler/characterizer/analytical_util.py @@ -191,17 +191,39 @@ def get_scaled_data(file_name, sample_dir=None): all_data = get_data(file_name) # Data is scaled by max/min and data format is changed to points vs feature lists - self_scaled_data = [[] for _ in range(len(all_data[0]))] - self_maxs,self_mins = [],[] - for feature_list in all_data: - max_val = max(feature_list) - self_maxs.append(max_val) - min_val = min(feature_list) - self_mins.append(min_val) - for i in range(len(feature_list)): - self_scaled_data[i].append((feature_list[i]-min_val)/(max_val-min_val)) + self_scaled_data = scale_data_and_transform(all_data) - return np.asarray(self_scaled_data) + samples = np.asarray(self_scaled_data) + features, labels = samples[:, :-1], samples[:,-1:] + return features, labels + +def scale_data_and_transform(data): + """ + Assume data is a list of features, change to a list of points and max/min scale + """ + + scaled_data = [[] for _ in range(len(data[0]))] + for feature_list in data: + max_val = max(feature_list) + min_val = min(feature_list) + for i in range(len(feature_list)): + scaled_data[i].append((feature_list[i]-min_val)/(max_val-min_val)) + return scaled_data + +def scale_input_datapoint(point, data_dir): + """ + Input data has no output and needs to be scaled like the model inputs during + training. + """ + maxs, mins, avgs = get_max_min_from_datasets(data_dir) + debug.info(1, "maxs={}".format(maxs)) + debug.info(1, "mins={}".format(mins)) + debug.info(1, "point={}".format(point)) + + scaled_point = [] + for feature, mx, mn in zip(point, maxs, mins): + scaled_point.append((feature-mn)/(mx-mn)) + return scaled_point def unscale_data(data, ref_dir, pos=None): if ref_dir: diff --git a/compiler/characterizer/linear_regression.py b/compiler/characterizer/linear_regression.py index 8c69f6be..897ad54b 100644 --- a/compiler/characterizer/linear_regression.py +++ b/compiler/characterizer/linear_regression.py @@ -28,42 +28,30 @@ class linear_regression(): test_sets = [] file_path = data_dir +'/'+data_filename - num_points_train = 7 + scaled_inputs = np.asarray(scale_input_datapoint(model_inputs, data_dir)) - samples = get_scaled_data(file_path, data_dir) + features, labels = get_scaled_data(file_path, data_dir) + self.train_model(features, labels) + scaled_pred = model_prediction(model_inputs) + pred = unscale_data(scaled_pred.tolist(), data_dir) + debug.info(1,"Unscaled Prediction = {}".format(pred)) + return pred - non_ip_samples, unused_samples = sample_from_file(num_points_train, file_path, data_dir) - nip_features_subset, nip_labels_subset = non_ip_samples[:, :-1], non_ip_samples[:,-1:] - nip_test_feature_subset, nip_test_labels_subset = unused_samples[:, :-1], unused_samples[:,-1:] - - train_sets = [(nip_features_subset, nip_labels_subset)] - test_sets = [(nip_test_feature_subset, nip_test_labels_subset)] - - runs_per_model = 1 - - for train_tuple, test_tuple in zip(train_sets, test_sets): - train_x, train_y = train_tuple - test_x, test_y = test_tuple - - errors = {} - min_train_set = None - for _ in range(runs_per_model): - new_error = self.run_model(train_x, train_y, test_x, test_y, data_dir) - debug.info(1, "Model Error: {}".format(new_error)) - - def train_model(self, x,y,test_x,test_y, reference_dir): - model = LinearRegression() - model.fit(x, y) - - pred = model.predict(test_x) - - #print(pred) - unscaled_labels = unscale_data(test_y.tolist(), reference_dir) - unscaled_preds = unscale_data(pred.tolist(), reference_dir) - unscaled_labels, unscaled_preds = (list(t) for t in zip(*sorted(zip(unscaled_labels, unscaled_preds)))) - avg_err = abs_error(unscaled_labels, unscaled_preds) - max_err = max_error(unscaled_labels, unscaled_preds) - min_err = min_error(unscaled_labels, unscaled_preds) - - errors = {"avg_error": avg_err, "max_error":max_err, "min_error":min_err} - return errors \ No newline at end of file + def train_model(self, features, labels): + """ + Supervised training of model. + """ + + self.model = LinearRegression() + self.model.fit(features, labels) + + def model_prediction(self, features): + """ + Have the model perform a prediction and unscale the prediction + as the model is trained with scaled values. + """ + + pred = self.model.predict(features) + debug.info(1, "pred={}".format(pred)) + return pred + \ No newline at end of file