Changed util functions to expect multiple outputs in data. Changed train models to account for multiple outputs when reading in data.

2021-05-24 12:03:26 -07:00 · 2021-05-24 12:03:26 -07:00 · 53503f40d2
parent a4cb539f72
commit 53503f40d2
2 changed files with 32 additions and 15 deletions
--- a/compiler/characterizer/analytical_util.py
+++ b/compiler/characterizer/analytical_util.py
@ -14,7 +14,7 @@ import os
 process_transform = {'SS':0.0, 'TT': 0.5, 'FF':1.0}
-def get_data_names(file_name):
+def get_data_names(file_name, exclude_area=True):
    """
    Returns just the data names in the first row of the CSV
    """
@ -25,7 +25,17 @@ def get_data_names(file_name):
        # reader is iterable not a list, probably  a better way to do this
        for row in csv_reader:
            # Return names from first row
-            return row[0].split(',')
+            names = row[0].split(',')
            break
    if exclude_area:
        try:
            area_ind = names.index('area')
        except ValueError:
            area_ind = -1
        if area_ind != -1:    
            names = names[:area_ind] + names[area_ind+1:]
    return names        
 def get_data(file_name):
    """
@ -41,7 +51,6 @@ def get_data(file_name):
            if row_iter == 1:
                feature_names = row[0].split(',')
                input_list = [[] for _ in range(len(feature_names)-removed_items)]
                scaled_list = [[] for _ in range(len(feature_names)-removed_items)]
                try:
                    # Save to remove area
                    area_ind = feature_names.index('area')
@ -237,9 +246,8 @@ def get_scaled_data(file_name):
    # Data is scaled by max/min and data format is changed to points vs feature lists
    self_scaled_data = scale_data_and_transform(all_data)
-    samples = np.asarray(self_scaled_data)
+    data_np = np.asarray(self_scaled_data)
-    features, labels = samples[:, :-1], samples[:,-1:]
+    return data_np
    return features, labels
 def scale_data_and_transform(data):
    """
--- a/compiler/characterizer/regression_model.py
+++ b/compiler/characterizer/regression_model.py
@ -13,7 +13,8 @@ import debug
 import math
-relative_data_path = "/sim_data"
+relative_data_path = "sim_data"
 data_file = "sim_data.csv"
 data_fnames = ["rise_delay.csv",
               "fall_delay.csv",
               "rise_slew.csv",
@ -41,7 +42,7 @@ if OPTS.sim_data_path == None:
 else:
    data_dir = OPTS.sim_data_path 
-data_paths = {dname:data_dir +'/'+fname for dname, fname in zip(lib_dnames, data_fnames)}
+data_path = data_dir + '/' + data_file
 class regression_model(simulation):
@ -65,6 +66,8 @@ class regression_model(simulation):
                        self.temperature]  
                        # Area removed for now
                        # self.sram.width * self.sram.height,
        # Include above inputs, plus load and slew which are added below
        self.num_inputs = len(model_inputs)+2
        self.create_measurement_names()
        models = self.train_models()
@ -135,12 +138,18 @@ class regression_model(simulation):
        """
        Generate and return models
        """
        self.output_names = get_data_names(data_path)[self.num_inputs:]
        data = get_scaled_data(data_path)
        features, labels = data[:, :self.num_inputs], data[:,self.num_inputs:]
        output_num = 0
        models = {}
-        for dname, dpath in data_paths.items():
+        for o_name in self.output_names:
-            features, labels = get_scaled_data(dpath)
+            output_label = labels[:,output_num]
-            model = self.generate_model(features, labels)
+            model = self.generate_model(features, output_label)
-            models[dname] = model
+            models[o_name] = model
-            self.save_model(dname, model)
+            output_num+=1
        return models
    # Fixme - only will work for sklearn regression models