mirror of https://github.com/VLSIDA/OpenRAM.git
Data scaling is only dependent on a single file rather than a directory now.
This commit is contained in:
parent
fc55cd194d
commit
393a9ca0d8
|
|
@ -110,6 +110,24 @@ def get_max_min_from_datasets(dir):
|
||||||
avgs = [s/total_count for s in sums]
|
avgs = [s/total_count for s in sums]
|
||||||
return maxs,mins,avgs
|
return maxs,mins,avgs
|
||||||
|
|
||||||
|
def get_max_min_from_file(path):
|
||||||
|
if not os.path.isfile(path):
|
||||||
|
debug.warning("Input file not found: {}".format(path))
|
||||||
|
return [], [], []
|
||||||
|
|
||||||
|
|
||||||
|
data = get_data(path)
|
||||||
|
# Get max, min, sum, and count from every file
|
||||||
|
data_max, data_min, data_sum, count = [],[],[], 0
|
||||||
|
for feature_list in data:
|
||||||
|
data_max.append(max(feature_list))
|
||||||
|
data_min.append(min(feature_list))
|
||||||
|
data_sum.append(sum(feature_list))
|
||||||
|
count = len(feature_list)
|
||||||
|
|
||||||
|
avgs = [s/count for s in data_sum]
|
||||||
|
return data_max, data_min, avgs
|
||||||
|
|
||||||
def get_data_and_scale(file_name, sample_dir):
|
def get_data_and_scale(file_name, sample_dir):
|
||||||
maxs,mins,avgs = get_max_min_from_datasets(sample_dir)
|
maxs,mins,avgs = get_max_min_from_datasets(sample_dir)
|
||||||
|
|
||||||
|
|
@ -179,11 +197,11 @@ def sample_from_file(num_samples, file_name, sample_dir=None):
|
||||||
|
|
||||||
return np.asarray(sampled_data), np.asarray(unused_new_scaling)
|
return np.asarray(sampled_data), np.asarray(unused_new_scaling)
|
||||||
|
|
||||||
def get_scaled_data(file_name, sample_dir=None):
|
def get_scaled_data(file_name):
|
||||||
"""Get data from CSV file and scale it based on max/min of dataset"""
|
"""Get data from CSV file and scale it based on max/min of dataset"""
|
||||||
|
|
||||||
if sample_dir:
|
if file_name:
|
||||||
maxs,mins,avgs = get_max_min_from_datasets(sample_dir)
|
maxs,mins,avgs = get_max_min_from_file(file_name)
|
||||||
else:
|
else:
|
||||||
maxs,mins,avgs = [], [], []
|
maxs,mins,avgs = [], [], []
|
||||||
|
|
||||||
|
|
@ -210,12 +228,12 @@ def scale_data_and_transform(data):
|
||||||
scaled_data[i].append((feature_list[i]-min_val)/(max_val-min_val))
|
scaled_data[i].append((feature_list[i]-min_val)/(max_val-min_val))
|
||||||
return scaled_data
|
return scaled_data
|
||||||
|
|
||||||
def scale_input_datapoint(point, data_dir):
|
def scale_input_datapoint(point, file_path):
|
||||||
"""
|
"""
|
||||||
Input data has no output and needs to be scaled like the model inputs during
|
Input data has no output and needs to be scaled like the model inputs during
|
||||||
training.
|
training.
|
||||||
"""
|
"""
|
||||||
maxs, mins, avgs = get_max_min_from_datasets(data_dir)
|
maxs, mins, avgs = get_max_min_from_file(file_path)
|
||||||
debug.info(1, "maxs={}".format(maxs))
|
debug.info(1, "maxs={}".format(maxs))
|
||||||
debug.info(1, "mins={}".format(mins))
|
debug.info(1, "mins={}".format(mins))
|
||||||
debug.info(1, "point={}".format(point))
|
debug.info(1, "point={}".format(point))
|
||||||
|
|
@ -225,9 +243,9 @@ def scale_input_datapoint(point, data_dir):
|
||||||
scaled_point.append((feature-mn)/(mx-mn))
|
scaled_point.append((feature-mn)/(mx-mn))
|
||||||
return scaled_point
|
return scaled_point
|
||||||
|
|
||||||
def unscale_data(data, ref_dir, pos=None):
|
def unscale_data(data, file_path, pos=None):
|
||||||
if ref_dir:
|
if file_path:
|
||||||
maxs,mins,avgs = get_max_min_from_datasets(ref_dir)
|
maxs,mins,avgs = get_max_min_from_file(file_path)
|
||||||
else:
|
else:
|
||||||
print("Must provide reference data to unscale")
|
print("Must provide reference data to unscale")
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -25,12 +25,12 @@ class linear_regression():
|
||||||
def get_prediction(self, model_inputs):
|
def get_prediction(self, model_inputs):
|
||||||
|
|
||||||
file_path = data_dir +'/'+data_filename
|
file_path = data_dir +'/'+data_filename
|
||||||
scaled_inputs = np.asarray([scale_input_datapoint(model_inputs, data_dir)])
|
scaled_inputs = np.asarray([scale_input_datapoint(model_inputs, file_path)])
|
||||||
|
|
||||||
features, labels = get_scaled_data(file_path, data_dir)
|
features, labels = get_scaled_data(file_path)
|
||||||
self.train_model(features, labels)
|
self.train_model(features, labels)
|
||||||
scaled_pred = self.model_prediction(scaled_inputs)
|
scaled_pred = self.model_prediction(scaled_inputs)
|
||||||
pred = unscale_data(scaled_pred.tolist(), data_dir)
|
pred = unscale_data(scaled_pred.tolist(), file_path)
|
||||||
debug.info(1,"Unscaled Prediction = {}".format(pred))
|
debug.info(1,"Unscaled Prediction = {}".format(pred))
|
||||||
return pred
|
return pred
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue