Data Sources
- The model uses CSV files containing historical environmental data specific to a city and roof type.
The model takes historical climate data as input, which includes:
- Outdoor Temperature (in °C)
- Vapor Pressure (in Pa)
- Wind Speed (in m/s)
- Sky Temperature (in °C)
- Global Irradiation (in W/m²)
The model predicts the following indoor parameters:
- Indoor Temperature (in °C)
- Indoor Vapor Pressure (in Pa)
Data Loading
- Reads feature and label data from CSV files.
- Features include outdoor environmental parameters such as temperature, vapor pressure, wind speed, sky temperature, and global irradiation.
- Labels consist of indoor temperature and vapor pressure.
- The feature data indices are set to match the label data to ensure alignment.
def load_data(self):
# Load data from CSV files
feature_data = pd.read_csv(
# "data/weather_data/beijing_EnergyPlus.csv",
parse_dates=["DateTime"], # Parse the "DateTime" column as dates
index_col="DateTime", # Use the "DateTime" column as the index
label_data = pd.read_csv(
parse_dates=["DateTime"], # Parse the "DateTime" column as dates
index_col="DateTime", # Use the "DateTime" column as the index
f"Loading feature data from: data/weather_data/{self.city_name}_EnergyPlus.csv"
f"Loading label data from: data/energyPlus/eplusout_{self.city_name}_{self.roof_type}_{self.past_time_steps}_{self.area}.csv"
# Use the index of the label data for the feature data
feature_data.index = label_data.index
# Select the required columns, "window_opening_actual",
feature_columns = [
"Outdoor global irradiation(W m⁻²)",
"Outdoor air temperature(°C)",
"Outdoor vapor pressure(Pa)",
"Outdoor wind speed(m s⁻¹)",
"Sky temperature(°C)",
label_columns = [
"ZoneAirTemperature °C",
"Indoor Vapor Pressure(Pa)",
# Extract the selected columns from the feature and label data
features = feature_data[feature_columns]
labels = label_data[label_columns]
# After extracting the labels, add them to the features
features = pd.concat([features, labels], axis=1)
# Remove "window_opening_actual" from labels
labels = labels.drop(columns=["window_opening_actual"])
# Return the features and labels
return features, labels
Dataset Creation
- Transforms the data into a format that can be processed by the model, creating a time series dataset with past data points to predict future values.
def create_dataset(self, X, y):
# Initialize two empty lists for storing the input sequences (Xs) and the corresponding target sequences (ys)
Xs, ys = [], []
# Loop over the input data. The range of the loop is determined by the length of the input data,
# the number of past time steps to consider, and the number of future time steps to forecast.
for i in range(len(X) - self.past_time_steps - self.forecast_steps + 1):
# For each iteration, append a sequence of past time steps from the input data to Xs
Xs.append(X[i: (i + self.past_time_steps)])
# Append a sequence of future time steps from the target data to ys.
# The sequence starts from the end of the corresponding input sequence and has a length of forecast_steps
+ self.past_time_steps: i
+ self.past_time_steps
+ self.forecast_steps
# Convert the lists to numpy arrays and return them
return np.array(Xs), np.array(ys)
Data Preprocessing
- Scales the features and labels using
, which is more robust to outliers. - Splits the data into training, validation, and test sets.
- Saves the scaler objects for later use in scaling new data during model deployment.
- Transforms the scaled data back into the time series format.
def preprocess_data(self, features, labels):
# Replace the original MinMaxScaler with RobustScaler
scaler_features = RobustScaler()
scaler_labels = RobustScaler()
# Convert the features and labels to numpy arrays
features = features.to_numpy()
labels = labels.to_numpy()
# Print the shapes of the features and labels
print(f"features shape: {features.shape}")
print(f"labels shape: {labels.shape}")
# Create the dataset using the create_dataset method
features, labels = self.create_dataset(features, labels)
# Initialize empty lists for training, validation, and test sets for both features and labels
features_train, features_val, features_test = [], [], []
labels_train, labels_val, labels_test = [], [], []
# Loop over the features and labels and split them into training, validation, and test sets
for i in range(0, len(features) - self.window_size, self.window_size):
train_size = int(self.window_size * 0.6)
val_size = int(self.window_size * 0.2)
test_size = self.window_size - train_size - val_size
features_train.append(features[i: i + train_size])
features[i + train_size: i + train_size + val_size])
features[i + train_size + val_size: i + self.window_size]
labels_train.append(labels[i: i + train_size])
labels[i + train_size: i + train_size + val_size])
labels[i + train_size + val_size: i + self.window_size])
# Concatenate the lists to form numpy arrays
features_train = np.concatenate(features_train)
features_val = np.concatenate(features_val)
features_test = np.concatenate(features_test)
labels_train = np.concatenate(labels_train)
labels_val = np.concatenate(labels_val)
labels_test = np.concatenate(labels_test)
# Reshape the data for scaling
features_train = features_train.reshape(-1, features_train.shape[-1])
features_val = features_val.reshape(-1, features_val.shape[-1])
features_test = features_test.reshape(-1, features_test.shape[-1])
# Reshape labels for scaling
labels_train = labels_train.reshape(-1, labels_train.shape[-1])
labels_val = labels_val.reshape(-1, labels_val.shape[-1])
labels_test = labels_test.reshape(-1, labels_test.shape[-1])
# Fit the scalers to the training data
# Save the scalers for future use
os.makedirs("data/pth/", exist_ok=True)
dump(scaler_features, "data/pth/scaler_features.joblib")
dump(scaler_labels, "data/pth/scaler_labels.joblib")
# Transform the training, validation, and test data using the fitted scalers
features_train = scaler_features.transform(features_train)
features_val = scaler_features.transform(features_val)
features_test = scaler_features.transform(features_test)
labels_train = scaler_labels.transform(labels_train)
labels_val = scaler_labels.transform(labels_val)
labels_test = scaler_labels.transform(labels_test)
# Reshape the features back to their original shape
features_train = features_train.reshape(
-1, self.past_time_steps, features_train.shape[-1]
features_val = features_val.reshape(
-1, self.past_time_steps, features_val.shape[-1]
features_test = features_test.reshape(
-1, self.past_time_steps, features_test.shape[-1]
# Reshape labels back to their original shape
labels_train = labels_train.reshape(
-1, self.forecast_steps, labels_train.shape[-1]
labels_val = labels_val.reshape(-1,
self.forecast_steps, labels_val.shape[-1])
labels_test = labels_test.reshape(
-1, self.forecast_steps, labels_test.shape[-1]
# Return the preprocessed data and the label scaler
return (