import pandas as pd import numpy as np from statsmodels.tsa.statespace.sarimax import SARIMAX from statsmodels.graphics.tsaplots import plot_acf, plot_pacf import matplotlib.pyplot as plt from datetime import datetime, timedelta import plotly.express as px from tools.algo.humidity import absolute_humidity import pickle def sarima(file, col_key='상대습도', future_hours=24): df = pd.read_csv(file) ah = absolute_humidity(df[col_key], df["기온"]) df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M')) df["절대습도"] = ah # Use the data up to the last 'future_hours' for fitting model = SARIMAX(df[col_key].iloc[:-future_hours], order=(1, 0, 2), seasonal_order=(0, 1, 2, 24)) model_fit = model.fit() print(model_fit.summary()) # Forecast the next 'future_hours' and append to df forecast_values = model_fit.forecast(steps=future_hours) forecast_index = [df['관측시각'].iloc[-1] + timedelta(hours=i) for i in range(1, future_hours + 1)] forecast_series = pd.Series(data=forecast_values, index=forecast_index, name='forecast') # Forecast the next 'future_hours' forecast_values = model_fit.forecast(steps=future_hours) forecast_index = [df['관측시각'].iloc[-1] + timedelta(hours=i) for i in range(1, future_hours + 1)] forecast_df = pd.DataFrame({ '관측시각': forecast_index, 'forecast': forecast_values }) forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False) with open('sarima_model.pkl', 'wb') as pkl_file: pickle.dump(model_fit, pkl_file) def forecast_from_saved_model(file, future_hours=24): # Load the saved model with open('sarima_model.pkl', 'rb') as pkl_file: loaded_model = pickle.load(pkl_file) df = pd.read_csv(file) df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M')) # Forecast the next 'future_hours' using the loaded model forecast_values = loaded_model.forecast(steps=future_hours) forecast_index = [df['관측시각'].iloc[-1] + timedelta(hours=i) for i in range(1, future_hours + 1)] forecast_df = pd.DataFrame({ '관측시각': forecast_index, 'forecast': forecast_values }) forecast_df.to_csv(f"{file.split('.')[0]}_forecast.csv", index=False) return forecast_df if __name__ == "__main__": df = pd.read_csv("/home/juni/PycharmProjects/failure_analysis/data/weather/202007010000_202308310000_f.csv") ah = absolute_humidity(df["상대습도"], df["기온"]) df['관측시각'] = df['관측시각'].apply(lambda x: datetime.strptime(f"{x}", '%Y%m%d%H%M')) df["절대습도"] = ah # fig = go.Figure() # # fig.add_trace( # go.Scatter(x=df["관측시각"], y=df["절대습도"]) # ) # fig.add_trace( # go.Scatter(x=df["관측시각"], y=signal.savgol_filter( # df["절대습도"],72,3) # )) # fig.show() log_df = np.log(df["상대습도"]) diff_1 = (log_df.diff(periods=1).iloc[1:]) diff_2 = diff_1.diff(periods=1).iloc[1:] plot_acf(diff_2) plot_pacf(diff_2) plt.show() model = SARIMAX(df["상대습도"], order=(2,0,2), seasonal_order=(1,1,2,24)) model_fit = model.fit() # ARIMA_model = pm.auto_arima(df['절대습도'], # start_p=1, # start_q=1, # test='adf', # use adftest to find optimal 'd' # max_p=3, max_q=3, # maximum p and q # m=24, # frequency of series (if m==1, seasonal is set to FALSE automatically) # d=None, # let model determine 'd' # D=2, #order of the seasonal differencing # seasonal=True, # No Seasonality for standard ARIMA # trace=False, # logs # error_action='warn', # shows errors ('ignore' silences these) # suppress_warnings=False, # stepwise=True) print(model_fit.summary()) df['forecast'] = model_fit.predict(start=-100, end=-1, dynamic=True) # df[['절대습도', 'forecast']].plot(figsize=(12, 8)) fig = px.line(df[['상대습도', 'forecast']]) fig.show()