SARIMA

Download as pdf or txt
Download as pdf or txt
You are on page 1of 8

SARIMA

February 5, 2024

[558]: import pandas as pd


import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX

[559]: data = pd.read_csv("GuestNights.csv")


projected_df = pd.read_csv("ProjectedGuestNights.csv", thousands=',')
capacity_df = pd.read_csv("MonthlyCapacity.csv")

data.head(), projected_df.head(), capacity_df.head()

[559]: ( Month Total_Court_Hours_Used Guest_Nights Hours_per_Night


0 nov-05 889 22,222 0.040
1 dic-05 1,797 29,450 0.061
2 ene-06 938 20,390 0.046
3 feb-06 1,506 35,848 0.042
4 mar-06 2,824 50,434 0.056,
Month_Year Projected_Guest_Nights
0 feb-07 59950
1 mar-07 83887
2 abr-07 86475
3 may-07 34514
4 jun-07 36221,
Month Days Days_Missed Total_Hours_Available
0 January 31 3 252
1 February 28 3 225
2 March 31 2 290
3 April 30 1 319
4 May* 31 2 255)

[560]: # Diccionario para traducir meses del español al inglés


spanish_to_english = {
'ene': 'Jan', 'feb': 'Feb', 'mar': 'Mar', 'abr': 'Apr', 'may': 'May', 'jun':
↪ 'Jun',

'jul': 'Jul', 'ago': 'Aug', 'sep': 'Sep', 'oct': 'Oct', 'nov': 'Nov', 'dic':
↪ 'Dec'

}
# Traducir los meses y convertir la columna a tipo datetime

1
data['Month'] = data['Month'].apply(lambda x: x.replace(x[:3],␣
↪spanish_to_english[x[:3]]))

projected_df['Month_Year'] = projected_df['Month_Year'].apply(lambda x: x.
↪replace(x[:3], spanish_to_english[x[:3]]))

capacity_df['Month'] = capacity_df['Month'].apply(lambda x: x.replace('*', ''))

[561]: projected_df['Month_Year'] = pd.to_datetime(projected_df['Month_Year'],␣


↪format='%b-%y', errors='coerce')

data['Month'] = pd.to_datetime(data['Month'], format='%b-%y', errors='coerce')


data.set_index('Month', inplace=True)
data.index = pd.DatetimeIndex(data.index, freq='MS')

[562]: data.sort_index(inplace=True)
data.head(20)

[562]: Total_Court_Hours_Used Guest_Nights Hours_per_Night


Month
2005-11-01 889 22,222 0.040
2005-12-01 1,797 29,450 0.061
2006-01-01 938 20,390 0.046
2006-02-01 1,506 35,848 0.042
2006-03-01 2,824 50,434 0.056
2006-04-01 2,885 51,515 0.056
2006-05-01 1,196 20,986 0.057
2006-06-01 1,459 22,798 0.064
2006-07-01 1,090 17,298 0.063
2006-08-01 840 9,551 0.088
2006-09-01 457 9,924 0.046
2006-10-01 629 16,139 0.039
2006-11-01 2,416 35,526 0.068
2006-12-01 3,409 41,576 0.082
2007-01-01 2,825 33,632 0.084

[563]: # Seleccionamos la serie temporal de interés


time_series = data['Hours_per_Night']

[564]: # Ajustamos el modelo SARIMA


sarima_model = SARIMAX(time_series,
order=(1, 1, 1),
seasonal_order=(1, 1, 1, 12),
enforce_stationarity=True,
enforce_invertibility=False)

[565]: # Ajustamos el modelo SARIMA


sarima_result = sarima_model.fit(disp=False)

C:\Users\sk8ta\anaconda3\Lib\site-
packages\statsmodels\tsa\statespace\sarimax.py:866: UserWarning: Too few

2
observations to estimate starting parameters for ARMA and trend. All parameters
except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
C:\Users\sk8ta\anaconda3\Lib\site-
packages\statsmodels\tsa\statespace\sarimax.py:866: UserWarning: Too few
observations to estimate starting parameters for seasonal ARMA. All parameters
except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'

[566]: # Obtenemos las predicciones para los próximos 24 meses


pred_start_date = time_series.index[-1] + pd.DateOffset(months=1)
pred_end_date = pred_start_date + pd.DateOffset(months=24)
sarima_prediction = sarima_result.get_prediction(start=pred_start_date,␣
↪end=pred_end_date)

prediction_mean = sarima_prediction.predicted_mean
prediction_conf_int = sarima_prediction.conf_int()

[567]: # Visualizamos la serie original y las predicciones


plt.figure(figsize=(10, 5))
plt.plot(time_series, label='Observed')
plt.plot(prediction_mean, label='SARIMA Predictions', color='red')
plt.fill_between(prediction_mean.index,
prediction_conf_int.iloc[:, 0],
prediction_conf_int.iloc[:, 1], color='pink', alpha=0.3)
plt.legend()
plt.title('SARIMA Predictions for Court Hours Used per Guest-Night')
plt.show()

3
[568]: # Mostrar las predicciones
prediction_mean, prediction_conf_int

# Creamos un DataFrame para mostrar los resultados en la tabla solicitada


predictions_table = pd.DataFrame({
'Año': prediction_mean.index.year,
'Mes': prediction_mean.index.month,
'Prediction Mean': prediction_mean.values,
'Lower Hours_per_Night': prediction_conf_int.iloc[:, 0].values,
'Upper Hours_per_Night': prediction_conf_int.iloc[:, 1].values
})

# Formateamos el mes para que muestre el nombre en lugar del número


predictions_table['Mes'] = predictions_table['Mes'].apply(lambda x: pd.
↪to_datetime(x, format='%m').strftime('%B'))

predictions_table

[568]: Año Mes Prediction Mean Lower Hours_per_Night \


0 2007 February 0.067419 0.049640
1 2007 March 0.086831 0.069111
2 2007 April 0.084503 0.064804
3 2007 May 0.086504 0.066057
4 2007 June 0.093073 0.071510
5 2007 July 0.092259 0.069804
6 2007 August 0.117178 0.093795
7 2007 September 0.075215 0.050973
8 2007 October 0.068194 0.043100
9 2007 November 0.097215 0.071324
10 2007 December 0.111122 0.084441
11 2008 January 0.113344 0.085897
12 2008 February 0.096599 0.059748
13 2008 March 0.116081 0.078467
14 2008 April 0.113723 0.073164
15 2008 May 0.115738 0.073439
16 2008 June 0.122301 0.077937
17 2008 July 0.121489 0.075319
18 2008 August 0.146407 0.098429
19 2008 September 0.104445 0.054756
20 2008 October 0.097423 0.046060
21 2008 November 0.126445 0.073486
22 2008 December 0.140350 0.085827
23 2009 January 0.142575 0.086531
24 2009 February 0.125828 0.061686

Upper Hours_per_Night
0 0.085197

4
1 0.104551
2 0.104201
3 0.106951
4 0.114637
5 0.114714
6 0.140561
7 0.099458
8 0.093288
9 0.123107
10 0.137803
11 0.140791
12 0.133449
13 0.153696
14 0.154282
15 0.158036
16 0.166665
17 0.167659
18 0.194386
19 0.154133
20 0.148786
21 0.179403
22 0.194873
23 0.198619
24 0.189969

[569]: # Extraemos el año y el mes en columnas separadas


projected_df['Año'] = projected_df['Month_Year'].dt.year
projected_df['Mes'] = projected_df['Month_Year'].dt.month_name()
capacity_df.rename(columns = {'Month':'Mes'}, inplace = True)

# Ahora combinamos usando 'Month_Year' como clave


result_df = projected_df.merge(predictions_table, on=['Año', 'Mes'],␣
↪how='inner')

result_df = result_df.merge(capacity_df, on=['Mes'], how='inner')

[570]: # Convertir las columnas a tipo numérico


result_df['Projected_Guest_Nights'] = pd.
↪to_numeric(result_df['Projected_Guest_Nights'], errors='coerce')

result_df['Prediction Mean'] = pd.to_numeric(result_df['Prediction Mean'],␣


↪errors='coerce')

# Ahora realiza la multiplicación


result_df['Total_Hours_Lower'] = result_df['Projected_Guest_Nights'] *␣
↪result_df['Lower Hours_per_Night']

result_df['Total_Hours_Mean'] = result_df['Projected_Guest_Nights'] *␣
↪result_df['Prediction Mean']

5
result_df['Total_Hours_Upper'] = result_df['Projected_Guest_Nights'] *␣
↪result_df['Upper Hours_per_Night']

result_df["Courts_Needed_Lower"] = result_df["Total_Hours_Lower"] /␣
↪result_df["Total_Hours_Available"]

result_df["Courts_Needed_Mean"] = result_df["Total_Hours_Mean"] /␣
↪result_df["Total_Hours_Available"]

result_df["Courts_Needed_Upper"] = result_df["Total_Hours_Upper"] /␣
↪result_df["Total_Hours_Available"]

result_df = result_df[["Año", "Mes", "Days", "Total_Hours_Available",␣


↪"Projected_Guest_Nights",

"Lower Hours_per_Night", "Total_Hours_Lower",␣


↪"Courts_Needed_Lower",

"Prediction Mean", "Total_Hours_Mean",␣


↪"Courts_Needed_Mean",

"Upper Hours_per_Night", "Total_Hours_Upper",␣


↪"Courts_Needed_Upper"]]

[571]: result_df

[571]: Año Mes Days Total_Hours_Available Projected_Guest_Nights \


0 2007 February 28 225 59950
1 2008 February 28 225 95620
2 2007 March 31 290 83887
3 2008 March 31 290 130696
4 2007 April 30 319 86475
5 2008 April 30 319 129111
6 2007 May 31 255 34514
7 2008 May 31 255 53218
8 2007 June 30 255 36221
9 2008 June 30 255 55197
10 2007 July 31 255 27180
11 2008 July 31 255 38819
12 2007 August 31 255 18509
13 2008 August 31 255 24627
14 2007 September 30 224 16293
15 2008 September 30 224 21717
16 2007 October 31 290 30330
17 2008 October 31 290 37083
18 2007 November 30 243 57741
19 2008 November 30 243 73580
20 2007 December 31 252 66462
21 2008 December 31 252 83773
22 2008 January 31 252 53992

Lower Hours_per_Night Total_Hours_Lower Courts_Needed_Lower \

6
0 0.049640 2975.921010 13.226316
1 0.059748 5713.091665 25.391519
2 0.069111 5797.529697 19.991482
3 0.078467 10255.300393 35.363105
4 0.064804 5603.932765 17.567187
5 0.073164 9446.234662 29.612021
6 0.066057 2279.901109 8.940789
7 0.073439 3908.299256 15.326664
8 0.071510 2590.151412 10.157457
9 0.077937 4301.891580 16.870163
10 0.069804 1897.275596 7.440296
11 0.075319 2923.808792 11.465917
12 0.093795 1736.055230 6.808060
13 0.098429 2424.001082 9.505887
14 0.050973 830.497506 3.707578
15 0.054756 1189.136496 5.308645
16 0.043100 1307.224993 4.507672
17 0.046060 1708.057218 5.889852
18 0.071324 4118.341661 16.947908
19 0.073486 5407.130969 22.251568
20 0.084441 5612.096229 22.270223
21 0.085827 7189.956850 28.531575
22 0.085897 4637.741404 18.403736

Prediction Mean Total_Hours_Mean Courts_Needed_Mean \


0 0.067419 4041.754989 17.963356
1 0.096599 9236.756453 41.052251
2 0.086831 7283.990828 25.117210
3 0.116081 15171.382782 52.315113
4 0.084503 7307.372585 22.907124
5 0.113723 14682.864274 46.027788
6 0.086504 2985.611885 11.708282
7 0.115738 6159.319818 24.154195
8 0.093073 3371.207969 13.220423
9 0.122301 6750.638864 26.473094
10 0.092259 2507.603915 9.833741
11 0.121489 4716.085880 18.494454
12 0.117178 2168.851848 8.505301
13 0.146407 3605.568419 14.139484
14 0.075215 1225.483070 5.470907
15 0.104445 2268.225327 10.126006
16 0.068194 2068.326657 7.132161
17 0.097423 3612.744743 12.457740
18 0.097215 5613.318566 23.100076
19 0.126445 9303.810495 38.287286
20 0.111122 7385.366113 29.307008
21 0.140350 11757.542021 46.656913

7
22 0.113344 6119.673238 24.284418

Upper Hours_per_Night Total_Hours_Upper Courts_Needed_Upper


0 0.085197 5107.588968 22.700395
1 0.133449 12760.421242 56.712983
2 0.104551 8770.451959 30.242938
3 0.153696 20087.465171 69.267121
4 0.104201 9010.812406 28.247061
5 0.154282 19919.493886 62.443555
6 0.106951 3691.322661 14.475775
7 0.158036 8410.340379 32.981727
8 0.114637 4152.264527 16.283390
9 0.166665 9199.386149 36.076024
10 0.114714 3117.932235 12.227185
11 0.167659 6508.362969 25.522992
12 0.140561 2601.648466 10.202543
13 0.194386 4787.135756 18.773081
14 0.099458 1620.468634 7.234235
15 0.154133 3347.314157 14.943367
16 0.093288 2829.428321 9.756649
17 0.148786 5517.432268 19.025629
18 0.123107 7108.295470 29.252245
19 0.179403 13200.490022 54.323004
20 0.137803 9158.635997 36.343794
21 0.194873 16325.127192 64.782251
22 0.140791 7601.605072 30.165099

[572]: result_df.to_csv('SARIMA_results.csv', encoding='utf-8')

You might also like