records["DATE_RAISED"]=pd.to_datetime(records.DATE_RAISED)
record_by_date=records.groupby("DATE_RAISED")["TYPE"].count().sort_index()
record_by_date.plot(figsize= (25, 10))
plt.ylabel('Number of requests')
plt.grid(visible=True,which='both')
plt.figure()
record_by_date.iloc[100:130].plot(figsize= (25, 10))
plt.ylabel('Number of requests')
plt.grid(visible=True,which='both')
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
填充缺失
让我们检查一下我们的数据是否包含了所有的日期。
start_date=record_by_date.index.min()
end_date=record_by_date.index.max()
# create a complete date range for the period of interestdate_range=pd.date_range(start=start_date, end=end_date, freq='D')
# compare the date range to the index of the time seriesmissing_dates=date_range[~date_range.isin(record_by_date.index)]
iflen(missing_dates) >0:
print("Missing dates:", missing_dates)
else:
print("No missing dates")
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
正如所预期的那样,数据缺少一些日期的值。让我们用相邻日期的平均值填充这些值。
# Reindex to fill missing datesidx=pd.date_range(start=record_by_date.index.min(), end=record_by_date.index.max(), freq='D')
record_by_date=record_by_date.reindex(idx, fill_value=0)
# Add missing dates with average of surrounding valuesfordateinmissing_dates:
prev_date=date-pd.DateOffset(days=1)
next_date=date+pd.DateOffset(days=1)
prev_val=record_by_date.loc[prev_date] ifprev_dateinrecord_by_date.indexelsenp.nannext_val=record_by_date.loc[next_date] ifnext_dateinrecord_by_date.indexelsenp.nanavg_val=np.nanmean([prev_val, next_val])
record_by_date.loc[date] =avg_val
fromscipy.fftimportfft# Calculate the Fourier transformyf=np.fft.fft(record_by_date)
xf=np.linspace(0.0, 1.0/(2.0), len(record_by_date)//2)
# Find the dominant frequency# We have to drop the first element of the fft as it corresponds to the# DC component or the average value of the signalidx=np.argmax(np.abs(yf[1:len(record_by_date)//2]))
freq=xf[idx]
period=(1/freq)
print(f"The period of the time series is {period}")
1.
2.
3.
4.
5.
6.
7.
8.
9.
10.
11.
12.
13.
14.
输出为:The period of the time series is 7.030927835051545。这与我们使用acf和目视检查发现的每周周期相似。
fromscipy.signalimportperiodogramfreq, power=periodogram(record_by_date)
period=1/freq[np.argmax(power)]
print(f"The period of the time series is {period}")
plt.plot(freq, power)
plt.xlabel('Frequency (Hz)')
plt.ylabel('Power spectral density')
plt.show()