191 lines
7.4 KiB
Python
191 lines
7.4 KiB
Python
"""
|
|
The tsnet.postprocessing.detect_cusum module contains function to perform
|
|
Cumulative sum algorithm (CUSUM) to detect abrupt changes in data.
|
|
|
|
"""
|
|
|
|
import numpy as np
|
|
import matplotlib.dates as mdates
|
|
|
|
def detect_cusum(time, x, threshold, drift, show,
|
|
ending=True, ax=None):
|
|
|
|
"""
|
|
Parameters
|
|
----------
|
|
time : 1D array-like
|
|
time.
|
|
x : 1D array_like
|
|
data.
|
|
threshold : positive number, optional (default = 1)
|
|
amplitude threshold for the change in the data.
|
|
drift : positive number, optional (default = 0)
|
|
drift term that prevents any change in the absence of change.
|
|
ending : bool, optional (default = False)
|
|
True (1) to estimate when the change ends; False (0) otherwise.
|
|
show : bool, optional (default = True)
|
|
True (1) plots data in matplotlib figure, False (0) don't plot.
|
|
ax : a matplotlib.axes.Axes instance, optional (default = None).
|
|
|
|
Returns
|
|
-------
|
|
tai : 1D array_like, int
|
|
index of when the change started.
|
|
taf : 1D array_like, int
|
|
index of when the change ended (if `ending` is True).
|
|
amp : 1D array_like, float
|
|
amplitude of changes (if `ending` is True).
|
|
|
|
Notes
|
|
-----
|
|
Tuning of the CUSUM algorithm according to Gustafsson (2000)[1]_:
|
|
Start with a very large `threshold`.
|
|
Choose `drift` to one half of the expected change, or adjust `drift` such
|
|
that `g` = 0 more than 50% of the time.
|
|
Then set the `threshold` so the required number of false alarms (this can
|
|
be done automatically) or delay for detection is obtained.
|
|
If faster detection is sought, try to decrease `drift`.
|
|
If fewer false alarms are wanted, try to increase `drift`.
|
|
If there is a subset of the change times that does not make sense,
|
|
try to increase `drift`.
|
|
|
|
Note that by default repeated sequential changes, i.e., changes that have
|
|
the same beginning (`tai`) are not deleted because the changes were
|
|
detected by the alarm (`ta`) at different instants. This is how the
|
|
classical CUSUM algorithm operates.
|
|
|
|
If you want to delete the repeated sequential changes and keep only the
|
|
beginning of the first sequential change, set the parameter `ending` to
|
|
True. In this case, the index of the ending of the change (`taf`) and the
|
|
amplitude of the change (or of the total amplitude for a repeated
|
|
sequential change) are calculated and only the first change of the repeated
|
|
sequential changes is kept. In this case, it is likely that `ta`, `tai`,
|
|
and `taf` will have less values than when `ending` was set to False.
|
|
|
|
See this IPython Notebook [2]_.
|
|
|
|
References
|
|
----------
|
|
.. [1] Gustafsson (2000) Adaptive Filtering and Change Detection.
|
|
.. [2] hhttp://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/DetectCUSUM.ipynb
|
|
|
|
"""
|
|
|
|
x = np.atleast_1d(x).astype('float64')
|
|
time = np.atleast_1d(time).astype('float64')
|
|
gp, gn = np.zeros(x.size), np.zeros(x.size)
|
|
gp_real, gn_real = np.zeros(x.size), np.zeros(x.size)
|
|
ta, tai, taf = np.array([[], [], []], dtype=int)
|
|
tap, tan = 0, 0
|
|
amp = np.array([])
|
|
|
|
# # Find changes (online form)
|
|
# for i in range(1, x.size):
|
|
# s = x[i] - x[i-1]
|
|
# gp[i] = gp[i-1] + s - drift # cumulative sum for + change
|
|
# gn[i] = gn[i-1] - s - drift # cumulative sum for - change
|
|
# if gp[i] < 0:
|
|
# gp[i], tap = 0, i
|
|
# if gn[i] < 0:
|
|
# gn[i], tan = 0, i
|
|
# if gp[i] > threshold or gn[i] > threshold: # change detected!
|
|
# ta = np.append(ta, i) # alarm index
|
|
# tai = np.append(tai, tap if gp[i] > threshold else tan) # start
|
|
# gp[i], gn[i] = 0, 0 # reset alarm
|
|
# # THE CLASSICAL CUSUM ALGORITHM ENDS HERE
|
|
|
|
# Find changes (online form)
|
|
# add gp_real and gn_rel to track the actual amplitude of the transient
|
|
for i in range(1, x.size):
|
|
s = x[i] - x[i-1]
|
|
gp[i] = gp[i-1] + s - drift # cumulative sum for + change
|
|
gp_real[i] = gp_real[i-1] + s
|
|
gn[i] = gn[i-1] - s - drift # cumulative sum for - change
|
|
gn_real[i] = gn_real[i-1] - s
|
|
|
|
if gp[i] < 0:
|
|
gp[i], gp_real[i], tap = 0, 0, i
|
|
if gn[i] < 0:
|
|
gn[i], gn_real[i], tan = 0, 0, i
|
|
if gp_real[i] > threshold or gn_real[i] > threshold: # change detected!
|
|
ta = np.append(ta, i) # alarm index
|
|
tai = np.append(tai, tap if gp_real[i] > threshold else tan) # start
|
|
gp[i], gn[i] = 0, 0 # reset alarm
|
|
gp_real[i], gn_real[i] = 0, 0
|
|
# THE CLASSICAL CUSUM ALGORITHM ENDS HERE
|
|
|
|
# Estimation of when the change ends (offline form)
|
|
if tai.size and ending:
|
|
tai2, _, _ = detect_cusum(time[::-1],x[::-1], threshold, drift, ending=False, show=False)
|
|
taf = x.size - tai2[::-1] - 1
|
|
# Eliminate repeated changes, changes that have the same beginning
|
|
tai, ind = np.unique(tai, return_index=True)
|
|
ta = ta[ind]
|
|
# taf = np.unique(taf, return_index=False) # corect later
|
|
if tai.size != taf.size:
|
|
if tai.size < taf.size:
|
|
taf = taf[[np.argmax(taf >= i) for i in ta]]
|
|
else:
|
|
ind = [np.argmax(i >= ta[::-1])-1 for i in taf]
|
|
ta = ta[ind]
|
|
tai = tai[ind]
|
|
# Delete intercalated changes (the ending of the change is after
|
|
# the beginning of the next change)
|
|
ind = taf[:-1] - tai[1:] > 0
|
|
if ind.any():
|
|
ta = ta[~np.append(False, ind)]
|
|
tai = tai[~np.append(False, ind)]
|
|
taf = taf[~np.append(ind, False)]
|
|
# Amplitude of changes
|
|
amp = x[taf] - x[tai]
|
|
if show:
|
|
_plot(time, x, threshold, drift, ending, ax, ta, tai, taf,
|
|
gp_real, gn_real)
|
|
|
|
return tai, taf, amp
|
|
|
|
|
|
def _plot(time, x, threshold, drift, ending, ax, ta, tai, taf, gp, gn):
|
|
"""Plot results of the detect_cusum function, see its help."""
|
|
|
|
try:
|
|
import matplotlib.pyplot as plt
|
|
except ImportError:
|
|
print('matplotlib is not available.')
|
|
else:
|
|
if ax is None:
|
|
fig, ax = plt.subplots(1, 1, figsize=(8,4), dpi=100, facecolor='w', edgecolor='k')
|
|
|
|
ax.plot(time , x, 'k-', lw=2)
|
|
if len(ta):
|
|
ax.plot(time[tai], x[tai], '>', mfc='r', mec='r', ms=10,
|
|
label='Start')
|
|
|
|
if ending:
|
|
ax.plot(time[taf], x[taf], '<', mfc='r', mec='r', ms=10,
|
|
label='End')
|
|
ax.legend(loc='best', framealpha=.5, numpoints=1)
|
|
ax.set_xlim([time[0], time[-1]])
|
|
ax.set_xlabel('Time [s]', fontsize=14)
|
|
ax.set_ylabel('Pressure Head [m]', fontsize=14)
|
|
ymin, ymax = x[np.isfinite(x)].min(), x[np.isfinite(x)].max()
|
|
yrange = ymax - ymin if ymax > ymin else 1
|
|
ax.set_ylim(ymin - 0.1*yrange, ymax + 0.1*yrange)
|
|
|
|
# ax2.plot(time, gp, 'y-', label='+')
|
|
# ax2.plot(time, gn, 'm-', label='-')
|
|
# ax2.set_xlim([time[0], time[-1]])
|
|
# ax2.set_xlabel('Time', fontsize=14)
|
|
# ax2.set_ylim(-0.01*threshold, 1.1*threshold)
|
|
# ax2.axhline(threshold, color='r')
|
|
# ax2.set_ylabel('Cumulative Sum (c)', fontsize=14)
|
|
# # ax2.set_title('Time series of the cumulative sums of ' +
|
|
# # 'positive and negative changes')
|
|
# ax2.set_title('(b)', fontsize=14)
|
|
# ax2.legend(loc='best', framealpha=.5, numpoints=1)
|
|
# plt.tight_layout()
|
|
plt.show()
|
|
|
|
|
|
|