diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py
index 62318942da..5e8897a208 100644
--- a/pvlib/clearsky.py
+++ b/pvlib/clearsky.py
@@ -579,9 +579,10 @@ def _calc_stats(data, samples_per_window, sample_interval, H):
"""
data_mean = data.values[H].mean(axis=0)
- data_mean = _to_centered_series(data_mean, data.index, samples_per_window)
+ data_mean = _to_centered_series(data_mean, data.index, samples_per_window,
+ H)
data_max = data.values[H].max(axis=0)
- data_max = _to_centered_series(data_max, data.index, samples_per_window)
+ data_max = _to_centered_series(data_max, data.index, samples_per_window, H)
# shift to get forward difference, .diff() is backward difference instead
data_diff = data.diff().shift(-1)
data_slope = data_diff / sample_interval
@@ -594,30 +595,48 @@ def _slope_nstd_windowed(slopes, data, H, samples_per_window, sample_interval):
with np.errstate(divide='ignore', invalid='ignore'):
nstd = slopes[H[:-1, ]].std(ddof=1, axis=0) \
/ data.values[H].mean(axis=0)
- return _to_centered_series(nstd, data.index, samples_per_window)
+ return _to_centered_series(nstd, data.index, samples_per_window, H)
def _max_diff_windowed(data, H, samples_per_window):
raw = np.diff(data)
raw = np.abs(raw[H[:-1, ]]).max(axis=0)
- return _to_centered_series(raw, data.index, samples_per_window)
+ return _to_centered_series(raw, data.index, samples_per_window, H)
def _line_length_windowed(data, H, samples_per_window,
sample_interval):
raw = np.sqrt(np.diff(data)**2. + sample_interval**2.)
raw = np.sum(raw[H[:-1, ]], axis=0)
- return _to_centered_series(raw, data.index, samples_per_window)
+ return _to_centered_series(raw, data.index, samples_per_window, H)
-def _to_centered_series(vals, idx, samples_per_window):
- vals = np.pad(vals, ((0, len(idx) - len(vals)),), mode='constant',
- constant_values=np.nan)
- shift = samples_per_window // 2 # align = 'center' only
- return pd.Series(index=idx, data=vals).shift(shift)
+def _to_centered_series(vals, idx, samples_per_window, H):
+ # Get center of interval using zero-indexing, round down to nearest
+ # index if there are an even number of rows
+ if samples_per_window % 2 == 0:
+ center_row = samples_per_window//2 - 1
+ else:
+ center_row = samples_per_window//2
+ try:
+ # Maintain tz that is stripped when idx is put in H
+ if idx.tz is not None:
+ c = pd.DatetimeIndex(idx.values[H][center_row, :],
+ tz='UTC').tz_convert(idx.tz)
+ else:
+ c = idx.values[H][center_row, :]
+ # If the index is a range
+ except AttributeError:
+ c = idx.values[H][center_row, :]
-def _clear_sample_index(clear_windows, samples_per_window, align, H):
+ # Assign summary values for each interval to the indices of the center row
+ centered = pd.Series(index=idx, dtype='object')
+ centered.loc[c] = vals
+ return centered
+
+
+def _clear_sample_index(clear_windows, samples_per_window, gaps, H, align):
"""
Returns indices of clear samples in clear windows
"""
@@ -635,12 +654,22 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H):
# shift = - (samples_per_window // 2)
# else:
# shift = 0
- shift = -(samples_per_window // 2)
- idx = clear_windows.shift(shift)
+
+ # Account for the row # on which the interval is centered not actually
+ # being in row samples_per_window // 2 if samples_per_window is even
+ if samples_per_window % 2 == 0:
+ shift = -(samples_per_window // 2 - 1)
+ else:
+ shift = -(samples_per_window // 2)
+ clear_cols = clear_windows.shift(shift)
# drop rows at the end corresponding to windows past the end of data
- idx = idx.drop(clear_windows.index[1 - samples_per_window:])
- idx = idx.astype(bool) # shift changed type to object
- clear_samples = np.unique(H[:, idx])
+ clear_cols = clear_cols.drop(clear_windows.index[1 - samples_per_window:])
+ clear_cols = clear_cols.astype(bool) # shift changed type to object
+ # Boolean mask for column indices of intervals with temporal gaps
+ gap_cols = [True if c not in gaps else False for c in range(0,
+ len(clear_windows) - (samples_per_window - 1))]
+ mask = np.logical_and(clear_cols, gap_cols)
+ clear_samples = np.unique(H[:, mask])
return clear_samples
@@ -756,8 +785,6 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False,
------
ValueError
If measured is not a Series and times is not provided
- NotImplementedError
- If timestamps are not equally spaced
References
----------
@@ -784,6 +811,11 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False,
* option to return individual test components and clearsky scaling
parameter
* uses centered windows (Matlab function uses left-aligned windows)
+
+ 2023-03-24 - This algorithm does accept data with skipped or missing
+ timestamps. The DatetimeIndex (either times or index of measured)
+ provided still must be regular, i.e. the length of intervals between
+ points are equal except in the case that data is missing.
"""
if times is None:
@@ -803,6 +835,13 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False,
if not isinstance(clearsky, pd.Series):
clear = pd.Series(clearsky, index=times)
+ # This clause is designed to address cases where measured has missing time
+ # steps - if this is the case, clear should be set to have the same
+ # missing time intervals as measured. Not doing this may cause issues with
+ # arrays of different lengths when evaluating comparison criteria and
+ # when indexing the Hankel matrix to construct clear_samples
+ elif len(clearsky.index) != len(times):
+ clear = pd.Series(clearsky, index=times)
else:
clear = clearsky
@@ -828,6 +867,19 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False,
H = hankel(np.arange(samples_per_window),
np.arange(samples_per_window-1, len(times)))
+ # Identify intervals with missing indices
+ time_h = times.values[H]
+ # Get maximum time step (in minutes) between consecutive Timestamps
+ # for each column
+ time_h_diff_max = np.max(np.diff(time_h, axis=0) /
+ np.timedelta64(1, '60s'), axis=0)
+ # Get column indices where max time step > sample_interval
+ gaps = np.ravel(np.argwhere(time_h_diff_max > sample_interval))
+ # Get column indices where at least one of the values is a NaN
+ gaps = set().union(*[
+ gaps, np.ravel(np.argwhere(np.isnan(meas\
+ .values[H].mean(axis=0))))])
+
# calculate measurement statistics
meas_mean, meas_max, meas_slope_nstd, meas_slope = _calc_stats(
meas, samples_per_window, sample_interval, H)
@@ -853,26 +905,55 @@ def detect_clearsky(measured, clearsky, times=None, infer_limits=False,
line_diff = meas_line_length - clear_line_length
slope_max_diff = _max_diff_windowed(
meas - scaled_clear, H, samples_per_window)
+
# evaluate comparison criteria
- c1 = np.abs(meas_mean - alpha*clear_mean) < mean_diff
- c2 = np.abs(meas_max - alpha*clear_max) < max_diff
- c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length)
+ # Condition 1
+ c1 = np.abs(meas_mean - alpha*clear_mean)
+ c1_where_nan = c1[c1.isna()].index
+ c1 = c1 < mean_diff
+ # Condition 2
+ c2 = np.abs(meas_max - alpha*clear_max)
+ c2_where_nan = c2[c2.isna()].index
+ c2 = c2 < max_diff
+ # Condition 3a & 3b
+ c3_where_nan = line_diff[line_diff.isna()].index
+ c3a = line_diff > lower_line_length
+ c3b = line_diff < upper_line_length
+ c3 = np.logical_and(c3a, c3b)
+ # Condition 4
+ c4_where_nan = meas_slope_nstd[meas_slope_nstd.isna()].index
c4 = meas_slope_nstd < var_diff
+ # Condition 5
+ c5_where_nan = slope_max_diff[slope_max_diff.isna()].index
c5 = slope_max_diff < slope_dev
- c6 = (clear_mean != 0) & ~np.isnan(clear_mean)
- clear_windows = c1 & c2 & c3 & c4 & c5 & c6
+ # Condition 6
+ c6 = clear_mean != 0
+ c6_where_nan = clear_mean[clear_mean.isna()].index
+
+ # np.logical_and() maintains NaNs
+ clear_windows = pd.Series(
+ index=times, data=np.logical_and.reduce([c1, c2, c3, c4, c5, c6]))
+ windows_where_nan = pd.DatetimeIndex(set().union(*[
+ c1_where_nan,c2_where_nan, c3_where_nan, c4_where_nan, c5_where_nan,
+ c6_where_nan]))
+ clear_windows[windows_where_nan] = np.nan
# create array to return
- clear_samples = np.full_like(meas, False, dtype='bool')
+ # dtype='bool' removed because it typecast NaNs to False values
+ clear_samples = np.full_like(meas, False)
# find the samples contained in any window classified as clear
- idx = _clear_sample_index(clear_windows, samples_per_window, 'center',
- H)
+ idx = _clear_sample_index(clear_windows, samples_per_window, gaps, H,
+ 'center')
clear_samples[idx] = True
+ # Assign NaN to datapoints that were originally NaNs
+ where_nan = np.argwhere(np.isnan(meas.values))
+ clear_samples[where_nan] = np.nan
+
# find a new alpha
previous_alpha = alpha
- clear_meas = meas[clear_samples]
- clear_clear = clear[clear_samples]
+ clear_meas = meas[idx]
+ clear_clear = clear[idx]
def rmse(alpha):
return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2))
diff --git a/pvlib/data/detect_clearsky_data_missing1.csv b/pvlib/data/detect_clearsky_data_missing1.csv
new file mode 100644
index 0000000000..333b3b2a90
--- /dev/null
+++ b/pvlib/data/detect_clearsky_data_missing1.csv
@@ -0,0 +1,64 @@
+# latitude: 35.04
+# longitude: -106.62
+# elevation: 1619.0
+,CS,GHI,Clear or not
+2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0
+2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0
+2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0
+2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0
+2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0
+2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0
+2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0
+2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0
+2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0
+2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0
+2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0
+2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0
+2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0
+2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0
+2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0
+2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0
+2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0
+2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0
+2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0
+2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0
+2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0
+2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0
+2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0
+2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0
+2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0
+2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0
+2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0
+2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0
+2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0
+2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0
+2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,1.0
+2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,1.0
+2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,1.0
+2020-01-01 11:34:00-07:00,569.9528257924741,541.4551845028503,1.0
+2020-01-01 11:35:00-07:00,570.6203557904192,542.0893380008982,1.0
+2020-01-01 11:36:00-07:00,571.2692529152785,542.7057902695145,1.0
+2020-01-01 11:37:00-07:00,571.8995026579759,543.304527525077,1.0
+2020-01-01 11:38:00-07:00,572.5110909343163,543.8855363876004,1.0
+2020-01-01 11:39:00-07:00,573.1040040834109,544.4488038792404,1.0
+2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0
+2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0
+2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0
+2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0
+2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0
+2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0
+2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0
+2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0
+2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0
+2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0
+2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0
+2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0
+2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0
+2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0
+2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0
+2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0
+2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0
+2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0
+2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0
+2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0
+2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0
diff --git a/pvlib/data/detect_clearsky_data_missing2.csv b/pvlib/data/detect_clearsky_data_missing2.csv
new file mode 100644
index 0000000000..88e9367af0
--- /dev/null
+++ b/pvlib/data/detect_clearsky_data_missing2.csv
@@ -0,0 +1,64 @@
+# latitude: 35.04
+# longitude: -106.62
+# elevation: 1619.0
+,CS,GHI,Clear or not
+2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0
+2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0
+2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0
+2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0
+2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0
+2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0
+2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0
+2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0
+2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0
+2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0
+2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0
+2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0
+2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0
+2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0
+2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0
+2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0
+2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0
+2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0
+2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0
+2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0
+2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0
+2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0
+2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0
+2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0
+2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0
+2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0
+2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0
+2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0
+2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0
+2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0
+2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,0.0
+2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,0.0
+2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,0.0
+2020-01-01 11:34:00-07:00,569.9528257924741,541.4551845028503,0.0
+2020-01-01 11:35:00-07:00,570.6203557904192,300.0,0.0
+2020-01-01 11:36:00-07:00,571.2692529152785,200.0,0.0
+2020-01-01 11:37:00-07:00,571.8995026579759,250.0,0.0
+2020-01-01 11:38:00-07:00,572.5110909343163,310.0,0.0
+2020-01-01 11:39:00-07:00,573.1040040834109,330.0,0.0
+2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0
+2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0
+2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0
+2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0
+2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0
+2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0
+2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0
+2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0
+2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0
+2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0
+2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0
+2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0
+2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0
+2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0
+2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0
+2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0
+2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0
+2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0
+2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0
+2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0
+2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0
diff --git a/pvlib/data/detect_clearsky_data_missing3.csv b/pvlib/data/detect_clearsky_data_missing3.csv
new file mode 100644
index 0000000000..7b703cb6b2
--- /dev/null
+++ b/pvlib/data/detect_clearsky_data_missing3.csv
@@ -0,0 +1,50 @@
+# latitude: 35.04
+# longitude: -106.62
+# elevation: 1619.0
+,CS,GHI,Clear or not
+2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0
+2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0
+2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0
+2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0
+2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0
+2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0
+2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0
+2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0
+2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0
+2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0
+2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0
+2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0
+2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0
+2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0
+2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0
+2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0
+2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0
+2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0
+2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0
+2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0
+2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0
+2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0
+2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0
+2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0
+2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0
+2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0
+2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0
+2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0
+2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0
+2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0
+2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0
+2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0
+2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0
+2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0
+2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0
+2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0
+2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0
+2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0
+2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0
+2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0
+2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0
+2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0
+2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0
+2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0
+2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0
+2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0
diff --git a/pvlib/data/detect_clearsky_data_nans1.csv b/pvlib/data/detect_clearsky_data_nans1.csv
new file mode 100644
index 0000000000..d4aabff89e
--- /dev/null
+++ b/pvlib/data/detect_clearsky_data_nans1.csv
@@ -0,0 +1,65 @@
+# latitude: 35.04
+# longitude: -106.62
+# elevation: 1619.0
+,CS,GHI,Clear or not
+2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0
+2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0
+2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0
+2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0
+2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0
+2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0
+2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0
+2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0
+2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0
+2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0
+2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0
+2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0
+2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0
+2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0
+2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0
+2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0
+2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0
+2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0
+2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0
+2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0
+2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0
+2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0
+2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0
+2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0
+2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0
+2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0
+2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0
+2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0
+2020-01-01 11:28:00-07:00,565.5572210969589,537.2793600421109,1.0
+2020-01-01 11:29:00-07:00,566.336220321224,538.0194093051628,1.0
+2020-01-01 11:30:00-07:00,567.0966821472473,,
+2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,1.0
+2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,1.0
+2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,1.0
+2020-01-01 11:34:00-07:00,569.9528257924741,541.4551845028503,1.0
+2020-01-01 11:35:00-07:00,570.6203557904192,542.0893380008982,1.0
+2020-01-01 11:36:00-07:00,571.2692529152785,542.7057902695145,1.0
+2020-01-01 11:37:00-07:00,571.8995026579759,543.304527525077,1.0
+2020-01-01 11:38:00-07:00,572.5110909343163,543.8855363876004,1.0
+2020-01-01 11:39:00-07:00,573.1040040834109,544.4488038792404,1.0
+2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0
+2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0
+2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0
+2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0
+2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0
+2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0
+2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0
+2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0
+2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0
+2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0
+2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0
+2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0
+2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0
+2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0
+2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0
+2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0
+2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0
+2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0
+2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0
+2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0
+2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0
diff --git a/pvlib/data/detect_clearsky_data_nans2.csv b/pvlib/data/detect_clearsky_data_nans2.csv
new file mode 100644
index 0000000000..5f91388b73
--- /dev/null
+++ b/pvlib/data/detect_clearsky_data_nans2.csv
@@ -0,0 +1,65 @@
+# latitude: 35.04
+# longitude: -106.62
+# elevation: 1619.0
+,CS,GHI,Clear or not
+2020-01-01 11:00:00-07:00,536.302172709558,509.4870640740801,1.0
+2020-01-01 11:01:00-07:00,537.591516239233,510.7119404272713,1.0
+2020-01-01 11:02:00-07:00,538.8629773699926,511.91982850149293,1.0
+2020-01-01 11:03:00-07:00,540.1165267379411,513.110700401044,1.0
+2020-01-01 11:04:00-07:00,541.3521354397624,514.2845286677742,1.0
+2020-01-01 11:05:00-07:00,542.5697750193947,515.441286268425,1.0
+2020-01-01 11:06:00-07:00,543.7694182656351,516.5809473523533,1.0
+2020-01-01 11:07:00-07:00,544.9510360161104,517.7034842153048,1.0
+2020-01-01 11:08:00-07:00,546.1146019716558,518.808871873073,1.0
+2020-01-01 11:09:00-07:00,547.2600894760479,519.8970850022455,1.0
+2020-01-01 11:10:00-07:00,548.3874723122931,520.9680986966785,1.0
+2020-01-01 11:11:00-07:00,549.4967247102148,522.021888474704,1.0
+2020-01-01 11:12:00-07:00,550.5878213431366,523.0584302759797,1.0
+2020-01-01 11:13:00-07:00,551.6607373248604,524.0777004586174,1.0
+2020-01-01 11:14:00-07:00,552.7154482165139,525.0796758056882,1.0
+2020-01-01 11:15:00-07:00,553.7519300136697,526.0643335129862,1.0
+2020-01-01 11:16:00-07:00,554.7701591532945,527.0316511956297,1.0
+2020-01-01 11:17:00-07:00,555.7701131753,527.981607516535,1.0
+2020-01-01 11:18:00-07:00,556.7517680497913,528.9141796473017,1.0
+2020-01-01 11:19:00-07:00,557.7151022068979,529.829347096553,1.0
+2020-01-01 11:20:00-07:00,558.6600938303893,530.7270891388698,1.0
+2020-01-01 11:21:00-07:00,559.5867215409612,531.6073854639131,1.0
+2020-01-01 11:22:00-07:00,560.4949643935382,532.4702161738613,1.0
+2020-01-01 11:23:00-07:00,561.3848018765772,533.3155617827483,1.0
+2020-01-01 11:24:00-07:00,562.2562139096694,534.1434032141859,1.0
+2020-01-01 11:25:00-07:00,563.1091808493264,534.9537218068601,1.0
+2020-01-01 11:26:00-07:00,563.9436834785143,535.7464993045886,1.0
+2020-01-01 11:27:00-07:00,564.7597030125947,536.5217178619649,1.0
+2020-01-01 11:28:00-07:00,565.5572210969589,,
+2020-01-01 11:29:00-07:00,566.336220321224,,
+2020-01-01 11:30:00-07:00,567.0966821472473,538.7418480398849,0.0
+2020-01-01 11:31:00-07:00,567.8385900341821,539.446660532473,0.0
+2020-01-01 11:32:00-07:00,568.5619273418005,540.1338309747105,0.0
+2020-01-01 11:33:00-07:00,569.2666778570792,540.8033439642253,0.0
+2020-01-01 11:34:00-07:00,569.9528257924741,300.0,0.0
+2020-01-01 11:35:00-07:00,570.6203557904192,200.0,0.0
+2020-01-01 11:36:00-07:00,571.2692529152785,250.0,0.0
+2020-01-01 11:37:00-07:00,571.8995026579759,310.0,0.0
+2020-01-01 11:38:00-07:00,572.5110909343163,330.0,0.0
+2020-01-01 11:39:00-07:00,573.1040040834109,544.4488038792404,1.0
+2020-01-01 11:40:00-07:00,573.6782292505169,544.994317787991,1.0
+2020-01-01 11:41:00-07:00,574.2337528522768,545.522065209663,1.0
+2020-01-01 11:42:00-07:00,574.7705628911892,546.0320347466297,1.0
+2020-01-01 11:43:00-07:00,575.2886474013371,546.5242150312703,1.0
+2020-01-01 11:44:00-07:00,575.7879948384899,546.9985950965654,1.0
+2020-01-01 11:45:00-07:00,576.2685940834351,547.4551643792634,1.0
+2020-01-01 11:46:00-07:00,576.7304344361013,547.8939127142962,1.0
+2020-01-01 11:47:00-07:00,577.1735056190306,548.3148303380791,1.0
+2020-01-01 11:48:00-07:00,577.5977977761287,548.7179078873222,1.0
+2020-01-01 11:49:00-07:00,578.0033014716739,549.1031363980902,1.0
+2020-01-01 11:50:00-07:00,578.3900076930416,549.4705073083895,1.0
+2020-01-01 11:51:00-07:00,578.7579080863377,549.8200126820208,1.0
+2020-01-01 11:52:00-07:00,579.1069939850056,550.1516442857553,1.0
+2020-01-01 11:53:00-07:00,579.4372578890877,550.4653949946334,1.0
+2020-01-01 11:54:00-07:00,579.7486924650192,550.7612578417682,1.0
+2020-01-01 11:55:00-07:00,580.0412908007218,551.0392262606856,1.0
+2020-01-01 11:56:00-07:00,580.3150464019636,551.2992940818655,1.0
+2020-01-01 11:57:00-07:00,580.569953194532,551.5414555348053,1.0
+2020-01-01 11:58:00-07:00,580.8060055234938,551.7657052473191,1.0
+2020-01-01 11:59:00-07:00,581.0231981526458,551.9720382450136,1.0
+2020-01-01 12:00:00-07:00,581.2215262659929,552.1604499526933,1.0
diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py
index c2ef607f0f..3fc3dbf58d 100644
--- a/pvlib/tests/test_clearsky.py
+++ b/pvlib/tests/test_clearsky.py
@@ -8,7 +8,7 @@
import pytest
from numpy.testing import assert_allclose
-from .conftest import assert_frame_equal, assert_series_equal
+from .conftest import assert_frame_equal, assert_series_equal, DATA_DIR
from pvlib.location import Location
from pvlib import clearsky
@@ -16,9 +16,6 @@
from pvlib import atmosphere
from pvlib import irradiance
-from .conftest import DATA_DIR
-
-
def test_ineichen_series():
times = pd.date_range(start='2014-06-24', end='2014-06-25', freq='3h',
tz='America/Phoenix')
@@ -656,21 +653,89 @@ def test_detect_clearsky_arrays(detect_clearsky_data):
assert (clear_samples == expected['Clear or not'].values).all()
-def test_detect_clearsky_irregular_times(detect_clearsky_data):
- expected, cs = detect_clearsky_data
- times = cs.index.values.copy()
- times[0] += 10**9
- times = pd.DatetimeIndex(times)
- with pytest.raises(NotImplementedError):
- clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values,
- times, 10)
+def test_detect_clearsky_missing_index1():
+ # Test for an isolated missing index
+ data_file = DATA_DIR / 'detect_clearsky_data_missing1.csv'
+ data = pd.read_csv(
+ data_file, index_col=0, parse_dates=True, comment='#')
+ meas, cs, expected = data['GHI'], data['CS'], data['Clear or not']
+ clear_samples = clearsky.detect_clearsky(
+ meas, cs)
+ assert_series_equal(expected, clear_samples, check_dtype=False,
+ check_names=False)
-def test_detect_clearsky_missing_index(detect_clearsky_data):
- expected, cs = detect_clearsky_data
- with pytest.raises(ValueError):
- clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values)
+def test_detect_clearsky_missing_index2():
+ # Test for a missing index followed by an overcast period
+ data_file = DATA_DIR / 'detect_clearsky_data_missing2.csv'
+ data = pd.read_csv(
+ data_file, index_col=0, parse_dates=True, comment='#')
+ meas, cs, expected = data['GHI'], data['CS'], data['Clear or not']
+ clear_samples = clearsky.detect_clearsky(
+ meas, cs)
+ assert_series_equal(expected, clear_samples, check_dtype=False,
+ check_names=False)
+
+def test_detect_clearsky_missing_index3():
+ # Test for 15 consecutive missing indices
+ data_file = DATA_DIR / 'detect_clearsky_data_missing3.csv'
+ data = pd.read_csv(
+ data_file, index_col=0, parse_dates=True, comment='#')
+ meas, cs, expected = data['GHI'], data['CS'], data['Clear or not']
+ clear_samples = clearsky.detect_clearsky(
+ meas, cs)
+ assert_series_equal(expected, clear_samples, check_dtype=False,
+ check_names=False)
+
+def test_detect_clearsky_nans1():
+ # Test for 1 NaN value - should mark as NaN
+ data_file = DATA_DIR / 'detect_clearsky_data_nans1.csv'
+ data = pd.read_csv(
+ data_file, index_col=0, parse_dates=True, comment='#')
+ meas, cs, expected = data['GHI'], data['CS'], data['Clear or not']
+ clear_samples = clearsky.detect_clearsky(
+ meas, cs)
+ assert_series_equal(expected, clear_samples, check_dtype=False,
+ check_names=False)
+
+
+def test_detect_clearsky_nans2():
+ # Test for 1 NaN value - should mark as NaN
+ data_file = DATA_DIR / 'detect_clearsky_data_nans2.csv'
+ data = pd.read_csv(
+ data_file, index_col=0, parse_dates=True, comment='#')
+ meas, cs, expected = data['GHI'], data['CS'], data['Clear or not']
+ clear_samples = clearsky.detect_clearsky(
+ meas, cs)
+ assert_series_equal(expected, clear_samples, check_dtype=False,
+ check_names=False)
+
+def test_detect_clearsky_diff_index_lengths(detect_clearsky_data):
+ '''
+ Intended to test the following if/else clauses
+
+ if not isinstance(clear_sky, pd.Series):
+ clear = pd.Series(clear_sky, index=times)
+ # This clause is designed to address cases where measured has missing time
+ # steps - if this is the case, clear should be set to have the same
+ # missing time intervals as measured. Not doing this may cause issues with
+ # arrays of different lengths when evaluating comparison criteria and
+ # when indexing the Hankel matrix to construct clear_samples
+ elif len(clear_sky.index) != len(times):
+ clear = pd.Series(clear_sky, index=times)
+ else:
+ clear = clear_sky
+ '''
+ expected, cs = detect_clearsky_data
+ expected.drop(index=expected.index[10], inplace=True)
+ clear_samples = clearsky.detect_clearsky(
+ expected['GHI'], cs['ghi'], times=expected.index,
+ window_length=10)
+ new_expected = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
+ 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
+ 1., 1., 1., 0., 0., 0., 0.])
+ assert (clear_samples.values == new_expected).all()
def test_detect_clearsky_not_enough_data(detect_clearsky_data):
expected, cs = detect_clearsky_data
@@ -705,7 +770,7 @@ def test__line_length_windowed(detect_clearsky_helper_data):
expected['line_length'] = sqt + sqt.shift(-1)
result = clearsky._line_length_windowed(
x, H, samples_per_window, sample_interval)
- assert_series_equal(result, expected['line_length'])
+ assert_series_equal(result, expected['line_length'], check_dtype=False)
def test__max_diff_windowed(detect_clearsky_helper_data):
@@ -714,7 +779,7 @@ def test__max_diff_windowed(detect_clearsky_helper_data):
expected['max_diff'] = pd.Series(
data=[np.nan, 3., 5., 7., 9., 11., np.nan], index=x.index)
result = clearsky._max_diff_windowed(x, H, samples_per_window)
- assert_series_equal(result, expected['max_diff'])
+ assert_series_equal(result, expected['max_diff'], check_dtype=False)
def test__calc_stats(detect_clearsky_helper_data):
@@ -736,10 +801,11 @@ def test__calc_stats(detect_clearsky_helper_data):
result = clearsky._calc_stats(
x, samples_per_window, sample_interval, H)
res_mean, res_max, res_slope_nstd, res_slope = result
- assert_series_equal(res_mean, expected['mean'])
- assert_series_equal(res_max, expected['max'])
- assert_series_equal(res_slope_nstd, expected['slope_nstd'])
- assert_series_equal(res_slope, expected['slope'])
+ assert_series_equal(res_mean, expected['mean'], check_dtype=False)
+ assert_series_equal(res_max, expected['max'], check_dtype=False)
+ assert_series_equal(res_slope_nstd, expected['slope_nstd'],
+ check_dtype=False)
+ assert_series_equal(res_slope, expected['slope'], check_dtype=False)
def test_bird():
diff --git a/pvlib/tests/test_tools.py b/pvlib/tests/test_tools.py
index 583141a726..f695cea61c 100644
--- a/pvlib/tests/test_tools.py
+++ b/pvlib/tests/test_tools.py
@@ -1,9 +1,8 @@
import pytest
-
from pvlib import tools
import numpy as np
import pandas as pd
-
+from .conftest import DATA_DIR
@pytest.mark.parametrize('keys, input_dict, expected', [
(['a', 'b'], {'a': 1, 'b': 2, 'c': 3}, {'a': 1, 'b': 2}),
@@ -91,6 +90,15 @@ def test__golden_sect_DataFrame_nans():
assert np.allclose(x, expected, atol=1e-8, equal_nan=True)
+def test_get_sample_intervals():
+ data_file = DATA_DIR / 'detect_clearsky_data_missing3.csv'
+ data = pd.read_csv(
+ data_file, index_col=0, parse_dates=True, comment='#')
+ sample_interval, samples_per_window = tools._get_sample_intervals(
+ data.index, 10)
+ assert np.allclose(sample_interval, 1)
+ assert np.allclose(samples_per_window, 10)
+
def test_degrees_to_index_1():
"""Test that _degrees_to_index raises an error when something other than
'latitude' or 'longitude' is passed."""
diff --git a/pvlib/tools.py b/pvlib/tools.py
index fe1b79a5f1..9d2775703b 100644
--- a/pvlib/tools.py
+++ b/pvlib/tools.py
@@ -389,20 +389,10 @@ def _get_sample_intervals(times, win_length):
sky detection functions
"""
deltas = np.diff(times.values) / np.timedelta64(1, '60s')
-
- # determine if we can proceed
- if times.inferred_freq and len(np.unique(deltas)) == 1:
- sample_interval = times[1] - times[0]
- sample_interval = sample_interval.seconds / 60 # in minutes
- samples_per_window = int(win_length / sample_interval)
- return sample_interval, samples_per_window
- else:
- message = (
- 'algorithm does not yet support unequal time intervals. consider '
- 'resampling your data and checking for gaps from missing '
- 'periods, leap days, etc.'
- )
- raise NotImplementedError(message)
+ vals, counts = np.unique(deltas, return_counts=True)
+ sample_interval = vals[np.argmax(counts)]
+ samples_per_window = int(win_length / sample_interval)
+ return sample_interval, samples_per_window
def _degrees_to_index(degrees, coordinate):