Skip to content

Commit 7a0767c

Browse files
committed
[ML] Improve forecasting for time series with step changes (#2591)
We model the level of a time series which we've observed having step discontinuities via a Markov process for forecasting. Specifically, we estimate the historical step size distribution and the distribution of the steps in time and as a function of the time series value. For this second part we use an online naive Bayes model to estimate the probability that at any given point in a roll out for forecasting we will get a step. This approach generally works well unless we're in the tails of the distribution values we've observed for the time series historically when we roll out. In this case, our prediction probability are very sensitive to the tail behaviour of the distributions we fit to the time series values where we saw a step and sometimes we predict far too many steps as a result. We can detect this case: when we're in the tails of time series value distribution. This change does this and stops predicting changes in such cases, which avoids pathologies. This fixes #2466.
1 parent 8285074 commit 7a0767c

File tree

11 files changed

+309
-107
lines changed

11 files changed

+309
-107
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@
2828

2929
//=== Regressions
3030

31+
== {es} version 7.17.15
32+
33+
=== Enhancements
34+
35+
* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
36+
issue: {ml-issue}2466[#2466]).
37+
3138
== {es} version 7.17.13
3239

3340
=== Enhancements

include/maths/common/CNaiveBayes.h

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
154154
TPriorPtr m_Prior;
155155
};
156156

157+
//! \brief Enables using custom feature weights in class prediction.
158+
class CNaiveBayesFeatureWeight {
159+
public:
160+
virtual ~CNaiveBayesFeatureWeight() = default;
161+
virtual void add(std::size_t class_, double logLikelihood) = 0;
162+
virtual double calculate() const = 0;
163+
};
164+
157165
//! \brief Implements a Naive Bayes classifier.
158166
class MATHS_COMMON_EXPORT CNaiveBayes {
159167
public:
168+
using TDoubleDoublePr = std::pair<double, double>;
160169
using TDoubleSizePr = std::pair<double, std::size_t>;
161170
using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
171+
using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
162172
using TDouble1Vec = core::CSmallVector<double, 1>;
163173
using TDouble1VecVec = std::vector<TDouble1Vec>;
164-
using TOptionalDouble = boost::optional<double>;
174+
using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;
175+
176+
private:
177+
//! \brief All features have unit weight in class prediction.
178+
class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
179+
public:
180+
void add(std::size_t, double) override {}
181+
double calculate() const override { return 1.0; }
182+
};
183+
184+
class CUnitFeatureWeightProvider {
185+
public:
186+
CUnitFeatureWeight& operator()() const { return m_UnitWeight; }
187+
188+
private:
189+
mutable CUnitFeatureWeight m_UnitWeight;
190+
};
165191

166192
public:
167-
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
168-
double decayRate = 0.0,
169-
TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
193+
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
170194
CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
171195
const SDistributionRestoreParams& params,
172196
core::CStateRestoreTraverser& traverser);
@@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
184208
//! Check if any training data has been added initialized.
185209
bool initialized() const;
186210

211+
//! Get the number of classes.
212+
std::size_t numberClasses() const;
213+
187214
//! This can be used to optionally seed the class counts
188215
//! with \p counts. These are added on to data class counts
189216
//! to compute the class posterior probabilities.
@@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
210237
//!
211238
//! \param[in] n The number of class probabilities to estimate.
212239
//! \param[in] x The feature values.
240+
//! \param[in] weightProvider Computes a feature weight from the class
241+
//! conditional log-likelihood of the feature value. It should be in
242+
//! the range [0,1]. The smaller the value the less impact the feature
243+
//! has on class selection.
244+
//! \return The class probabilities and the minimum feature weight.
213245
//! \note \p x size should be equal to the number of features.
214246
//! A feature is missing is indicated by passing an empty vector
215247
//! for that feature.
216-
TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
248+
TDoubleSizePrVecDoublePr highestClassProbabilities(
249+
std::size_t n,
250+
const TDouble1VecVec& x,
251+
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
217252

218253
//! Get the probability of the class labeled \p label for \p x.
219254
//!
220255
//! \param[in] label The label of the class of interest.
221256
//! \param[in] x The feature values.
257+
//! \param[in] weightProvider Computes a feature weight from the class
258+
//! conditional log-likelihood of the feature value. It should be in
259+
//! the range [0,1]. The smaller the value the less impact the feature
260+
//! has on class selection.
261+
//! \return The class probabilities and the minimum feature weight.
262+
//! conditional distributions.
222263
//! \note \p x size should be equal to the number of features.
223264
//! A feature is missing is indicated by passing an empty vector
224265
//! for that feature.
225-
double classProbability(std::size_t label, const TDouble1VecVec& x) const;
266+
TDoubleDoublePr classProbability(std::size_t label,
267+
const TDouble1VecVec& x,
268+
const TFeatureWeightProvider& weightProvider =
269+
CUnitFeatureWeightProvider{}) const;
226270

227271
//! Get the probabilities of all the classes for \p x.
228272
//!
229273
//! \param[in] x The feature values.
274+
//! \param[in] weightProvider Computes a feature weight from the class
275+
//! conditional log-likelihood of the feature value. It should be in
276+
//! the range [0,1]. The smaller the value the less impact the feature
277+
//! has on class selection.
278+
//! \return The class probabilities and the minimum feature weight.
279+
//! A feature is missing is indicated by passing an empty vector
280+
//! for that feature.
230281
//! \note \p x size should be equal to the number of features.
231282
//! A feature is missing is indicated by passing an empty vector
232283
//! for that feature.
233-
TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
284+
TDoubleSizePrVecDoublePr
285+
classProbabilities(const TDouble1VecVec& x,
286+
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
234287

235288
//! Debug the memory used by this object.
236289
void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
@@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
298351
bool validate(const TDouble1VecVec& x) const;
299352

300353
private:
301-
//! It is not always appropriate to use features with very low
302-
//! probability in all classes to discriminate: the class choice
303-
//! will be very sensitive to the underlying conditional density
304-
//! model. This is a cutoff (for the minimum maximum class log
305-
//! likelihood) in order to use a feature.
306-
TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;
307-
308354
//! Controls the rate at which data are aged out.
309355
double m_DecayRate;
310356

lib/core/CStateRestoreTraverser.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ namespace core {
1818
CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
1919
}
2020

21-
CStateRestoreTraverser::~CStateRestoreTraverser() {
22-
}
21+
CStateRestoreTraverser::~CStateRestoreTraverser() = default;
2322

2423
bool CStateRestoreTraverser::haveBadState() const {
2524
return m_BadState;

lib/maths/common/CNaiveBayes.cc

Lines changed: 58 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@ namespace {
3636
const core::TPersistenceTag PRIOR_TAG{"a", "prior"};
3737
const core::TPersistenceTag CLASS_LABEL_TAG{"b", "class_label"};
3838
const core::TPersistenceTag CLASS_MODEL_TAG{"c", "class_model"};
39-
const core::TPersistenceTag MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG{
40-
"d", "min_max_likelihood_to_use_feature"};
4139
const core::TPersistenceTag COUNT_TAG{"e", "count"};
4240
const core::TPersistenceTag CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f", "conditional_density_from_prior"};
4341
}
@@ -135,24 +133,26 @@ std::string CNaiveBayesFeatureDensityFromPrior::print() const {
135133
return result;
136134
}
137135

138-
CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
139-
double decayRate,
140-
TOptionalDouble minMaxLogLikelihoodToUseFeature)
141-
: m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature},
142-
m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
136+
CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate)
137+
: m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
143138
}
144139

145140
CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
146141
const SDistributionRestoreParams& params,
147142
core::CStateRestoreTraverser& traverser)
148143
: m_DecayRate{params.s_DecayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
149-
traverser.traverseSubLevel(std::bind(&CNaiveBayes::acceptRestoreTraverser, this,
150-
std::cref(params), std::placeholders::_1));
144+
// If we persist before we create class conditional distributions we will
145+
// not have anything to restore and hasSubLevel will be false. Trying to
146+
// restore sets the traverser state to bad so we need to handle explicitly.
147+
if (traverser.hasSubLevel() && traverser.traverseSubLevel([&](auto& traverser_) {
148+
return this->acceptRestoreTraverser(params, traverser_);
149+
}) == false) {
150+
traverser.setBadState();
151+
}
151152
}
152153

153154
CNaiveBayes::CNaiveBayes(const CNaiveBayes& other)
154-
: m_MinMaxLogLikelihoodToUseFeature{other.m_MinMaxLogLikelihoodToUseFeature},
155-
m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
155+
: m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
156156
for (const auto& class_ : other.m_ClassConditionalDensities) {
157157
m_ClassConditionalDensities.emplace(class_.first, class_.second);
158158
}
@@ -170,9 +170,6 @@ bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& param
170170
std::ref(class_), std::cref(params),
171171
std::placeholders::_1)),
172172
m_ClassConditionalDensities.emplace(label, std::move(class_)))
173-
RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value,
174-
core::CStringUtils::stringToType(traverser.value(), value),
175-
m_MinMaxLogLikelihoodToUseFeature.reset(value))
176173
} while (traverser.next());
177174
return true;
178175
}
@@ -195,12 +192,6 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) c
195192
std::ref(class_->second),
196193
std::placeholders::_1));
197194
}
198-
199-
if (m_MinMaxLogLikelihoodToUseFeature) {
200-
inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG,
201-
*m_MinMaxLogLikelihoodToUseFeature,
202-
core::CIEEE754::E_SinglePrecision);
203-
}
204195
}
205196

206197
CNaiveBayes& CNaiveBayes::operator=(const CNaiveBayes& other) {
@@ -215,26 +206,29 @@ void CNaiveBayes::swap(CNaiveBayes& other) {
215206
std::swap(m_DecayRate, other.m_DecayRate);
216207
m_Exemplar.swap(other.m_Exemplar);
217208
m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities);
218-
std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature);
219209
}
220210

221211
bool CNaiveBayes::initialized() const {
222-
return m_ClassConditionalDensities.size() > 0 &&
212+
return m_ClassConditionalDensities.empty() == false &&
223213
std::all_of(m_ClassConditionalDensities.begin(),
224214
m_ClassConditionalDensities.end(),
225215
[](const std::pair<std::size_t, CClass>& class_) {
226216
return class_.second.initialized();
227217
});
228218
}
229219

220+
std::size_t CNaiveBayes::numberClasses() const {
221+
return m_ClassConditionalDensities.size();
222+
}
223+
230224
void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
231225
for (const auto& count : counts) {
232226
m_ClassConditionalDensities.emplace(count.second, CClass{count.first});
233227
}
234228
}
235229

236230
void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x) {
237-
if (!this->validate(x)) {
231+
if (this->validate(x) == false) {
238232
return;
239233
}
240234

@@ -249,7 +243,7 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec&
249243

250244
bool updateCount{false};
251245
for (std::size_t i = 0; i < x.size(); ++i) {
252-
if (x[i].size() > 0) {
246+
if (x[i].empty() == false) {
253247
class_.conditionalDensities()[i]->add(x[i]);
254248
updateCount = true;
255249
}
@@ -280,62 +274,74 @@ void CNaiveBayes::propagateForwardsByTime(double time) {
280274
}
281275
}
282276

283-
CNaiveBayes::TDoubleSizePrVec
284-
CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const {
285-
TDoubleSizePrVec p(this->classProbabilities(x));
277+
CNaiveBayes::TDoubleSizePrVecDoublePr
278+
CNaiveBayes::highestClassProbabilities(std::size_t n,
279+
const TDouble1VecVec& x,
280+
const TFeatureWeightProvider& weightProvider) const {
281+
auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
286282
n = std::min(n, p.size());
287-
std::sort(p.begin(), p.begin() + n, std::greater<TDoubleSizePr>());
288-
return TDoubleSizePrVec{p.begin(), p.begin() + n};
283+
std::sort(p.begin(), p.begin() + n, std::greater<>());
284+
return {TDoubleSizePrVec{p.begin(), p.begin() + n}, minFeatureWeight};
289285
}
290286

291-
double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const {
292-
TDoubleSizePrVec p(this->classProbabilities(x));
287+
CNaiveBayes::TDoubleDoublePr
288+
CNaiveBayes::classProbability(std::size_t label,
289+
const TDouble1VecVec& x,
290+
const TFeatureWeightProvider& weightProvider) const {
291+
auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
293292
auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) {
294293
return p_.second == label;
295294
});
296-
return i == p.end() ? 0.0 : i->first;
295+
return {i == p.end() ? 0.0 : i->first, minFeatureWeight};
297296
}
298297

299-
CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec& x) const {
300-
if (!this->validate(x)) {
301-
return {};
298+
CNaiveBayes::TDoubleSizePrVecDoublePr
299+
CNaiveBayes::classProbabilities(const TDouble1VecVec& x,
300+
const TFeatureWeightProvider& weightProvider) const {
301+
if (this->validate(x) == false) {
302+
return {{}, 0.0};
302303
}
303304
if (m_ClassConditionalDensities.empty()) {
304305
LOG_ERROR(<< "Trying to compute class probabilities without supplying training data");
305-
return {};
306+
return {{}, 0.0};
306307
}
307308

308309
using TDoubleVec = std::vector<double>;
309-
using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;
310310

311311
TDoubleSizePrVec p;
312312
p.reserve(m_ClassConditionalDensities.size());
313313
for (const auto& class_ : m_ClassConditionalDensities) {
314314
p.emplace_back(CTools::fastLog(class_.second.count()), class_.first);
315315
}
316+
double minFeatureWeight{1.0};
316317

317318
TDoubleVec logLikelihoods;
318319
for (std::size_t i = 0; i < x.size(); ++i) {
319-
if (x[i].size() > 0) {
320-
TMaxAccumulator maxLogLikelihood;
320+
if (x[i].empty() == false) {
321+
auto& featureWeight = weightProvider();
321322
logLikelihoods.clear();
322323
for (const auto& class_ : m_ClassConditionalDensities) {
323324
const auto& density = class_.second.conditionalDensities()[i];
324325
double logLikelihood{density->logValue(x[i])};
325326
double logMaximumLikelihood{density->logMaximumValue()};
326-
maxLogLikelihood.add(logLikelihood - logMaximumLikelihood);
327327
logLikelihoods.push_back(logLikelihood);
328+
featureWeight.add(class_.first, logLikelihood - logMaximumLikelihood);
328329
}
329-
double weight{1.0};
330-
if (m_MinMaxLogLikelihoodToUseFeature) {
331-
weight = CTools::logisticFunction(
332-
(maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) /
333-
std::fabs(*m_MinMaxLogLikelihoodToUseFeature),
334-
0.1);
335-
}
330+
331+
// We compute the class c_i probability using
332+
//
333+
// p(c_i | x) = exp(sum_i{w_j * log(L(x_j | c_i))}) / Z * p(c_i).
334+
//
335+
// Any feature whose weight < 1 has its significance dropped in class
336+
// selection, effectively we use the w_i'th root of the log-likelihood
337+
// which tends to 1 for all values if w_i is small enough. This can be
338+
// used to ignore features that for which x is the extreme tails of the
339+
// class conditional distribution.
340+
double featureWeight_{featureWeight.calculate()};
336341
for (std::size_t j = 0; j < logLikelihoods.size(); ++j) {
337-
p[j].first += weight * logLikelihoods[j];
342+
p[j].first += featureWeight_ * logLikelihoods[j];
338343
}
344+
minFeatureWeight = std::min(minFeatureWeight, featureWeight_);
339345
}
340346
}
341347

@@ -349,7 +355,7 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV
349355
pc.first /= Z;
350356
}
351357

352-
return p;
358+
return {std::move(p), minFeatureWeight};
353359
}
354360

355361
void CNaiveBayes::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
@@ -363,8 +369,7 @@ std::size_t CNaiveBayes::memoryUsage() const {
363369
core::CMemory::dynamicSize(m_ClassConditionalDensities);
364370
}
365371

366-
uint64_t CNaiveBayes::checksum(uint64_t seed) const {
367-
CChecksum::calculate(seed, m_MinMaxLogLikelihoodToUseFeature);
372+
std::uint64_t CNaiveBayes::checksum(std::uint64_t seed) const {
368373
CChecksum::calculate(seed, m_DecayRate);
369374
CChecksum::calculate(seed, m_Exemplar);
370375
return CChecksum::calculate(seed, m_ClassConditionalDensities);
@@ -386,7 +391,7 @@ std::string CNaiveBayes::print() const {
386391
bool CNaiveBayes::validate(const TDouble1VecVec& x) const {
387392
auto class_ = m_ClassConditionalDensities.begin();
388393
if (class_ != m_ClassConditionalDensities.end() &&
389-
class_->second.conditionalDensities().size() > 0 &&
394+
class_->second.conditionalDensities().empty() == false &&
390395
class_->second.conditionalDensities().size() != x.size()) {
391396
LOG_ERROR(<< "Unexpected feature vector: " << core::CContainerPrinter::print(x));
392397
return false;
@@ -423,7 +428,7 @@ bool CNaiveBayes::CClass::acceptRestoreTraverser(const SDistributionRestoreParam
423428
void CNaiveBayes::CClass::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
424429
inserter.insertValue(COUNT_TAG, m_Count, core::CIEEE754::E_SinglePrecision);
425430
for (const auto& density : m_ConditionalDensities) {
426-
if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get())) {
431+
if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get()) != nullptr) {
427432
inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG,
428433
std::bind(&CNaiveBayesFeatureDensity::acceptPersistInserter,
429434
density.get(), std::placeholders::_1));

0 commit comments

Comments
 (0)