Skip to content

Commit 5441e73

Browse files
authored
[ML] Improve forecasting for time series with step changes (#2591)
We model the level of a time series which we've observed having step discontinuities via a Markov process for forecasting. Specifically, we estimate the historical step size distribution and the distribution of the steps in time and as a function of the time series value. For this second part we use an online naive Bayes model to estimate the probability that at any given point in a roll out for forecasting we will get a step. This approach generally works well unless we're in the tails of the distribution values we've observed for the time series historically when we roll out. In this case, our prediction probability are very sensitive to the tail behaviour of the distributions we fit to the time series values where we saw a step and sometimes we predict far too many steps as a result. We can detect this case: when we're in the tails of time series value distribution. This change does this and stops predicting changes in such cases, which avoids pathologies. This fixes #2466.
1 parent 37330c3 commit 5441e73

File tree

11 files changed

+293
-103
lines changed

11 files changed

+293
-103
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
=== Enhancements
3434

3535
* Upgrade Boost libraries to version 1.83. (See {ml-pull}2560[#2560].)
36+
* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
37+
issue: {ml-issue}2466[#2466]).
3638

3739
=== Bug Fixes
3840

include/maths/common/CNaiveBayes.h

Lines changed: 60 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
154154
TPriorPtr m_Prior;
155155
};
156156

157+
//! \brief Enables using custom feature weights in class prediction.
158+
class CNaiveBayesFeatureWeight {
159+
public:
160+
virtual ~CNaiveBayesFeatureWeight() = default;
161+
virtual void add(std::size_t class_, double logLikelihood) = 0;
162+
virtual double calculate() const = 0;
163+
};
164+
157165
//! \brief Implements a Naive Bayes classifier.
158166
class MATHS_COMMON_EXPORT CNaiveBayes {
159167
public:
168+
using TDoubleDoublePr = std::pair<double, double>;
160169
using TDoubleSizePr = std::pair<double, std::size_t>;
161170
using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
171+
using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
162172
using TDouble1Vec = core::CSmallVector<double, 1>;
163173
using TDouble1VecVec = std::vector<TDouble1Vec>;
164-
using TOptionalDouble = std::optional<double>;
174+
using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;
175+
176+
private:
177+
//! \brief All features have unit weight in class prediction.
178+
class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
179+
public:
180+
void add(std::size_t, double) override {}
181+
double calculate() const override { return 1.0; }
182+
};
183+
184+
class CUnitFeatureWeightProvider {
185+
public:
186+
CUnitFeatureWeight& operator()() const { return m_UnitWeight; }
187+
188+
private:
189+
mutable CUnitFeatureWeight m_UnitWeight;
190+
};
165191

166192
public:
167-
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
168-
double decayRate = 0.0,
169-
TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
193+
explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
170194
CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
171195
const SDistributionRestoreParams& params,
172196
core::CStateRestoreTraverser& traverser);
@@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
184208
//! Check if any training data has been added initialized.
185209
bool initialized() const;
186210

211+
//! Get the number of classes.
212+
std::size_t numberClasses() const;
213+
187214
//! This can be used to optionally seed the class counts
188215
//! with \p counts. These are added on to data class counts
189216
//! to compute the class posterior probabilities.
@@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
210237
//!
211238
//! \param[in] n The number of class probabilities to estimate.
212239
//! \param[in] x The feature values.
240+
//! \param[in] weightProvider Computes a feature weight from the class
241+
//! conditional log-likelihood of the feature value. It should be in
242+
//! the range [0,1]. The smaller the value the less impact the feature
243+
//! has on class selection.
244+
//! \return The class probabilities and the minimum feature weight.
213245
//! \note \p x size should be equal to the number of features.
214246
//! A feature is missing is indicated by passing an empty vector
215247
//! for that feature.
216-
TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
248+
TDoubleSizePrVecDoublePr highestClassProbabilities(
249+
std::size_t n,
250+
const TDouble1VecVec& x,
251+
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
217252

218253
//! Get the probability of the class labeled \p label for \p x.
219254
//!
220255
//! \param[in] label The label of the class of interest.
221256
//! \param[in] x The feature values.
257+
//! \param[in] weightProvider Computes a feature weight from the class
258+
//! conditional log-likelihood of the feature value. It should be in
259+
//! the range [0,1]. The smaller the value the less impact the feature
260+
//! has on class selection.
261+
//! \return The class probabilities and the minimum feature weight.
262+
//! conditional distributions.
222263
//! \note \p x size should be equal to the number of features.
223264
//! A feature is missing is indicated by passing an empty vector
224265
//! for that feature.
225-
double classProbability(std::size_t label, const TDouble1VecVec& x) const;
266+
TDoubleDoublePr classProbability(std::size_t label,
267+
const TDouble1VecVec& x,
268+
const TFeatureWeightProvider& weightProvider =
269+
CUnitFeatureWeightProvider{}) const;
226270

227271
//! Get the probabilities of all the classes for \p x.
228272
//!
229273
//! \param[in] x The feature values.
274+
//! \param[in] weightProvider Computes a feature weight from the class
275+
//! conditional log-likelihood of the feature value. It should be in
276+
//! the range [0,1]. The smaller the value the less impact the feature
277+
//! has on class selection.
278+
//! \return The class probabilities and the minimum feature weight.
279+
//! A feature is missing is indicated by passing an empty vector
280+
//! for that feature.
230281
//! \note \p x size should be equal to the number of features.
231282
//! A feature is missing is indicated by passing an empty vector
232283
//! for that feature.
233-
TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
284+
TDoubleSizePrVecDoublePr
285+
classProbabilities(const TDouble1VecVec& x,
286+
const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
234287

235288
//! Debug the memory used by this object.
236289
void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
@@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
298351
bool validate(const TDouble1VecVec& x) const;
299352

300353
private:
301-
//! It is not always appropriate to use features with very low
302-
//! probability in all classes to discriminate: the class choice
303-
//! will be very sensitive to the underlying conditional density
304-
//! model. This is a cutoff (for the minimum maximum class log
305-
//! likelihood) in order to use a feature.
306-
TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;
307-
308354
//! Controls the rate at which data are aged out.
309355
double m_DecayRate;
310356

lib/core/CStateRestoreTraverser.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ namespace core {
1818
CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
1919
}
2020

21-
CStateRestoreTraverser::~CStateRestoreTraverser() {
22-
}
21+
CStateRestoreTraverser::~CStateRestoreTraverser() = default;
2322

2423
bool CStateRestoreTraverser::haveBadState() const {
2524
return m_BadState;

lib/maths/common/CNaiveBayes.cc

Lines changed: 52 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ namespace {
4040
const core::TPersistenceTag PRIOR_TAG{"a", "prior"};
4141
const core::TPersistenceTag CLASS_LABEL_TAG{"b", "class_label"};
4242
const core::TPersistenceTag CLASS_MODEL_TAG{"c", "class_model"};
43-
const core::TPersistenceTag MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG{
44-
"d", "min_max_likelihood_to_use_feature"};
4543
const core::TPersistenceTag COUNT_TAG{"e", "count"};
4644
const core::TPersistenceTag CONDITIONAL_DENSITY_FROM_PRIOR_TAG{"f", "conditional_density_from_prior"};
4745
}
@@ -141,27 +139,26 @@ std::string CNaiveBayesFeatureDensityFromPrior::print() const {
141139
return result;
142140
}
143141

144-
CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
145-
double decayRate,
146-
TOptionalDouble minMaxLogLikelihoodToUseFeature)
147-
: m_MinMaxLogLikelihoodToUseFeature{minMaxLogLikelihoodToUseFeature},
148-
m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
142+
CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate)
143+
: m_DecayRate{decayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
149144
}
150145

151146
CNaiveBayes::CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
152147
const SDistributionRestoreParams& params,
153148
core::CStateRestoreTraverser& traverser)
154149
: m_DecayRate{params.s_DecayRate}, m_Exemplar{exemplar.clone()}, m_ClassConditionalDensities{2} {
155-
if (traverser.traverseSubLevel([&](auto& traverser_) {
150+
// If we persist before we create class conditional distributions we will
151+
// not have anything to restore and hasSubLevel will be false. Trying to
152+
// restore sets the traverser state to bad so we need to handle explicitly.
153+
if (traverser.hasSubLevel() && traverser.traverseSubLevel([&](auto& traverser_) {
156154
return this->acceptRestoreTraverser(params, traverser_);
157155
}) == false) {
158156
traverser.setBadState();
159157
}
160158
}
161159

162160
CNaiveBayes::CNaiveBayes(const CNaiveBayes& other)
163-
: m_MinMaxLogLikelihoodToUseFeature{other.m_MinMaxLogLikelihoodToUseFeature},
164-
m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
161+
: m_DecayRate{other.m_DecayRate}, m_Exemplar{other.m_Exemplar->clone()} {
165162
for (const auto& class_ : other.m_ClassConditionalDensities) {
166163
m_ClassConditionalDensities.emplace(class_.first, class_.second);
167164
}
@@ -178,9 +175,6 @@ bool CNaiveBayes::acceptRestoreTraverser(const SDistributionRestoreParams& param
178175
return class_.acceptRestoreTraverser(params, traverser_);
179176
}),
180177
m_ClassConditionalDensities.emplace(label, std::move(class_)))
181-
RESTORE_SETUP_TEARDOWN(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG, double value,
182-
core::CStringUtils::stringToType(traverser.value(), value),
183-
m_MinMaxLogLikelihoodToUseFeature.emplace(value))
184178
} while (traverser.next());
185179
return true;
186180
}
@@ -203,12 +197,6 @@ void CNaiveBayes::acceptPersistInserter(core::CStatePersistInserter& inserter) c
203197
class_->second.acceptPersistInserter(inserter_);
204198
});
205199
}
206-
207-
if (m_MinMaxLogLikelihoodToUseFeature) {
208-
inserter.insertValue(MIN_MAX_LOG_LIKELIHOOD_TO_USE_FEATURE_TAG,
209-
*m_MinMaxLogLikelihoodToUseFeature,
210-
core::CIEEE754::E_SinglePrecision);
211-
}
212200
}
213201

214202
CNaiveBayes& CNaiveBayes::operator=(const CNaiveBayes& other) {
@@ -223,26 +211,29 @@ void CNaiveBayes::swap(CNaiveBayes& other) {
223211
std::swap(m_DecayRate, other.m_DecayRate);
224212
m_Exemplar.swap(other.m_Exemplar);
225213
m_ClassConditionalDensities.swap(other.m_ClassConditionalDensities);
226-
std::swap(m_MinMaxLogLikelihoodToUseFeature, other.m_MinMaxLogLikelihoodToUseFeature);
227214
}
228215

229216
bool CNaiveBayes::initialized() const {
230-
return m_ClassConditionalDensities.size() > 0 &&
217+
return m_ClassConditionalDensities.empty() == false &&
231218
std::all_of(m_ClassConditionalDensities.begin(),
232219
m_ClassConditionalDensities.end(),
233220
[](const std::pair<std::size_t, CClass>& class_) {
234221
return class_.second.initialized();
235222
});
236223
}
237224

225+
std::size_t CNaiveBayes::numberClasses() const {
226+
return m_ClassConditionalDensities.size();
227+
}
228+
238229
void CNaiveBayes::initialClassCounts(const TDoubleSizePrVec& counts) {
239230
for (const auto& count : counts) {
240231
m_ClassConditionalDensities.emplace(count.second, CClass{count.first});
241232
}
242233
}
243234

244235
void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec& x) {
245-
if (!this->validate(x)) {
236+
if (this->validate(x) == false) {
246237
return;
247238
}
248239

@@ -257,7 +248,7 @@ void CNaiveBayes::addTrainingDataPoint(std::size_t label, const TDouble1VecVec&
257248

258249
bool updateCount{false};
259250
for (std::size_t i = 0; i < x.size(); ++i) {
260-
if (x[i].size() > 0) {
251+
if (x[i].empty() == false) {
261252
class_.conditionalDensities()[i]->add(x[i]);
262253
updateCount = true;
263254
}
@@ -288,62 +279,74 @@ void CNaiveBayes::propagateForwardsByTime(double time) {
288279
}
289280
}
290281

291-
CNaiveBayes::TDoubleSizePrVec
292-
CNaiveBayes::highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const {
293-
TDoubleSizePrVec p(this->classProbabilities(x));
282+
CNaiveBayes::TDoubleSizePrVecDoublePr
283+
CNaiveBayes::highestClassProbabilities(std::size_t n,
284+
const TDouble1VecVec& x,
285+
const TFeatureWeightProvider& weightProvider) const {
286+
auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
294287
n = std::min(n, p.size());
295288
std::sort(p.begin(), p.begin() + n, std::greater<>());
296-
return TDoubleSizePrVec{p.begin(), p.begin() + n};
289+
return {TDoubleSizePrVec{p.begin(), p.begin() + n}, minFeatureWeight};
297290
}
298291

299-
double CNaiveBayes::classProbability(std::size_t label, const TDouble1VecVec& x) const {
300-
TDoubleSizePrVec p(this->classProbabilities(x));
292+
CNaiveBayes::TDoubleDoublePr
293+
CNaiveBayes::classProbability(std::size_t label,
294+
const TDouble1VecVec& x,
295+
const TFeatureWeightProvider& weightProvider) const {
296+
auto[p, minFeatureWeight] = this->classProbabilities(x, weightProvider);
301297
auto i = std::find_if(p.begin(), p.end(), [label](const TDoubleSizePr& p_) {
302298
return p_.second == label;
303299
});
304-
return i == p.end() ? 0.0 : i->first;
300+
return {i == p.end() ? 0.0 : i->first, minFeatureWeight};
305301
}
306302

307-
CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecVec& x) const {
308-
if (!this->validate(x)) {
309-
return {};
303+
CNaiveBayes::TDoubleSizePrVecDoublePr
304+
CNaiveBayes::classProbabilities(const TDouble1VecVec& x,
305+
const TFeatureWeightProvider& weightProvider) const {
306+
if (this->validate(x) == false) {
307+
return {{}, 0.0};
310308
}
311309
if (m_ClassConditionalDensities.empty()) {
312310
LOG_ERROR(<< "Trying to compute class probabilities without supplying training data");
313-
return {};
311+
return {{}, 0.0};
314312
}
315313

316314
using TDoubleVec = std::vector<double>;
317-
using TMaxAccumulator = CBasicStatistics::SMax<double>::TAccumulator;
318315

319316
TDoubleSizePrVec p;
320317
p.reserve(m_ClassConditionalDensities.size());
321318
for (const auto& class_ : m_ClassConditionalDensities) {
322319
p.emplace_back(CTools::fastLog(class_.second.count()), class_.first);
323320
}
321+
double minFeatureWeight{1.0};
324322

325323
TDoubleVec logLikelihoods;
326324
for (std::size_t i = 0; i < x.size(); ++i) {
327-
if (x[i].size() > 0) {
328-
TMaxAccumulator maxLogLikelihood;
325+
if (x[i].empty() == false) {
326+
auto& featureWeight = weightProvider();
329327
logLikelihoods.clear();
330328
for (const auto& class_ : m_ClassConditionalDensities) {
331329
const auto& density = class_.second.conditionalDensities()[i];
332330
double logLikelihood{density->logValue(x[i])};
333331
double logMaximumLikelihood{density->logMaximumValue()};
334-
maxLogLikelihood.add(logLikelihood - logMaximumLikelihood);
335332
logLikelihoods.push_back(logLikelihood);
333+
featureWeight.add(class_.first, logLikelihood - logMaximumLikelihood);
336334
}
337-
double weight{1.0};
338-
if (m_MinMaxLogLikelihoodToUseFeature) {
339-
weight = CTools::logisticFunction(
340-
(maxLogLikelihood[0] - *m_MinMaxLogLikelihoodToUseFeature) /
341-
std::fabs(*m_MinMaxLogLikelihoodToUseFeature),
342-
0.1);
343-
}
335+
336+
// We compute the class c_i probability using
337+
//
338+
// p(c_i | x) = exp(sum_i{w_j * log(L(x_j | c_i))}) / Z * p(c_i).
339+
//
340+
// Any feature whose weight < 1 has its significance dropped in class
341+
// selection, effectively we use the w_i'th root of the log-likelihood
342+
// which tends to 1 for all values if w_i is small enough. This can be
343+
// used to ignore features that for which x is the extreme tails of the
344+
// class conditional distribution.
345+
double featureWeight_{featureWeight.calculate()};
344346
for (std::size_t j = 0; j < logLikelihoods.size(); ++j) {
345-
p[j].first += weight * logLikelihoods[j];
347+
p[j].first += featureWeight_ * logLikelihoods[j];
346348
}
349+
minFeatureWeight = std::min(minFeatureWeight, featureWeight_);
347350
}
348351
}
349352

@@ -357,7 +360,7 @@ CNaiveBayes::TDoubleSizePrVec CNaiveBayes::classProbabilities(const TDouble1VecV
357360
pc.first /= Z;
358361
}
359362

360-
return p;
363+
return {std::move(p), minFeatureWeight};
361364
}
362365

363366
void CNaiveBayes::debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const {
@@ -372,7 +375,6 @@ std::size_t CNaiveBayes::memoryUsage() const {
372375
}
373376

374377
std::uint64_t CNaiveBayes::checksum(std::uint64_t seed) const {
375-
CChecksum::calculate(seed, m_MinMaxLogLikelihoodToUseFeature);
376378
CChecksum::calculate(seed, m_DecayRate);
377379
CChecksum::calculate(seed, m_Exemplar);
378380
return CChecksum::calculate(seed, m_ClassConditionalDensities);
@@ -394,7 +396,7 @@ std::string CNaiveBayes::print() const {
394396
bool CNaiveBayes::validate(const TDouble1VecVec& x) const {
395397
auto class_ = m_ClassConditionalDensities.begin();
396398
if (class_ != m_ClassConditionalDensities.end() &&
397-
class_->second.conditionalDensities().size() > 0 &&
399+
class_->second.conditionalDensities().empty() == false &&
398400
class_->second.conditionalDensities().size() != x.size()) {
399401
LOG_ERROR(<< "Unexpected feature vector: " << x);
400402
return false;
@@ -431,7 +433,7 @@ bool CNaiveBayes::CClass::acceptRestoreTraverser(const SDistributionRestoreParam
431433
void CNaiveBayes::CClass::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
432434
inserter.insertValue(COUNT_TAG, m_Count, core::CIEEE754::E_SinglePrecision);
433435
for (const auto& density : m_ConditionalDensities) {
434-
if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get())) {
436+
if (dynamic_cast<const CNaiveBayesFeatureDensityFromPrior*>(density.get()) != nullptr) {
435437
inserter.insertLevel(CONDITIONAL_DENSITY_FROM_PRIOR_TAG,
436438
[&density](auto& inserter_) {
437439
density->acceptPersistInserter(inserter_);

0 commit comments

Comments
 (0)