elastic
diff --git a/‎.buildkite/ml_pipeline/config.py
Lines changed: 12 additions & 1 deletion b/‎.buildkite/ml_pipeline/config.py
Lines changed: 12 additions & 1 deletion
diff --git a/‎.buildkite/pipeline.json.py
Lines changed: 3 additions & 0 deletions b/‎.buildkite/pipeline.json.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎.buildkite/pipelines/run_qa_tests.yml.sh
Lines changed: 25 additions & 0 deletions b/‎.buildkite/pipelines/run_qa_tests.yml.sh
Lines changed: 25 additions & 0 deletions
diff --git a/‎.buildkite/pull-requests.json
Lines changed: 1 addition & 1 deletion b/‎.buildkite/pull-requests.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/CHANGELOG.asciidoc
Lines changed: 2 additions & 0 deletions b/‎docs/CHANGELOG.asciidoc
Lines changed: 2 additions & 0 deletions
diff --git a/‎include/maths/common/CNaiveBayes.h
Lines changed: 60 additions & 14 deletions b/‎include/maths/common/CNaiveBayes.h
Lines changed: 60 additions & 14 deletions
diff --git a/‎jupyter/requirements.txt
Lines changed: 5 additions & 4 deletions b/‎jupyter/requirements.txt
Lines changed: 5 additions & 4 deletions
diff --git a/‎lib/core/CStateRestoreTraverser.cc
Lines changed: 1 addition & 2 deletions b/‎lib/core/CStateRestoreTraverser.cc
Lines changed: 1 addition & 2 deletions
@@ -15,11 +15,15 @@ class Config:
     build_windows: bool = False
     build_macos: bool = False
     build_linux: bool = False
+    run_qa_tests: bool = False
     action: str = "build"
 
     def parse_comment(self):
         if "GITHUB_PR_COMMENT_VAR_ACTION" in os.environ:
             self.action = os.environ["GITHUB_PR_COMMENT_VAR_ACTION"]
+            self.run_qa_tests = self.action == "run_qa_tests"
+            if self.run_qa_tests:
+                self.action = "build"
 
         if "GITHUB_PR_COMMENT_VAR_PLATFORM" in os.environ:
             csv_platform = os.environ["GITHUB_PR_COMMENT_VAR_PLATFORM"]
@@ -36,13 +40,14 @@ def parse_comment(self):
             self.build_linux = True
 
     def parse_label(self):
-        build_labels = ['ci:build-linux','ci:build-macos','ci:build-windows']
+        build_labels = ['ci:build-linux','ci:build-macos','ci:build-windows','ci:run-qa-tests']
         all_labels = [x.strip().lower() for x in os.environ["GITHUB_PR_LABELS"].split(",")]
         ci_labels = [label for label in all_labels if re.search("|".join(build_labels), label)]
         if not ci_labels:
             self.build_windows = True
             self.build_macos = True
             self.build_linux = True
+            self.run_qa_tests = False
         else:
             for label in ci_labels:
                 if "ci:build-windows" == label:
@@ -51,6 +56,11 @@ def parse_label(self):
                     self.build_macos = True
                 elif "ci:build-linux" == label:
                     self.build_linux = True
+                elif "ci:run-qa-tests" == label:
+                    self.build_windows = True
+                    self.build_macos = True
+                    self.build_linux = True
+                    self.run_qa_tests = True
 
     def parse(self):
         """Parse Github label or Github comment passed through buildkite-pr-bot."""
@@ -63,4 +73,5 @@ def parse(self):
             self.build_windows = True
             self.build_macos = True
             self.build_linux = True
+            self.run_qa_tests = False
 
@@ -50,6 +50,9 @@ def main():
         pipeline_steps.append(build_linux)
     pipeline_steps.append(pipeline_steps.generate_step("Upload ES tests runner pipeline",
                                                        ".buildkite/pipelines/run_es_tests.yml.sh"))
+    if config.run_qa_tests:
+        pipeline_steps.append(pipeline_steps.generate_step("Upload QA tests runner pipeline",
+                                                           ".buildkite/pipelines/run_qa_tests.yml.sh"))
     pipeline["env"] = env
     pipeline["steps"] = pipeline_steps
     print(json.dumps(pipeline, indent=2))
 
@@ -0,0 +1,25 @@
+#!/bin/bash
+# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+# or more contributor license agreements. Licensed under the Elastic License
+# 2.0 and the following additional limitation. Functionality enabled by the
+# files subject to the Elastic License 2.0 may only be used in production when
+# invoked by an Elasticsearch process with a license key installed that permits
+# use of machine learning features. You may not use this file except in
+# compliance with the Elastic License 2.0 and the foregoing additional
+# limitation.
+
+cat <<EOL
+steps:
+  - label: "Trigger Appex QA Tests :test_tube:"
+    command:
+      - echo 'Trigger QA Tests'
+      - 'buildkite-agent artifact download "build/*" . --step build_test_linux-x86_64-RelWithDebInfo'
+    depends_on: "build_test_linux-x86_64-RelWithDebInfo"
+  - wait
+  - trigger: appex-qa-stateful-custom-ml-c-plus-plus-build-testing
+    async: false
+    build:
+      message: "${BUILDKITE_MESSAGE}"
+      env:
+        QAF_TESTS_TO_RUN: "ml_cpp_pr"
+EOL
@@ -9,7 +9,7 @@
       "commit_status_context": "ml-cpp-ci",
       "build_on_commit": true,
       "build_on_comment": true,
-      "trigger_comment_regex": "^(?:(?:buildkite +)(?<action>build|debug) +(?: *on *(?<platform>(?:[ ,]*(?:windows|linux|mac(os)?))+))?)$",
+      "trigger_comment_regex": "^(?:(?:buildkite +)(?<action>build|debug|run_qa_tests) +(?: *on *(?<platform>(?:[ ,]*(?:windows|linux|mac(os)?))+))?)$",
       "always_trigger_comment_regex": "^(?:(?:buildkite\\W+)?(?:build|test)\\W+(?:this|it))",
       "skip_ci_labels": ["skip-ci", "jenkins-ci", ">test-mute", ">docs"],
       "skip_target_branches": ["6.8", "7.11", "7.12"],
 
@@ -33,6 +33,8 @@
 === Enhancements
 
 * Upgrade Boost libraries to version 1.83. (See {ml-pull}2560[#2560].)
+* Improve forecasting for time series with step changes. (See {ml-pull}#2591[2591],
+  issue: {ml-issue}2466[#2466]).
 
 === Bug Fixes
 
 
@@ -154,19 +154,43 @@ class MATHS_COMMON_EXPORT CNaiveBayesFeatureDensityFromPrior final
     TPriorPtr m_Prior;
 };
 
+//! \brief Enables using custom feature weights in class prediction.
+class CNaiveBayesFeatureWeight {
+public:
+    virtual ~CNaiveBayesFeatureWeight() = default;
+    virtual void add(std::size_t class_, double logLikelihood) = 0;
+    virtual double calculate() const = 0;
+};
+
 //! \brief Implements a Naive Bayes classifier.
 class MATHS_COMMON_EXPORT CNaiveBayes {
 public:
+    using TDoubleDoublePr = std::pair<double, double>;
     using TDoubleSizePr = std::pair<double, std::size_t>;
     using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
+    using TDoubleSizePrVecDoublePr = std::pair<TDoubleSizePrVec, double>;
     using TDouble1Vec = core::CSmallVector<double, 1>;
     using TDouble1VecVec = std::vector<TDouble1Vec>;
-    using TOptionalDouble = std::optional<double>;
+    using TFeatureWeightProvider = std::function<CNaiveBayesFeatureWeight&()>;
+
+private:
+    //! \brief All features have unit weight in class prediction.
+    class CUnitFeatureWeight : public CNaiveBayesFeatureWeight {
+    public:
+        void add(std::size_t, double) override {}
+        double calculate() const override { return 1.0; }
+    };
+
+    class CUnitFeatureWeightProvider {
+    public:
+        CUnitFeatureWeight& operator()() const { return m_UnitWeight; }
+
+    private:
+        mutable CUnitFeatureWeight m_UnitWeight;
+    };
 
 public:
-    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
-                         double decayRate = 0.0,
-                         TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
+    explicit CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar, double decayRate = 0.0);
     CNaiveBayes(const CNaiveBayesFeatureDensity& exemplar,
                 const SDistributionRestoreParams& params,
                 core::CStateRestoreTraverser& traverser);
@@ -184,6 +208,9 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //! Check if any training data has been added initialized.
     bool initialized() const;
 
+    //! Get the number of classes.
+    std::size_t numberClasses() const;
+
     //! This can be used to optionally seed the class counts
     //! with \p counts. These are added on to data class counts
     //! to compute the class posterior probabilities.
@@ -210,27 +237,53 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     //!
     //! \param[in] n The number of class probabilities to estimate.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec highestClassProbabilities(std::size_t n, const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr highestClassProbabilities(
+        std::size_t n,
+        const TDouble1VecVec& x,
+        const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Get the probability of the class labeled \p label for \p x.
     //!
     //! \param[in] label The label of the class of interest.
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! conditional distributions.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    double classProbability(std::size_t label, const TDouble1VecVec& x) const;
+    TDoubleDoublePr classProbability(std::size_t label,
+                                     const TDouble1VecVec& x,
+                                     const TFeatureWeightProvider& weightProvider =
+                                         CUnitFeatureWeightProvider{}) const;
 
     //! Get the probabilities of all the classes for \p x.
     //!
     //! \param[in] x The feature values.
+    //! \param[in] weightProvider Computes a feature weight from the class
+    //! conditional log-likelihood of the feature value. It should be in
+    //! the range [0,1]. The smaller the value the less impact the feature
+    //! has on class selection.
+    //! \return The class probabilities and the minimum feature weight.
+    //! A feature is missing is indicated by passing an empty vector
+    //! for that feature.
     //! \note \p x size should be equal to the number of features.
     //! A feature is missing is indicated by passing an empty vector
     //! for that feature.
-    TDoubleSizePrVec classProbabilities(const TDouble1VecVec& x) const;
+    TDoubleSizePrVecDoublePr
+    classProbabilities(const TDouble1VecVec& x,
+                       const TFeatureWeightProvider& weightProvider = CUnitFeatureWeightProvider{}) const;
 
     //! Debug the memory used by this object.
     void debugMemoryUsage(const core::CMemoryUsage::TMemoryUsagePtr& mem) const;
@@ -298,13 +351,6 @@ class MATHS_COMMON_EXPORT CNaiveBayes {
     bool validate(const TDouble1VecVec& x) const;
 
 private:
-    //! It is not always appropriate to use features with very low
-    //! probability in all classes to discriminate: the class choice
-    //! will be very sensitive to the underlying conditional density
-    //! model. This is a cutoff (for the minimum maximum class log
-    //! likelihood) in order to use a feature.
-    TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;
-
     //! Controls the rate at which data are aged out.
     double m_DecayRate;
 
 
@@ -11,13 +11,14 @@ jupyter             ==1.0.0
 libtmux             ==0.10.1
 matplotlib          ==3.3.4
 nbsmoke             ==0.5.0
-numpy               >=1.21.0
+numpy               >=1.22.2
 pandas              >=1.3
 pathlib2            ==2.3.5
 plotly              ==5.3.1
 sacred              ==0.8.2
-scikit-learn        ==0.24.1
+scikit-learn        ==1.3
 scipy               >=1.5.4
 seaborn             ==0.11.1
-tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
-jupyter-server>=2.7.2 # not directly required, pinned by Snyk to avoid a vulnerability
+tornado             >=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
+jupyter-server      >=2.7.2 # not directly required, pinned by Snyk to avoid a vulnerability
+pillow              >=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability
@@ -18,8 +18,7 @@ namespace core {
 CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {
 }
 
-CStateRestoreTraverser::~CStateRestoreTraverser() {
-}
+CStateRestoreTraverser::~CStateRestoreTraverser() = default;
 
 bool CStateRestoreTraverser::haveBadState() const {
     return m_BadState;
Original file line number	Diff line number	Diff line change
`@@ -18,8 +18,7 @@ namespace core {`
`18`	`18`	`CStateRestoreTraverser::CStateRestoreTraverser() : m_BadState(false) {`
`19`	`19`	`}`
`20`	`20`
`21`		`-CStateRestoreTraverser::~CStateRestoreTraverser() {`
`22`		`-}`
	`21`	`+CStateRestoreTraverser::~CStateRestoreTraverser() = default;`
`23`	`22`
`24`	`23`	`bool CStateRestoreTraverser::haveBadState() const {`
`25`	`24`	`return m_BadState;`