diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 9fd1b0ae8e248..8bf21b0e75d67 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -53,7 +53,63 @@ class raw_ostream; enum class IR2VecKind { Symbolic }; namespace ir2vec { -using Embedding = std::vector; +/// Embedding is a datatype that wraps std::vector. It provides +/// additional functionality for arithmetic and comparison operations. +/// It is meant to be used *like* std::vector but is more restrictive +/// in the sense that it does not allow the user to change the size of the +/// embedding vector. The dimension of the embedding is fixed at the time of +/// construction of Embedding object. But the elements can be modified in-place. +struct Embedding { +private: + std::vector Data; + +public: + Embedding() = default; + Embedding(const std::vector &V) : Data(V) {} + Embedding(std::vector &&V) : Data(std::move(V)) {} + Embedding(std::initializer_list IL) : Data(IL) {} + + explicit Embedding(size_t Size) : Data(Size) {} + Embedding(size_t Size, double InitialValue) : Data(Size, InitialValue) {} + + size_t size() const { return Data.size(); } + bool empty() const { return Data.empty(); } + + double &operator[](size_t Itr) { + assert(Itr < Data.size() && "Index out of bounds"); + return Data[Itr]; + } + + const double &operator[](size_t Itr) const { + assert(Itr < Data.size() && "Index out of bounds"); + return Data[Itr]; + } + + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + + iterator begin() { return Data.begin(); } + iterator end() { return Data.end(); } + const_iterator begin() const { return Data.begin(); } + const_iterator end() const { return Data.end(); } + const_iterator cbegin() const { return Data.cbegin(); } + const_iterator cend() const { return Data.cend(); } + + const std::vector &getData() const { return Data; } + + /// Arithmetic operators + Embedding &operator+=(const Embedding &RHS); + Embedding &operator-=(const Embedding &RHS); + + /// Adds Src Embedding scaled by Factor with the called Embedding. + /// Called_Embedding += Src * Factor + Embedding &scaleAndAdd(const Embedding &Src, float Factor); + + /// Returns true if the embedding is approximately equal to the RHS embedding + /// within the specified tolerance. + bool approximatelyEquals(const Embedding &RHS, double Tolerance = 1e-6) const; +}; + using InstEmbeddingsMap = DenseMap; using BBEmbeddingsMap = DenseMap; // FIXME: Current the keys are strings. This can be changed to @@ -61,8 +117,8 @@ using BBEmbeddingsMap = DenseMap; using Vocab = std::map; /// Embedder provides the interface to generate embeddings (vector -/// representations) for instructions, basic blocks, and functions. The vector -/// representations are generated using IR2Vec algorithms. +/// representations) for instructions, basic blocks, and functions. The +/// vector representations are generated using IR2Vec algorithms. /// /// The Embedder class is an abstract class and it is intended to be /// subclassed for different IR2Vec algorithms like Symbolic and Flow-aware. @@ -99,13 +155,6 @@ class Embedder { /// zero vector. Embedding lookupVocab(const std::string &Key) const; - /// Adds two vectors: Dst += Src - static void addVectors(Embedding &Dst, const Embedding &Src); - - /// Adds Src vector scaled by Factor to Dst vector: Dst += Src * Factor - static void addScaledVector(Embedding &Dst, const Embedding &Src, - float Factor); - public: virtual ~Embedder() = default; diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 490db5fdcdf99..25ce35d4ace37 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -55,6 +55,51 @@ static cl::opt ArgWeight("ir2vec-arg-weight", cl::Optional, AnalysisKey IR2VecVocabAnalysis::Key; +namespace llvm::json { +inline bool fromJSON(const llvm::json::Value &E, Embedding &Out, + llvm::json::Path P) { + std::vector TempOut; + if (!llvm::json::fromJSON(E, TempOut, P)) + return false; + Out = Embedding(std::move(TempOut)); + return true; +} +} // namespace llvm::json + +// ==----------------------------------------------------------------------===// +// Embedding +//===----------------------------------------------------------------------===// + +Embedding &Embedding::operator+=(const Embedding &RHS) { + assert(this->size() == RHS.size() && "Vectors must have the same dimension"); + std::transform(this->begin(), this->end(), RHS.begin(), this->begin(), + std::plus()); + return *this; +} + +Embedding &Embedding::operator-=(const Embedding &RHS) { + assert(this->size() == RHS.size() && "Vectors must have the same dimension"); + std::transform(this->begin(), this->end(), RHS.begin(), this->begin(), + std::minus()); + return *this; +} + +Embedding &Embedding::scaleAndAdd(const Embedding &Src, float Factor) { + assert(this->size() == Src.size() && "Vectors must have the same dimension"); + for (size_t Itr = 0; Itr < this->size(); ++Itr) + (*this)[Itr] += Src[Itr] * Factor; + return *this; +} + +bool Embedding::approximatelyEquals(const Embedding &RHS, + double Tolerance) const { + assert(this->size() == RHS.size() && "Vectors must have the same dimension"); + for (size_t Itr = 0; Itr < this->size(); ++Itr) + if (std::abs((*this)[Itr] - RHS[Itr]) > Tolerance) + return false; + return true; +} + // ==----------------------------------------------------------------------===// // Embedder and its subclasses //===----------------------------------------------------------------------===// @@ -73,20 +118,6 @@ Embedder::create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary) { return make_error("Unknown IR2VecKind", errc::invalid_argument); } -void Embedder::addVectors(Embedding &Dst, const Embedding &Src) { - assert(Dst.size() == Src.size() && "Vectors must have the same dimension"); - std::transform(Dst.begin(), Dst.end(), Src.begin(), Dst.begin(), - std::plus()); -} - -void Embedder::addScaledVector(Embedding &Dst, const Embedding &Src, - float Factor) { - assert(Dst.size() == Src.size() && "Vectors must have the same dimension"); - for (size_t i = 0; i < Dst.size(); ++i) { - Dst[i] += Src[i] * Factor; - } -} - // FIXME: Currently lookups are string based. Use numeric Keys // for efficiency Embedding Embedder::lookupVocab(const std::string &Key) const { @@ -164,20 +195,20 @@ void SymbolicEmbedder::computeEmbeddings(const BasicBlock &BB) const { Embedding InstVector(Dimension, 0); const auto OpcVec = lookupVocab(I.getOpcodeName()); - addScaledVector(InstVector, OpcVec, OpcWeight); + InstVector.scaleAndAdd(OpcVec, OpcWeight); // FIXME: Currently lookups are string based. Use numeric Keys // for efficiency. const auto Type = I.getType(); const auto TypeVec = getTypeEmbedding(Type); - addScaledVector(InstVector, TypeVec, TypeWeight); + InstVector.scaleAndAdd(TypeVec, TypeWeight); for (const auto &Op : I.operands()) { const auto OperandVec = getOperandEmbedding(Op.get()); - addScaledVector(InstVector, OperandVec, ArgWeight); + InstVector.scaleAndAdd(OperandVec, ArgWeight); } InstVecMap[&I] = InstVector; - addVectors(BBVector, InstVector); + BBVector += InstVector; } BBVecMap[&BB] = BBVector; } @@ -187,7 +218,7 @@ void SymbolicEmbedder::computeEmbeddings() const { return; for (const auto &BB : F) { computeEmbeddings(BB); - addVectors(FuncVector, BBVecMap[&BB]); + FuncVector += BBVecMap[&BB]; } } diff --git a/llvm/unittests/Analysis/IR2VecTest.cpp b/llvm/unittests/Analysis/IR2VecTest.cpp index 9e47b2cd8bedd..053b9f75e7a66 100644 --- a/llvm/unittests/Analysis/IR2VecTest.cpp +++ b/llvm/unittests/Analysis/IR2VecTest.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Support/Error.h" +#include "llvm/Support/JSON.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -32,89 +33,209 @@ class TestableEmbedder : public Embedder { void computeEmbeddings() const override {} void computeEmbeddings(const BasicBlock &BB) const override {} using Embedder::lookupVocab; - static void addVectors(Embedding &Dst, const Embedding &Src) { - Embedder::addVectors(Dst, Src); +}; + +TEST(EmbeddingTest, ConstructorsAndAccessors) { + // Default constructor + { + Embedding E; + EXPECT_TRUE(E.empty()); + EXPECT_EQ(E.size(), 0u); } - static void addScaledVector(Embedding &Dst, const Embedding &Src, - float Factor) { - Embedder::addScaledVector(Dst, Src, Factor); + + // Constructor with const std::vector& + { + std::vector Data = {1.0, 2.0, 3.0}; + Embedding E(Data); + EXPECT_FALSE(E.empty()); + ASSERT_THAT(E, SizeIs(3u)); + EXPECT_THAT(E.getData(), ElementsAre(1.0, 2.0, 3.0)); + EXPECT_EQ(E[0], 1.0); + EXPECT_EQ(E[1], 2.0); + EXPECT_EQ(E[2], 3.0); } -}; -TEST(IR2VecTest, CreateSymbolicEmbedder) { - Vocab V = {{"foo", {1.0, 2.0}}}; + // Constructor with std::vector&& + { + Embedding E(std::vector({4.0, 5.0})); + ASSERT_THAT(E, SizeIs(2u)); + EXPECT_THAT(E.getData(), ElementsAre(4.0, 5.0)); + } - LLVMContext Ctx; - Module M("M", Ctx); - FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), false); - Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M); + // Constructor with std::initializer_list + { + Embedding E({6.0, 7.0, 8.0, 9.0}); + ASSERT_THAT(E, SizeIs(4u)); + EXPECT_THAT(E.getData(), ElementsAre(6.0, 7.0, 8.0, 9.0)); + EXPECT_EQ(E[0], 6.0); + E[0] = 6.5; + EXPECT_EQ(E[0], 6.5); + } - auto Result = Embedder::create(IR2VecKind::Symbolic, *F, V); - EXPECT_TRUE(static_cast(Result)); + // Constructor with size_t + { + Embedding E(5); + ASSERT_THAT(E, SizeIs(5u)); + EXPECT_THAT(E.getData(), ElementsAre(0.0, 0.0, 0.0, 0.0, 0.0)); + } - auto *Emb = Result->get(); - EXPECT_NE(Emb, nullptr); -} + // Constructor with size_t and double + { + Embedding E(5, 1.5); + ASSERT_THAT(E, SizeIs(5u)); + EXPECT_THAT(E.getData(), ElementsAre(1.5, 1.5, 1.5, 1.5, 1.5)); + } -TEST(IR2VecTest, CreateInvalidMode) { - Vocab V = {{"foo", {1.0, 2.0}}}; + // Test iterators + { + Embedding E({6.5, 7.0, 8.0, 9.0}); + std::vector VecE; + for (double Val : E) { + VecE.push_back(Val); + } + EXPECT_THAT(VecE, ElementsAre(6.5, 7.0, 8.0, 9.0)); + + const Embedding CE = E; + std::vector VecCE; + for (const double &Val : CE) { + VecCE.push_back(Val); + } + EXPECT_THAT(VecCE, ElementsAre(6.5, 7.0, 8.0, 9.0)); + + EXPECT_EQ(*E.begin(), 6.5); + EXPECT_EQ(*(E.end() - 1), 9.0); + EXPECT_EQ(*CE.cbegin(), 6.5); + EXPECT_EQ(*(CE.cend() - 1), 9.0); + } +} - LLVMContext Ctx; - Module M("M", Ctx); - FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), false); - Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M); +TEST(EmbeddingTest, AddVectors) { + Embedding E1 = {1.0, 2.0, 3.0}; + Embedding E2 = {0.5, 1.5, -1.0}; - // static_cast an invalid int to IR2VecKind - auto Result = Embedder::create(static_cast(-1), *F, V); - EXPECT_FALSE(static_cast(Result)); + E1 += E2; + EXPECT_THAT(E1, ElementsAre(1.5, 3.5, 2.0)); - std::string ErrMsg; - llvm::handleAllErrors( - Result.takeError(), - [&](const llvm::ErrorInfoBase &EIB) { ErrMsg = EIB.message(); }); - EXPECT_NE(ErrMsg.find("Unknown IR2VecKind"), std::string::npos); + // Check that E2 is unchanged + EXPECT_THAT(E2, ElementsAre(0.5, 1.5, -1.0)); } -TEST(IR2VecTest, AddVectors) { +TEST(EmbeddingTest, SubtractVectors) { Embedding E1 = {1.0, 2.0, 3.0}; Embedding E2 = {0.5, 1.5, -1.0}; - TestableEmbedder::addVectors(E1, E2); - EXPECT_THAT(E1, ElementsAre(1.5, 3.5, 2.0)); + E1 -= E2; + EXPECT_THAT(E1, ElementsAre(0.5, 0.5, 4.0)); // Check that E2 is unchanged EXPECT_THAT(E2, ElementsAre(0.5, 1.5, -1.0)); } -TEST(IR2VecTest, AddScaledVector) { +TEST(EmbeddingTest, AddScaledVector) { Embedding E1 = {1.0, 2.0, 3.0}; Embedding E2 = {2.0, 0.5, -1.0}; - TestableEmbedder::addScaledVector(E1, E2, 0.5f); + E1.scaleAndAdd(E2, 0.5f); EXPECT_THAT(E1, ElementsAre(2.0, 2.25, 2.5)); // Check that E2 is unchanged EXPECT_THAT(E2, ElementsAre(2.0, 0.5, -1.0)); } +TEST(EmbeddingTest, ApproximatelyEqual) { + Embedding E1 = {1.0, 2.0, 3.0}; + Embedding E2 = {1.0000001, 2.0000001, 3.0000001}; + EXPECT_TRUE(E1.approximatelyEquals(E2)); // Diff = 1e-7 + + Embedding E3 = {1.00002, 2.00002, 3.00002}; // Diff = 2e-5 + EXPECT_FALSE(E1.approximatelyEquals(E3)); + EXPECT_TRUE(E1.approximatelyEquals(E3, 3e-5)); + + Embedding E_clearly_within = {1.0000005, 2.0000005, 3.0000005}; // Diff = 5e-7 + EXPECT_TRUE(E1.approximatelyEquals(E_clearly_within)); + + Embedding E_clearly_outside = {1.00001, 2.00001, 3.00001}; // Diff = 1e-5 + EXPECT_FALSE(E1.approximatelyEquals(E_clearly_outside)); + + Embedding E4 = {1.0, 2.0, 3.5}; // Large diff + EXPECT_FALSE(E1.approximatelyEquals(E4, 0.01)); + + Embedding E5 = {1.0, 2.0, 3.0}; + EXPECT_TRUE(E1.approximatelyEquals(E5, 0.0)); + EXPECT_TRUE(E1.approximatelyEquals(E5)); +} + #if GTEST_HAS_DEATH_TEST #ifndef NDEBUG -TEST(IR2VecTest, MismatchedDimensionsAddVectors) { +TEST(EmbeddingTest, AccessOutOfBounds) { + Embedding E = {1.0, 2.0, 3.0}; + EXPECT_DEATH(E[3], "Index out of bounds"); + EXPECT_DEATH(E[-1], "Index out of bounds"); + EXPECT_DEATH(E[4] = 4.0, "Index out of bounds"); +} + +TEST(EmbeddingTest, MismatchedDimensionsAddVectors) { Embedding E1 = {1.0, 2.0}; Embedding E2 = {1.0}; - EXPECT_DEATH(TestableEmbedder::addVectors(E1, E2), - "Vectors must have the same dimension"); + EXPECT_DEATH(E1 += E2, "Vectors must have the same dimension"); +} + +TEST(EmbeddingTest, MismatchedDimensionsSubtractVectors) { + Embedding E1 = {1.0, 2.0}; + Embedding E2 = {1.0}; + EXPECT_DEATH(E1 -= E2, "Vectors must have the same dimension"); } -TEST(IR2VecTest, MismatchedDimensionsAddScaledVector) { +TEST(EmbeddingTest, MismatchedDimensionsAddScaledVector) { Embedding E1 = {1.0, 2.0}; Embedding E2 = {1.0}; - EXPECT_DEATH(TestableEmbedder::addScaledVector(E1, E2, 1.0f), + EXPECT_DEATH(E1.scaleAndAdd(E2, 1.0f), + "Vectors must have the same dimension"); +} + +TEST(EmbeddingTest, MismatchedDimensionsApproximatelyEqual) { + Embedding E1 = {1.0, 2.0}; + Embedding E2 = {1.010}; + EXPECT_DEATH(E1.approximatelyEquals(E2), "Vectors must have the same dimension"); } #endif // NDEBUG #endif // GTEST_HAS_DEATH_TEST +TEST(IR2VecTest, CreateSymbolicEmbedder) { + Vocab V = {{"foo", {1.0, 2.0}}}; + + LLVMContext Ctx; + Module M("M", Ctx); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M); + + auto Result = Embedder::create(IR2VecKind::Symbolic, *F, V); + EXPECT_TRUE(static_cast(Result)); + + auto *Emb = Result->get(); + EXPECT_NE(Emb, nullptr); +} + +TEST(IR2VecTest, CreateInvalidMode) { + Vocab V = {{"foo", {1.0, 2.0}}}; + + LLVMContext Ctx; + Module M("M", Ctx); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "f", M); + + // static_cast an invalid int to IR2VecKind + auto Result = Embedder::create(static_cast(-1), *F, V); + EXPECT_FALSE(static_cast(Result)); + + std::string ErrMsg; + llvm::handleAllErrors( + Result.takeError(), + [&](const llvm::ErrorInfoBase &EIB) { ErrMsg = EIB.message(); }); + EXPECT_NE(ErrMsg.find("Unknown IR2VecKind"), std::string::npos); +} + TEST(IR2VecTest, LookupVocab) { Vocab V = {{"foo", {1.0, 2.0}}, {"bar", {3.0, 4.0}}}; LLVMContext Ctx; @@ -136,8 +257,9 @@ TEST(IR2VecTest, ZeroDimensionEmbedding) { Embedding E1; Embedding E2; // Should be no-op, but not crash - TestableEmbedder::addVectors(E1, E2); - TestableEmbedder::addScaledVector(E1, E2, 1.0f); + E1 += E2; + E1 -= E2; + E1.scaleAndAdd(E2, 1.0f); EXPECT_TRUE(E1.empty()); }