Cinnamon
diff --git a/‎flowsettings.py
Lines changed: 15 additions & 1 deletion b/‎flowsettings.py
Lines changed: 15 additions & 1 deletion
diff --git a/‎libs/kotaemon/kotaemon/embeddings/langchain_based.py
Lines changed: 1 addition & 1 deletion b/‎libs/kotaemon/kotaemon/embeddings/langchain_based.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎libs/kotaemon/kotaemon/indices/qa/citation.py
Lines changed: 31 additions & 68 deletions b/‎libs/kotaemon/kotaemon/indices/qa/citation.py
Lines changed: 31 additions & 68 deletions
diff --git a/‎libs/kotaemon/kotaemon/indices/rankings/cohere.py
Lines changed: 19 additions & 2 deletions b/‎libs/kotaemon/kotaemon/indices/rankings/cohere.py
Lines changed: 19 additions & 2 deletions
diff --git a/‎libs/kotaemon/kotaemon/llms/__init__.py
Lines changed: 2 additions & 0 deletions b/‎libs/kotaemon/kotaemon/llms/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎libs/kotaemon/kotaemon/llms/chats/__init__.py
Lines changed: 2 additions & 0 deletions b/‎libs/kotaemon/kotaemon/llms/chats/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎libs/kotaemon/kotaemon/llms/chats/langchain_based.py
Lines changed: 63 additions & 5 deletions b/‎libs/kotaemon/kotaemon/llms/chats/langchain_based.py
Lines changed: 63 additions & 5 deletions
diff --git a/‎libs/kotaemon/kotaemon/llms/chats/openai.py
Lines changed: 6 additions & 0 deletions b/‎libs/kotaemon/kotaemon/llms/chats/openai.py
Lines changed: 6 additions & 0 deletions
@@ -24,9 +24,13 @@
     except Exception:
         KH_APP_VERSION = "local"
 
+KH_ENABLE_FIRST_SETUP = True
+KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool)
+
 # App can be ran from anywhere and it's not trivial to decide where to store app data.
 # So let's use the same directory as the flowsetting.py file.
 KH_APP_DATA_DIR = this_dir / "ktem_app_data"
+KH_APP_DATA_EXISTS = KH_APP_DATA_DIR.exists()
 KH_APP_DATA_DIR.mkdir(parents=True, exist_ok=True)
 
 # User data directory
@@ -59,7 +63,9 @@
 KH_DOC_DIR = this_dir / "docs"
 
 KH_MODE = "dev"
-KH_FEATURE_USER_MANAGEMENT = True
+KH_FEATURE_USER_MANAGEMENT = config(
+    "KH_FEATURE_USER_MANAGEMENT", default=True, cast=bool
+)
 KH_USER_CAN_SEE_PUBLIC = None
 KH_FEATURE_USER_MANAGEMENT_ADMIN = str(
     config("KH_FEATURE_USER_MANAGEMENT_ADMIN", default="admin")
@@ -202,6 +208,14 @@
     },
     "default": False,
 }
+KH_LLMS["cohere"] = {
+    "spec": {
+        "__type__": "kotaemon.llms.chats.LCCohereChat",
+        "model_name": "command-r-plus-08-2024",
+        "api_key": "your-key",
+    },
+    "default": False,
+}
 
 # additional embeddings configurations
 KH_EMBEDDINGS["cohere"] = {
 
@@ -183,7 +183,7 @@ def __init__(
 
     def _get_lc_class(self):
         try:
-            from langchain_community.embeddings import CohereEmbeddings
+            from langchain_cohere import CohereEmbeddings
         except ImportError:
             from langchain.embeddings import CohereEmbeddings
 
 
@@ -1,4 +1,4 @@
-from typing import Iterator, List
+from typing import List
 
 from pydantic import BaseModel, Field
 
@@ -7,53 +7,14 @@
 from kotaemon.llms import BaseLLM
 
 
-class FactWithEvidence(BaseModel):
-    """Class representing a single statement.
+class CiteEvidence(BaseModel):
+    """List of evidences (maximum 5) to support the answer."""
 
-    Each fact has a body and a list of sources.
-    If there are multiple facts make sure to break them apart
-    such that each one only uses a set of sources that are relevant to it.
-    """
-
-    fact: str = Field(..., description="Body of the sentence, as part of a response")
-    substring_quote: List[str] = Field(
+    evidences: List[str] = Field(
         ...,
         description=(
             "Each source should be a direct quote from the context, "
-            "as a substring of the original content"
-        ),
-    )
-
-    def _get_span(self, quote: str, context: str, errs: int = 100) -> Iterator[str]:
-        import regex
-
-        minor = quote
-        major = context
-
-        errs_ = 0
-        s = regex.search(f"({minor}){{e<={errs_}}}", major)
-        while s is None and errs_ <= errs:
-            errs_ += 1
-            s = regex.search(f"({minor}){{e<={errs_}}}", major)
-
-        if s is not None:
-            yield from s.spans()
-
-    def get_spans(self, context: str) -> Iterator[str]:
-        for quote in self.substring_quote:
-            yield from self._get_span(quote, context)
-
-
-class QuestionAnswer(BaseModel):
-    """A question and its answer as a list of facts each one should have a source.
-    each sentence contains a body and a list of sources."""
-
-    question: str = Field(..., description="Question that was asked")
-    answer: List[FactWithEvidence] = Field(
-        ...,
-        description=(
-            "Body of the answer, each fact should be "
-            "its separate object with a body and a list of sources"
+            "as a substring of the original content (max 15 words)."
         ),
     )
 
@@ -68,15 +29,16 @@ def run(self, context: str, question: str):
         return self.invoke(context, question)
 
     def prepare_llm(self, context: str, question: str):
-        schema = QuestionAnswer.schema()
+        schema = CiteEvidence.schema()
         function = {
             "name": schema["title"],
             "description": schema["description"],
             "parameters": schema,
         }
         llm_kwargs = {
             "tools": [{"type": "function", "function": function}],
-            "tool_choice": "auto",
+            "tool_choice": "required",
+            "tools_pydantic": [CiteEvidence],
         }
         messages = [
             SystemMessage(
@@ -85,7 +47,12 @@ def prepare_llm(self, context: str, question: str):
                     "questions with correct and exact citations."
                 )
             ),
-            HumanMessage(content="Answer question using the following context"),
+            HumanMessage(
+                content=(
+                    "Answer question using the following context. "
+                    "Use the provided function CiteEvidence() to cite your sources."
+                )
+            ),
             HumanMessage(content=context),
             HumanMessage(content=f"Question: {question}"),
             HumanMessage(
@@ -103,33 +70,29 @@ def invoke(self, context: str, question: str):
             print("CitationPipeline: invoking LLM")
             llm_output = self.get_from_path("llm").invoke(messages, **llm_kwargs)
             print("CitationPipeline: finish invoking LLM")
-            if not llm_output.messages or not llm_output.additional_kwargs.get(
-                "tool_calls"
-            ):
+            if not llm_output.additional_kwargs.get("tool_calls"):
                 return None
-            function_output = llm_output.additional_kwargs["tool_calls"][0]["function"][
-                "arguments"
-            ]
-            output = QuestionAnswer.parse_raw(function_output)
-        except Exception as e:
-            print(e)
-            return None
 
-        return output
+            first_func = llm_output.additional_kwargs["tool_calls"][0]
 
-    async def ainvoke(self, context: str, question: str):
-        messages, llm_kwargs = self.prepare_llm(context, question)
+            if "function" in first_func:
+                # openai and cohere format
+                function_output = first_func["function"]["arguments"]
+            else:
+                # anthropic format
+                function_output = first_func["args"]
 
-        try:
-            print("CitationPipeline: async invoking LLM")
-            llm_output = await self.get_from_path("llm").ainvoke(messages, **llm_kwargs)
-            print("CitationPipeline: finish async invoking LLM")
-            function_output = llm_output.additional_kwargs["tool_calls"][0]["function"][
-                "arguments"
-            ]
-            output = QuestionAnswer.parse_raw(function_output)
+            print("CitationPipeline:", function_output)
+
+            if isinstance(function_output, str):
+                output = CiteEvidence.parse_raw(function_output)
+            else:
+                output = CiteEvidence.parse_obj(function_output)
         except Exception as e:
             print(e)
             return None
 
         return output
+
+    async def ainvoke(self, context: str, question: str):
+        raise NotImplementedError()
@@ -10,6 +10,7 @@
 class CohereReranking(BaseReranking):
     model_name: str = "rerank-multilingual-v2.0"
     cohere_api_key: str = config("COHERE_API_KEY", "")
+    use_key_from_ktem: bool = False
 
     def run(self, documents: list[Document], query: str) -> list[Document]:
         """Use Cohere Reranker model to re-order documents
@@ -18,9 +19,25 @@ def run(self, documents: list[Document], query: str) -> list[Document]:
             import cohere
         except ImportError:
             raise ImportError(
-                "Please install Cohere " "`pip install cohere` to use Cohere Reranking"
+                "Please install Cohere `pip install cohere` to use Cohere Reranking"
             )
 
+        # try to get COHERE_API_KEY from embeddings
+        if not self.cohere_api_key and self.use_key_from_ktem:
+            try:
+                from ktem.embeddings.manager import (
+                    embedding_models_manager as embeddings,
+                )
+
+                cohere_model = embeddings.get("cohere")
+                ktem_cohere_api_key = cohere_model._kwargs.get(  # type: ignore
+                    "cohere_api_key"
+                )
+                if ktem_cohere_api_key != "your-key":
+                    self.cohere_api_key = ktem_cohere_api_key
+            except Exception as e:
+                print("Cannot get Cohere API key from `ktem`", e)
+
         if not self.cohere_api_key:
             print("Cohere API key not found. Skipping reranking.")
             return documents
@@ -35,7 +52,7 @@ def run(self, documents: list[Document], query: str) -> list[Document]:
         response = cohere_client.rerank(
             model=self.model_name, query=query, documents=_docs
         )
-        print("Cohere score", [r.relevance_score for r in response.results])
+        # print("Cohere score", [r.relevance_score for r in response.results])
         for r in response.results:
             doc = documents[r.index]
             doc.metadata["cohere_reranking_score"] = r.relevance_score
 
@@ -10,6 +10,7 @@
     LCAnthropicChat,
     LCAzureChatOpenAI,
     LCChatOpenAI,
+    LCCohereChat,
     LCGeminiChat,
     LlamaCppChat,
 )
@@ -31,6 +32,7 @@
     "ChatOpenAI",
     "LCAnthropicChat",
     "LCGeminiChat",
+    "LCCohereChat",
     "LCAzureChatOpenAI",
     "LCChatOpenAI",
     "LlamaCppChat",
 
@@ -5,6 +5,7 @@
     LCAzureChatOpenAI,
     LCChatMixin,
     LCChatOpenAI,
+    LCCohereChat,
     LCGeminiChat,
 )
 from .llamacpp import LlamaCppChat
@@ -18,6 +19,7 @@
     "ChatOpenAI",
     "LCAnthropicChat",
     "LCGeminiChat",
+    "LCCohereChat",
     "LCChatOpenAI",
     "LCAzureChatOpenAI",
     "LCChatMixin",
 
@@ -18,6 +18,9 @@ def _get_lc_class(self):
             "Please return the relevant Langchain class in in _get_lc_class"
         )
 
+    def _get_tool_call_kwargs(self):
+        return {}
+
     def __init__(self, stream: bool = False, **params):
         self._lc_class = self._get_lc_class()
         self._obj = self._lc_class(**params)
@@ -56,9 +59,7 @@ def prepare_response(self, pred):
                 total_tokens = pred.llm_output["token_usage"]["total_tokens"]
                 prompt_tokens = pred.llm_output["token_usage"]["prompt_tokens"]
         except Exception:
-            logger.warning(
-                f"Cannot get token usage from LLM output for {self._lc_class.__name__}"
-            )
+            pass
 
         return LLMInterface(
             text=all_text[0] if len(all_text) > 0 else "",
@@ -83,8 +84,30 @@ def invoke(
             LLMInterface: generated response
         """
         input_ = self.prepare_message(messages)
-        pred = self._obj.generate(messages=[input_], **kwargs)
-        return self.prepare_response(pred)
+
+        if "tools_pydantic" in kwargs:
+            tools = kwargs.pop(
+                "tools_pydantic",
+            )
+            lc_tool_call = self._obj.bind_tools(tools)
+            pred = lc_tool_call.invoke(
+                input_,
+                **self._get_tool_call_kwargs(),
+            )
+            if pred.tool_calls:
+                tool_calls = pred.tool_calls
+            else:
+                tool_calls = pred.additional_kwargs.get("tool_calls", [])
+
+            output = LLMInterface(
+                content="",
+                additional_kwargs={"tool_calls": tool_calls},
+            )
+        else:
+            pred = self._obj.generate(messages=[input_], **kwargs)
+            output = self.prepare_response(pred)
+
+        return output
 
     async def ainvoke(
         self, messages: str | BaseMessage | list[BaseMessage], **kwargs
@@ -235,6 +258,9 @@ class LCAnthropicChat(LCChatMixin, ChatLLM):  # type: ignore
         required=True,
     )
 
+    def _get_tool_call_kwargs(self):
+        return {"tool_choice": {"type": "any"}}
+
     def __init__(
         self,
         api_key: str | None = None,
@@ -291,3 +317,35 @@ def _get_lc_class(self):
             raise ImportError("Please install langchain-google-genai")
 
         return ChatGoogleGenerativeAI
+
+
+class LCCohereChat(LCChatMixin, ChatLLM):  # type: ignore
+    api_key: str = Param(
+        help="API key (https://dashboard.cohere.com/api-keys)", required=True
+    )
+    model_name: str = Param(
+        help=("Model name to use (https://dashboard.cohere.com/playground/chat)"),
+        required=True,
+    )
+
+    def __init__(
+        self,
+        api_key: str | None = None,
+        model_name: str | None = None,
+        temperature: float = 0.7,
+        **params,
+    ):
+        super().__init__(
+            cohere_api_key=api_key,
+            model_name=model_name,
+            temperature=temperature,
+            **params,
+        )
+
+    def _get_lc_class(self):
+        try:
+            from langchain_cohere import ChatCohere
+        except ImportError:
+            raise ImportError("Please install langchain-cohere")
+
+        return ChatCohere
@@ -292,6 +292,9 @@ def prepare_client(self, async_version: bool = False):
 
     def openai_response(self, client, **kwargs):
         """Get the openai response"""
+        if "tools_pydantic" in kwargs:
+            kwargs.pop("tools_pydantic")
+
         params_ = {
             "model": self.model,
             "temperature": self.temperature,
@@ -360,6 +363,9 @@ def prepare_client(self, async_version: bool = False):
 
     def openai_response(self, client, **kwargs):
         """Get the openai response"""
+        if "tools_pydantic" in kwargs:
+            kwargs.pop("tools_pydantic")
+
         params_ = {
             "model": self.azure_deployment,
             "temperature": self.temperature,