solve some comments.

eosphoros-ai · Jan 16, 2025 · e78c030 · e78c030
1 parent 43c81ad
commit e78c030
Show file tree

Hide file tree

Showing 11 changed files with 153 additions and 164 deletions.
diff --git a/.env.template b/.env.template
@@ -164,14 +164,15 @@ KNOWLEDGE_GRAPH_EXTRACT_SEARCH_TOP_SIZE=5
 KNOWLEDGE_GRAPH_EXTRACT_SEARCH_RECALL_SCORE=0.3
 KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_TOP_SIZE=5
 KNOWLEDGE_GRAPH_SIMILARITY_SEARCH_RECALL_SCORE=0.7
+KNOWLEDGE_GRAPH_TEXT_SEARCH_TOP_SIZE=5
 KNOWLEDGE_GRAPH_COMMUNITY_SEARCH_TOP_SIZE=20
 KNOWLEDGE_GRAPH_COMMUNITY_SEARCH_RECALL_SCORE=0.0
 
 GRAPH_COMMUNITY_SUMMARY_ENABLED=True  # enable the graph community summary
 TRIPLET_GRAPH_ENABLED=True  # enable the graph search for triplets
 DOCUMENT_GRAPH_ENABLED=True  # enable the graph search for documents and chunks
 SIMILARITY_SEARCH_ENABLED=True # enable the similarity search for entities and chunks
-TEXT2GQL_SEARCH_ENABLED=False # enable the text2gql search for entities and relations.
+TEXT_SEARCH_ENABLED=False # enable the text search for entities and relations.
 
 KNOWLEDGE_GRAPH_CHUNK_SEARCH_TOP_SIZE=5  # the top size of knowledge graph search for chunks
 KNOWLEDGE_GRAPH_EXTRACTION_BATCH_SIZE=20  # the batch size of triplet extraction from the text

diff --git a/dbgpt/rag/transformer/base.py b/dbgpt/rag/transformer/base.py
@@ -75,5 +75,5 @@ class TranslatorBase(TransformerBase, ABC):
     """Translator base class."""
 
     @abstractmethod
-    async def translate(self, text: str, limit: Optional[int] = None) -> Dict:
+    async def translate(self, text: str) -> Dict:
         """Translate results from text."""
diff --git a/dbgpt/rag/transformer/intent_interpreter.py b/dbgpt/rag/transformer/intent_interpreter.py
@@ -2,7 +2,7 @@
 import json
 import logging
 import re
-from typing import Dict, List, Optional
+from typing import Dict, List
 
 from dbgpt.core import BaseMessage, HumanPromptTemplate, LLMClient
 from dbgpt.rag.transformer.llm_translator import LLMTranslator
@@ -69,10 +69,10 @@ def __init__(self, llm_client: LLMClient, model_name: str):
         super().__init__(llm_client, model_name, INTENT_INTERPRET_PT)
 
     def _format_messages(self, text: str, history: str = None) -> List[BaseMessage]:
-        # interprete intent with single prompt only.
+        # interprete intention with single prompt only.
         template = HumanPromptTemplate.from_template(self._prompt_template)
 
-        messages = (
+        messages: List[BaseMessage] = (
             template.format_messages(text=text, history=history)
             if history is not None
             else template.format_messages(text=text)
@@ -86,7 +86,7 @@ def truncate(self):
     def drop(self):
         """Do nothing by default."""
 
-    def _parse_response(self, text: str, limit: Optional[int] = None) -> Dict:
+    def _parse_response(self, text: str) -> Dict:
         """Parse llm response."""
         """
         The returned diction should contain the following content.

diff --git a/dbgpt/rag/transformer/llm_extractor.py b/dbgpt/rag/transformer/llm_extractor.py
@@ -54,6 +54,10 @@ async def _extract(
         self, text: str, history: str = None, limit: Optional[int] = None
     ) -> List:
         """Inner extract by LLM."""
+        # limit check
+        if limit and limit < 1:
+            ValueError("optional argument limit >= 1")
+
         template = HumanPromptTemplate.from_template(self._prompt_template)
 
         messages = (
@@ -80,8 +84,6 @@ async def _extract(
             logger.error(f"request llm failed ({code}) {reason}")
             return []
 
-        if limit and limit < 1:
-            ValueError("optional argument limit >= 1")
         return self._parse_response(response.text, limit)
 
     def truncate(self):

diff --git a/dbgpt/rag/transformer/llm_translator.py b/dbgpt/rag/transformer/llm_translator.py
@@ -2,7 +2,7 @@
 
 import logging
 from abc import ABC, abstractmethod
-from typing import Dict, List, Optional
+from typing import Dict, List
 
 from dbgpt.core import BaseMessage, LLMClient, ModelMessage, ModelRequest
 from dbgpt.rag.transformer.base import TranslatorBase
@@ -19,14 +19,12 @@ def __init__(self, llm_client: LLMClient, model_name: str, prompt_template: str)
         self._model_name = model_name
         self._prompt_template = prompt_template
 
-    async def translate(self, text: str, limit: Optional[int] = None) -> Dict:
+    async def translate(self, text: str) -> Dict:
         """Translate by LLM."""
         messages = self._format_messages(text)
-        return await self._translate(messages, limit)
+        return await self._translate(messages)
 
-    async def _translate(
-        self, messages: List[BaseMessage], limit: Optional[int] = None
-    ) -> Dict:
+    async def _translate(self, messages: List[BaseMessage]) -> Dict:
         """Inner translate by LLM."""
         # use default model if needed
         if not self._model_name:
@@ -46,9 +44,7 @@ async def _translate(
             logger.error(f"request llm failed ({code}) {reason}")
             return {}
 
-        if limit and limit < 1:
-            ValueError("optional argument limit >= 1")
-        return self._parse_response(response.text, limit)
+        return self._parse_response(response.text)
 
     def truncate(self):
         """Do nothing by default."""
@@ -61,5 +57,5 @@ def _format_messages(self, text: str, history: str = None) -> List[BaseMessage]:
         """Parse llm response."""
 
     @abstractmethod
-    def _parse_response(self, text: str, limit: Optional[int] = None) -> Dict:
+    def _parse_response(self, text: str) -> Dict:
         """Parse llm response."""
diff --git a/dbgpt/rag/transformer/text2cypher.py b/dbgpt/rag/transformer/text2cypher.py
diff --git a/dbgpt/rag/transformer/text2gql.py b/dbgpt/rag/transformer/text2gql.py
@@ -1,10 +1,117 @@
 """Text2GQL class."""
+import json
 import logging
+import re
+from typing import Dict, List
 
-from dbgpt.rag.transformer.base import TranslatorBase
+from dbgpt.core import BaseMessage, HumanPromptTemplate, LLMClient
+from dbgpt.rag.transformer.llm_translator import LLMTranslator
+
+TEXT_TO_GQL_PT = (
+    "A question written in graph query language style is provided below. "
+    "The category of this question, "
+    "entities and relations that might be used in the cypher query are also provided. "
+    "Given the question, translate the question into a cypher query that "
+    "can be executed on the given knowledge graph. "
+    "Make sure the syntax of the translated cypher query is correct.\n"
+    "To help query generation, the schema of the knowledge graph is:\n"
+    "{schema}\n"
+    "---------------------\n"
+    "Example:\n"
+    "Question: Query the entity named TuGraph then return the entity.\n"
+    "Category: Single Entity Search\n"
+    'entities: ["TuGraph"]\n'
+    "relations: []\n"
+    'Query:\nMatch (n) WHERE n.id="TuGraph" RETURN n\n'
+    "Question: Query all one hop paths between the entity named Alex "
+    "and the entity named TuGraph, then return them.\n"
+    "Category: One Hop Entity Search\n"
+    'entities: ["Alex", "TuGraph"]\n'
+    "relations: []\n"
+    'Query:\nMATCH p=(n)-[r]-(m) WHERE n.id="Alex" AND m.id="TuGraph" RETURN p \n'
+    "Question: Query all one hop paths that has a entity named TuGraph "
+    "and a relation named commit, then return them.\n"
+    "Category: One Hop Relation Search\n"
+    'entities: ["TuGraph"]\n'
+    'relations: ["commit"]\n'
+    'Query:\nMATCH p=(n)-[r]-(m) WHERE n.id="TuGraph" AND r.id="commit" RETURN p \n'
+    "Question: Query all entities that have a two hop path between them "
+    "and the entity named Bob, "
+    "both entities should have a work for relation with the middle entity.\n"
+    "Category: Two Hop Entity Search\n"
+    'entities: ["Bob"]\n'
+    'relations: ["work for"]\n'
+    'Query:\nMATCH p=(n)-[r1]-(m)-[r2]-(l) WHERE n.id="Bob" '
+    'AND r1.id="work for" AND r2.id="work for" RETURN p \n'
+    "Question: Introduce TuGraph and DBGPT seperately.\n"
+    "Category: Freestyle Question\n"
+    'entities: ["TuGraph", "DBGPT"]\n'
+    "relations: []\n"
+    "Query:\nMATCH p=(n)-[r:relation*2]-(m) "
+    'WHERE n.id IN ["TuGraph", "DB-GPT"] RETURN p\n'
+    "---------------------\n"
+    "Question: {question}\n"
+    "Category: {category}\n"
+    "entities: {entities}\n"
+    "relations: {relations}\n"
+    "Query:\n"
+)
 
 logger = logging.getLogger(__name__)
 
 
-class Text2GQL(TranslatorBase):
+class Text2GQL(LLMTranslator):
     """Text2GQL class."""
+
+    def __init__(self, llm_client: LLMClient, model_name: str):
+        """Initialize the Text2GQL."""
+        super().__init__(llm_client, model_name, TEXT_TO_GQL_PT)
+
+    def _format_messages(self, text: str, history: str = None) -> List[BaseMessage]:
+        # translate intention to gql with single prompt only.
+        intention = json.loads(text)
+        question = intention["rewritten_question"]
+        category = intention["category"]
+        entities = intention["entities"]
+        relations = intention["relations"]
+        schema = intention["schema"]
+
+        template = HumanPromptTemplate.from_template(self._prompt_template)
+
+        messages = (
+            template.format_messages(
+                schema=schema,
+                question=question,
+                category=category,
+                entities=entities,
+                relations=relations,
+                history=history,
+            )
+            if history is not None
+            else template.format_messages(
+                schema=schema,
+                question=question,
+                category=category,
+                entities=entities,
+                relations=relations,
+            )
+        )
+
+        return messages
+
+    def _parse_response(self, text: str) -> Dict:
+        """Parse llm response."""
+        translation = {}
+        query = ""
+
+        code_block_pattern = re.compile(r"```cypher(.*?)```", re.S)
+
+        result = re.findall(code_block_pattern, text)
+        if result:
+            query = result[0]
+        else:
+            query = text
+
+        translation["query"] = query.strip()
+
+        return translation
diff --git a/dbgpt/storage/graph_store/base.py b/dbgpt/storage/graph_store/base.py
@@ -31,9 +31,9 @@ class GraphStoreConfig(BaseModel):
         default=False,
         description="Enable similarity search or not.",
     )
-    enable_text2gql_search: bool = Field(
+    enable_text_search: bool = Field(
         default=False,
-        description="Enable text2gql search or not.",
+        description="Enable text search or not.",
     )
 
 
@@ -46,7 +46,7 @@ def __init__(self, config: GraphStoreConfig):
         self._conn = None
         self.enable_summary = config.enable_summary
         self.enable_similarity_search = config.enable_similarity_search
-        self.enable_text2gql_search = config.enable_text2gql_search
+        self.enable_text_search = config.enable_text_search
 
     @abstractmethod
     def get_config(self) -> GraphStoreConfig: