Add SummarizeCode tool (#20)

Key changes: * Add the SummarizeCode tool. * Impose a limit of 20,000 characters on tool outputs. * Implement security improvements: authentication for all endpoints. * Fix some bugs. * Add tests (78.21% total coverage (+4.14%)) Other: * Allow for files in print_all_files_in_path.py. * Fix JSON configs: change 'role' to 'name'. * Update developer instructions. * Update .gitignore.
bonk1t · Jan 24, 2024 · 19f89dc · 19f89dc
1 parent d262dba
commit 19f89dc
Show file tree

Hide file tree

Showing 32 changed files with 562 additions and 149 deletions.
diff --git a/.gitignore b/.gitignore
@@ -167,3 +167,4 @@ settings.json
 
 # UI folder
 nalgonda/ui/*
+nalgonda/data/agency_data/*
diff --git a/nalgonda/custom_tools/__init__.py b/nalgonda/custom_tools/__init__.py
@@ -3,18 +3,20 @@
 
 from nalgonda.custom_tools.build_directory_tree import BuildDirectoryTree
 from nalgonda.custom_tools.generate_proposal import GenerateProposal
-from nalgonda.custom_tools.print_all_files_in_directory import PrintAllFilesInDirectory
+from nalgonda.custom_tools.print_all_files_in_path import PrintAllFilesInPath
 from nalgonda.custom_tools.save_lead_to_airtable import SaveLeadToAirtable
 from nalgonda.custom_tools.search_web import SearchWeb
+from nalgonda.custom_tools.summarize_code import SummarizeCode
 from nalgonda.custom_tools.write_and_save_program import WriteAndSaveProgram
 
 TOOL_MAPPING = {
     "CodeInterpreter": CodeInterpreter,
     "Retrieval": Retrieval,
     "BuildDirectoryTree": BuildDirectoryTree,
     "GenerateProposal": GenerateProposal,
-    "PrintAllFilesInDirectory": PrintAllFilesInDirectory,
+    "PrintAllFilesInPath": PrintAllFilesInPath,
     "SaveLeadToAirtable": SaveLeadToAirtable,
     "SearchWeb": SearchWeb,
+    "SummarizeCode": SummarizeCode,
     "WriteAndSaveProgram": WriteAndSaveProgram,
 }
diff --git a/nalgonda/custom_tools/build_directory_tree.py b/nalgonda/custom_tools/build_directory_tree.py
@@ -42,6 +42,9 @@ def recurse(directory: Path, level: int = 0) -> None:
                     tree_str += f"{sub_indent}{path.name}\n"
 
         recurse(start_path)
+
+        if len(tree_str) > 20000:
+            tree_str = tree_str[:20000] + "\n\n... (truncated output, please use a smaller directory or apply a filter)"
         return tree_str
 
 

diff --git a/nalgonda/custom_tools/generate_proposal.py b/nalgonda/custom_tools/generate_proposal.py
@@ -2,6 +2,7 @@
 from pydantic import Field
 
 from nalgonda.custom_tools.utils import get_chat_completion
+from nalgonda.settings import settings
 
 USER_PROMPT_PREFIX = "Please draft a proposal for the following project brief: \n"
 SYSTEM_MESSAGE = """\
@@ -20,5 +21,7 @@ class GenerateProposal(BaseTool):
 
     def run(self) -> str:
         user_prompt = f"{USER_PROMPT_PREFIX}{self.project_brief}"
-        response = get_chat_completion(user_prompt=user_prompt, system_message=SYSTEM_MESSAGE, temperature=0.6)
+        response = get_chat_completion(
+            user_prompt=user_prompt, system_message=SYSTEM_MESSAGE, temperature=0.6, model=settings.gpt_model
+        )
         return response
diff --git a/...tom_tools/print_all_files_in_directory.py → ...a/custom_tools/print_all_files_in_path.py b/...tom_tools/print_all_files_in_directory.py → ...a/custom_tools/print_all_files_in_path.py
@@ -6,29 +6,35 @@
 from nalgonda.custom_tools.utils import check_directory_traversal
 
 
-class PrintAllFilesInDirectory(BaseTool):
-    """Print the contents of all files in a start_directory recursively.
+class PrintAllFilesInPath(BaseTool):
+    """Print the contents of all files in a start_path recursively.
+    The parameters are: start_path, file_extensions.
     Directory traversal is not allowed (you cannot read /* or ../*).
     """
 
-    start_directory: Path = Field(
+    start_path: Path = Field(
         default_factory=Path.cwd,
-        description="Directory to search for Python files, by default the current working directory.",
+        description="The starting path to search for files, defaults to the current working directory. "
+        "Can be a filename or a directory.",
     )
     file_extensions: set[str] = Field(
         default_factory=set,
         description="Set of file extensions to include in the tree. If empty, all files will be included. "
         "Examples are {'.py', '.txt', '.md'}.",
     )
 
-    _validate_start_directory = field_validator("start_directory", mode="after")(check_directory_traversal)
+    _validate_start_path = field_validator("start_path", mode="after")(check_directory_traversal)
 
     def run(self) -> str:
         """
-        Recursively searches for files within `start_directory` and compiles their contents into a single string.
+        Recursively searches for files within `start_path` and compiles their contents into a single string.
         """
         output = []
-        start_path = self.start_directory.resolve()
+        start_path = self.start_path.resolve()
+
+        # if start_path is a file, just read it
+        if start_path.is_file():
+            return f"{str(start_path)}:\n```\n{self.read_file(start_path)}\n```\n"
 
         for path in start_path.rglob("*"):
             # ignore files in hidden directories
@@ -37,7 +43,13 @@ def run(self) -> str:
             if path.is_file() and (not self.file_extensions or path.suffix in self.file_extensions):
                 output.append(f"{str(path)}:\n```\n{self.read_file(path)}\n```\n")
 
-        return "\n".join(output)
+        output_str = "\n".join(output)
+
+        if len(output_str) > 20000:
+            output_str = (
+                output_str[:20000] + "\n\n... (truncated output, please use a smaller directory or apply a filter)"
+            )
+        return output_str
 
     @staticmethod
     def read_file(file_path: Path):
@@ -50,8 +62,8 @@ def read_file(file_path: Path):
 
 if __name__ == "__main__":
     print(
-        PrintAllFilesInDirectory(
-            start_directory=".",
+        PrintAllFilesInPath(
+            start_path=".",
             file_extensions={".py", ".json", ".yaml", ".yml", ".md", ".txt", ".tsx", ".ts", ".js", ".jsx", ".html"},
         ).run()
     )
diff --git a/nalgonda/custom_tools/search_web.py b/nalgonda/custom_tools/search_web.py
@@ -8,7 +8,7 @@ class SearchWeb(BaseTool):
 
     phrase: str = Field(
         ...,
-        description="The search phrase you want to use. " "Optimize the search phrase for an internet search engine.",
+        description="The search phrase you want to use. Optimize the search phrase for an internet search engine.",
     )
     max_results: int = Field(default=10, description="The maximum number of search results to return, default is 10.")
 

diff --git a/nalgonda/custom_tools/summarize_code.py b/nalgonda/custom_tools/summarize_code.py
@@ -0,0 +1,70 @@
+from pathlib import Path
+
+from agency_swarm import BaseTool
+from pydantic import Field
+
+from nalgonda.custom_tools import PrintAllFilesInPath
+from nalgonda.custom_tools.utils import get_chat_completion
+from nalgonda.settings import settings
+
+USER_PROMPT_PREFIX = "Summarize the code of each file below.\n\n"
+SYSTEM_MESSAGE = """\
+Your main job is to handle programming code from SEVERAL FILES. \
+Each file's content is shown within triple backticks and has a FILE PATH as a title. \
+It's vital to KEEP the FILE PATHS.
+Here's what to do:
+1. ALWAYS KEEP the FILE PATHS for each file.
+2. Start each file with a short SUMMARY of its content. Mention important points but don't repeat details found later.
+3. KEEP important elements like non-trivial imports, function details, type hints, and key constants. \
+Don't change these.
+4. In functions or class methods, replace long code with a short SUMMARY in the docstrings, keeping the main logic.
+5. Shorten and combine docstrings and comments into the function or method descriptions.
+6. For classes, provide a brief SUMMARY in the docstrings, explaining the class's purpose and main logic.
+7. Cut down long strings to keep things brief.
+8. If there's a comment about "truncated output" at the end, KEEP it.
+
+Your task is to create a concise version of the code, strictly keeping the FILE PATHS and structure, \
+without extra comments or explanations. Focus on clarity and avoiding repeated information within each file.\
+"""
+
+
+class SummarizeCode(BaseTool):
+    """Summarize code using GPT-3. The tool uses the `PrintAllFilesInPath` tool to get the code to summarize.
+    The parameters are: start_path, file_extensions.
+    Directory traversal is not allowed (you cannot read /* or ../*).
+    """
+
+    start_path: Path = Field(
+        default_factory=Path.cwd,
+        description="The starting path to search for files, defaults to the current working directory. "
+        "Can be a filename or a directory.",
+    )
+    file_extensions: set[str] = Field(
+        default_factory=set,
+        description="Set of file extensions to include in the tree. If empty, all files will be included. "
+        "Examples are {'.py', '.txt', '.md'}.",
+    )
+
+    def run(self) -> str:
+        full_code = PrintAllFilesInPath(
+            start_path=self.start_path,
+            file_extensions=self.file_extensions,
+        ).run()
+        user_prompt = f"{USER_PROMPT_PREFIX}{full_code}"
+
+        output = get_chat_completion(
+            user_prompt=user_prompt, system_message=SYSTEM_MESSAGE, temperature=0.0, model=settings.gpt_cheap_model
+        )
+
+        if len(output) > 20000:
+            output = output[:20000] + "\n\n... (truncated output, please use a smaller directory or apply a filter)"
+        return output
+
+
+if __name__ == "__main__":
+    print(
+        SummarizeCode(
+            start_path=".",
+            file_extensions={".py"},
+        ).run()
+    )
diff --git a/nalgonda/custom_tools/utils.py b/nalgonda/custom_tools/utils.py
@@ -2,15 +2,13 @@
 
 from agency_swarm.util import get_openai_client
 
-from nalgonda.settings import settings
 
-
-def get_chat_completion(user_prompt: str, system_message: str, **kwargs) -> str:
+def get_chat_completion(user_prompt: str, system_message: str, model: str, **kwargs) -> str:
     """Generate a chat completion based on a prompt and a system message.
     This function is a wrapper around the OpenAI API."""
     client = get_openai_client()
     completion = client.chat.completions.create(
-        model=settings.gpt_model,
+        model=model,
         messages=[
             {"role": "system", "content": system_message},
             {"role": "user", "content": user_prompt},

diff --git a/nalgonda/custom_tools/write_and_save_program.py b/nalgonda/custom_tools/write_and_save_program.py
@@ -45,8 +45,7 @@ def run(self):
 
 
 class WriteAndSaveProgram(BaseTool):
-    """Set of files that represent a complete and correct program/application.
-    This environment has access to all standard Python packages and the internet."""
+    """Set of files that represent a complete and correct program/application"""
 
     chain_of_thought: str = Field(
         ..., description="Think step by step to determine the correct actions that are needed to implement the program."

diff --git a/nalgonda/data/default_configs/agent/default_config.json b/nalgonda/data/default_configs/agent/default_config.json
@@ -1,5 +1,5 @@
 {
-  "role": "LeadAndRequirementsGatherer",
+  "name": "LeadAndRequirementsGatherer",
   "description": "Specialized in lead capture and software development requirement gathering, this agent will interact with users, guiding them through the initial stages of understanding our AI solutions and collecting relevant information for further engagement.",
   "instructions": "# Instructions for Virtual Assistant: \nLead Capture and Requirement Gathering Specialist\n\n- Engage with website visitors by introducing them to AI in Hand's services, emphasizing our custom AI automation and the transformative impact it can have on their business operations.\n- Explain that AI in Hand specializes in bespoke AI solutions, primarily offering 3 groups of solutions: \n1. Virtual AI Assistants: Custom-designed to reflect a brand's voice and ethos; integrated with CRMs for seamless customer interactions; knowledge base customization for a truly personalized service.\n2. Custom AI Agents: Tailor-made agents for task automation, including data processing, forecasting, and reporting; driving efficiency and accuracy in day-to-day operations.\n3. API-Driven Custom Tools: Enhance each solution with our expertise in creating custom tools using APIs, ensuring seamless integration and functionality tailored to specific needs. Explain how these services can be tailored to their unique business needs.\n- Inquire if the visitor is interested in specifying their business requirements for a custom AI solution, offering to guide them through the process.\n- Begin with the Initial Interaction stage, asking the visitor to describe the type of AI solution they are interested in and how it might serve their business.\n- Proceed to the Requirement Gathering stage, asking targeted questions to collect comprehensive details about their AI needs, ensuring to ask one question at a time for clarity.\n- Once sufficient information is collected, transition to the Lead Capture stage, politely asking for the visitor's preferred name and email address to ensure our team can follow up effectively.\n- Assure the visitor that their requirements and contact details will be securely saved to our CRM system, and that a member of our team will reach out to them to discuss their custom AI solution further.\n- Throughout the interaction, maintain a professional and helpful demeanor, using the information about AI in Hand's services and solutions to answer any questions and provide a personalized experience. \nIMPORTANT: ALWAYS be concise and respond with shorter messages.",
   "files_folder": null,

diff --git a/nalgonda/data/default_configs/agent/default_config_ceo.json b/nalgonda/data/default_configs/agent/default_config_ceo.json
@@ -1,5 +1,5 @@
 {
-  "role": "CEO",
+  "name": "CEO",
   "description": "Responsible for client communication, task planning and management.",
   "instructions": "# Instructions for CEO Agent\n\n- Send the proposal to the user before beginning task execution.\n- Assign tasks to agents based on their expertise and capabilities.\n- Clearly outline the goals and expected outcomes for each task.\n- Provide essential context and background for successful task completion.\n- Keep in constant communication with agents throughout task execution.\n- Review completed tasks to ensure they meet the objectives.\n- Report the outcomes to the user.\n- Pass on any user feedback to the agents. Note: All conversations with agents are private. Information must be relayed directly by you, as cross-referencing or referencing 'above' is not possible in these separate, private conversations.",
   "files_folder": null,

diff --git a/nalgonda/data/default_configs/agent/default_config_developer.json b/nalgonda/data/default_configs/agent/default_config_developer.json
@@ -1,11 +1,12 @@
 {
-  "role": "Developer",
+  "name": "Developer",
   "description": "Responsible for running and executing Python Programs. Can also save programs to files, and search the web for information.",
-  "instructions": "# Instructions for AI Developer Agent\n\n- Write clean and efficient Python code.\n- Ensure correct imports according to the program structure.\n- Check your code to validate functionality and errors, before reporting back to the user.\n- Always update all relevant files after each change, don't bother the user with details or code diff.\n- Before starting to work, make sure you are familiar with the codebase. Use BuildDirectoryTree to get the directory structure. Then use PrintAllFilesInDirectory tool to print all files in a particular directory.\n- ALWAYS try to minimize the number of files printed. ALWAYS use BuildDirectoryTree tool before PrintAllFilesInDirectory to find the most relevant directory.",
+  "instructions": "# Instructions for AI Developer Agent\n\n- Write clean and efficient Python code.\n- Ensure correct imports according to the program structure.\n- ALWAYS update all relevant files after each change; don't bother the user with details or code diffs.\n- Before starting to work, MAKE SURE you are familiar with the codebase. You MUST USE the BuildDirectoryTree tool to get the directory structure. Then you MUST use the SummarizeCode tool to get an overview of the code (prefer low-level directories or individual files). Finally, use the PrintAllFilesInPath tool to access the full code of the files you absolutely need (only when writing tests, when using as dependency, when debugging).\n- ALWAYS USE the BuildDirectoryTree tool BEFORE SummarizeCode or PrintAllFilesInPath to find the most relevant directory or file.\n- ALWAYS USE the SummarizeCode tool BEFORE PrintAllFilesInPath to gain a better overview of the code.\n- When writing tests, ALWAYS use EXISTING testing infrastructure as much as possible: mocks, utility functions / classes / fixtures / conftest objects.\n- Use the WriteAndSaveProgram tool when coding. It allows you to plan your work and save the code to files.",
   "files_folder": null,
   "tools": [
     "BuildDirectoryTree",
-    "PrintAllFilesInDirectory",
+    "PrintAllFilesInPath",
+    "SummarizeCode",
     "WriteAndSaveProgram"
   ]
 }
diff --git a/nalgonda/data/default_configs/agent/default_config_va.json b/nalgonda/data/default_configs/agent/default_config_va.json
@@ -1,5 +1,5 @@
 {
-  "role": "Virtual Assistant",
+  "name": "Virtual Assistant",
   "description": "Responsible for drafting emails, doing research and writing proposals. Can also search the web for information.",
   "instructions": "### Instructions for Virtual Assistant\n\nYour role is to assist users in executing tasks like below. \nIf the task is outside of your capabilities, please report back to the user.\n\n#### 1. Drafting Emails\n   - **Understand Context and Tone**: Familiarize yourself with the context of each email. \n   Maintain a professional and courteous tone.\n   - **Accuracy and Clarity**: Ensure that the information is accurate and presented clearly. \n   Avoid jargon unless it's appropriate for the recipient.\n\n#### 2. Generating Proposals\n   - **Gather Requirements**: Collect all necessary information about the project, \n   including client needs, objectives, and any specific requests.\n\n#### 3. Conducting Research\n   - **Understand the Objective**: Clarify the purpose and objectives of the research to focus on relevant information.\n   - **Summarize Findings**: Provide clear, concise summaries of the research findings, \n   highlighting key points and how they relate to the project or inquiry.\n   - **Cite Sources**: Properly cite all sources to maintain integrity and avoid plagiarism.",
   "files_folder": null,

diff --git a/nalgonda/dependencies/auth.py b/nalgonda/dependencies/auth.py
@@ -3,6 +3,7 @@
 from fastapi import Depends, HTTPException, status
 from fastapi.security import OAuth2PasswordBearer
 from jose import JWTError, jwt
+from starlette.status import HTTP_400_BAD_REQUEST, HTTP_403_FORBIDDEN
 
 from nalgonda.models.auth import TokenData, UserInDB
 from nalgonda.persistence.user_repository import UserRepository
@@ -14,7 +15,7 @@
 def get_user(username: str) -> UserInDB | None:
     user = UserRepository().get_user_by_id(username)
     if user:
-        return UserInDB(**user, username=username)
+        return UserInDB(**user)
 
 
 async def get_current_user(token: Annotated[str, Depends(oauth2_scheme)]) -> UserInDB:
@@ -41,13 +42,13 @@ async def get_current_active_user(
     current_user: Annotated[UserInDB, Depends(get_current_user)],
 ) -> UserInDB:
     if current_user.disabled:
-        raise HTTPException(status_code=400, detail="Inactive user")
+        raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail="Inactive user")
     return current_user
 
 
 async def get_current_superuser(
     current_user: Annotated[UserInDB, Depends(get_current_active_user)],
 ) -> UserInDB:
     if not current_user.is_superuser:
-        raise HTTPException(status_code=403, detail="The user doesn't have enough privileges")
+        raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="The user doesn't have enough privileges")
     return current_user
diff --git a/nalgonda/persistence/agent_config_firestore_storage.py b/nalgonda/persistence/agent_config_firestore_storage.py
@@ -23,10 +23,10 @@ def save(self, agent_config: AgentConfig) -> str:
         """Save the agent configuration to the firestore.
         If the agent_id is not set, it will create a new document and set the agent_id.
         Returns the agent_id."""
-        document_data = agent_config.model_dump()
         if agent_config.agent_id is None:
             # Create a new document and set the agent_id
-            document_reference = self.collection.add(document_data)[0]
+            document_reference = self.collection.add(agent_config.model_dump())[0]
             agent_config.agent_id = document_reference.id
-        self.collection.document(agent_config.agent_id).set(document_data)
+
+        self.collection.document(agent_config.agent_id).set(agent_config.model_dump())
         return agent_config.agent_id
diff --git a/nalgonda/persistence/tool_config_firestore_storage.py b/nalgonda/persistence/tool_config_firestore_storage.py
@@ -19,16 +19,11 @@ def load_by_tool_id(self, tool_id: str) -> ToolConfig | None:
             return None
         return ToolConfig.model_validate(document_snapshot.to_dict())
 
-    def save(self, tool_config: ToolConfig, approved: bool = False) -> tuple[str, int]:
-        # Increment version and set as not approved for each new save
-        tool_config.version += 1
-        tool_config.approved = approved
-
-        document_data = tool_config.model_dump()
+    def save(self, tool_config: ToolConfig) -> tuple[str, int]:
         if tool_config.tool_id is None:
             # Create a new document and set the tool_id
-            document_reference = self.collection.add(document_data)[0]
+            document_reference = self.collection.add(tool_config.model_dump())[0]
             tool_config.tool_id = document_reference.id
-        self.collection.document(tool_config.tool_id).set(document_data)
+        self.collection.document(tool_config.tool_id).set(tool_config.model_dump())
 
         return tool_config.tool_id, tool_config.version
Original file line number	Diff line number	Diff line change
Expand Up		@@ -167,3 +167,4 @@ settings.json

		# UI folder
		nalgonda/ui/*
		nalgonda/data/agency_data/*