Skip to content

Commit

Permalink
Add SummarizeCode tool (#20)
Browse files Browse the repository at this point in the history
Key changes:
* Add the SummarizeCode tool.
* Impose a limit of 20,000 characters on tool outputs.
* Implement security improvements: authentication for all endpoints.
* Fix some bugs.
* Add tests (78.21% total coverage (+4.14%))

Other:
* Allow for files in print_all_files_in_path.py.
* Fix JSON configs: change 'role' to 'name'.
* Update developer instructions.
* Update .gitignore.
  • Loading branch information
bonk1t authored Jan 24, 2024
1 parent d262dba commit 19f89dc
Show file tree
Hide file tree
Showing 32 changed files with 562 additions and 149 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,4 @@ settings.json

# UI folder
nalgonda/ui/*
nalgonda/data/agency_data/*
6 changes: 4 additions & 2 deletions nalgonda/custom_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@

from nalgonda.custom_tools.build_directory_tree import BuildDirectoryTree
from nalgonda.custom_tools.generate_proposal import GenerateProposal
from nalgonda.custom_tools.print_all_files_in_directory import PrintAllFilesInDirectory
from nalgonda.custom_tools.print_all_files_in_path import PrintAllFilesInPath
from nalgonda.custom_tools.save_lead_to_airtable import SaveLeadToAirtable
from nalgonda.custom_tools.search_web import SearchWeb
from nalgonda.custom_tools.summarize_code import SummarizeCode
from nalgonda.custom_tools.write_and_save_program import WriteAndSaveProgram

TOOL_MAPPING = {
"CodeInterpreter": CodeInterpreter,
"Retrieval": Retrieval,
"BuildDirectoryTree": BuildDirectoryTree,
"GenerateProposal": GenerateProposal,
"PrintAllFilesInDirectory": PrintAllFilesInDirectory,
"PrintAllFilesInPath": PrintAllFilesInPath,
"SaveLeadToAirtable": SaveLeadToAirtable,
"SearchWeb": SearchWeb,
"SummarizeCode": SummarizeCode,
"WriteAndSaveProgram": WriteAndSaveProgram,
}
3 changes: 3 additions & 0 deletions nalgonda/custom_tools/build_directory_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def recurse(directory: Path, level: int = 0) -> None:
tree_str += f"{sub_indent}{path.name}\n"

recurse(start_path)

if len(tree_str) > 20000:
tree_str = tree_str[:20000] + "\n\n... (truncated output, please use a smaller directory or apply a filter)"
return tree_str


Expand Down
5 changes: 4 additions & 1 deletion nalgonda/custom_tools/generate_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pydantic import Field

from nalgonda.custom_tools.utils import get_chat_completion
from nalgonda.settings import settings

USER_PROMPT_PREFIX = "Please draft a proposal for the following project brief: \n"
SYSTEM_MESSAGE = """\
Expand All @@ -20,5 +21,7 @@ class GenerateProposal(BaseTool):

def run(self) -> str:
user_prompt = f"{USER_PROMPT_PREFIX}{self.project_brief}"
response = get_chat_completion(user_prompt=user_prompt, system_message=SYSTEM_MESSAGE, temperature=0.6)
response = get_chat_completion(
user_prompt=user_prompt, system_message=SYSTEM_MESSAGE, temperature=0.6, model=settings.gpt_model
)
return response
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,35 @@
from nalgonda.custom_tools.utils import check_directory_traversal


class PrintAllFilesInDirectory(BaseTool):
"""Print the contents of all files in a start_directory recursively.
class PrintAllFilesInPath(BaseTool):
"""Print the contents of all files in a start_path recursively.
The parameters are: start_path, file_extensions.
Directory traversal is not allowed (you cannot read /* or ../*).
"""

start_directory: Path = Field(
start_path: Path = Field(
default_factory=Path.cwd,
description="Directory to search for Python files, by default the current working directory.",
description="The starting path to search for files, defaults to the current working directory. "
"Can be a filename or a directory.",
)
file_extensions: set[str] = Field(
default_factory=set,
description="Set of file extensions to include in the tree. If empty, all files will be included. "
"Examples are {'.py', '.txt', '.md'}.",
)

_validate_start_directory = field_validator("start_directory", mode="after")(check_directory_traversal)
_validate_start_path = field_validator("start_path", mode="after")(check_directory_traversal)

def run(self) -> str:
"""
Recursively searches for files within `start_directory` and compiles their contents into a single string.
Recursively searches for files within `start_path` and compiles their contents into a single string.
"""
output = []
start_path = self.start_directory.resolve()
start_path = self.start_path.resolve()

# if start_path is a file, just read it
if start_path.is_file():
return f"{str(start_path)}:\n```\n{self.read_file(start_path)}\n```\n"

for path in start_path.rglob("*"):
# ignore files in hidden directories
Expand All @@ -37,7 +43,13 @@ def run(self) -> str:
if path.is_file() and (not self.file_extensions or path.suffix in self.file_extensions):
output.append(f"{str(path)}:\n```\n{self.read_file(path)}\n```\n")

return "\n".join(output)
output_str = "\n".join(output)

if len(output_str) > 20000:
output_str = (
output_str[:20000] + "\n\n... (truncated output, please use a smaller directory or apply a filter)"
)
return output_str

@staticmethod
def read_file(file_path: Path):
Expand All @@ -50,8 +62,8 @@ def read_file(file_path: Path):

if __name__ == "__main__":
print(
PrintAllFilesInDirectory(
start_directory=".",
PrintAllFilesInPath(
start_path=".",
file_extensions={".py", ".json", ".yaml", ".yml", ".md", ".txt", ".tsx", ".ts", ".js", ".jsx", ".html"},
).run()
)
2 changes: 1 addition & 1 deletion nalgonda/custom_tools/search_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class SearchWeb(BaseTool):

phrase: str = Field(
...,
description="The search phrase you want to use. " "Optimize the search phrase for an internet search engine.",
description="The search phrase you want to use. Optimize the search phrase for an internet search engine.",
)
max_results: int = Field(default=10, description="The maximum number of search results to return, default is 10.")

Expand Down
70 changes: 70 additions & 0 deletions nalgonda/custom_tools/summarize_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from pathlib import Path

from agency_swarm import BaseTool
from pydantic import Field

from nalgonda.custom_tools import PrintAllFilesInPath
from nalgonda.custom_tools.utils import get_chat_completion
from nalgonda.settings import settings

USER_PROMPT_PREFIX = "Summarize the code of each file below.\n\n"
SYSTEM_MESSAGE = """\
Your main job is to handle programming code from SEVERAL FILES. \
Each file's content is shown within triple backticks and has a FILE PATH as a title. \
It's vital to KEEP the FILE PATHS.
Here's what to do:
1. ALWAYS KEEP the FILE PATHS for each file.
2. Start each file with a short SUMMARY of its content. Mention important points but don't repeat details found later.
3. KEEP important elements like non-trivial imports, function details, type hints, and key constants. \
Don't change these.
4. In functions or class methods, replace long code with a short SUMMARY in the docstrings, keeping the main logic.
5. Shorten and combine docstrings and comments into the function or method descriptions.
6. For classes, provide a brief SUMMARY in the docstrings, explaining the class's purpose and main logic.
7. Cut down long strings to keep things brief.
8. If there's a comment about "truncated output" at the end, KEEP it.
Your task is to create a concise version of the code, strictly keeping the FILE PATHS and structure, \
without extra comments or explanations. Focus on clarity and avoiding repeated information within each file.\
"""


class SummarizeCode(BaseTool):
"""Summarize code using GPT-3. The tool uses the `PrintAllFilesInPath` tool to get the code to summarize.
The parameters are: start_path, file_extensions.
Directory traversal is not allowed (you cannot read /* or ../*).
"""

start_path: Path = Field(
default_factory=Path.cwd,
description="The starting path to search for files, defaults to the current working directory. "
"Can be a filename or a directory.",
)
file_extensions: set[str] = Field(
default_factory=set,
description="Set of file extensions to include in the tree. If empty, all files will be included. "
"Examples are {'.py', '.txt', '.md'}.",
)

def run(self) -> str:
full_code = PrintAllFilesInPath(
start_path=self.start_path,
file_extensions=self.file_extensions,
).run()
user_prompt = f"{USER_PROMPT_PREFIX}{full_code}"

output = get_chat_completion(
user_prompt=user_prompt, system_message=SYSTEM_MESSAGE, temperature=0.0, model=settings.gpt_cheap_model
)

if len(output) > 20000:
output = output[:20000] + "\n\n... (truncated output, please use a smaller directory or apply a filter)"
return output


if __name__ == "__main__":
print(
SummarizeCode(
start_path=".",
file_extensions={".py"},
).run()
)
6 changes: 2 additions & 4 deletions nalgonda/custom_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@

from agency_swarm.util import get_openai_client

from nalgonda.settings import settings


def get_chat_completion(user_prompt: str, system_message: str, **kwargs) -> str:
def get_chat_completion(user_prompt: str, system_message: str, model: str, **kwargs) -> str:
"""Generate a chat completion based on a prompt and a system message.
This function is a wrapper around the OpenAI API."""
client = get_openai_client()
completion = client.chat.completions.create(
model=settings.gpt_model,
model=model,
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt},
Expand Down
3 changes: 1 addition & 2 deletions nalgonda/custom_tools/write_and_save_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ def run(self):


class WriteAndSaveProgram(BaseTool):
"""Set of files that represent a complete and correct program/application.
This environment has access to all standard Python packages and the internet."""
"""Set of files that represent a complete and correct program/application"""

chain_of_thought: str = Field(
..., description="Think step by step to determine the correct actions that are needed to implement the program."
Expand Down
2 changes: 1 addition & 1 deletion nalgonda/data/default_configs/agent/default_config.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"role": "LeadAndRequirementsGatherer",
"name": "LeadAndRequirementsGatherer",
"description": "Specialized in lead capture and software development requirement gathering, this agent will interact with users, guiding them through the initial stages of understanding our AI solutions and collecting relevant information for further engagement.",
"instructions": "# Instructions for Virtual Assistant: \nLead Capture and Requirement Gathering Specialist\n\n- Engage with website visitors by introducing them to AI in Hand's services, emphasizing our custom AI automation and the transformative impact it can have on their business operations.\n- Explain that AI in Hand specializes in bespoke AI solutions, primarily offering 3 groups of solutions: \n1. Virtual AI Assistants: Custom-designed to reflect a brand's voice and ethos; integrated with CRMs for seamless customer interactions; knowledge base customization for a truly personalized service.\n2. Custom AI Agents: Tailor-made agents for task automation, including data processing, forecasting, and reporting; driving efficiency and accuracy in day-to-day operations.\n3. API-Driven Custom Tools: Enhance each solution with our expertise in creating custom tools using APIs, ensuring seamless integration and functionality tailored to specific needs. Explain how these services can be tailored to their unique business needs.\n- Inquire if the visitor is interested in specifying their business requirements for a custom AI solution, offering to guide them through the process.\n- Begin with the Initial Interaction stage, asking the visitor to describe the type of AI solution they are interested in and how it might serve their business.\n- Proceed to the Requirement Gathering stage, asking targeted questions to collect comprehensive details about their AI needs, ensuring to ask one question at a time for clarity.\n- Once sufficient information is collected, transition to the Lead Capture stage, politely asking for the visitor's preferred name and email address to ensure our team can follow up effectively.\n- Assure the visitor that their requirements and contact details will be securely saved to our CRM system, and that a member of our team will reach out to them to discuss their custom AI solution further.\n- Throughout the interaction, maintain a professional and helpful demeanor, using the information about AI in Hand's services and solutions to answer any questions and provide a personalized experience. \nIMPORTANT: ALWAYS be concise and respond with shorter messages.",
"files_folder": null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"role": "CEO",
"name": "CEO",
"description": "Responsible for client communication, task planning and management.",
"instructions": "# Instructions for CEO Agent\n\n- Send the proposal to the user before beginning task execution.\n- Assign tasks to agents based on their expertise and capabilities.\n- Clearly outline the goals and expected outcomes for each task.\n- Provide essential context and background for successful task completion.\n- Keep in constant communication with agents throughout task execution.\n- Review completed tasks to ensure they meet the objectives.\n- Report the outcomes to the user.\n- Pass on any user feedback to the agents. Note: All conversations with agents are private. Information must be relayed directly by you, as cross-referencing or referencing 'above' is not possible in these separate, private conversations.",
"files_folder": null,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
{
"role": "Developer",
"name": "Developer",
"description": "Responsible for running and executing Python Programs. Can also save programs to files, and search the web for information.",
"instructions": "# Instructions for AI Developer Agent\n\n- Write clean and efficient Python code.\n- Ensure correct imports according to the program structure.\n- Check your code to validate functionality and errors, before reporting back to the user.\n- Always update all relevant files after each change, don't bother the user with details or code diff.\n- Before starting to work, make sure you are familiar with the codebase. Use BuildDirectoryTree to get the directory structure. Then use PrintAllFilesInDirectory tool to print all files in a particular directory.\n- ALWAYS try to minimize the number of files printed. ALWAYS use BuildDirectoryTree tool before PrintAllFilesInDirectory to find the most relevant directory.",
"instructions": "# Instructions for AI Developer Agent\n\n- Write clean and efficient Python code.\n- Ensure correct imports according to the program structure.\n- ALWAYS update all relevant files after each change; don't bother the user with details or code diffs.\n- Before starting to work, MAKE SURE you are familiar with the codebase. You MUST USE the BuildDirectoryTree tool to get the directory structure. Then you MUST use the SummarizeCode tool to get an overview of the code (prefer low-level directories or individual files). Finally, use the PrintAllFilesInPath tool to access the full code of the files you absolutely need (only when writing tests, when using as dependency, when debugging).\n- ALWAYS USE the BuildDirectoryTree tool BEFORE SummarizeCode or PrintAllFilesInPath to find the most relevant directory or file.\n- ALWAYS USE the SummarizeCode tool BEFORE PrintAllFilesInPath to gain a better overview of the code.\n- When writing tests, ALWAYS use EXISTING testing infrastructure as much as possible: mocks, utility functions / classes / fixtures / conftest objects.\n- Use the WriteAndSaveProgram tool when coding. It allows you to plan your work and save the code to files.",
"files_folder": null,
"tools": [
"BuildDirectoryTree",
"PrintAllFilesInDirectory",
"PrintAllFilesInPath",
"SummarizeCode",
"WriteAndSaveProgram"
]
}
2 changes: 1 addition & 1 deletion nalgonda/data/default_configs/agent/default_config_va.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"role": "Virtual Assistant",
"name": "Virtual Assistant",
"description": "Responsible for drafting emails, doing research and writing proposals. Can also search the web for information.",
"instructions": "### Instructions for Virtual Assistant\n\nYour role is to assist users in executing tasks like below. \nIf the task is outside of your capabilities, please report back to the user.\n\n#### 1. Drafting Emails\n - **Understand Context and Tone**: Familiarize yourself with the context of each email. \n Maintain a professional and courteous tone.\n - **Accuracy and Clarity**: Ensure that the information is accurate and presented clearly. \n Avoid jargon unless it's appropriate for the recipient.\n\n#### 2. Generating Proposals\n - **Gather Requirements**: Collect all necessary information about the project, \n including client needs, objectives, and any specific requests.\n\n#### 3. Conducting Research\n - **Understand the Objective**: Clarify the purpose and objectives of the research to focus on relevant information.\n - **Summarize Findings**: Provide clear, concise summaries of the research findings, \n highlighting key points and how they relate to the project or inquiry.\n - **Cite Sources**: Properly cite all sources to maintain integrity and avoid plagiarism.",
"files_folder": null,
Expand Down
7 changes: 4 additions & 3 deletions nalgonda/dependencies/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from fastapi import Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from jose import JWTError, jwt
from starlette.status import HTTP_400_BAD_REQUEST, HTTP_403_FORBIDDEN

from nalgonda.models.auth import TokenData, UserInDB
from nalgonda.persistence.user_repository import UserRepository
Expand All @@ -14,7 +15,7 @@
def get_user(username: str) -> UserInDB | None:
user = UserRepository().get_user_by_id(username)
if user:
return UserInDB(**user, username=username)
return UserInDB(**user)


async def get_current_user(token: Annotated[str, Depends(oauth2_scheme)]) -> UserInDB:
Expand All @@ -41,13 +42,13 @@ async def get_current_active_user(
current_user: Annotated[UserInDB, Depends(get_current_user)],
) -> UserInDB:
if current_user.disabled:
raise HTTPException(status_code=400, detail="Inactive user")
raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail="Inactive user")
return current_user


async def get_current_superuser(
current_user: Annotated[UserInDB, Depends(get_current_active_user)],
) -> UserInDB:
if not current_user.is_superuser:
raise HTTPException(status_code=403, detail="The user doesn't have enough privileges")
raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="The user doesn't have enough privileges")
return current_user
6 changes: 3 additions & 3 deletions nalgonda/persistence/agent_config_firestore_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def save(self, agent_config: AgentConfig) -> str:
"""Save the agent configuration to the firestore.
If the agent_id is not set, it will create a new document and set the agent_id.
Returns the agent_id."""
document_data = agent_config.model_dump()
if agent_config.agent_id is None:
# Create a new document and set the agent_id
document_reference = self.collection.add(document_data)[0]
document_reference = self.collection.add(agent_config.model_dump())[0]
agent_config.agent_id = document_reference.id
self.collection.document(agent_config.agent_id).set(document_data)

self.collection.document(agent_config.agent_id).set(agent_config.model_dump())
return agent_config.agent_id
11 changes: 3 additions & 8 deletions nalgonda/persistence/tool_config_firestore_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,11 @@ def load_by_tool_id(self, tool_id: str) -> ToolConfig | None:
return None
return ToolConfig.model_validate(document_snapshot.to_dict())

def save(self, tool_config: ToolConfig, approved: bool = False) -> tuple[str, int]:
# Increment version and set as not approved for each new save
tool_config.version += 1
tool_config.approved = approved

document_data = tool_config.model_dump()
def save(self, tool_config: ToolConfig) -> tuple[str, int]:
if tool_config.tool_id is None:
# Create a new document and set the tool_id
document_reference = self.collection.add(document_data)[0]
document_reference = self.collection.add(tool_config.model_dump())[0]
tool_config.tool_id = document_reference.id
self.collection.document(tool_config.tool_id).set(document_data)
self.collection.document(tool_config.tool_id).set(tool_config.model_dump())

return tool_config.tool_id, tool_config.version
Loading

0 comments on commit 19f89dc

Please sign in to comment.