Skip to content

Commit

Permalink
feat: add document analysis and JSON formatting
Browse files Browse the repository at this point in the history
- Implement `llm.py` module with functions to:
  - Initialize Cody agent and server
  - Perform document analysis on provided files
  - Structure analysis output into JSON format
- Update `main.py` to:
  - Use asyncio for running analysis
  - Collect full paths of documentation files
  - Save analysis output to files in output directory
- Add .env and .codyarchitect to .gitignore
  • Loading branch information
PriNova committed May 2, 2024
1 parent d1c5ab8 commit 0a8d4e4
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/.venv
**/__pycache__
/logs
.env
/.codyarchitect
5 changes: 0 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,3 @@ repos:
rev: 24.4.0
hooks:
- id: black

- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
155 changes: 155 additions & 0 deletions llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import os

from codypy import AgentSpecs, CodyAgent, CodyServer, Models, append_paths, log_message
from dotenv import load_dotenv

load_dotenv()
SRC_ACCESS_TOKEN = os.getenv("SRC_ACCESS_TOKEN")
BINARY_PATH = os.getenv("BINARY_PATH")

prompt_analysis = """
Analyze the provided documentation and extract the most relevant information to give a concise overview of the software project. Include the following details in your summary:
1. Project Description:
- Briefly describe the purpose and main functionality of the project.
- Highlight the key features or unique aspects of the project.
2. Architecture Overview:
- Provide a high-level overview of the project's architecture.
- Mention the main components, modules, or layers of the system.
- Describe how these components interact with each other.
3. Dependencies and Requirements:
- List the major dependencies, libraries, or frameworks used in the project.
- Specify any specific versions or compatibility requirements.
4. Setup and Configuration:
- Summarize the steps required to set up and configure the project.
- Include any necessary environment variables, configuration files, or database setup.
5. Usage Instructions:
- Provide a brief explanation of how to use the project or run the application.
- Include any command-line arguments, API endpoints, or user interface interactions.
6. Contribution Guidelines:
- Summarize the guidelines for contributing to the project.
- Mention any coding conventions, branch naming, or pull request processes.
7. Testing and Deployment:
- Briefly explain how to run tests for the project.
- Provide an overview of the deployment process or any specific deployment considerations.
8. Additional Resources:
- List any additional resources, such as API documentation, examples, or troubleshooting guides.
- Provide links to these resources if available.
Please generate a concise summary that covers these key points based on the provided documentation. The summary should be clear, well-structured, and easy to understand for developers who are new to the project.
""".strip()

structured_prompt = """Please structure the extracted information from the documentation into a JSON format using the following guidelines:
1. Create a JSON object with the following keys:
- "project_description"
- "architecture_overview"
- "dependencies"
- "requirements"
- "setup_instructions"
- "configuration_instructions"
- "usage_instructions"
- "contribution_guidelines"
- "testing_instructions"
- "deployment_instructions"
- "additional_resources"
2. For each key, provide the corresponding information extracted from the documentation very briefly.
3. If any information is missing, couldn't be extracted or is not known, set the value of the corresponding key to "UNKNOWN".
4. Ensure that the JSON object is well-formatted, with proper indentation and syntax.
5. If there are any code snippets or examples in the extracted information, format them as strings within the JSON object.
6. Use clear and concise language in the JSON values, avoiding any ambiguity or redundancy.
7. If there are multiple points or steps for a particular key (e.g., setup instructions), represent them as an array of strings.
Here's an example of the desired JSON format:
{
"project_description": "A powerful tool for analyzing codebases.",
"architecture_overview": "The project follows a modular architecture with three main components: parser, analyzer, and reporter.",
"dependencies": [
"Python 3.8+",
"OpenAI API",
"ChromaDB"
],
"setup_instructions": [
"Clone the repository",
"Install dependencies using pip",
"Set up the required environment variables"
],
"usage_instructions": "Run the main script with the codebase directory as an argument.",
"contribution_guidelines": "UNKNOWN",
"testing_instructions": "Run the test suite using the command `pytest tests/`.",
"deployment_instructions": "UNKNOWN",
"additional_resources": [
"API documentation: https://example.com/api-docs",
"Troubleshooting guide: https://example.com/troubleshooting"
]
}
Please generate the JSON object based on the extracted information, following the provided guidelines and example format as raw string. Do not enclose the JSON object in triple backticks.
""".strip()


async def init_llm(workspace_path: str) -> CodyAgent:
cody_server: CodyServer = await CodyServer.init(
binary_path=BINARY_PATH, version="0.0.5b", is_debugging=False
)
agent_specs = AgentSpecs(
workspaceRootUri=workspace_path,
extensionConfiguration={
"accessToken": SRC_ACCESS_TOKEN,
"codebase": workspace_path, # "/home/prinova/CodeProjects/codypy", # github.com/sourcegraph/cody",
"customConfiguration": {},
},
)
log_message("CodyArchitect", "Initializing Cody Agent")
cody_agent: CodyAgent = await cody_server.initialize_agent(
agent_specs=agent_specs, is_debugging=False
)
return cody_server, cody_agent


async def new_chat(cody_agent: CodyAgent, model: Models = Models.Claude3Sonnet):
await cody_agent.new_chat(is_debugging=False)
await cody_agent.set_model(
model=model,
is_debugging=False,
)


async def document_analysis(documents: list, cody_agent: CodyAgent):
# Perform analysis on the document content here
context_files = append_paths(*documents)
(analysis, _) = await cody_agent.chat(
message=prompt_analysis,
enhanced_context=False,
show_context_files=False,
context_files=context_files,
is_debugging=False,
)

# Perform Json formatting
(json_format, _) = await cody_agent.chat(
message=structured_prompt,
enhanced_context=False,
show_context_files=False,
context_files=None,
is_debugging=False,
)
return analysis, json_format


async def cleanup_llm(cody_server: CodyServer):
await cody_server.cleanup_server()
31 changes: 26 additions & 5 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import argparse
import asyncio
import json
import os

from codypy import log_message, setup_logger
from pathspec import PathSpec

from llm import cleanup_llm, document_analysis, init_llm, new_chat


def validate_codebase_dir(codebase_dir):
if not os.path.exists(codebase_dir):
Expand Down Expand Up @@ -41,12 +45,14 @@ def collect_documentation_files(codebase_dir):
continue

if file.endswith(".md") or file.endswith(".txt"):
documentation_files.append(file_path)
# Get the full absolute path
full_path = os.path.abspath(file_path)
documentation_files.append(full_path)

return documentation_files


def main(codebase_dir=None, output_dir=None):
async def main(codebase_dir=None, output_dir=None):
setup_logger("CodyArchitect", "logs")
if codebase_dir is None:
# Create a command-line interface (CLI) for the program
Expand Down Expand Up @@ -76,9 +82,24 @@ def main(codebase_dir=None, output_dir=None):
os.makedirs(output_dir)

documentation_files = collect_documentation_files(codebase_dir)
log_message("Main: ", f"{documentation_files}")

cody_server, cody_agent = await init_llm(codebase_dir)
await new_chat(cody_agent=cody_agent)
analysis, analysis_formatted = await document_analysis(
documentation_files, cody_agent
)
with open(os.path.join(output_dir, "analysis.txt"), "w") as f:
f.write(analysis)

with open(os.path.join(output_dir, "analysis_formatted.json"), "w") as f:
json.dump(analysis_formatted, f, indent=2)

print(f"{analysis}\n")
print("--- JSON ---")
print(f"{analysis_formatted}\n")
await cleanup_llm(cody_server)


if __name__ == "__main__":
codebase_dir = "."
main(codebase_dir)
codebase_dir = "." # "/home/prinova/CodyProjects/cody"
asyncio.run(main(codebase_dir, ".codyarchitect"))

0 comments on commit 0a8d4e4

Please sign in to comment.