Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat/#22] Langchain를 활용하여 프롬프트 진행 #27

Merged
merged 6 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions gitlio/app/domain/portfolio/portfolio_router.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from fastapi import APIRouter, Depends, Response, status, HTTPException
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session

from . import portfolio_crud, portfolio_schema
from ...database import get_db

router = APIRouter(
prefix="/api/portfolios"
prefix="/api/portfolios",
tags=["portfolio"]
)

@router.post("/")
Expand Down
8 changes: 7 additions & 1 deletion gitlio/app/domain/repository/repository_crud.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from sqlalchemy import and_

from .repository_schema import *
from ... import models
from ...models import Repository
Expand All @@ -14,7 +16,11 @@ def create_repository(db: Session, repository: RepositoryCreateRequest):


def get_repository_list(db: Session):
repository_list = db.query(Repository)\
repository_list = db.query(Repository) \
.all()
return repository_list


def get_repository(repository_id: int, db: Session):
repository = db.query(Repository).filter(and_(Repository.repository_id == repository_id)).first()
return repository
139 changes: 109 additions & 30 deletions gitlio/app/domain/repository/repository_router.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import base64
import re

from langchain_core.callbacks import StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate
from langchain_openai.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from bs4 import BeautifulSoup
from fastapi import APIRouter, Depends, HTTPException
from fastapi.encoders import jsonable_encoder
Expand All @@ -10,23 +14,58 @@
from . import repository_schema, repository_crud
from ...database import get_db
import requests
from typing import List
from typing import List, Dict
from urllib.parse import urlparse
from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv())

router = APIRouter(
prefix="/api/repositories"
prefix="/api/repositories",
tags=["repository"]
)

# GitHub Personal Access Token
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
headers = {
AZURE_OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

# GitHub API를 위한 헤더
github_headers = {
'Authorization': f'Bearer {GITHUB_TOKEN}',
}

# Azure OpenAI API를 위한 헤더
azure_openai_headers = {
'Authorization': f'Bearer {AZURE_OPENAI_KEY}',
}

# LLM 모델 인스턴스 생성
llm = ChatOpenAI(
temperature=0.5, # 창의성 (0.0 ~ 2.0)
max_tokens=2048, # 최대 토큰수
streaming=True,
callbacks=[StreamingStdOutCallbackHandler()],
model_name='gpt-4', # 모델명
)

template = """
Your task is to write contribution phrases for a developer's portfolio based on the {input} data. Write in a way that effectively appeals your capabilities as a developer to companies.
Focus on commit messages that start with 'feat' or 'chore', and emphasize the key dependencies and libraries in the package file.
You should not explicitly mention any packages, but rather describe what you have implemented with those packages.
The contributions should not be repetitive. They should be written with the nuance 'Implemented B (technology stack) for A (reason for use), resulting in C.' The format does not have to be strict, but A should be emphasized.
The content must be written in Korean, within about 5 lines, and formatted as bullet points. Repetition penalty:1
"""

# 프롬프트 템플릿
prompt = PromptTemplate.from_template(
template=template,
input_variable=["input"]
)

# 체인
llm_chain = LLMChain(prompt=prompt, llm=llm)


@router.get("/list")
def repository_list(db: Session = Depends(get_db)):
Expand All @@ -40,7 +79,7 @@ def get_commit(org: str, repo: str, username: str):
page = 1
while True:
commit_url = f"https://api.github.com/repos/{org}/{repo}/commits?author={username}&page={page}&per_page=100"
response = requests.get(commit_url, headers=headers)
response = requests.get(commit_url, headers=github_headers)

if response.status_code == 200:
commits = response.json()
Expand All @@ -56,7 +95,7 @@ def get_commit(org: str, repo: str, username: str):
# 패키지 파일 찾기
def find_package_file(org: str, repo: str, path: str, target_files: list):
package_url = f"https://api.github.com/repos/{org}/{repo}/contents/{path}"
response = requests.get(package_url, headers=headers)
response = requests.get(package_url, headers=github_headers)

if response.status_code == 200:
items = response.json()
Expand All @@ -75,15 +114,14 @@ def find_package_file(org: str, repo: str, path: str, target_files: list):
# 패키지 파일 내용 반환
def get_package_contents(org: str, repo: str, path: str):
package_url = f"https://api.github.com/repos/{org}/{repo}/contents/{path}"
print(package_url)
response = requests.get(package_url, headers=headers)
response = requests.get(package_url, headers=github_headers)

if response.status_code == 200:
item = response.json()
contents = base64.b64decode(item['content']).decode('utf-8')
return contents
else:
print(f"Failed to fetch {path}")
raise HTTPException(status_code=response.status_code, detail="Failed to fetch package file")


# readme 이미지 반환
Expand All @@ -94,7 +132,7 @@ def get_readme_images(org: str, repo: str):
]

for url in urls:
response = requests.get(url, headers=headers)
response = requests.get(url, headers=github_headers)
if response.status_code == 200:
readme_data = response.json()
readme_content_encoded = readme_data['content']
Expand All @@ -119,11 +157,10 @@ def get_readme_images(org: str, repo: str):
return []


# user-data 조회
@router.post("/user-data", response_model=List[repository_schema.RepositoryUserData], status_code=201)
def get_user_data(request: repository_schema.RepositoryCreateRequest, db: Session = Depends(get_db)) -> JSONResponse:
org_repositories = {}
# user-data 저장 (깃허브 파싱 후 저장)
def save_user_data(request, db):
personal_repositories = []
org_repositories = {}
errors = []
for repo_url in request.repository_url:
# GitHub repository URL 파싱
Expand All @@ -140,16 +177,18 @@ def get_user_data(request: repository_schema.RepositoryCreateRequest, db: Sessio
commit_messages = get_commit(org, repo, username)

# 패키지 파일 내용
package_files = ['requirements.txt', 'Pipfile', 'setup.py', 'build.gradle', 'pom.xml', 'package.json', 'go.mod']
package_files = ['requirements.txt', 'Pipfile', 'setup.py', 'build.gradle', 'pom.xml', 'package.json',
'go.mod']
package_path = find_package_file(org, repo, "", package_files)
package_contents = get_package_contents(org, repo, package_path)
if package_path:
package_contents = get_package_contents(org, repo, package_path)

# README 이미지
readme_images = get_readme_images(org, repo)

# 매핑
user_data = repository_schema.RepositoryUserData(
repository_url=f"https://github.com/{org}/{repo}",
repository_url=repo_url,
commit_list=commit_messages,
package_contents=package_contents,
readme_images=readme_images
Expand All @@ -158,15 +197,10 @@ def get_user_data(request: repository_schema.RepositoryCreateRequest, db: Sessio
repository_data = {
"user_id": request.user_id,
"repository_url": repo_url,
"main_image": "default_image.png",
"user_data": user_data.dict(),
"gpt_result": []
"user_data": user_data.dict()
}
db_repository = repository_crud.create_repository(db, repository_data)

if not db_repository:
raise HTTPException(status_code=500, detail="Failed to save repository")

# org 명으로 프로젝트 구분
if org == username:
# 개인 프로젝트로 처리
Expand All @@ -176,18 +210,63 @@ def get_user_data(request: repository_schema.RepositoryCreateRequest, db: Sessio
org_repositories[org] = []
org_repositories[org].append(user_data)

if not db_repository:
raise HTTPException(status_code=500, detail="Failed to save repository")

except Exception as e:
errors.append({"url": repo_url, "message": str(e)})

results = {
"success": {
"organizations": jsonable_encoder(org_repositories),
"personal": jsonable_encoder(personal_repositories)
},
"errors": errors
}
return org_repositories, personal_repositories, errors


# GPT 프롬프트
def run_gpt_prompts(org_repositories: Dict[str, List], personal_repositories: List) -> List:
gpt_response = []
# organization 에서 진행한 프로젝트 gpt 프롬프트
for org, projects in org_repositories.items():
project_data = []
readme_images = set()
for project in projects:
project_data.append(project)
for image in project.readme_images:
readme_images.add(image)

project_gpt_result = llm_chain.run(input=project_data)
print(project_gpt_result, end="\n----\n")
gpt_result_list = project_gpt_result.split('\n')

gpt_result = repository_schema.RepositoryGPTResponse(
organization=org,
gpt_response=gpt_result_list,
readme_images=list(readme_images)
)
gpt_response.append(jsonable_encoder(gpt_result))

# 개인 repo 에서 진행한 프로젝트 gpt 프롬프트
for project in personal_repositories:
project_gpt_result = llm_chain.run(input=jsonable_encoder(project))
gpt_result_list = project_gpt_result.split('\n')

gpt_result = repository_schema.RepositoryGPTResponse(
organization="personal",
gpt_response=gpt_result_list,
readme_images=project.readme_images
)
gpt_response.append(jsonable_encoder(gpt_result))

return gpt_response


@router.post("/chat-gpt", response_model=List[repository_schema.RepositoryGPTResponse], status_code=201)
def get_chat_gpt(request: repository_schema.RepositoryCreateRequest, db: Session = Depends(get_db)) -> JSONResponse:
# github 파싱된 레포 저장
org_repositories, personal_repositories, errors = save_user_data(request, db)

# gpt 프롬프트 결과
gpt_results = run_gpt_prompts(org_repositories, personal_repositories)

return JSONResponse(content={
"message": "repository 저장 성공",
"data": results
"data": jsonable_encoder(gpt_results),
"errors": errors
}, status_code=201)
13 changes: 6 additions & 7 deletions gitlio/app/domain/repository/repository_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@
class RepositoryModel(BaseModel):
repository_id: int
repository_url: str
main_image: str
user_data: List[str] = []
gpt_result: List[str] = []
# main_image: str
# gpt_result: List[str] = []


class RepositoryGPT(BaseModel):
repository_url: str
main_image: str # 추후에 List 형식으로 변경
gpt_result: List[str] = []
status: bool
class RepositoryGPTResponse(BaseModel):
organization: str
gpt_response: List[str] = []
readme_images: List[str] = []


class RepositoryCreateRequest(BaseModel):
Expand Down
3 changes: 2 additions & 1 deletion gitlio/app/domain/user/user_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from ...database import get_db

router = APIRouter(
prefix="/api/users"
prefix="/api/users",
tags=["user"]
)

@router.post("/", response_model=user_schema.UserCreateResponse, status_code=201)
Expand Down
4 changes: 2 additions & 2 deletions gitlio/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ class Repository(Base):
repository_id = Column('repositoryId', Integer, primary_key=True, index=True, autoincrement=True)
user_id = Column('userId', Integer, ForeignKey('user.userId'))
repository_url = Column('repositoryUrl', String(255))
main_image = Column('mainImage', String(255))
user_data = Column('userData', JSON)
gpt_result = Column('gptResult', JSON)
# main_image = Column('mainImage', String(255))
# gpt_result = Column('gptResult', JSON)

# Establishing the relationship to User
user = relationship("User", back_populates="repositories")
4 changes: 2 additions & 2 deletions gitlio/app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .domain.repository import repository_router
from .domain.user import user_router
from .domain.portfolio import portfolio_router

Base.metadata.create_all(bind=engine) # FastAPI 실행시 필요한 테이블 모두 생성


Expand Down Expand Up @@ -50,10 +51,9 @@ async def update_profile(response: Response, clerk_id: str, profile_picture: Upl
return {"message": "Failed to upload profile picture"}
return {"message": "Profile picture uploaded successfully"}


app.include_router(portfolio_router.router)
app.include_router(repository_router.router)
app.include_router(user_router.router)
app.include_router(portfolio_router.router)

add_routes(
app,
Expand Down