diff --git a/code/LLM/aug.py b/code/LLM/aug.py
deleted file mode 100644
index 3696572..0000000
--- a/code/LLM/aug.py
+++ /dev/null
@@ -1,126 +0,0 @@
-import csv
-import re
-
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-
-MODEL_NAME = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
-
-# 모델과 토크나이저 로드
-model_name = MODEL_NAME
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-# GPU 사용 가능 시 모델을 GPU로 이동
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = model.to(device)
-
-
-def generate_article(
-    prompt,
-    prompt_end_word,
-    max_new_tokens=200,
-    num_return_sequences=1,
-    no_repeat_ngram_size=2,
-    temperature=0.7,
-):
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=max_new_tokens,
-        num_return_sequences=num_return_sequences,
-        no_repeat_ngram_size=no_repeat_ngram_size,
-        temperature=temperature,
-    )
-
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_text.split(prompt_end_word)[1].strip()
-
-
-def generate_new_title(
-    prompt,
-    max_new_tokens=20,
-    num_return_sequences=1,
-    no_repeat_ngram_size=2,
-    temperature=1.5,
-):
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=max_new_tokens,
-        num_return_sequences=num_return_sequences,
-        no_repeat_ngram_size=no_repeat_ngram_size,
-        temperature=temperature,
-    )
-
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_text.split("새로운 제목:")[-1].strip()
-
-
-def save_to_csv(data, filename):
-    with open(filename, "w", newline="", encoding="utf-8") as file:
-        writer = csv.writer(file, quoting=csv.QUOTE_MINIMAL)
-        writer.writerow(["ID", "text", "target"])
-        for i, (text, target) in enumerate(data):
-            cleaned_text = re.sub(r'^"|"$', "", text)
-            cleaned_text = re.sub(r"\s*이\s*제목은.*$", "", cleaned_text)
-            cleaned_text = " ".join(cleaned_text.split())
-            writer.writerow([f"ynat-v1_train_{i:05d}", cleaned_text, target])
-
-
-def process_title(title, target):
-    prompt_title_to_article = f"""다음 기사 제목에 대한 내용을 작성해주세요.
-    주제는 똑같지만 기사 내용은 창의적이어도 좋습니다.
-    : {title}
-
-    기사 내용:"""
-    prompt_title_to_article_end_word = "기사 내용:"
-    article = generate_article(prompt_title_to_article, prompt_title_to_article_end_word)
-
-    prompt_article_to_title = f"""다음 기사 내용을 바탕으로 새로운 제목을 생성해주세요.
-    절대 기사 내용에 들어간 단어는 쓰지 마세요.:
-    \n\n{article}\n\n새로운 제목:"""
-    new_title = generate_new_title(prompt_article_to_title)
-    return article, new_title, target
-
-
-def read_csv(filename):
-    with open(filename, "r", encoding="utf-8") as file:
-        reader = csv.reader(file)
-        next(reader)  # 헤더 스킵
-        return [(row[1], row[2]) for row in reader]  # (text, target) 튜플 리스트 반환
-
-
-def process_csv(input_filename, output_filename, max_rows=None):
-    data = read_csv(input_filename)
-
-    # 텍스트 길이에 따라 정렬
-    data.sort(key=lambda x: len(x[0]), reverse=True)
-
-    if max_rows:
-        data = data[:max_rows]
-
-    processed_data = []
-    for i, (title, target) in enumerate(data):
-        article, new_title, _ = process_title(title, target)
-
-        print(f"\n원래 제목: {title}")
-        print(f"생성된 기사 내용:\n{article}")
-        print(f"새로운 제목: {new_title}")
-        print("-" * 50)
-
-        processed_data.append((new_title, target))
-
-    save_to_csv(processed_data, output_filename)
-    print(f"처리 완료. 결과가 '{output_filename}' 파일에 저장되었습니다.")
-
-
-if __name__ == "__main__":
-    # 사용 예시
-    input_csv = "../data/aug_input.csv"  # 입력 CSV 파일명
-    output_csv = "../data/aug_output.csv"  # 출력 CSV 파일명
-    max_rows = 600  # 처리할 최대 행 수 (None으로 설정하면 모든 행 처리)
-    process_csv(input_csv, output_csv, max_rows)
diff --git a/code/LLM/aug_syn.py b/code/LLM/aug_syn.py
deleted file mode 100644
index e33b76d..0000000
--- a/code/LLM/aug_syn.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import csv
-import random
-import re
-from collections import defaultdict
-
-import torch
-from prompt import get_prompt_synonyms
-from tqdm import tqdm  # Import tqdm for progress bar
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-
-MODEL_NAME = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
-PROMPT_VERSION = 1
-
-# 모델과 토크나이저 로드
-model_name = MODEL_NAME
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-# GPU 사용 가능 시 모델을 GPU로 이동
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = model.to(device)
-
-
-def get_synonyms(word, model, tokenizer, device, max_length=100, num_return_sequences=1):
-    prompt = get_prompt_synonyms(PROMPT_VERSION, word)
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-
-    with torch.no_grad():
-        outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=num_return_sequences)
-
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    synonyms = re.findall(r":\s*(.*?)(?:\n|$)", response)
-    if synonyms:
-        return [syn.strip() for syn in synonyms[0].split(",") if syn.strip()]
-    return []
-
-
-def augment_text(text, model, tokenizer, device, random_ratio=0.3):
-    words = text.split()
-    augmented_words = []
-    for word in words:
-        if random.random() < random_ratio:  # 30% 확률로 단어를 동의어로 대체
-            synonyms = get_synonyms(word, model, tokenizer, device)
-            if synonyms:
-                augmented_words.append(random.choice(synonyms))
-            else:
-                augmented_words.append(word)
-        else:
-            augmented_words.append(word)
-    return " ".join(augmented_words)
-
-
-def main(
-    input_file="../data/aug_syn_input.csv",
-    output_file="../data/aug_syn_output.csv",
-    min_row=3,
-    samples_per_class=10,
-):
-    """
-    데이터 증강을 수행하는 메인 함수
-
-    Args:
-        input_file (str): 입력 CSV 파일의 경로.
-        output_file (str): 증강된 데이터를 저장할 CSV 파일의 경로.
-        min_row (int): 데이터로 사용할 최소 행의 개수. 이보다 적은 행을 가진 데이터는 건너뜀.
-        samples_per_class (int): 각 타겟 클래스당 증강할 샘플의 수.
-
-    Returns:
-        None: 결과를 output_file에 저장
-    """
-
-    # 타겟 클래스별 데이터 저장을 위한 딕셔너리 초기화
-    data_by_target = defaultdict(list)
-
-    with open(input_file, "r", encoding="utf-8") as infile:
-        reader = csv.reader(infile)
-        header = next(reader)  # 헤더 읽기
-
-        for row in reader:
-            if len(row) < min_row:  # 데이터가 부족한 경우 건너뛰기
-                continue
-            id, text, target = row
-            data_by_target[target].append((id, text))
-
-    # CSV 파일에 증강된 데이터 쓰기
-    with open(output_file, "w", newline="", encoding="utf-8") as outfile:
-        writer = csv.writer(outfile)
-        writer.writerow(header)  # 헤더 쓰기
-
-        for target, samples in data_by_target.items():
-            # 각 타겟 클래스에서 샘플을 무작위로 선택하고 증강 수행
-            selected_samples = random.sample(samples, min(samples_per_class, len(samples)))
-
-            for id, text in tqdm(selected_samples, desc=f"Processing Target {target}"):
-                writer.writerow([id, text, target])  # 원본 데이터 쓰기
-
-                # 증강된 데이터 생성 및 쓰기
-                augmented_text = augment_text(text, model, tokenizer, device)
-                writer.writerow([f"{id}_aug", augmented_text, target])
-
-    print(f"데이터 증강이 완료되었습니다. 결과가 '{output_file}' 파일에 저장되었습니다.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/code/LLM/augmentation.py b/code/LLM/augmentation.py
new file mode 100644
index 0000000..03764c3
--- /dev/null
+++ b/code/LLM/augmentation.py
@@ -0,0 +1,153 @@
+import argparse
+import csv
+import re
+
+from .model import init_model
+from .prompt import get_prompt_article_to_title, get_prompt_title_to_article
+
+
+def generate_article(prompt, prompt_end_word, model, tokenizer, device, args):
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=args.article_max_tokens,
+        num_return_sequences=args.article_num_sequences,
+        no_repeat_ngram_size=args.article_no_repeat_ngram,
+        temperature=args.article_temperature,
+    )
+
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text.split(prompt_end_word)[1].strip()
+
+
+def generate_new_title(prompt, model, tokenizer, device, args):
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=args.title_max_tokens,
+        num_return_sequences=args.title_num_sequences,
+        no_repeat_ngram_size=args.title_no_repeat_ngram,
+        temperature=args.title_temperature,
+    )
+
+    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_text.split("새로운 제목:")[-1].strip()
+
+
+def save_to_csv(data, filename):
+    with open(filename, "w", newline="", encoding="utf-8") as file:
+        writer = csv.writer(file, quoting=csv.QUOTE_MINIMAL)
+        writer.writerow(["ID", "text", "target"])
+        for i, (text, target) in enumerate(data):
+            cleaned_text = re.sub(r'^"|"$', "", text)
+            cleaned_text = re.sub(r"\s*이\s*제목은.*$", "", cleaned_text)
+            cleaned_text = " ".join(cleaned_text.split())
+            writer.writerow([f"ynat-v1_train_{i:05d}", cleaned_text, target])
+
+
+def process_title(title, target, model, tokenizer, device, args):
+    prompt_title_to_article, prompt_title_to_article_end_word = get_prompt_title_to_article(title)
+    article = generate_article(
+        prompt_title_to_article,
+        prompt_title_to_article_end_word,
+        model,
+        tokenizer,
+        device,
+        args,
+    )
+
+    prompt_article_to_title = get_prompt_article_to_title(article)
+    new_title = generate_new_title(prompt_article_to_title, model, tokenizer, device, args)
+    return article, new_title, target
+
+
+def read_csv(filename):
+    with open(filename, "r", encoding="utf-8") as file:
+        reader = csv.reader(file)
+        next(reader)  # 헤더 스킵
+        return [(row[1], row[2]) for row in reader]  # (text, target) 튜플 리스트 반환
+
+
+def process_csv(input_filename, output_filename, model, tokenizer, device, args):
+    data = read_csv(input_filename)
+    data.sort(key=lambda x: len(x[0]), reverse=True)
+
+    if args.max_rows:
+        data = data[: args.max_rows]
+
+    processed_data = []
+    for i, (title, target) in enumerate(data):
+        article, new_title, _ = process_title(title, target, model, tokenizer, device, args)
+
+        print(f"\n원래 제목: {title}")
+        print(f"생성된 기사 내용:\n{article}")
+        print(f"새로운 제목: {new_title}")
+        print("-" * 50)
+
+        processed_data.append((new_title, target))
+
+    save_to_csv(processed_data, output_filename)
+    print(f"처리 완료. 결과가 '{output_filename}' 파일에 저장되었습니다.")
+
+
+if __name__ == "__main__":
+    # ArgumentParser
+    parser = argparse.ArgumentParser()
+    # 기존 인자들
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        default="LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
+        help="사용할 모델 이름",
+    )
+    parser.add_argument(
+        "--input",
+        type=str,
+        default="../data/aug_input.csv",
+        help="입력 CSV 파일 경로",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="../data/aug_output.csv",
+        help="출력 CSV 파일 경로",
+    )
+    parser.add_argument(
+        "--max-rows",
+        type=int,
+        default=600,
+        help="처리할 최대 행 수. 0이면 모든 행 처리",
+    )
+
+    # generate_article 함수 인자
+    parser.add_argument("--article-max-tokens", type=int, default=200, help="기사 생성시 최대 토큰 수")
+    parser.add_argument("--article-num-sequences", type=int, default=1, help="기사 생성시 시퀀스 수")
+    parser.add_argument(
+        "--article-no-repeat-ngram",
+        type=int,
+        default=2,
+        help="기사 생성시 반복하지 않을 n-gram 크기",
+    )
+    parser.add_argument("--article-temperature", type=float, default=0.7, help="기사 생성시 샘플링 온도")
+
+    # generate_new_title 함수 인자
+    parser.add_argument("--title-max-tokens", type=int, default=20, help="제목 생성시 최대 토큰 수")
+    parser.add_argument("--title-num-sequences", type=int, default=1, help="제목 생성시 시퀀스 수")
+    parser.add_argument(
+        "--title-no-repeat-ngram",
+        type=int,
+        default=2,
+        help="제목 생성시 반복하지 않을 n-gram 크기",
+    )
+    parser.add_argument("--title-temperature", type=float, default=1.5, help="제목 생성시 샘플링 온도")
+
+    args = parser.parse_args()
+
+    # max_rows가 0이면 None으로 설정
+    max_rows = None if args.max_rows == 0 else args.max_rows
+
+    # 모델 초기화 및 데이터 처리
+    model, tokenizer, device = init_model(args.model_name)
+    process_csv(args.input, args.output, model, tokenizer, device, args)
diff --git a/code/LLM/augmentation_synonyms.py b/code/LLM/augmentation_synonyms.py
new file mode 100644
index 0000000..d52b628
--- /dev/null
+++ b/code/LLM/augmentation_synonyms.py
@@ -0,0 +1,115 @@
+import argparse
+from collections import defaultdict
+import csv
+import random
+import re
+
+from prompt import get_prompt_synonyms
+import torch
+from tqdm import tqdm
+
+from .model import init_model
+
+
+def get_synonyms(word, model, tokenizer, device, args):
+    prompt = get_prompt_synonyms(args.prompt_version, word)
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_length=args.max_length,
+            num_return_sequences=args.num_return_sequences,
+        )
+
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    synonyms = re.findall(r":\s*(.*?)(?:\n|$)", response)
+    if synonyms:
+        return [syn.strip() for syn in synonyms[0].split(",") if syn.strip()]
+    return []
+
+
+def augment_text(text, model, tokenizer, device, args):
+    words = text.split()
+    augmented_words = []
+    for word in words:
+        if random.random() < args.random_ratio:
+            synonyms = get_synonyms(word, model, tokenizer, device, args)
+            if synonyms:
+                augmented_words.append(random.choice(synonyms))
+            else:
+                augmented_words.append(word)
+        else:
+            augmented_words.append(word)
+    return " ".join(augmented_words)
+
+
+if __name__ == "__main__":
+    # ArgumentParser 설정
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        default="LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
+        help="사용할 모델 이름",
+    )
+    parser.add_argument(
+        "--input",
+        type=str,
+        default="../data/aug_syn_input.csv",
+        help="입력 CSV 파일 경로",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="../data/aug_syn_output.csv",
+        help="출력 CSV 파일 경로",
+    )
+    parser.add_argument("--min-row", type=int, default=3, help="처리할 최소 행 개수")
+    parser.add_argument(
+        "--samples-per-class",
+        type=int,
+        default=10,
+        help="클래스당 증강할 샘플 수",
+    )
+    parser.add_argument("--prompt-version", type=int, default=1, help="사용할 프롬프트 버전")
+    # get_synonyms 함수 인자들
+    parser.add_argument("--max-length", type=int, default=100, help="생성할 최대 토큰 길이")
+    parser.add_argument("--num-return-sequences", type=int, default=1, help="생성할 시퀀스 수")
+    parser.add_argument("--random-ratio", type=float, default=0.3, help="동의어 대체 확률")
+
+    args = parser.parse_args()
+
+    # 모델, 토크나이저, 디바이스 설정
+    model, tokenizer, device = init_model(args.model_name)
+
+    # 타겟 클래스별 데이터 저장을 위한 딕셔너리 초기화
+    data_by_target = defaultdict(list)
+
+    with open(args.input, "r", encoding="utf-8") as infile:
+        reader = csv.reader(infile)
+        header = next(reader)  # 헤더 읽기
+
+        for row in reader:
+            if len(row) < args.min_row:  # 데이터가 부족한 경우 건너뛰기
+                continue
+            id, text, target = row
+            data_by_target[target].append((id, text))
+
+    # CSV 파일에 증강된 데이터 쓰기
+    with open(args.output, "w", newline="", encoding="utf-8") as outfile:
+        writer = csv.writer(outfile)
+        writer.writerow(header)  # 헤더 쓰기
+
+        for target, samples in data_by_target.items():
+            # 각 타겟 클래스에서 샘플을 무작위로 선택하고 증강 수행
+            selected_samples = random.sample(samples, min(args.samples_per_class, len(samples)))
+
+            for id, text in tqdm(selected_samples, desc=f"Processing Target {target}"):
+                writer.writerow([id, text, target])  # 원본 데이터 쓰기
+
+                # 증강된 데이터 생성 및 쓰기
+                augmented_text = augment_text(text, model, tokenizer, device, args)
+                writer.writerow([f"{id}_aug", augmented_text, target])
+
+    print(f"데이터 증강이 완료되었습니다. 결과가 '{args.output}' 파일에 저장되었습니다.")
diff --git a/code/LLM/denoise.py b/code/LLM/denoise.py
index 7e0d594..e32eaf4 100644
--- a/code/LLM/denoise.py
+++ b/code/LLM/denoise.py
@@ -2,51 +2,32 @@
 denoise.py 를 실행후 processing.py로 후처리 해주세요
 """
 
+import argparse
 import csv
 import logging
 import re
 
 import pandas as pd
 import torch
-from prompt import get_prompt_denoise
 from tqdm import tqdm
-from transformers import AutoModelForCausalLM, AutoTokenizer
 
+from .model import init_model
+from .prompt import get_prompt_denoise
 
-MODEL_NAME = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
-PROMPT_VERSION = 1
 
-
-pd.set_option("display.max_rows", None)
-
-# 로깅 설정
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-
-# 모델 및 토크나이저 로드
-logging.info("모델 및 토크나이저 로딩 중...")
-model_name = MODEL_NAME
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-
-# GPU 사용 가능 시 모델을 GPU로 이동
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = model.to(device)
-logging.info(f"모델을 {device}로 이동했습니다.")
-
-
-def restore_headline(noisy_headline):
-    prompt = get_prompt_denoise(PROMPT_VERSION, noisy_headline)
+def restore_headline(noisy_headline, prompt_version, model, tokenizer, device, args):
+    prompt = get_prompt_denoise(prompt_version, noisy_headline)
     inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).to(device)
 
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=15,
-            num_return_sequences=1,
-            no_repeat_ngram_size=2,
-            top_k=50,
-            top_p=0.95,
-            temperature=0.7,
+            max_new_tokens=args.max_new_tokens,
+            num_return_sequences=args.num_return_sequences,
+            no_repeat_ngram_size=args.no_repeat_ngram_size,
+            top_k=args.top_k,
+            top_p=args.top_p,
+            temperature=args.temperature,
         )
 
     restored_headline = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -59,54 +40,116 @@ def restore_headline(noisy_headline):
     restored_headline = re.sub(r"^답변:\s*", "", restored_headline)
 
     # 필요에 따라 추가적인 처리...
-
     return restored_headline.strip()
 
 
-# CSV 파일 읽기, 데이터 타입 지정
-input_file = "../data/21_relabeling_2800.csv"
-logging.info(f"CSV 파일 '{input_file}' 읽는 중...")
-df = pd.read_csv(
-    input_file,
-    quoting=csv.QUOTE_ALL,
-    dtype={"ID": str, "text": str, "target": int, "is_noise": int},
-)
-logging.info(f"총 {len(df)} 개의 행을 읽었습니다.")
-
-# is_noise가 1인 항목 필터링
-noisy_df = df[df["is_noise"] == 1]
-logging.info(f"노이즈가 있는 {len(noisy_df)} 개의 행을 찾았습니다.")
-
-# 초반 100개 항목만 선택
-noisy_df_subset = noisy_df.head(1602)
-
-# 디노이징 수행
-logging.info("디노이징 작업 시작...")
-tqdm.pandas()
-noisy_df_subset["denoised_text"] = noisy_df_subset["text"].progress_apply(restore_headline)
-logging.info("디노이징 작업 완료.")
-
-# 결과 DataFrame 생성 (is_noise 열은 포함하지 않음)
-result_df = noisy_df_subset[["ID", "denoised_text", "target"]]
-result_df = result_df.rename(columns={"denoised_text": "text"})
-
-output_file = "../data/semi-final3.csv"
-logging.info(f"결과를 '{output_file}'에 저장 중...")
-result_df.to_csv(output_file, index=False, encoding="utf-8")
-
-# is_noise가 0인 항목 필터링
-non_noisy_df = df[df["is_noise"] == 0]
-logging.info(f"is_noise가 0인 {len(non_noisy_df)} 개의 행을 찾았습니다.")
-
-# 두 DataFrame 합치기
-final_df = pd.concat([result_df, non_noisy_df[["ID", "text", "target"]]], ignore_index=True)
-
-# 데이터프레임 섞기
-final_df = final_df.sample(frac=1).reset_index(drop=True)
-
-# 결과를 CSV 파일로 저장 (is_noise 열은 포함하지 않음)
-output_file = "../data/final3.csv"
-logging.info(f"결과를 '{output_file}'에 저장 중...")
-final_df.to_csv(output_file, index=False, encoding="utf-8")
-
-logging.info(f"디노이징 완료. 결과가 '{output_file}' 파일에 저장되었습니다.")
+if __name__ == "__main__":
+    # ArgumentParser 설정
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--prompt-version", type=int, default=1, help="프롬프트 버전")
+    parser.add_argument(
+        "--input",
+        type=str,
+        default="../data/21_relabeling_2800.csv",
+        help="입력 CSV 파일 경로",
+    )
+    parser.add_argument(
+        "--semi-output",
+        type=str,
+        default="../data/semi-final3.csv",
+        help="중간 출력 CSV 파일 경로",
+    )
+    parser.add_argument(
+        "--final-output",
+        type=str,
+        default="../data/final3.csv",
+        help="최종 출력 CSV 파일 경로",
+    )
+    parser.add_argument(
+        "--max-new-tokens",
+        type=int,
+        default=15,
+        help="",
+    )
+    parser.add_argument(
+        "--num-return-sequences",
+        type=int,
+        default=1,
+        help="",
+    )
+    parser.add_argument(
+        "--no-repeat-ngram-size",
+        type=int,
+        default=2,
+        help="",
+    )
+    parser.add_argument("--top-k", type=int, default=50, help="")
+    parser.add_argument(
+        "--top-p",
+        type=float,
+        default=0.95,
+        help="",
+    )
+    parser.add_argument("--temperature", type=float, default=0.7, help="")
+
+    args = parser.parse_args()
+    prompt_version = args.prompt_version
+    input_file = args.input
+    semi_final_output_file = args.semi_output
+    final_output_file = args.final_output
+
+    # pandas 디스플레이 설정
+    pd.set_option("display.max_rows", None)
+
+    # 로깅 설정
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+
+    # CSV 파일 읽기, 데이터 타입 지정
+    logging.info(f"CSV 파일 '{input_file}' 읽는 중...")
+    df = pd.read_csv(
+        input_file,
+        quoting=csv.QUOTE_ALL,
+        dtype={"ID": str, "text": str, "target": int, "is_noise": int},
+    )
+    logging.info(f"총 {len(df)} 개의 행을 읽었습니다.")
+
+    # is_noise가 1인 항목 필터링
+    noisy_df = df[df["is_noise"] == 1]
+    logging.info(f"노이즈가 있는 {len(noisy_df)} 개의 행을 찾았습니다.")
+
+    # 초반 100개 항목만 선택
+    noisy_df_subset = noisy_df.head(1602)
+
+    # 모델 초기화
+    model, tokenizer, device = init_model(args.model_name)
+
+    # 디노이징 수행
+    logging.info("디노이징 작업 시작...")
+    tqdm.pandas()
+    noisy_df_subset["denoised_text"] = noisy_df_subset["text"].progress_apply(
+        lambda x: restore_headline(x, prompt_version, model, tokenizer, device, args)
+    )
+    logging.info("디노이징 작업 완료.")
+
+    # 결과 DataFrame 생성 (is_noise 열은 포함하지 않음)
+    result_df = noisy_df_subset[["ID", "denoised_text", "target"]]
+    result_df = result_df.rename(columns={"denoised_text": "text"})
+
+    logging.info(f"결과를 '{semi_final_output_file}'에 저장 중...")
+    result_df.to_csv(semi_final_output_file, index=False, encoding="utf-8")
+
+    # is_noise가 0인 항목 필터링
+    non_noisy_df = df[df["is_noise"] == 0]
+    logging.info(f"is_noise가 0인 {len(non_noisy_df)} 개의 행을 찾았습니다.")
+
+    # 두 DataFrame 합치기
+    final_df = pd.concat([result_df, non_noisy_df[["ID", "text", "target"]]], ignore_index=True)
+
+    # 데이터프레임 섞기
+    final_df = final_df.sample(frac=1).reset_index(drop=True)
+
+    # 결과를 CSV 파일로 저장 (is_noise 열은 포함하지 않음)
+    logging.info(f"결과를 '{final_output_file}'에 저장 중...")
+    final_df.to_csv(final_output_file, index=False, encoding="utf-8")
+
+    logging.info(f"디노이징 완료. 결과가 '{final_output_file}' 파일에 저장되었습니다.")
diff --git a/code/LLM/model.py b/code/LLM/model.py
new file mode 100644
index 0000000..5df4469
--- /dev/null
+++ b/code/LLM/model.py
@@ -0,0 +1,18 @@
+import logging
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+def init_model(model_name="LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"):
+    # 모델 및 토크나이저 로드
+    logging.info("모델 및 토크나이저 로딩 중...")
+    model_name = model_name
+    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+    # GPU 사용 가능 시 모델을 GPU로 이동
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = model.to(device)
+    logging.info(f"모델을 {device}로 이동했습니다.")
+    return model, tokenizer, device
diff --git a/code/LLM/processing.py b/code/LLM/processing.py
index 8ba0516..f7371ad 100644
--- a/code/LLM/processing.py
+++ b/code/LLM/processing.py
@@ -1,19 +1,43 @@
+import argparse
+
 import pandas as pd
 
 
-# CSV 파일 읽기
-df = pd.read_csv("../data/final4.csv")
+def processing(
+    input_path="../data/processing_input.csv",
+    output_path="../data/processing_output.csv",
+):
+    # CSV 파일 읽기
+    df = pd.read_csv(input_path)
+
+    # text 열에서 \n 이후의 내용을 제거하는 함수 정의
+    def clean_text(text):
+        if isinstance(text, str):  # text가 문자열인 경우에만 처리
+            return text.split("\n")[0]  # \n 이전의 텍스트만 반환
+        return text  # 문자열이 아닌 경우 원래 값을 반환
 
+    # clean_text 함수를 text 열에 적용
+    df["text"] = df["text"].apply(clean_text)
 
-# text 열에서 \n 이후의 내용을 제거하는 함수 정의
-def clean_text(text):
-    if isinstance(text, str):  # text가 문자열인 경우에만 처리
-        return text.split("\n")[0]  # \n 이전의 텍스트만 반환
-    return text  # 문자열이 아닌 경우 원래 값을 반환
+    # 결과를 새로운 CSV 파일로 저장
+    df.to_csv(output_path, index=False, encoding="utf-8")
 
 
-# clean_text 함수를 text 열에 적용
-df["text"] = df["text"].apply(clean_text)
+if __name__ == "__main__":
+    # ArgumentParser
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input_path",
+        type=str,
+        default="../data/processing_input.csv",
+        help="입력 CSV 파일 경로",
+    )
+    parser.add_argument(
+        "--output_path",
+        type=str,
+        default="../data/processing_output.csv",
+        help="출력 CSV 파일 경로",
+    )
+    args = parser.parse_args()
 
-# 결과를 새로운 CSV 파일로 저장
-df.to_csv("../data/final5.csv", index=False, encoding="utf-8")
+    processing(args.input_path, args.output_path)
diff --git a/code/LLM/prompt/__init__.py b/code/LLM/prompt/__init__.py
index 45a04e7..600f638 100644
--- a/code/LLM/prompt/__init__.py
+++ b/code/LLM/prompt/__init__.py
@@ -18,6 +18,12 @@
 >>> prompt = get_agument_prompt(1, "original text")
 """
 
-from .agument import get_agument_system_message, get_prompt_agument, get_prompt_synonyms
+from .agument import (
+    get_agument_system_message,
+    get_prompt_agument,
+    get_prompt_synonyms,
+    get_prompt_title_to_article,
+    get_prompt_article_to_title,
+)
 from .denoise import get_prompt_denoise, get_system_message_denoise
 from .relabel import get_prompt_relabel, get_system_messaget_relabel
diff --git a/code/LLM/prompt/agument.py b/code/LLM/prompt/agument.py
index fe3db4b..8507502 100644
--- a/code/LLM/prompt/agument.py
+++ b/code/LLM/prompt/agument.py
@@ -13,6 +13,33 @@ def get_prompt_synonyms(word, version=1):
         return prompt_synonyms_v1(word)
 
 
+def get_prompt_title_to_article(text, version=1):
+    if version == 1:
+        return prompt_title_to_article_v1(text)
+
+
+def get_prompt_article_to_title(article, version=1):
+    if version == 1:
+        return prompt_article_to_title_v1(article)
+
+
+def prompt_article_to_title_v1(article):
+    prompt_article_to_title = f"""다음 기사 내용을 바탕으로 새로운 제목을 생성해주세요.
+    절대 기사 내용에 들어간 단어는 쓰지 마세요.:
+    \n\n{article}\n\n새로운 제목:"""
+    return prompt_article_to_title
+
+
+def prompt_title_to_article_v1(text):
+    prompt = f"""다음 기사 제목에 대한 내용을 작성해주세요.
+    주제는 똑같지만 기사 내용은 창의적이어도 좋습니다.
+    : {text}
+
+    기사 내용:"""
+    end_word = "기사 내용:"
+    return prompt, end_word
+
+
 def prompt_synonyms_v1(word):
     prompt = f"다음 단어의 동의어를 쉼표로 구분하여 5개만 나열해주세요: {word}"
     return prompt
diff --git a/code/clean_lab/clean_lab.py b/code/clean_lab/class_relabel.py
similarity index 94%
rename from code/clean_lab/clean_lab.py
rename to code/clean_lab/class_relabel.py
index d834a25..dfb8544 100755
--- a/code/clean_lab/clean_lab.py
+++ b/code/clean_lab/class_relabel.py
@@ -1,13 +1,11 @@
 import os
 
-import numpy as np
-import pandas as pd
-import torch
-import wandb
-import yaml
 from cleanlab.filter import find_label_issues
 from main import BERTDataset, compute_metrics, data_setting, evaluating
+import numpy as np
+import pandas as pd
 from sklearn.model_selection import StratifiedKFold
+import torch
 from transformers import (
     AutoModelForSequenceClassification,
     AutoTokenizer,
@@ -15,7 +13,11 @@
     Trainer,
     TrainingArguments,
 )
-from utils import (
+import wandb
+import yaml
+
+from ..utils import (
+    HF_TEAM_NAME,
     check_dataset,
     config_print,
     get_parser,
@@ -87,9 +89,6 @@ def train_for_clean_labels(
         logging_strategy="epoch",
         evaluation_strategy="epoch",
         save_strategy="epoch",
-        # logging_steps=100,
-        # eval_steps=100,
-        # save_steps=100,
         save_total_limit=2,
         learning_rate=float(learning_rate),
         adam_beta1=0.9,
@@ -160,9 +159,16 @@ def train_for_clean_labels(
 
 
 if __name__ == "__main__":
-    # The current process just got forked, after parallelism has already been used.
-    # Disabling parallelism to avoid deadlocks...
-    # os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    # ArgumentParser 설정
+    parser = get_parser()
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        default="vaiv/kobigbird-roberta-large",
+        help="사용할 모델 이름",
+    )
+
+    args = parser.parse_args()
 
     parser = get_parser()
     with open(os.path.join("../config", parser.config)) as f:
@@ -204,7 +210,7 @@ def train_for_clean_labels(
     load_env_file("../setup/.env")
     hf_config = CFG.get("huggingface", {})
     hf_token = os.getenv("HUGGINGFACE_TOKEN")
-    hf_organization = "paper-company"
+    hf_organization = HF_TEAM_NAME
 
     config_print(CFG)
 
@@ -219,7 +225,7 @@ def train_for_clean_labels(
 
     DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
-    model_name = "vaiv/kobigbird-roberta-large"  # "klue/bert-base"
+    model_name = args.model_name
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=7).to(DEVICE)
 
diff --git a/code/clean_lab/clean_lab2.py b/code/clean_lab/noise_relabel.py
similarity index 92%
rename from code/clean_lab/clean_lab2.py
rename to code/clean_lab/noise_relabel.py
index 13f9856..0900c21 100644
--- a/code/clean_lab/clean_lab2.py
+++ b/code/clean_lab/noise_relabel.py
@@ -1,12 +1,10 @@
 import os
 
+from cleanlab.filter import find_label_issues
+from main import BERTDataset, compute_metrics, data_setting
 import numpy as np
 import pandas as pd
 import torch
-import wandb
-import yaml
-from cleanlab.filter import find_label_issues
-from main import BERTDataset, compute_metrics, data_setting
 from transformers import (
     AutoModelForSequenceClassification,
     AutoTokenizer,
@@ -14,7 +12,11 @@
     Trainer,
     TrainingArguments,
 )
-from utils import (
+import wandb
+import yaml
+
+from ..utils import (
+    HF_TEAM_NAME,
     check_dataset,
     config_print,
     get_parser,
@@ -26,7 +28,7 @@
 
 
 """
-clean_lab.py와 다른 점은, is_noise==1이면 data_train으로, is_noise==0이면 eval_train으로 split합니다.
+clean_lab/class_relabel.py와 다른 점은, is_noise==1이면 data_train으로, is_noise==0이면 eval_train으로 split합니다.
 k-fold를 사용하지 않습니다.
 1. 훈련된 모델: 훈련이 완료된 모델이 지정된 output 경로에 저장됩니다.
 2. retrained_data.csv: 이 파일은 새롭게 라벨링된 훈련 데이터셋으로, 이후 모델 훈련 시 사용할 수 있습니다.
@@ -136,10 +138,16 @@ def train_for_clean_labels_modified(
 
 
 if __name__ == "__main__":
-    # The current process just got forked, after parallelism has already been used.
-    # Disabling parallelism to avoid deadlocks...
-    # os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    # ArgumentParser 설정
+    parser = get_parser()
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        default="klue/bert-base",
+        help="사용할 모델 이름",
+    )
 
+    args = parser.parse_args()
     parser = get_parser()
     with open(os.path.join("../config", parser.config)) as f:
         CFG = yaml.safe_load(f)
@@ -176,7 +184,7 @@ def train_for_clean_labels_modified(
     load_env_file("../setup/.env")
     hf_config = CFG.get("huggingface", {})
     hf_token = os.getenv("HUGGINGFACE_TOKEN")
-    hf_organization = "paper-company"
+    hf_organization = HF_TEAM_NAME
 
     config_print(CFG)
 
@@ -191,7 +199,7 @@ def train_for_clean_labels_modified(
 
     DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
-    model_name = "klue/bert-base"
+    model_name = args.model_name
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=7).to(DEVICE)
 
diff --git a/code/main.py b/code/main.py
index 8b63bed..72ec02d 100755
--- a/code/main.py
+++ b/code/main.py
@@ -2,12 +2,10 @@
 
 import numpy as np
 import pandas as pd
-import torch
-import wandb
-import yaml
 from sklearn.metrics import accuracy_score, f1_score
 from sklearn.model_selection import train_test_split
 from tabulate import tabulate
+import torch
 from torch.utils.data import Dataset
 from tqdm import tqdm
 from transformers import (
@@ -17,7 +15,11 @@
     Trainer,
     TrainingArguments,
 )
-from utils import (
+import wandb
+import yaml
+
+from .utils import (
+    HF_TEAM_NAME,
     check_dataset,
     config_print,
     get_parser,
@@ -242,7 +244,7 @@ def evaluating(device, model, tokenizer, eval_batch_size, test_path, output_dir)
     load_env_file("../setup/.env")
     hf_config = CFG.get("huggingface", {})
     hf_token = os.getenv("HUGGINGFACE_TOKEN")
-    hf_organization = "paper-company"
+    hf_organization = HF_TEAM_NAME
 
     config_print(CFG)
 
@@ -257,6 +259,7 @@ def evaluating(device, model, tokenizer, eval_batch_size, test_path, output_dir)
 
     DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
 
+    # 모델명 절대 수정하지 말 것.
     model_name = "klue/bert-base"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=7).to(DEVICE)
diff --git a/code/sp_denoise.py b/code/sp_denoise.py
index dd954b0..bbbfa5b 100644
--- a/code/sp_denoise.py
+++ b/code/sp_denoise.py
@@ -13,17 +13,14 @@
 주의: 이 스크립트는 입력 CSV 파일에 'text'와 'is_noise' 열이 있다고 가정합니다.
 """
 
+import argparse
 import os
 
 import pandas as pd
 import sentencepiece as spm
 
 
-# 데이터 파일 경로
-data_file = "/content/2_base_2800_noise_detected.csv"
-
-
-def split_noise_data(data_file):
+def split_noise_data(data_file, noise_data_file="noise_data.csv", non_noise_file="non_noise_data.csv"):
     # CSV 파일 읽기
     df = pd.read_csv(data_file)
 
@@ -32,11 +29,11 @@ def split_noise_data(data_file):
     non_noise_data = df[df["is_noise"] == 0]
 
     # 분리된 데이터를 CSV 파일로 저장
-    noise_data.to_csv("noise_data.csv", index=False)
-    non_noise_data.to_csv("non_noise_data.csv", index=False)
+    noise_data.to_csv(noise_data_file, index=False)
+    non_noise_data.to_csv(non_noise_file, index=False)
 
     print("데이터가 성공적으로 분리되어 CSV 파일로 저장되었습니다.")
-    return "non_noise_data.csv", "noise_data.csv"
+    return noise_data_file, non_noise_file
 
 
 def train_sentencepiece_model(df, model_prefix, vocab_size=19931):
@@ -75,22 +72,44 @@ def process_dataframe(df, sp):
     return df
 
 
-# 메인 실행 코드
 if __name__ == "__main__":
-    # 노이즈 데이터 분리
-    non_noise_file, noise_file = split_noise_data(data_file)
+    # ArgumentParser 설정
+    parser = argparse.ArgumentParser()
+
+    # 인자 추가
+    parser.add_argument(
+        "--data_file",
+        type=str,
+        default="/content/2_base_2800_noise_detected.csv",
+        help="디노이징할 데이터 파일 경로",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="denoised_data.csv",
+        help="디노이징된 데이터를 저장할 출력 파일 경로",
+    )
+    parser.add_argument(
+        "--model-prefix",
+        type=str,
+        default="sentencepiece_model",
+        help="SentencePiece 모델 prefix",
+    )
 
-    # SentencePiece 모델 훈련을 위한 설정
-    model_prefix = "sentencepiece_model"
+    # 인자 파싱
+    args = parser.parse_args()
+
+    # 노이즈 데이터 분리
+    non_noise_file, noise_file = split_noise_data(args.data_file)
 
     # 노이즈가 없는 데이터 읽기
     df = pd.read_csv(non_noise_file)
 
     # SentencePiece 모델 훈련
-    train_sentencepiece_model(df, model_prefix)
+    train_sentencepiece_model(df, args.model_prefix)
 
     # 훈련된 SentencePiece 모델 로드
-    sp = load_sentencepiece_model(f"{model_prefix}.model")
+    sp = load_sentencepiece_model(f"{args.model_prefix}.model")
 
     # 노이즈가 있는 새로운 데이터 읽기
     noise_df = pd.read_csv(noise_file)
@@ -99,6 +118,6 @@ def process_dataframe(df, sp):
     denoised_df = process_dataframe(noise_df, sp)
 
     # 결과를 CSV 파일로 저장
-    denoised_df.to_csv("denoised_data.csv", index=False)
+    denoised_df.to_csv(args.output, index=False)
 
     print("모든 처리가 완료되었습니다.")
diff --git a/code/tokenized_denoise.py b/code/tokenized_denoise.py
index fed81cd..e96007d 100644
--- a/code/tokenized_denoise.py
+++ b/code/tokenized_denoise.py
@@ -1,9 +1,3 @@
-# 필요한 라이브러리 임포트
-import pandas as pd
-from tqdm import tqdm
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-
-
 """
 T5 모델을 사용하여 디노이징(노이즈 제거)하는 작업을 수행
 1. CSV 파일에서 텍스트 데이터 읽기
@@ -11,6 +5,12 @@
 3. 처리된 결과를 새로운 CSV 파일로 저장
 """
 
+import argparse
+
+import pandas as pd
+from tqdm import tqdm
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+
 
 # 텍스트 배치를 디노이징하는 함수 정의
 def denoise(batch_texts, tokenizer, model):
@@ -23,10 +23,15 @@ def denoise(batch_texts, tokenizer, model):
 
 
 # 데이터를 처리하는 메인 함수 정의
-def process_data(input_file, output_file, batch_size=16):
+def process_data(
+    input_file,
+    output_file,
+    model_name="eenzeenee/t5-base-korean-summarization",
+    batch_size=16,
+):
     # 모델과 토크나이저 로드
-    tokenizer = AutoTokenizer.from_pretrained("eenzeenee/t5-base-korean-summarization")
-    model = AutoModelForSeq2SeqLM.from_pretrained("eenzeenee/t5-base-korean-summarization")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
     # 입력 데이터 읽기
     data = pd.read_csv(input_file)
@@ -50,11 +55,28 @@ def process_data(input_file, output_file, batch_size=16):
     print(f"처리된 데이터가 {output_file}에 저장되었습니다.")
 
 
-# 스크립트가 직접 실행될 때 수행되는 코드
 if __name__ == "__main__":
-    # 입력 파일 경로 설정
-    input_file = "../data/3_d_2800_hanzi_dictionary.csv"
-    # 출력 파일 경로 설정
-    output_file = "../data/tokenized_denoised_data2.csv"
+    # 인자 처리
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model-name",
+        type=str,
+        default="eenzeenee/t5-base-korean-summarization",
+        help="사용할 모델 이름",
+    )
+    parser.add_argument(
+        "--input",
+        type=str,
+        default="../data/3_d_2800_hanzi_dictionary.csv",
+        help="입력 파일 경로",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="../data/tokenized_denoised_data2.csv",
+        help="출력 파일 경로",
+    )
+    args = parser.parse_args()
+
     # 데이터 처리 함수 호출
-    process_data(input_file, output_file)
+    process_data(args.model_name, args.input, args.output)
diff --git a/code/utils/__init__.py b/code/utils/__init__.py
new file mode 100644
index 0000000..5924c2f
--- /dev/null
+++ b/code/utils/__init__.py
@@ -0,0 +1,22 @@
+"""
+프로젝트 전반에 사용하는 유틸리티 모듈입니다.
+
+## 주요 기능
+- upload_dataset_hf.py: 데이터셋을 허깅페이스에 업로드
+- gdrive_manager.py: 실험 및 추론 결과를 구글 드라이브로 자동 업로드
+- util.py: 인자 및 로깅 설정을 위한 함수 모음
+
+"""
+
+from .upload_dataset_hf import HF_TEAM_NAME
+from .utils import (
+    check_dataset,
+    config_print,
+    get_parser,
+    load_env_file,
+    make_json_report,
+    seed_fix,
+    set_debug_mode,
+    upload_report,
+    wandb_name,
+)
diff --git a/code/gdrive_manager.py b/code/utils/gdrive_manager.py
similarity index 100%
rename from code/gdrive_manager.py
rename to code/utils/gdrive_manager.py
index 5494c89..a649d1b 100755
--- a/code/gdrive_manager.py
+++ b/code/utils/gdrive_manager.py
@@ -2,12 +2,12 @@
 import io
 import os.path
 
-import yaml
 from google.auth.transport.requests import Request
 from google.oauth2.credentials import Credentials
 from google_auth_oauthlib.flow import InstalledAppFlow
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseUpload
+import yaml
 
 
 SCOPES = [
diff --git a/code/upload_dataset_hf.py b/code/utils/upload_dataset_hf.py
similarity index 86%
rename from code/upload_dataset_hf.py
rename to code/utils/upload_dataset_hf.py
index 5bfb830..ddd6914 100755
--- a/code/upload_dataset_hf.py
+++ b/code/utils/upload_dataset_hf.py
@@ -1,9 +1,14 @@
 import os
 
-import yaml
 from datasets import load_dataset
 from huggingface_hub import HfApi
 from main import get_parser, load_env_file
+import yaml
+
+
+# 절대 바꾸지 않을 값이므로 따로 config 처리X
+HF_TEAM_NAME = "paper-company"
+HF_PROJECT_NAME = "datacentric"
 
 
 def upload_train_file_to_hub(file_name, token, private=True):
@@ -19,7 +24,7 @@ def upload_train_file_to_hub(file_name, token, private=True):
     - None
     """
     api = HfApi()
-    repo_id = f"paper-company/datacentric-{file_name}"
+    repo_id = f"{HF_TEAM_NAME}/{HF_PROJECT_NAME}-{file_name}"
 
     # 리포지토리 존재 여부 확인
     try:
@@ -44,11 +49,11 @@ def upload_train_file_to_hub(file_name, token, private=True):
 
 if "__main__" == __name__:
     parser = get_parser()
-    with open(os.path.join("../config", parser.config)) as f:
+    with open(os.path.join("../../config", parser.config)) as f:
         CFG = yaml.safe_load(f)
 
     # 허깅페이스 API키 관리
-    load_env_file("../setup/.env")
+    load_env_file("../../setup/.env")
     hf_token = os.getenv("HUGGINGFACE_TOKEN")
 
     # 업로드할 파일명 지정
diff --git a/code/utils.py b/code/utils/utils.py
similarity index 86%
rename from code/utils.py
rename to code/utils/utils.py
index b38cfe7..816d327 100755
--- a/code/utils.py
+++ b/code/utils/utils.py
@@ -4,23 +4,15 @@
 import random
 import time
 
-import numpy as np
-import torch
 from datasets import load_dataset
 from dotenv import load_dotenv
-from gdrive_manager import GoogleDriveManager
-
+import numpy as np
+import torch
 
-DEBUG_MODE = False
+from .gdrive_manager import GoogleDriveManager
 
 
-# seed 고정
-def seed_fix(SEED=456):
-    random.seed(SEED)
-    np.random.seed(SEED)
-    torch.manual_seed(SEED)
-    torch.cuda.manual_seed(SEED)
-    torch.cuda.manual_seed_all(SEED)
+DEBUG_MODE = False
 
 
 def set_debug_mode(debug_mode):
@@ -115,6 +107,15 @@ def check_dataset(hf_organization, hf_token, train_file_name):
         debug_print("로컬파일을 로드합니다.")
 
 
+# seed 고정
+def seed_fix(SEED=456):
+    random.seed(SEED)
+    np.random.seed(SEED)
+    torch.manual_seed(SEED)
+    torch.cuda.manual_seed(SEED)
+    torch.cuda.manual_seed_all(SEED)
+
+
 def get_timestamp():
     return round(time.time())
 
@@ -123,33 +124,17 @@ def make_json_report(df):
     json_report = {}
 
     # 가정: 전체 데이터셋의 클래스별 샘플 수는 균등 할 것
-    # public 분포와 가정을 통한 private 분포 추정
-    total_per_class = 30000 // 7
-    public_percentages = [0.1719, 0.1367, 0.1018, 0.1627, 0.1469, 0.1499, 0.1301]
+    num_classes = 7
+    public_percentages = [0.1428] * num_classes
     public_samples = 15000
 
     # Public 데이터셋의 클래스별 샘플 수
     public_distribution = {
         "class_distribution": {str(i): int(p * public_samples) for i, p in enumerate(public_percentages)},
-        "num_classes": 7,
+        "num_classes": num_classes,
         "class_balance": {str(i): round(p, 4) for i, p in enumerate(public_percentages)},
     }
 
-    # Private 데이터셋의 클래스별 샘플 수
-    private_distribution = {
-        "class_distribution": {
-            str(i): total_per_class - public_distribution["class_distribution"][str(i)] for i in range(7)
-        },
-        "num_classes": 7,
-        "class_balance": {
-            str(i): round(
-                (total_per_class - public_distribution["class_distribution"][str(i)]) / 15000,
-                4,
-            )
-            for i in range(7)
-        },
-    }
-
     # 현재 데이터의 타겟 레이블 분포 분석
     target_counts = df["target"].value_counts().to_dict()
     sorted_target_counts = dict(sorted(target_counts.items()))
@@ -160,7 +145,6 @@ def make_json_report(df):
     }
 
     json_report["public_distribution"] = public_distribution
-    json_report["private_distribution"] = private_distribution
 
     # NumPy 타입을 처리하기 위한 커스텀 JSONEncoder
     class NumpyEncoder(json.JSONEncoder):
@@ -187,5 +171,5 @@ def upload_report(dataset_name, user_name, exp_name, result_df, result_json):
     _ = drive_manager.upload_dataframe(result_df, f"{exp_name}_{timestamp}_output.csv", folder_id)
     _ = drive_manager.upload_json_data(result_json, f"{exp_name}_{timestamp}_report.json", folder_id)
 
-    gdrive_url = f"https://drive.google.com/drive/folders/{folder_id}"
+    gdrive_url = os.path.join("https://drive.google.com/drive/folders", folder_id)
     print(f"구글 드라이브에 업로드 되었습니다: {gdrive_url}")
diff --git a/preprocess_minseo/1.BERT-based-relabeling/label_fix.py b/preprocess_minseo/1.BERT-based-relabeling/label_fix.py
index 15ff0d1..36c608c 100644
--- a/preprocess_minseo/1.BERT-based-relabeling/label_fix.py
+++ b/preprocess_minseo/1.BERT-based-relabeling/label_fix.py
@@ -1,10 +1,10 @@
 # 필요한 라이브러리 임포트
 import os
 
-import pandas as pd
-import torch
 from datasets import Dataset, DatasetDict
+import pandas as pd
 from sklearn.model_selection import train_test_split
+import torch
 from transformers import (
     AutoModelForSequenceClassification,
     AutoTokenizer,
diff --git a/pyproject.toml b/pyproject.toml
index 86bbd7b..c9bec2c 100755
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,11 +30,10 @@ select = ["C", "E", "F", "I", "W"]
 [tool.ruff.lint.isort]
 lines-after-imports = 2
 
-# import 섹션 사이에 빈 줄 추가
-lines-between-types = 1
-
 # 섹션 순서 설정
-section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
+section-order = ["standard-library", "third-party", "local-folder"]
+combine-as-imports = true
+force-sort-within-sections = true
 
 [tool.ruff.format]
 # Like Black, use double quotes for strings.
diff --git a/st_pages/app.py b/st_pages/app.py
index 7a97c6e..c5b6f75 100755
--- a/st_pages/app.py
+++ b/st_pages/app.py
@@ -1,9 +1,9 @@
 import cleanlab_noize_viz
+from data_loader import save_uploaded_file_to_session
 import data_overview
 import noise_viz
 import streamlit as st
 import tokenize_viz
-from data_loader import save_uploaded_file_to_session
 
 
 def select_page():
diff --git a/st_pages/cleanlab_noize_viz.py b/st_pages/cleanlab_noize_viz.py
index cf8f1a7..36c80bf 100755
--- a/st_pages/cleanlab_noize_viz.py
+++ b/st_pages/cleanlab_noize_viz.py
@@ -4,8 +4,8 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-import streamlit as st
 from sklearn.manifold import TSNE
+import streamlit as st
 
 
 # 모든 경고 무시
diff --git a/st_pages/tokenize_viz.py b/st_pages/tokenize_viz.py
index 3f752c4..f45247a 100644
--- a/st_pages/tokenize_viz.py
+++ b/st_pages/tokenize_viz.py
@@ -2,7 +2,7 @@
 from transformers import AutoTokenizer
 
 
-tokenizer = AutoTokenizer.from_pretrained("klue/roberta-base")
+tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")
 
 
 def show(df):