From 47490f43fec948b6d23626315288e4de29034e8e Mon Sep 17 00:00:00 2001 From: Matthew Mazzola Date: Sun, 3 Mar 2024 18:37:31 +0000 Subject: [PATCH] Remove llava --- .vscode/launch.json | 74 ------------------ evaluation/generate_response.py | 14 +--- models/llava.py | 131 -------------------------------- 3 files changed, 3 insertions(+), 216 deletions(-) delete mode 100644 models/llava.py diff --git a/.vscode/launch.json b/.vscode/launch.json index c14faa5..1b0d1bb 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -60,79 +60,5 @@ "LOGLEVEL": "DEBUG", }, }, - { - "name": "Python: Eval - MathVista - (LLaVA) evaluation.generate_response", - "type": "debugpy", - "request": "launch", - "module": "evaluation.generate_response", - "justMyCode": true, - "args": [ - "--conv-mode", - "vicuna_v1", - "--max_num_problems", - "100", - "--output_dir", - "_results/eval/mathvista/llava/debug", - "--output_file", - "llava-v1.5-7b.json", - "--model_path", - "liuhaotian/llava-v1.5-7b", - // TODO: Find out why loading from folder during debug not work? - // "/mnt/mattmprojects/exp/projects/mattm-projectwillow/amlt-results/7293878818.78845-fe8da8ef-7f38-4b40-b2e1-3259c7bf7a3e/llava/checkpoints/llava-vicuna-7b-v1.5-finetune", - "--save_every", - "1", - ], - "envFile": "${workspaceFolder}/.env", - "env": { - "CUDA_VISIBLE_DEVICES": "0", - "LOGLEVEL": "DEBUG", - }, - }, - { - "name": "Python: Eval - MathVista - (LLaVA) evaluation.extract_answer", - "type": "debugpy", - "request": "launch", - "module": "evaluation.extract_answer", - "justMyCode": true, - "args": [ - "--max_num_problems", - "100", - "--results_file_path", - "_results/eval/mathvista/llava/debug/llava-v1.5-7b.json", - // "${workspaceFolder}/_results/eval/mathvista/20240213_223832/llava-v1.5-7b.json", - // "${workspaceFolder}/_results/eval/mathvista/debug/output_gpt4_2shot_solution_use_caption_ocr.json", - ], - "envFile": "${workspaceFolder}/.env", - "env": { - "CUDA_VISIBLE_DEVICES": "0", - "LOGLEVEL": "DEBUG", - }, - }, - { - "name": "Python: Eval - MathVista - (LLaVA) evaluation.calculate_score", - "type": "debugpy", - "request": "launch", - "module": "evaluation.calculate_score", - "justMyCode": true, - "args": [ - "--output_dir", - // All Empty Responses - // "_results/eval/mathvista/20240214_210722", - // Legitimate Responses - // "_results/eval/mathvista/20240214_220834", - "_results/eval/mathvista/20240215_204602", - "--output_file", - "llava-v1.5-7b.json", - // "llava-v1.5-7b_false_positives.json", - "--score_file", - "llava-v1.5-7b_metrics.json", - "--ignore_empty_extractions", - ], - "envFile": "${workspaceFolder}/.env", - "env": { - "CUDA_VISIBLE_DEVICES": "0", - "LOGLEVEL": "DEBUG", - }, - }, ] } diff --git a/evaluation/generate_response.py b/evaluation/generate_response.py index 732369c..299344c 100644 --- a/evaluation/generate_response.py +++ b/evaluation/generate_response.py @@ -146,19 +146,11 @@ def main(): # If we were given a custom model path, load that model, otherwise use a remote service model if args.model_path: - from models import llava + # from models import llava logging.info(f"Loading model from {args.model_path}...") - model = llava.Llava_Model( - model_path=args.model_path, - model_base=args.model_base, - conv_mode=args.conv_mode, - temperature=args.temperature, - top_p=args.top_p, - num_beams=args.num_beams, - max_new_tokens=args.max_new_tokens, - seed_value=42, - ) + # TODO: Add support for local models + raise NotImplementedError("Local models are not yet supported.") else: model_name = args.azure_openai_model if args.azure_openai_model else args.model logging.info(f"Loading {model_name}...") diff --git a/models/llava.py b/models/llava.py deleted file mode 100644 index b3f3b44..0000000 --- a/models/llava.py +++ /dev/null @@ -1,131 +0,0 @@ -import random -import re -from io import BytesIO - -import numpy as np -import requests -import torch -from llava.constants import ( - DEFAULT_IM_END_TOKEN, - DEFAULT_IM_START_TOKEN, - DEFAULT_IMAGE_TOKEN, - IMAGE_PLACEHOLDER, - IMAGE_TOKEN_INDEX, -) -from llava.conversation import SeparatorStyle, conv_templates -from llava.mm_utils import KeywordsStoppingCriteria, get_model_name_from_path, process_images, tokenizer_image_token -from llava.model.builder import load_pretrained_model -from llava.utils import disable_torch_init -from PIL import Image - - -def image_parser(args): - out = args.image_file.split(args.sep) - return out - - -def load_image(image_file): - if image_file.startswith("http") or image_file.startswith("https"): - response = requests.get(image_file) - image = Image.open(BytesIO(response.content)).convert("RGB") - else: - image = Image.open(image_file).convert("RGB") - return image - - -def load_images(image_files): - return [load_image(image_file) for image_file in image_files] - - -def set_seed(seed_value): - """ - Set the seed for PyTorch (both CPU and CUDA), Python, and NumPy for reproducible results. - - :param seed_value: An integer value to be used as the seed. - """ - torch.manual_seed(seed_value) - if torch.cuda.is_available(): - torch.cuda.manual_seed(seed_value) - torch.cuda.manual_seed_all(seed_value) # For multi-GPU setups - random.seed(seed_value) - np.random.seed(seed_value) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False - - -class Llava_Model: - - def __init__(self, model_path, model_base, conv_mode, temperature, top_p, num_beams, max_new_tokens, seed_value): - disable_torch_init() - set_seed(seed_value) - model_name = get_model_name_from_path(model_path) - tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name) - - self.tokenizer = tokenizer - self.model = model - self.image_processor = image_processor - self.context_len = context_len - self.conv_mode = conv_mode - self.temperature = temperature - self.top_p = top_p - self.num_beams = num_beams - self.max_new_tokens = max_new_tokens - - def get_response(self, user_prompt: str, decoded_image: Image.Image): - qs = user_prompt - image_token_se = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN - if IMAGE_PLACEHOLDER in qs: - if self.model.config.mm_use_im_start_end: - qs = re.sub(IMAGE_PLACEHOLDER, image_token_se, qs) - else: - qs = re.sub(IMAGE_PLACEHOLDER, DEFAULT_IMAGE_TOKEN, qs) - else: - if self.model.config.mm_use_im_start_end: - qs = image_token_se + "\n" + qs - else: - qs = DEFAULT_IMAGE_TOKEN + "\n" + qs - - conv = conv_templates[self.conv_mode].copy() - conv.append_message(conv.roles[0], qs) - conv.append_message(conv.roles[1], None) - prompt = conv.get_prompt() - - images = [decoded_image.convert('RGB')] - images_tensor = process_images(images, self.image_processor, self.model.config).to( - self.model.device, dtype=torch.float16 - ) - - input_ids = ( - tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).cuda() - ) - - stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2 - keywords = [stop_str] - stopping_criteria = KeywordsStoppingCriteria(keywords, self.tokenizer, input_ids) - - with torch.inference_mode(): - output_ids = self.model.generate( - input_ids, - images=images_tensor, - do_sample=True if self.temperature > 0 else False, - temperature=self.temperature, - top_p=self.top_p, - num_beams=self.num_beams, - max_new_tokens=self.max_new_tokens, - use_cache=True, - stopping_criteria=[stopping_criteria], - ) - - input_token_len = input_ids.shape[1] - n_diff_input_output = (input_ids != output_ids[:, :input_token_len]).sum().item() - if n_diff_input_output > 0: - print(f"[Warning] {n_diff_input_output} output_ids are not the same as the input_ids") - - outputs = self.tokenizer.batch_decode(output_ids[:, input_token_len:], skip_special_tokens=True)[0] - outputs = outputs.strip() - - if outputs.endswith(stop_str): - outputs = outputs[: -len(stop_str)] - outputs = outputs.strip() - - return outputs