furiosa-ai · kphilpark · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/.github/workflows/auto_pr.yaml b/.github/workflows/auto_pr.yaml
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -245,4 +245,7 @@ jobs:
               repo: context.repo.repo,
               body: body
             })
+<<<<<<< HEAD
+=======
 
+>>>>>>> 822757d038bdb68c5a942cbeb159a56be1588cbc
diff --git a/ci_scripts/ci_bert_fp8.sh b/ci_scripts/ci_bert_fp8.sh
@@ -96,7 +96,7 @@ python -m ci_file.qbert_forward_test --model_path=$MODEL_PATH \
                                     --ref_path=$REF_PATH \
                                     --res_path=$RES_PATH \
                                     --config_dtype=$CONFIG_DTYPE\
-                                    --update_gen_list #정답지 업데이트용 argument
+                                    # --update_gen_list #정답지 업데이트용 argument
 
 
 printf "\n============= STEP-3: Check the equivalence of f1 score between current mlperf submission <-> ref =============\n"

diff --git a/ci_scripts/ci_bert_int8.sh b/ci_scripts/ci_bert_int8.sh
@@ -96,7 +96,7 @@ python -m ci_file.qbert_forward_test --model_path=$MODEL_PATH \
                                     --ref_path=$REF_PATH \
                                     --res_path=$RES_PATH \
                                     --config_dtype=$CONFIG_DTYPE\
-                                    --update_gen_list #정답지 업데이트용 argument
+                                    # --update_gen_list #정답지 업데이트용 argument
 
 
 printf "\n============= STEP-3: Check the equivalence of f1 score between current mlperf submission <-> ref =============\n"

diff --git a/ci_scripts/ci_gpt_j_fp8.sh b/ci_scripts/ci_gpt_j_fp8.sh
@@ -88,7 +88,7 @@ python -m ci_file.qgpt_j_forward_test          --model_path=$MODEL_PATH \
                                                 --ref_path=$REF_PATH\
                                                 --res_path=$RES_PATH\
                                                 --config_dtype=$CONFIG_DTYPE\
-                                                --update_gen_list #정답지 업데이트용 argument
+                                                # --update_gen_list #정답지 업데이트용 argument
 
 
 

diff --git a/ci_scripts/ci_gpt_j_int8.sh b/ci_scripts/ci_gpt_j_int8.sh
@@ -88,7 +88,7 @@ python -m ci_file.qgpt_j_forward_test          --model_path=$MODEL_PATH \
                                                 --ref_path=$REF_PATH\
                                                 --res_path=$RES_PATH\
                                                 --config_dtype=$CONFIG_DTYPE\
-                                                --update_gen_list #정답지 업데이트용 argument
+                                                # --update_gen_list #정답지 업데이트용 argument
 
 
 

diff --git a/ci_scripts/ci_llama2-70b_fp8.sh b/ci_scripts/ci_llama2-70b_fp8.sh
@@ -96,7 +96,7 @@ python -m ci_file.qllama2_70b_forward_test  --model_path=$CHECKPOINT_PATH \
                                             --ref_path=$REF_PATH\
                                             --res_path=$RES_PATH\
                                             --config_dtype=$CONFIG_DTYPE\
-                                            --update_gen_list
+                                            # --update_gen_list
 
 
 printf "\n============= End of Forward Test for Qllama2-70b =============\n"

diff --git a/ci_scripts/ci_llama2-70b_int8.sh b/ci_scripts/ci_llama2-70b_int8.sh
@@ -96,7 +96,7 @@ python -m ci_file.qllama2_70b_forward_test  --model_path=$CHECKPOINT_PATH \
                                             --ref_path=$REF_PATH\
                                             --res_path=$RES_PATH\
                                             --config_dtype=$CONFIG_DTYPE\
-                                            --update_gen_list
+                                            # --update_gen_list
 
 
 printf "\n============= End of Forward Test for Qllama2-70b =============\n"

diff --git a/language/bert/quantization/calibrate.py b/language/bert/quantization/calibrate.py
@@ -121,7 +121,7 @@ def calibrate(model: GraphModule, qconfig, qparam_path, qformat_path, calib_data
 
     model_compressor.calibrate(
         model,
-        dataloader=calib_dataloader,
+        # dataloader=calib_dataloader,
         **get_kwargs(model_compressor.calibrate, qconfig),
     )
 

diff --git a/language/gpt-j/ci_file/qgpt_j_forward_test.py b/language/gpt-j/ci_file/qgpt_j_forward_test.py
@@ -192,8 +192,8 @@ def generate_compare_gen_token(
     tokenizer = get_tokenizer()
     # load reference generated tokens.
     update_ref_path = ref_path + f"/generated_data_list_{config_dtype}.json"
-    # with open(update_ref_path, "r") as file:
-    #     ref_data = json.load(file)
+    with open(update_ref_path, "r") as file:
+        ref_data = json.load(file)
 
     results = []
     result_flag = True
@@ -258,13 +258,13 @@ def generate_compare_gen_token(
             generated_data_list.append(generated_data)
         print(f"생성 토큰 문장 {idx}: {gen_sentence}")
         # compare submission model's decoded_test with reference sentences.
-        # ref_sentence = ref_data[idx]["gen_text"]
-        # result_flag = check_diff(idx, ref_sentence, gen_sentence, results, result_flag)
+        ref_sentence = ref_data[idx]["gen_text"]
+        result_flag = check_diff(idx, ref_sentence, gen_sentence, results, result_flag)
 
-    # compare_results_path = res_path + f"/qgpt_j_compare_result_{config_dtype}.json"
-    # with open(compare_results_path, "w") as file:
-    #     json.dump(results, file, indent=4)
-    #     print(f"토큰 동치비교 결과가 저장되었습니다. dir: {compare_results_path}")
+    compare_results_path = res_path + f"/qgpt_j_compare_result_{config_dtype}.json"
+    with open(compare_results_path, "w") as file:
+        json.dump(results, file, indent=4)
+        print(f"토큰 동치비교 결과가 저장되었습니다. dir: {compare_results_path}")
     if update_gen_list:
         with open(update_ref_path, "w") as file:
             json.dump(generated_data_list, file, indent=4)

diff --git a/language/gpt-j/quantization/calibrate.py b/language/gpt-j/quantization/calibrate.py
@@ -134,7 +134,7 @@ def calibrate(model: GraphModule, qconfig, qparam_path, qformat_path, calib_data
 
     model_compressor.calibrate(
         model_for_calib,
-        dataloader=calib_dataloader,
+        # dataloader=calib_dataloader,
         autoscale_calib_kwargs=autoscale_calib_cfg if run_autoscale else None,
         model_type=model_type,
         **get_kwargs(model_compressor.calibrate, qconfig),
@@ -160,7 +160,6 @@ def immigrate_qparams(model, golden_qparam_path, golden_qformat_path, quant_para
         qparam_path = golden_qparam_path,
         qlevel=2,
         target_machine=qconfig["target_machine"],
-        delete_org_weight=True,
         immigrate_qparams = True,
     )
 
@@ -186,10 +185,7 @@ def immigrate_qparams(model, golden_qparam_path, golden_qformat_path, quant_para
         torch.save(quant_models["decode"].state_dict(), qlv4_decode_out_path)
         # model_compressor.save_graph_patterns(quant_models["prefill"], prefill_rblock_json_out_path)
         # model_compressor.save_graph_patterns(quant_models["decode"], decode_rblock_json_out_path)
-
 
-
-
 
 def get_args():
     parser = argparse.ArgumentParser()

diff --git a/language/gpt-j/quantization/quantize.py b/language/gpt-j/quantization/quantize.py
@@ -20,7 +20,6 @@ def _quantize(
         model,
         qformat_path=qformat_path,
         qparam_path=qparam_path,
-        delete_org_weight=True,
         decode_phase=quantized_prefill is not None,
         quantized_prefill_model=quantized_prefill,
         target_machine=TARGET_MACHINE,

diff --git a/language/llama2-70b/quantization/calibrate.py b/language/llama2-70b/quantization/calibrate.py
@@ -132,7 +132,7 @@ def calibrate(model, qconfig, qparam_path, qformat_path, calib_dataloader):
 
     model_compressor.calibrate(
         model,
-        dataloader=calib_dataloader,
+        # dataloader=calib_dataloader,
         **get_kwargs(model_compressor.calibrate, qconfig),
         model_type = model_type,
         autoscale_calib_kwargs=autoscale_calib_kwargs,
@@ -163,7 +163,6 @@ def immigrate_qparams(model, golden_qparam_path, golden_qformat_path, quant_para
         qparam_path = golden_qparam_path,
         qlevel=2,
         target_machine=qconfig["target_machine"],
-        delete_org_weight=True,
         immigrate_qparams = True,
     )
 

diff --git a/language/llama2-70b/quantization/calibrate_llama3.py b/language/llama2-70b/quantization/calibrate_llama3.py
@@ -143,7 +143,7 @@ def calibrate(model, qconfig, qparam_path, qformat_path, calib_dataloader):
 
     model_compressor.calibrate(
         model,
-        dataloader=calib_dataloader,
+        # dataloader=calib_dataloader,
         **get_kwargs(model_compressor.calibrate, qconfig),
         model_type = model_type,
         autoscale_calib_kwargs=autoscale_calib_kwargs,
@@ -174,7 +174,6 @@ def immigrate_qparams(model, golden_qparam_path, golden_qformat_path, quant_para
         qparam_path = golden_qparam_path,
         qlevel=2,
         target_machine=qconfig["target_machine"],
-        delete_org_weight=True,
         immigrate_qparams = True,
     )
 

diff --git a/language/llama2-70b/quantization/quantize.py b/language/llama2-70b/quantization/quantize.py
@@ -19,7 +19,6 @@ def _quantize(
         model,
         qformat_path=qformat_path,
         qparam_path=qparam_path,
-        delete_org_weight=True,
         decode_phase=quantized_prefill is not None,
         quantized_prefill_model=quantized_prefill,
         # https://github.com/furiosa-ai/inference/pull/29/files#diff-9b228ac2c8c424039f8ab41443631c4097f3c3abf73a05b3e327c51ed30d394dR65