From 9a0e155670c219bf6688aef898a2e47156bd109a Mon Sep 17 00:00:00 2001
From: James Gallagher <jamesg@jamesg.blog>
Date: Wed, 13 Dec 2023 15:38:42 +0000
Subject: [PATCH] fix action, remove % axis

---
 .github/workflows/test.yml | 13 ++++++
 index.html                 | 70 +++++++++++++++--------------
 results/2023-11-29.json    | 10 +++++
 results/2023-12-12.json    | 90 ++++++++++++++++++++++++++++++++++++++
 results/2023-12-13.json    | 90 ++++++++++++++++++++++++++++++++++++++
 template.html              |  8 ++--
 6 files changed, 243 insertions(+), 38 deletions(-)
 create mode 100644 results/2023-11-29.json
 create mode 100644 results/2023-12-12.json
 create mode 100644 results/2023-12-13.json
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 4ef3e63..84b7d60 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -25,3 +25,16 @@ jobs:
         run: python web.py
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      - uses: actions/checkout@v4
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v5
+
+      # commit results to repository
+      - name: Commit results
+        run: |
+          git config --local user.email "jamesg@jamesg.blog"
+          git config --local user.name "capjamesg"
+          git add results/*
+          git commit -m "Update results"
+          git push
diff --git a/index.html b/index.html
index f6b9dca..a9c6f8c 100644
--- a/index.html
+++ b/index.html
@@ -32,9 +32,6 @@
     </head>
     <body>
         <div class="graph_paper">
-            <a class="logo_link" href="https://roboflow.com/">
-                <img src="./assets/roboflow_full_logo_color.png" class="logo" alt="Roboflow Logo" />
-            </a>
             <header>
                 <h1>How's GPT-4 with Vision Doing?</h1>
                 <div class="header_text">
@@ -58,12 +55,12 @@ <h1>How's GPT-4 with Vision Doing?</h1>
                         <div class="feature_header" style="min-height: auto">
                             <div class="feature_header_text" style="gap: var(--spacing-sizing-4)">
                                 <h2>Response Time</h2>
-                                <p style="font-size: 16px; color: var(--gray-700)">Today, the average response time to receive results from our tests was <b>5.46s</b> per request.</p>
+                                <p style="font-size: 16px; color: var(--gray-700)">Today, the average response time to receive results from our tests was <b>5.79 seconds</b> per request.</p>
                                 <p class="subtitle">This number only accounts for requests made by this application.</p>
                             </div>
                             <div class="chart">
                                 <div class="chart_box chart_box_green">
-                                    <p>5.46 s</p>
+                                    <p>5.79 s</p>
                                 </div>
                             </div>
                         </div>
@@ -85,12 +82,12 @@ <h2>Counting</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
-                                            <p>0%</p>
+                                            <p>Fail</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>14.0%</b> of the time with an average score of <b>14.0%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>14.0%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.008</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -128,12 +125,12 @@ <h2>Object Detection</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
-                                            <p>2.0%</p>
+                                            <p>Fail</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time with an average score of <b>10.0%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -146,7 +143,7 @@ <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
                                         <img class="test_image" src="images/fruit.jpeg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>{'x': 0.68, 'y': 0.25, 'width': 0.1, 'height': 0.25}</pre>
+                                        <pre>{'x': 0.3, 'y': 0.25, 'width': 0.18, 'height': 0.4}</pre>
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
                                         <pre class="test_method">We provide GPT-4V with an image with a known object. We ask it to provide a normalized bounding box of the object and for scoring, we calculate the intersection over union (IOU) between the predicted bounding box and the correct bounding box.</pre>
                                     </div>
@@ -163,12 +160,12 @@ <h2>Graph Understanding</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
-                                            <p>82.0%</p>
+                                            <p>Fail</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 6 tests, conducted daily, this test has passed <b>0%</b> of the time with an average score of <b>81.0%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.01</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -183,10 +180,10 @@ <h3><span class="explainer_icon far fa-image"></span>Image</h3>
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
                                         <pre>```json
 {
-  "A": {"quantity": 8, "price": 5},
-  "B": {"quantity": 20, "price": 20},
-  "C": {"quantity": 28, "price": 30},
-  "D": {"quantity": 40, "price": 45}
+  "A": {"quantity": 15, "price": 15},
+  "B": {"quantity": 22, "price": 23},
+  "C": {"quantity": 30, "price": 33},
+  "D": {"quantity": 42, "price": 40}
 }
 ```</pre>
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
@@ -205,12 +202,12 @@ <h2>Color Recognition</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
-                                            <p>89.0%</p>
+                                            <p>Fail</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 3 tests, conducted daily, this test has passed <b>0%</b> of the time with an average score of <b>55.0%</b>.</p>
+                                    <p class="result_text">Of the last 4 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -246,12 +243,12 @@ <h2>Annotation Quality Assurance</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
-                                            <p>33.0%</p>
+                                            <p>Fail</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 3 tests, conducted daily, this test has passed <b>0%</b> of the time with an average score of <b>33.0%</b>.</p>
+                                    <p class="result_text">Of the last 4 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.015</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -285,13 +282,13 @@ <h2>Measurement Test</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
-                                            <p>0%</p>
+                                            <p>Fail</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 2 tests, conducted daily, this test has passed <b>0%</b> of the time with an average score of <b>36.0%</b>.</p>
-                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.011</p>
+                                    <p class="result_text">Of the last 3 tests, conducted daily, this test has passed <b>0%</b> of the time.</p>
+                                    <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
                                     <button type="button" class="dropdown dropdown_learn active">Learn about this test</button>
@@ -303,7 +300,12 @@ <h3><span class="explainer_icon far fa-comment-dots"></span>Prompt</h3>
                                         <h3><span class="explainer_icon far fa-image"></span>Image</h3>
                                         <img class="test_image" src="images/measurement.jpg" alt="Image of the input into GPT-4" />
                                         <h3><span class="explainer_icon far fa-sparkles"></span>Result</h3>
-                                        <pre>Failed to produce a valid JSON output: I'm sorry, but I can't provide measurements or identify specific details in images. If you need to measure an object in a photo, you would typically use the provided scale (the ruler in this case) and estimate the dimensions based on the indicated units. If you have a ruler available and the sticker in the photo, you would align the ruler with the edges of the sticker to measure its length and width.</pre>
+                                        <pre>```json
+{
+  "length": 3.0,
+  "width": 3.0
+}
+```</pre>
                                         <h3><span class="explainer_icon far fa-microscope"></span>Method</h3>
                                         <pre class="test_method">This test gives GPT-4 a image of a square sticker with a ruler on one side and asks it to provide a number for the length and width. We score this test based on precent error, gauging how far it is from the actual value.</pre>
                                     </div>
@@ -329,12 +331,12 @@ <h2>Zero Shot Classification</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
-                                            <p>100%</p>
+                                            <p>Pass</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time with an average score of <b>100%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.005</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -366,12 +368,12 @@ <h2>Document OCR</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
-                                            <p>100%</p>
+                                            <p>Pass</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time with an average score of <b>100%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -401,12 +403,12 @@ <h2>Handwriting OCR</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
-                                            <p>100%</p>
+                                            <p>Pass</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time with an average score of <b>100%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.009</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -436,12 +438,12 @@ <h2>Structured Data OCR</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
-                                            <p>100.0%</p>
+                                            <p>Pass</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time with an average score of <b>100.0%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.007</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -471,12 +473,12 @@ <h2>Math OCR</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
-                                            <p>100.0%</p>
+                                            <p>Pass</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time with an average score of <b>100.0%</b>.</p>
+                                    <p class="result_text">Of the last 7 tests, conducted daily, this test has passed <b>100%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost $0.015</p>
                                 </div>
                                 <div class="explainer_dropdown">
diff --git a/results/2023-11-29.json b/results/2023-11-29.json
new file mode 100644
index 0000000..144eeaa
--- /dev/null
+++ b/results/2023-11-29.json
@@ -0,0 +1,10 @@
+{
+    "zero_shot_classification": {
+        "score": 1,
+        "success": true,
+        "price": 0.00481,
+        "pass_fail": "Pass",
+        "response_time": 2.05232572555542,
+        "result": "Toyota Camry"
+    }
+}
\ No newline at end of file
diff --git a/results/2023-12-12.json b/results/2023-12-12.json
new file mode 100644
index 0000000..a8b2f49
--- /dev/null
+++ b/results/2023-12-12.json
@@ -0,0 +1,90 @@
+{
+    "zero_shot_classification": {
+        "score": 1,
+        "success": true,
+        "price": 0.00481,
+        "pass_fail": "Pass",
+        "response_time": 5.844768047332764,
+        "result": "Toyota Camry"
+    },
+    "count_fruit": {
+        "score": 0,
+        "success": false,
+        "price": 0.007870000000000002,
+        "pass_fail": "Fail",
+        "response_time": 2.6285157203674316,
+        "result": "9"
+    },
+    "document_ocr": {
+        "score": 1,
+        "success": true,
+        "price": 0.00857,
+        "pass_fail": "Pass",
+        "response_time": 5.8653788566589355,
+        "result": "I was thinking earlier today that I have gone through, to use the lingo, eras of listening to each of Swift's Eras. Meta indeed. I started listening to Ms. Swift's music after hearing the Midnights album. A few weeks after hearing the album for the first time, I found myself playing various songs on repeat. I listened to the album in order multiple times."
+    },
+    "handwriting_ocr": {
+        "score": 1,
+        "success": true,
+        "price": 0.008730000000000002,
+        "pass_fail": "Pass",
+        "response_time": 6.2278218269348145,
+        "result": "The words of songs on the album have been echoing in my head all week. \"Fades into the grey of my day old tea.\""
+    },
+    "extraction_ocr": {
+        "score": 1.0,
+        "success": true,
+        "price": 0.00725,
+        "pass_fail": "Pass",
+        "response_time": 7.156796932220459,
+        "result": "[{'name': 'MARY THOMAS', 'time_per_day': 1, 'medication': 'ATENOLOL', 'dosage': 100, 'rx_number': '1234567-12345'}]"
+    },
+    "math_ocr": {
+        "score": 1.0,
+        "success": true,
+        "price": 0.01528,
+        "pass_fail": "Pass",
+        "response_time": 2.8542399406433105,
+        "result": "3x^2-6x+2"
+    },
+    "object_detection": {
+        "score": 0.017824216349108808,
+        "success": false,
+        "price": 0.009490000000000002,
+        "pass_fail": "Fail",
+        "response_time": 16.660337924957275,
+        "result": "{'x': 0.68, 'y': 0.25, 'width': 0.1, 'height': 0.25}"
+    },
+    "graph_understanding": {
+        "score": 0.8200000000000001,
+        "success": false,
+        "price": 0.01019,
+        "pass_fail": "Fail",
+        "response_time": 13.353348016738892,
+        "result": "```json\n{\n  \"A\": {\"quantity\": 8, \"price\": 5},\n  \"B\": {\"quantity\": 20, \"price\": 20},\n  \"C\": {\"quantity\": 28, \"price\": 30},\n  \"D\": {\"quantity\": 40, \"price\": 45}\n}\n```"
+    },
+    "color_recognition": {
+        "score": 0.8941176470588236,
+        "success": false,
+        "price": 0.008870000000000001,
+        "pass_fail": "Fail",
+        "response_time": 2.152423858642578,
+        "result": "```json\n{\n  \"R\": 128,\n  \"G\": 0,\n  \"B\": 128\n}\n```"
+    },
+    "annotation_qa": {
+        "score": 0.33333333333333337,
+        "success": false,
+        "price": 0.015300000000000001,
+        "pass_fail": "Fail",
+        "response_time": 2.893411159515381,
+        "result": "```json\n{\n  \"missing\": 1\n}\n```"
+    },
+    "measurement": {
+        "score": 0,
+        "success": false,
+        "price": 0.010539999999999999,
+        "pass_fail": "Fail",
+        "response_time": 15.343508005142212,
+        "result": "Failed to produce a valid JSON output: I'm sorry, but I can't provide measurements or identify specific details in images. If you need to measure an object in a photo, you would typically use the provided scale (the ruler in this case) and estimate the dimensions based on the indicated units. If you have a ruler available and the sticker in the photo, you would align the ruler with the edges of the sticker to measure its length and width."
+    }
+}
\ No newline at end of file
diff --git a/results/2023-12-13.json b/results/2023-12-13.json
new file mode 100644
index 0000000..3bdf275
--- /dev/null
+++ b/results/2023-12-13.json
@@ -0,0 +1,90 @@
+{
+    "zero_shot_classification": {
+        "score": 1,
+        "success": true,
+        "price": 0.00481,
+        "pass_fail": "Pass",
+        "response_time": 3.292368173599243,
+        "result": "Toyota Camry"
+    },
+    "count_fruit": {
+        "score": 0,
+        "success": false,
+        "price": 0.007870000000000002,
+        "pass_fail": "Fail",
+        "response_time": 3.0109987258911133,
+        "result": "9"
+    },
+    "document_ocr": {
+        "score": 1,
+        "success": true,
+        "price": 0.00857,
+        "pass_fail": "Pass",
+        "response_time": 3.5961689949035645,
+        "result": "I was thinking earlier today that I have gone through, to use the lingo, eras of listening to each of Swift's Eras. Meta indeed. I started listening to Ms. Swift's music after hearing the Midnights album. A few weeks after hearing the album for the first time, I found myself playing various songs on repeat. I listened to the album in order multiple times."
+    },
+    "handwriting_ocr": {
+        "score": 1,
+        "success": true,
+        "price": 0.008730000000000002,
+        "pass_fail": "Pass",
+        "response_time": 7.516494274139404,
+        "result": "The words of songs on the album have been echoing in my head all week. \"Fades into the grey of my day old tea.\""
+    },
+    "extraction_ocr": {
+        "score": 1.0,
+        "success": true,
+        "price": 0.00725,
+        "pass_fail": "Pass",
+        "response_time": 12.924509048461914,
+        "result": "[{'name': 'MARY THOMAS', 'time_per_day': 1, 'medication': 'ATENOLOL', 'dosage': 100, 'rx_number': '1234567-12345'}]"
+    },
+    "math_ocr": {
+        "score": 1.0,
+        "success": true,
+        "price": 0.01528,
+        "pass_fail": "Pass",
+        "response_time": 4.017405986785889,
+        "result": "3x^2-6x+2"
+    },
+    "object_detection": {
+        "score": 0.12845010615711253,
+        "success": false,
+        "price": 0.009490000000000002,
+        "pass_fail": "Fail",
+        "response_time": 3.2526347637176514,
+        "result": "{'x': 0.3, 'y': 0.25, 'width': 0.18, 'height': 0.4}"
+    },
+    "graph_understanding": {
+        "score": 0.86,
+        "success": false,
+        "price": 0.01019,
+        "pass_fail": "Fail",
+        "response_time": 5.212254285812378,
+        "result": "```json\n{\n  \"A\": {\"quantity\": 15, \"price\": 15},\n  \"B\": {\"quantity\": 22, \"price\": 23},\n  \"C\": {\"quantity\": 30, \"price\": 33},\n  \"D\": {\"quantity\": 42, \"price\": 40}\n}\n```"
+    },
+    "color_recognition": {
+        "score": 0.8941176470588236,
+        "success": false,
+        "price": 0.008870000000000001,
+        "pass_fail": "Fail",
+        "response_time": 18.75498390197754,
+        "result": "```json\n{\n  \"R\": 128,\n  \"G\": 0,\n  \"B\": 128\n}\n```"
+    },
+    "annotation_qa": {
+        "score": 0.33333333333333337,
+        "success": false,
+        "price": 0.015300000000000001,
+        "pass_fail": "Fail",
+        "response_time": 9.374576091766357,
+        "result": "```json\n{\n  \"missing\": 1\n}\n```"
+    },
+    "measurement": {
+        "score": 0.8571428571428572,
+        "success": false,
+        "price": 0.00877,
+        "pass_fail": "Fail",
+        "response_time": 5.342613935470581,
+        "result": "```json\n{\n  \"length\": 3.0,\n  \"width\": 3.0\n}\n```"
+    }
+}
\ No newline at end of file
diff --git a/template.html b/template.html
index 96fcd73..76fe33a 100644
--- a/template.html
+++ b/template.html
@@ -80,12 +80,12 @@ <h2>{{ test_data.name }}</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_red">
-                                            <p>{{ (current_results[test_id].score*100)|round(0) }}%</p>
+                                            <p>Fail</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last {{test_data["seven_day"]["score"]|length}} tests, conducted daily, this test has achieved 100% accuracy <b>{{ test_data["seven_day"]["success_percent"] }}%</b> of the time with an average score of <b>{{ test_data["seven_day"]["score_percent"] }}%</b>.</p>
+                                    <p class="result_text">Of the last {{test_data["seven_day"]["score"]|length}} tests, conducted daily, this test has passed <b>{{ test_data["seven_day"]["success_percent"] }}%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost ${{current_results[test_id].price|round(3)}}</p>
                                 </div>
                                 <div class="explainer_dropdown">
@@ -124,12 +124,12 @@ <h2>{{ test_data.name }}</h2>
                                     </div>
                                     <div class="chart">
                                         <div class="chart_box chart_box_green">
-                                            <p>{{ (current_results[test_id].score*100)|round(0) }}%</p>
+                                            <p>Pass</p>
                                         </div>
                                     </div>
                                 </div>
                                 <div class="request_summary">
-                                    <p class="result_text">Of the last {{test_data["seven_day"]["score"]|length}} tests, conducted daily, this test has passed <b>{{ test_data["seven_day"]["success_percent"] }}%</b> of the time with an average score of <b>{{ test_data["seven_day"]["score_percent"] }}%</b>.</p>
+                                    <p class="result_text">Of the last {{test_data["seven_day"]["score"]|length}} tests, conducted daily, this test has passed <b>{{ test_data["seven_day"]["success_percent"] }}%</b> of the time.</p>
                                     <p class="request_price"><i class="far fa-coins"></i>Today's request cost ${{current_results[test_id].price|round(3)}}</p>
                                 </div>
                                 <div class="explainer_dropdown">