-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopenai_analyzer.py
728 lines (599 loc) · 29.3 KB
/
openai_analyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
import json
from dataclasses import dataclass
from typing import List, Dict, Tuple
from datetime import datetime
import html
import time
import csv
import os
import random
import tiktoken
import sys
from collections import defaultdict
from calendar import month_name
from openai import OpenAI
from openai import OpenAIError
from pydantic import BaseModel, Field
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import numpy as np
# Dry run mode
DRY_RUN = False # Set to False for actual API calls
MAX_RATED_COUNT = 2000000
# Replace BASE_DIR with INPUT_DIR and OUTPUT_DIR
INPUT_DIR = "input" # Directory for input files
OUTPUT_DIR = "output" # Directory for output files
CACHE_FILE = os.path.join(OUTPUT_DIR, 'ratings_cache.csv')
USER_DIMENSIONS = [
("complexity", "The level of difficulty or intricacy of the user's query"),
("domain_specificity", "How specialized or niche the topic of the query is"),
("ambiguity", "How clear or unclear the user's request is"),
("abstraction_level", "Whether the query deals with concrete or abstract concepts"),
("contextual_requirements", "How much context is needed to fully understand and address the query"),
("linguistic_challenge", "The level of language proficiency required to understand and respond to the query"),
("cognitive_demand", "The level of cognitive processing required to address the query")
]
ASSISTANT_DIMENSIONS = [
("overall", "Overall quality of the response"),
("relevance", "How well the response addresses the user's query or task"),
("accuracy", "Correctness of information provided"),
("coherence", "Logical flow and consistency of the response"),
("completeness", "How thoroughly the response addresses all aspects of the query"),
("clarity", "How easy the response is to understand"),
("conciseness", "Efficiency in conveying information without unnecessary verbosity"),
("helpfulness", "How useful the response is in solving the user's problem or answering their question"),
("safety", "Avoidance of harmful, unethical, or biased content"),
("creativity", "Novel or innovative aspects of the response"),
("language_quality", "Proper grammar, spelling, and language use"),
("task_completion", "How well the response accomplishes the requested task"),
("contextual_understanding", "How well the response considers the context of the conversation"),
("non_sycophancy", "Ability to provide honest, critical feedback when appropriate instead of always agreeing")
]
ALL_DIMENSIONS = [("a_" + dim[0], dim[1]) for dim in ASSISTANT_DIMENSIONS] + [("u_" + dim[0], dim[1]) for dim in USER_DIMENSIONS]
class QualityRating(BaseModel):
a_overall: int
a_relevance: int
a_accuracy: int
a_coherence: int
a_completeness: int
a_clarity: int
a_conciseness: int
a_helpfulness: int
a_safety: int
a_creativity: int
a_language_quality: int
a_task_completion: int
a_contextual_understanding: int
a_non_sycophancy: int
u_complexity: int
u_domain_specificity: int
u_ambiguity: int
u_abstraction_level: int
u_contextual_requirements: int
u_linguistic_challenge: int
u_cognitive_demand: int
@dataclass
class Message:
id: str
author_role: str
content: str
@dataclass
class Conversation:
id: str
title: str
create_time: float
update_time: float
messages: List[Message]
def ingest_conversations() -> Dict[str, Dict[Tuple[str, str], List[Conversation]]]:
import zipfile
import os
zip_files = [f for f in os.listdir('.') if f.endswith('.zip')]
if len(zip_files) != 1:
print("Error: There should be exactly one zip file in the current directory - the one with your ChatGPT Exported data.")
print("Download your data from ChatGPT: (top right -> Settings -> Data Controls -> Export your data)")
sys.exit(1)
zip_file = zip_files[0]
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
with zip_ref.open('conversations.json') as f:
data = json.load(f)
organized_data = {}
conversations = data if isinstance(data, list) else [data]
for conversation in conversations:
model = conversation.get('default_model_slug', 'Unknown')
if model is None:
model = 'Unknown'
if ("gpt" not in model.lower()) and model.lower() != "unknown":
print(f"SKIPPING CONVERSATION WITH MODEL: {model.upper()} - NOT A GPT MODEL")
continue
messages = []
first_user_message = None
first_assistant_response = None
about_model = ''
about_user = ''
for msg in conversation.get('mapping', {}).values():
message = msg.get('message')
if message and message.get('metadata') and message['metadata'].get('user_context_message_data'):
about_user = message['metadata']['user_context_message_data'].get('about_user_message', '')
about_model = message['metadata']['user_context_message_data'].get('about_model_message', '')
if message and message.get('content'):
content = message['content']
if isinstance(content, dict):
# Skip image messages
if 'content_type' in content and content['content_type'] == 'image_asset_pointer':
if not first_user_message:
# If this is the first user message and it's an image, skip this conversation
break
continue
content_text = ''.join(str(part) for part in content.get('parts', []))
else:
content_text = str(content)
if content_text.strip(): # Skip blank messages
msg_obj = Message(
id=message['id'],
author_role=message['author']['role'],
content=content_text
)
messages.append(msg_obj)
if not first_user_message and msg_obj.author_role == 'user':
first_user_message = msg_obj
elif first_user_message and not first_assistant_response and msg_obj.author_role == 'assistant':
first_assistant_response = msg_obj
break # We have what we need, stop processing messages
if first_user_message and first_assistant_response:
conv = Conversation(
id=conversation['id'],
title=conversation.get('title', ''),
create_time=conversation.get('create_time', 0),
update_time=conversation.get('update_time', 0),
messages=[first_user_message, first_assistant_response]
)
key = (about_user, about_model)
if model not in organized_data:
organized_data[model] = {}
if key not in organized_data[model]:
organized_data[model][key] = []
organized_data[model][key].append(conv)
return organized_data
def load_ratings_cache(cache_file: str) -> Dict[Tuple[str, str], Dict[str, int]]:
ratings_cache = {}
if os.path.exists(cache_file):
with open(cache_file, 'r', newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
timestamp = row['timestamp']
table_id = row['table_id']
ratings = {k: int(float(v)) for k, v in row.items() if k not in ['timestamp', 'table_id', 'model']}
ratings_cache[(timestamp, table_id)] = ratings
return ratings_cache
def save_rating_to_cache(cache_file: str, timestamp: str, table_id: str, model: str, ratings: Dict[str, int]):
file_exists = os.path.exists(cache_file)
with open(cache_file, 'a', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
if not file_exists:
writer.writerow(['timestamp', 'table_id', 'model', 'user_tokens', 'assistant_tokens'] + [dim[0] for dim in ALL_DIMENSIONS])
writer.writerow([timestamp, table_id, model, ratings['user_tokens'], ratings['assistant_tokens']] + [ratings[dim[0]] for dim in ALL_DIMENSIONS])
def num_tokens_from_string(string: str, model_name: str) -> int:
if model_name == 'Unknown':
# Choose a default tokenizer or use a simple method
return len(string.split()) # Simple word count as a fallback
encoding = tiktoken.encoding_for_model(model_name)
return len(encoding.encode(string))
def calculate_cost(tokens):
return (tokens / 1_000_000) * 0.25 # $0.25 per 1M tokens
cumulative_cost = 0
def get_quality_rating(user_message: str, assistant_response: str, model_name: str, timestamp: str, table_id: str) -> Dict[str, int]:
global cumulative_cost
user_tokens = num_tokens_from_string(user_message, model_name)
assistant_tokens = num_tokens_from_string(assistant_response, model_name)
ratings = {
'user_tokens': user_tokens,
'assistant_tokens': assistant_tokens
}
try:
if DRY_RUN:
ratings.update({dim[0]: random.randint(0, 100) for dim in ALL_DIMENSIONS})
else:
client = OpenAI()
msgs=[
{"role": "system", "content": """You are an AI quality rater.
Your task is to rate the difficulty of the user's prompt and the quality of an AI assistant's response.
Provide integer ratings from 0 to 100 for multiple dimensions,
where 0 is easy prompt/very poor response and 100 is difficult prompt / excellent response."""},
{"role": "user", "content": f"""User message: {user_message}
---
Assistant response: {assistant_response}
---
Please rate the following dimensions with an integer between 0 and 100.
For the assistant's response (prefix with 'a_'):
{', '.join([f"{dim[0]} ({dim[1]})" for dim in ASSISTANT_DIMENSIONS])}
For the user's prompt difficulty (prefix with 'u_'):
{', '.join([f"{dim[0]} ({dim[1]})" for dim in USER_DIMENSIONS])}"""}
]
completion = client.chat.completions.create(
messages=msgs,
model="gpt-4o-mini",
response_format={ "type": "json_object" },
)
parsed_ratings = json.loads(completion.choices[0].message.content)
ratings.update({k: int(v) for k, v in parsed_ratings.items()})
# Check if ratings now contains all of the keys in ALL_DIMENSIONS
missing_dimensions = set(dim[0] for dim in ALL_DIMENSIONS) - set(ratings.keys())
if missing_dimensions:
print(f"Warning: Missing ratings for dimensions: {missing_dimensions}")
for dim in missing_dimensions:
ratings[dim] = 0 # Set a default value for missing dimensions
# Calculate and print cost
api_tokens = completion.usage.total_tokens
call_cost = calculate_cost(api_tokens)
cumulative_cost += call_cost
print(f"API call cost: ${call_cost:.6f}")
print(f"Cumulative cost: ${cumulative_cost:.6f}")
# Save the full messages to an HTML file
save_messages_to_html(user_message, assistant_response, timestamp, table_id, ratings)
except OpenAIError as e:
print(f"OpenAI API error: {str(e)}")
raise
return ratings
def generate_rating_filename(timestamp: str, table_id: str) -> str:
return f"ratings/{timestamp}_{table_id}.html"
def save_messages_to_html(user_message: str, assistant_response: str, timestamp: str, table_id: str, ratings: Dict[str, int]):
os.makedirs(os.path.join(OUTPUT_DIR, 'ratings'), exist_ok=True)
filename = os.path.join(OUTPUT_DIR, generate_rating_filename(timestamp, table_id))
# Function to determine color based on rating
def get_color(rating):
try:
rating_value = int(rating)
return f'rgb({255-int(rating_value*2.55)}, {int(rating_value*2.55)}, 0)'
except (ValueError, TypeError):
# If conversion fails, return a default color
return 'rgb(128, 128, 128)' # Gray color as fallback
# Create table rows for ratings
rating_rows = ''
for dim, desc in ALL_DIMENSIONS:
key = dim
if key in ratings:
value = ratings[key]
else:
value = "N/A" # or some default value
color = get_color(value)
rating_rows += f'<tr><td>{dim}</td><td>{desc}</td><td style="background-color: {color}">{value}</td></tr>'
html_content = f"""
<html>
<head>
<title>Conversation - {timestamp}</title>
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }}
.message {{ margin-bottom: 20px; padding: 10px; border-radius: 5px; }}
.user {{ background-color: #e6f3ff; }}
.assistant {{ background-color: #f0f0f0; }}
pre {{ white-space: pre-wrap; word-wrap: break-word; }}
table {{ border-collapse: collapse; width: 100%; }}
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
th {{ background-color: #f2f2f2; }}
</style>
</head>
<body>
<h1>Conversation - {timestamp}</h1>
<div class="message user">
<h2>User Message:</h2>
<pre>{html.escape(user_message)}</pre>
</div>
<div class="message assistant">
<h2>Assistant Response:</h2>
<pre>{html.escape(assistant_response)}</pre>
</div>
<div class="ratings">
<h2>Ratings:</h2>
<table>
<tr><th>Dimension</th><th>Description</th><th>Rating</th></tr>
{rating_rows}
</table>
</div>
</body>
</html>
"""
with open(filename, 'w', encoding='utf-8') as f:
f.write(html_content)
def shorten_string(s, length):
return f'{s[:length]}... [{len(s)}]'
def generate_index(organized_conversations, ratings_cache):
html_report = """<html><head>
<style>body{font-family:Arial,sans-serif;} table{border-collapse:collapse;width:100%;} th,td{border:1px solid #ddd;padding:8px;text-align:left;} th{background-color:#f2f2f2;}</style>
</head>
<body>
<h1>ChatGPT Conversations</h1>
<p>
Does it seem like ChatGPT got worse over time? Now you can see for yourself.
<p>
This tool only looks at the first interaction in each conversation.
If the prompt included an image, the conversation is skipped.
Each conversation is rated - click on the link in the first column.
</p>
<p>
Conversations are organized by model and your user/system prompt.
</p>
<p>
See <a href="graphs.html">these plots with your results.</a>
</p>
<p>
See <a href="activities.html">activity summaries.</a>
</p>
<h1>Conversation Analysis Report</h1>
<h2>Index</h2>
<ul>"""
for model in organized_conversations:
about_id = 0
for _ in organized_conversations[model]:
about_id += 1
table_id = f"{model}_{about_id}"
html_report += f'<li><a href="#{table_id}">{table_id}</a></li>'
html_report += '</ul>'
for model, conversations in organized_conversations.items():
about_id = 0
for (about_user, about_model), convs in conversations.items():
about_id += 1
table_id = f"{model}_{about_id}"
html_report += f'<h2 id="{table_id}">Model: {table_id}</h2>'
html_report += f'<h3>About User:</h3>'
html_report += f'<p>{html.escape(about_user)}</p>'
html_report += f'<h3>About Model:</h3>'
html_report += f'<p>{html.escape(about_model)}</p>'
html_report += '<table>'
html_report += '<tr><th>Timestamp</th><th>Prompt</th><th>Response</th><th>Ratings</th></tr>'
for conv in convs:
if len(conv.messages) >= 2:
user_message = conv.messages[0].content
assistant_response = conv.messages[1].content
timestamp = datetime.fromtimestamp(conv.create_time).strftime('%Y%m%d_%H%M%S')
rating_file_url = generate_rating_filename(timestamp, table_id)
html_report += f'<tr>'
html_report += f'<td><a href="{rating_file_url}" target="_blank">{timestamp}</a></td>'
html_report += f'<td>{shorten_string(user_message, 40)}</td>'
html_report += f'<td>{shorten_string(assistant_response, 40)}</td>'
if (timestamp, table_id) in ratings_cache:
ratings = ratings_cache[(timestamp, table_id)]
html_report += '<td>'
for dim in ['a_overall', 'u_complexity']:
value = ratings.get(dim, 'N/A')
color = get_color(value)
html_report += f'<span style="background-color: {color}">{dim}: {value}</span><br>'
html_report += '</td>'
else:
html_report += '<td>Not rated yet</td>'
html_report += '</tr>'
html_report += '</table>'
html_report += '</body></html>'
os.makedirs(OUTPUT_DIR, exist_ok=True)
path = os.path.join(OUTPUT_DIR, 'index.html')
with open(path, 'w', encoding='utf-8') as f:
f.write(html_report)
print(f"Report generated: {path}")
return
def process_uncached_ratings(organized_conversations, ratings_cache, cache_file):
global cumulative_cost
skip_counter = 0
processed_count = 0
for model, conversations in organized_conversations.items():
print(f"Processing model: {model}")
about_id = 0
for (about_user, about_model), convs in conversations.items():
about_id += 1
print(f" Processing conversation group: {about_id}")
for conv in convs:
if len(conv.messages) < 2:
print(f" Skipping conversation due to insufficient messages")
continue
user_message = conv.messages[0].content
assistant_response = conv.messages[1].content
assert conv.messages[0].author_role == 'user', "First message should be from user"
assert conv.messages[1].author_role == 'assistant', "Second message should be from assistant"
if not isinstance(user_message, str):
print(f" Skipping conversation: User message is not a string (type: {type(user_message)})")
continue
if 'image_asset_pointer' in user_message:
#print(f" Skipping conversation: User message starts with image content")
skip_counter += 1
continue
timestamp = datetime.fromtimestamp(conv.create_time).strftime('%Y%m%d_%H%M%S')
table_id = f"{model}_{about_id}"
if (timestamp, table_id) not in ratings_cache:
print(f" Processing message: {table_id}")
ratings = get_quality_rating(user_message, assistant_response, model, timestamp, table_id)
save_rating_to_cache(cache_file, timestamp, table_id, model, ratings)
print(f" Ratings calculated and saved for {table_id}")
processed_count += 1
#else:
#print(f" Using cached ratings for {table_id}")
if processed_count >= MAX_RATED_COUNT:
print(f"\nReached maximum number of rated conversations ({MAX_RATED_COUNT})")
print(f"Total cost: ${cumulative_cost:.6f}")
return
print(f"\nProcessing complete. Total conversations processed: {processed_count}")
print(f"Total conversations skipped due to image content: {skip_counter}")
print(f"Total cost: ${cumulative_cost:.6f}")
def get_color(rating):
try:
rating_value = int(rating)
return f'rgb({255-int(rating_value*2.55)}, {int(rating_value*2.55)}, 0)'
except (ValueError, TypeError):
return 'rgb(128, 128, 128)' # Gray color as fallback
def save_organized_conversations(organized_conversations, output_file):
def tuple_key_to_str(obj):
if isinstance(obj, dict):
return {str(key) if isinstance(key, tuple) else key: tuple_key_to_str(value)
for key, value in obj.items()}
elif isinstance(obj, list):
return [tuple_key_to_str(element) for element in obj]
elif isinstance(obj, (Conversation, Message)):
return obj.__dict__
else:
return obj
serializable_data = tuple_key_to_str(organized_conversations)
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(serializable_data, f, indent=2, default=str, ensure_ascii=False)
def create_multi_line_plot(df, x, y_list, title):
fig = go.Figure()
df_sorted = df.sort_values(by=x)
for y in y_list:
if y in df.columns: # Only plot if the column exists
# Calculate the rolling average
y_rolling = df_sorted[y].rolling(window=500, min_periods=1).mean()
fig.add_trace(go.Scatter(x=df_sorted[x], y=y_rolling, mode='lines', name=y))
fig.update_layout(title=title, height=600, xaxis_title="Time", yaxis_title="Value")
return fig
def create_section_plots(df, table_id):
plots = []
# Non-normalized multi-line plot for all a_ values
a_columns = [col for col in df.columns if col.startswith('a_')]
plots.append(create_multi_line_plot(df, 'timestamp', a_columns, f'Assistant Metrics Over Time - {table_id}'))
# Non-normalized multi-line plot for all a_ values
u_columns = [col for col in df.columns if col.startswith('u_')]
plots.append(create_multi_line_plot(df, 'timestamp', u_columns, f'User Prompt Metrics Over Time - {table_id}'))
# Normalized multi-line plot for all a_ values divided by average prompt complexity (excluding creativity and safety)
normalized_a_values = df[['timestamp'] + a_columns].copy()
for col in a_columns:
normalized_a_values[col] = normalized_a_values[col] / df['avg_prompt_complexity']
plots.append(create_multi_line_plot(normalized_a_values, 'timestamp', a_columns, f'Normalized Assistant Metrics Over Time - {table_id}'))
# Separate line plots for creativity, safety, and avg_assistant_quality (non-normalized and normalized)
for metric in ['avg_assistant_quality', 'a_creativity', 'a_safety']:
# Non-normalized plot
plots.append(create_multi_line_plot(df, 'timestamp', [metric], f'{metric.capitalize()} Over Time - {table_id}'))
# Normalized plot
normalized_metric = df[metric] / df['avg_prompt_complexity']
plots.append(create_multi_line_plot(pd.DataFrame({'timestamp': df['timestamp'], metric: normalized_metric}), 'timestamp', [metric], f'Normalized {metric.capitalize()} Over Time - {table_id}'))
# Heatmap: Correlation between metrics
corr_metrics = [col for col in df.columns if col.startswith('a_') or col.startswith('u_')]
corr_matrix = df[corr_metrics].corr()
fig = px.imshow(corr_matrix, title=f'Correlation Heatmap - {table_id}')
fig.update_layout(height=800, width=800)
plots.append(fig)
return plots
def generate_plots():
df = pd.read_csv(CACHE_FILE)
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y%m%d_%H%M%S')
df['avg_assistant_quality'] = df[['a_overall', 'a_relevance', 'a_accuracy', 'a_coherence', 'a_completeness', 'a_clarity', 'a_conciseness', 'a_helpfulness', 'a_task_completion', 'a_contextual_understanding', 'a_non_sycophancy']].mean(axis=1)
df['avg_prompt_complexity'] = df[['u_complexity', 'u_domain_specificity', 'u_ambiguity', 'u_abstraction_level', 'u_contextual_requirements', 'u_linguistic_challenge', 'u_cognitive_demand']].mean(axis=1)
df['quality_to_complexity_ratio'] = df['avg_assistant_quality'] / df['avg_prompt_complexity']
html = """
<html>
<head>
<title>AI Assistant Performance Analysis</title>
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
<style>
body { font-family: Arial, sans-serif; }
.index { margin-bottom: 20px; }
.index a { margin-right: 10px; }
</style>
</head>
<body>
<h1>AI Assistant Performance Analysis</h1>
<div class="index">
<h2>Index</h2>
"""
# Generate index
html += f'<a href="#all_data">All Data ({len(df)} data points)</a>'
for table_id in df['table_id'].unique():
df_table = df[df['table_id'] == table_id]
html += f'<a href="#{table_id}">{table_id} ({len(df_table)} data points)</a>'
html += "</div>"
# All data section
html += f'<h2 id="all_data">All Data ({len(df)} data points)</h2>'
for plot in create_section_plots(df, "All Data"):
html += plot.to_html(full_html=False, include_plotlyjs=False)
# Sections for each table_id
for table_id in df['table_id'].unique():
df_table = df[df['table_id'] == table_id]
html += f'<h2 id="{table_id}">{table_id} ({len(df_table)} data points)</h2>'
for plot in create_section_plots(df_table, table_id):
html += plot.to_html(full_html=False, include_plotlyjs=False)
html += "</body></html>"
with open("output/graphs.html", "w") as f:
f.write(html)
def generate_activity_summaries(organized_conversations):
activity_prompts = defaultdict(list)
for model, conversations in organized_conversations.items():
for (about_user, about_model), convs in conversations.items():
for conv in convs:
if len(conv.messages) >= 1:
timestamp = datetime.fromtimestamp(conv.create_time)
period_key = f"{timestamp.year}-{timestamp.month:02d}"
user_message = conv.messages[0].content
activity_prompts[period_key].append(user_message)
summaries = {}
client = OpenAI()
for period_key, prompts in activity_prompts.items():
year, month = map(int, period_key.split('-'))
period_name = f"{month_name[month]} {year}"
prompts = prompts[:3]
prompt_text = "\n---\n".join([f"{i+1}. {prompt}" for i, prompt in enumerate(prompts)])
print(f"Calling OpenAI API for {len(prompts)} prompts from {period_name}")
try:
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are an AI assistant tasked with summarizing user activity based on their ChatGPT prompts."},
{"role": "user", "content": f"""Here are the user's ChatGPT prompts.
Respond with
(1) one sentence on what kind of questions the user asked, with up to 3 bullets for the main themes.
(2) one sentence on what the user was likely focused on during this period.
Use HTML formatting (inside a <p>) and bullet lists.
{prompt_text}"""}
]
)
summary = response.choices[0].message.content.strip()
summaries[period_key] = summary + f" ({len(prompts)} prompts in total)"
except Exception as e:
print(f"Error generating summary for {period_name}: {str(e)}")
summaries[period_key] = f"Error generating summary: {str(e)}"
return summaries
def generate_activities_report(summaries):
html_report = """
<html>
<head>
<title>User Activities Summary</title>
<style>
body { font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }
h1 { color: #333; }
.activity-summary { margin-bottom: 20px; padding: 10px; border: 1px solid #ddd; border-radius: 5px; }
.period-title { color: #0066cc; }
</style>
</head>
<body>
<h1>
User Activities Summary</h1>
"""
for period_key, summary in sorted(summaries.items(), reverse=True):
year, month = map(int, period_key.split('-'))
period_name = f"{month_name[month]} {year}"
html_report += f"""
<div class="activity-summary">
<h2 class="period-title">{period_name}</h2>
<p>{summary}</p>
</div>
"""
html_report += "</body></html>"
os.makedirs(OUTPUT_DIR, exist_ok=True)
path = os.path.join(OUTPUT_DIR, 'activities.html')
with open(path, 'w', encoding='utf-8') as f:
f.write(html_report)
print(f"Activities report generated: {path}")
return path
organized_conversations = ingest_conversations()
ratings_cache = load_ratings_cache(CACHE_FILE)
generate_index(organized_conversations, ratings_cache)
# Save organized data as pretty-printed JSON
organized_data_output = os.path.join(OUTPUT_DIR, 'organized_conversations.json')
save_organized_conversations(organized_conversations, organized_data_output)
print(f"Organized data saved to: {organized_data_output}")
process_uncached_ratings(organized_conversations, ratings_cache, CACHE_FILE)
# again, to update it with the new ratings
generate_index(organized_conversations, ratings_cache)
generate_plots()
# Generate activity summaries - WARNING - THESE CONTAIN PERSONAL DATA
print("Generating activity summaries...")
activity_summaries = generate_activity_summaries(organized_conversations)
activities_report_path = generate_activities_report(activity_summaries)
print("All the ratings are in output/ratings_cache.csv if you want to do your own analysis.")
print(f"Look at {CACHE_FILE} for results.")