Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better logging #205

Merged
merged 2 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion factgenie/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def generate_example_index(app, campaign):
Expand Down
4 changes: 2 additions & 2 deletions factgenie/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
app.db["announcers"] = {}
app.wsgi_app = ProxyFix(app.wsgi_app, x_host=1)

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


# -----------------
Expand Down Expand Up @@ -228,7 +228,7 @@ def browse():

if dataset_id and split and example_idx:
display_example = {"dataset": dataset_id, "split": split, "example_idx": int(example_idx)}
logger.info(f"Browsing {display_example}")
logger.info(f"Serving permalink {dataset_id} / {split} / {example_idx}")
else:
display_example = None

Expand Down
80 changes: 63 additions & 17 deletions factgenie/bin/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,39 +279,85 @@ def run_llm_campaign(campaign_id: str):
)


def create_app(**kwargs):
import yaml
def setup_logging(config):
import logging
import coloredlogs
import os
import re
from datetime import datetime

from factgenie import ROOT_DIR

class PlainTextFormatter(logging.Formatter):
def format(self, record):
msg = super().format(record)
return remove_ansi_codes(msg)

def remove_ansi_codes(text):
"""Removes ANSI escape sequences from text."""
import re

ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
return ansi_escape.sub("", text)

os.makedirs(f"{ROOT_DIR}/logs", exist_ok=True)
datetime_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

# Define log format
log_format = "%(asctime)s - %(levelname)s - %(message)s"

# Create loggers
logger = logging.getLogger("factgenie")

# Get logging level from config
logging_level = config.get("logging", {}).get("level", "INFO")
logger.setLevel(logging_level)

# File handler for errors and warnings
error_handler = logging.FileHandler(f"{ROOT_DIR}/logs/{datetime_str}_error.log")
error_handler.setLevel(logging.WARNING)
error_handler.setFormatter(PlainTextFormatter(log_format))

# File handler for info messages only
info_handler = logging.FileHandler(f"{ROOT_DIR}/logs/{datetime_str}_info.log")
info_handler.setLevel(logging.INFO)
info_handler.addFilter(lambda record: record.levelno == logging.INFO)
info_handler.setFormatter(PlainTextFormatter(log_format))

# # Console handler with colored output
# console_handler = logging.StreamHandler()
# console_handler.setLevel(logging_level)
coloredlogs.install(level=logging_level, logger=logger, fmt=log_format)
# console_handler.setFormatter(logging.Formatter(log_format)) # ANSI codes preserved

# Add handlers to logger
logger.addHandler(error_handler)
logger.addHandler(info_handler)
# logger.addHandler(console_handler)

return logger


def create_app(**kwargs):
import yaml
import os
import shutil
import logging
import factgenie.workflows as workflows
from apscheduler.schedulers.background import BackgroundScheduler
from datetime import datetime
from factgenie.utils import check_login
from factgenie import ROOT_DIR, MAIN_CONFIG_PATH, MAIN_CONFIG_TEMPLATE_PATH, CAMPAIGN_DIR, INPUT_DIR, OUTPUT_DIR

file_handler = logging.FileHandler("error.log")
file_handler.setLevel(logging.ERROR)

if not MAIN_CONFIG_PATH.exists():
print("Activating the default configuration.")
shutil.copy(MAIN_CONFIG_TEMPLATE_PATH, MAIN_CONFIG_PATH)

with open(MAIN_CONFIG_PATH) as f:
config = yaml.safe_load(f)

logging_level = config.get("logging", {}).get("level", "INFO")
logging.basicConfig(
format="%(levelname)s (%(filename)s:%(lineno)d) - %(message)s",
level=logging_level,
handlers=[file_handler, logging.StreamHandler()],
)
logger = logging.getLogger(__name__)
coloredlogs.install(
level=logging_level,
logger=logger,
fmt="%(asctime)s %(levelname)s %(filename)s:%(lineno)d %(message)s",
)
logger = setup_logging(config)

config["host_prefix"] = os.getenv("FACTGENIE_HOST_PREFIX", config["host_prefix"])
config["login"]["active"] = os.getenv("FACTGENIE_LOGIN_ACTIVE", config["login"]["active"])
config["login"]["lock_view_pages"] = os.getenv(
Expand Down
2 changes: 1 addition & 1 deletion factgenie/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datetime import datetime
from factgenie import CAMPAIGN_DIR

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


class CampaignMode:
Expand Down
2 changes: 1 addition & 1 deletion factgenie/crowdsourcing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from factgenie import CAMPAIGN_DIR, PREVIEW_STUDY_ID, TEMPLATES_DIR
from factgenie.campaign import CampaignMode, ExampleStatus

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def create_crowdsourcing_campaign(app, campaign_id, config, campaign_data):
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/basic.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

from factgenie.datasets.dataset import Dataset
from pathlib import Path
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from factgenie import INPUT_DIR, OUTPUT_DIR

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def get_dataset_classes():
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/gsmarena.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import requests

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

from factgenie.datasets.basic import JSONDataset
from factgenie.datasets.quintd import QuintdDataset
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/hf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from factgenie.datasets.dataset import Dataset

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


class HFDataset(Dataset):
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/ice_hockey.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

from datetime import datetime
from tinyhtml import h
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/logicnlg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import requests

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")
from factgenie.datasets.hf_dataset import HFDataset
from tinyhtml import h

Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/openweather.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

import json
from factgenie.datasets.basic import JSONDataset
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/owid.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

import dateutil.parser
from factgenie.datasets.dataset import Dataset
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/propaganda_techniques.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from factgenie.utils import resumable_download
from factgenie.datasets.dataset import Dataset

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")

PTC_DATASET_ID = "propaganda-techniques"
PTC_CAMPAIGN_ID = "ptc"
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/quintd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from factgenie.utils import resumable_download
from pathlib import Path

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


class QuintdDataset(Dataset):
Expand Down
2 changes: 1 addition & 1 deletion factgenie/datasets/wikidata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import logging

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")
from factgenie.datasets.basic import JSONDataset
from factgenie.datasets.quintd import QuintdDataset
from tinyhtml import h
Expand Down
19 changes: 17 additions & 2 deletions factgenie/llm_campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from factgenie import CAMPAIGN_DIR, OUTPUT_DIR, TEMPLATES_DIR

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


def create_llm_campaign(app, mode, campaign_id, config, campaign_data, datasets, overwrite=False):
Expand Down Expand Up @@ -149,7 +149,20 @@ def run_llm_campaign(app, mode, campaign_id, announcer, campaign, datasets, mode
campaign.update_metadata()

provider = campaign.metadata["config"].get("type", None)
logger.info(f"Starting LLM campaign {campaign_id} ({provider})")

logger.info(f"Starting LLM campaign \033[1m{campaign_id}\033[0m | {provider}")

logger.info(f"=" * 50)
logger.info(f"\033[1mModel\033[0m: {campaign.metadata['config']['model']}")
logger.info(f"\033[1mAPI URL\033[0m: {campaign.metadata['config'].get('api_url')}")
logger.info(f"\033[1mModel args\033[0m: {campaign.metadata['config'].get('model_args')}")
logger.info(f"\033[1mSystem message\033[0m: \"{campaign.metadata['config'].get('system_msg')}\"")
logger.info(f"\033[1mAnnotation span categories\033[0m:")

for cat in campaign.metadata["config"].get("annotation_span_categories", []):
logger.info(f"[{cat['name']}] {cat['description']}")

logger.info(f"=" * 50)

# regenerate output index
workflows.get_output_index(app, force_reload=True)
Expand Down Expand Up @@ -211,7 +224,9 @@ def run_llm_campaign(app, mode, campaign_id, announcer, campaign, datasets, mode
payload = {"campaign_id": campaign_id, "stats": stats, "type": "result", "response": response}

utils.announce(announcer, payload)
logger.info(f"-" * 50)
logger.info(f"{campaign_id}: {stats['finished']}/{stats['total']} examples")
logger.info(f"-" * 50)

# if all examples are finished, set the campaign status to finished
if len(db.status.unique()) == 1 and db.status.unique()[0] == ExampleStatus.FINISHED:
Expand Down
54 changes: 43 additions & 11 deletions factgenie/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
logging.getLogger("LiteLLM Proxy").setLevel(logging.ERROR)
logging.getLogger("LiteLLM Router").setLevel(logging.ERROR)

logger = logging.getLogger(__name__)
# disable requests logging
logging.getLogger("httpx").setLevel(logging.ERROR)

logger = logging.getLogger("factgenie")

DIR_PATH = os.path.dirname(__file__)
LLM_ANNOTATION_DIR = os.path.join(DIR_PATH, "annotations")
Expand Down Expand Up @@ -163,18 +166,42 @@ def parse_annotations(self, text, annotations_json):
annotations_obj = OutputAnnotations.model_validate_json(annotations_json)
annotations = annotations_obj.annotations
except ValidationError as e:
logger.error(f"Model response is not in the expected format: {e}\n\nResponse: {annotations_json=}")
logger.error("Parsing error: ", json.loads(e.json())[0]["msg"])
try:
logger.error(f"Model response does not follow the schema.")
parsed_json = json.loads(annotations_json)
logger.error(f"Model response: {parsed_json}")
except json.JSONDecodeError:
logger.error(f"Model response is not a valid JSON.")
logger.error(f"Model response: {annotations_json}")

return []

annotation_list = []
current_pos = 0

for annotation in annotations:
logger.info("Annotated text:")
logger.info(f"\033[34m{text}\033[0m")

logger.info(f"Received {len(annotations)} annotations.")

for i, annotation in enumerate(annotations):
annotated_span = annotation.text.lower()

if len(text) == 0:
logger.warning(f"❌ Span EMPTY.")
continue

# find the `start` index of the error in the text
start_pos = text.lower().find(annotation.text.lower(), current_pos)
start_pos = text.lower().find(annotated_span, current_pos)

if start_pos == -1:
logger.warning(f'Cannot find {annotation=} in text "{text[current_pos:]}"')
if text.lower().find(annotated_span) != -1:
# The annotation was found earlier in the text. That may be an accident, therefore we ignore it. The model should be instructed to order the annotation sequentially.
logger.warning(f'❌ Span OUT OF ORDER: "{annotated_span}"')
else:
# The annotation was not found in the text.
logger.warning(f'❌ Span NOT FOUND: "{annotated_span}"')
continue

annotation_d = annotation.model_dump()
Expand All @@ -185,6 +212,17 @@ def parse_annotations(self, text, annotations_json):

# Save the start position of the annotation
annotation_d["start"] = start_pos

try:
annotation_type_str = self.config["annotation_span_categories"][annotation_d["type"]]["name"]
except:
logger.error(f"Annotation type {annotation_d['type']} not found in the annotation_span_categories.")
continue

logger.info(
f'[\033[32m\033[1m{annotation_type_str}\033[0m] "\033[32m{annotation.text}\033[0m" ({start_pos}:{start_pos + len(annotation.text)})'
)

annotation_list.append(annotation_d)

overlap_allowed = self.config.get("annotation_overlap_allowed", False)
Expand Down Expand Up @@ -247,20 +285,14 @@ def annotate_example(self, data, text):
try:
prompt = self.prompt(data, text)

logger.debug(f"Calling LiteLLM API.")
logger.debug(f"Prompt: {prompt}")
logger.debug(f"Model: {model}")
logger.debug(f"Model args: {self.config.get('model_args', {})}")
logger.debug(f"API URL: {self._api_url()}")
logger.debug(f"System message: {self.config['system_msg']}")

response = self.get_model_response(prompt, model_service)

logger.debug(f"Prompt tokens: {response.usage.prompt_tokens}")
logger.debug(f"Response tokens: {response.usage.completion_tokens}")

annotation_str = response.choices[0].message.content
logger.info(annotation_str)

return {
"prompt": prompt,
Expand Down
2 changes: 1 addition & 1 deletion factgenie/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
MAIN_CONFIG_PATH,
)

logger = logging.getLogger(__name__)
logger = logging.getLogger("factgenie")


# https://maxhalford.github.io/blog/flask-sse-no-deps/
Expand Down
Loading