example_new.py

"""
Demonstration of the GazeTracking library.
Check the README.md for complete documentation.
"""

import cv2
import numpy as np
from gaze_tracking import GazeTracking

gaze = GazeTracking()
webcam = cv2.VideoCapture(0)

gaze_data = {
    "robot_face": {"left_pupil": {"x": [], "y": []}, "right_pupil": {"x": [], "y": []}},
    "pose1": {"left_pupil": {"x": [], "y": []}, "right_pupil": {"x": [], "y": []}},
    "pose2": {"left_pupil": {"x": [], "y": []}, "right_pupil": {"x": [], "y": []}},
    "own_items": {"left_pupil": {"x": [], "y": []}, "right_pupil": {"x": [], "y": []}},
}

fixations = {
    "robot_face": None,
    "pose1": None,
    "pose2": None,
    "own_items": None,
}

def calibration_loop(target: str):
    counter = 0
    while counter < 100:
        _, frame = webcam.read()
        gaze.refresh(frame)

        left_pupil = gaze.pupil_left_coords()
        right_pupil = gaze.pupil_right_coords()

        if left_pupil:
            gaze_data[target]["left_pupil"]["x"].append(left_pupil[0])
            gaze_data[target]["left_pupil"]["y"].append(left_pupil[1])

        if right_pupil:
            gaze_data[target]["right_pupil"]["x"].append(right_pupil[0])
            gaze_data[target]["right_pupil"]["y"].append(right_pupil[1])

        counter += 1

def mean(l):
    return sum(l) / len(l)

def range(l):
    return f"[{min(l)},{max(l)} - diff: {max(l) - min(l)}]"

def find_closest_fixation(target) -> str:
    distances = [np.linalg.norm(target - vec) for vec in fixations.values()]

    most_similar_index = np.argmin(distances)
    return list(fixations.keys())[most_similar_index]


input("Press ENTER to capture Robot Face ...")
calibration_loop("robot_face")

input("Press ENTER to capture Pose1 ...")
calibration_loop("pose1")

input("Press ENTER to capture Pose2 ...")
calibration_loop("pose2")

input("Press ENTER to capture Own Items ...")
calibration_loop("own_items")

print("DONE ...")
print("[Robot Face]", "Left Pupil X", mean(gaze_data["robot_face"]["left_pupil"]["x"]), range(gaze_data["robot_face"]["left_pupil"]["x"]))
print("[Robot Face]", "Left Pupil Y", mean(gaze_data["robot_face"]["left_pupil"]["y"]), range(gaze_data["robot_face"]["left_pupil"]["y"]))
print("[Robot Face]", "Right Pupil X", mean(gaze_data["robot_face"]["right_pupil"]["x"]), range(gaze_data["robot_face"]["right_pupil"]["x"]))
print("[Robot Face]", "Right Pupil Y", mean(gaze_data["robot_face"]["right_pupil"]["y"]), range(gaze_data["robot_face"]["right_pupil"]["y"]))
fixations["robot_face"] = np.array([
    mean(gaze_data["robot_face"]["left_pupil"]["x"]),
    mean(gaze_data["robot_face"]["left_pupil"]["y"]),
    mean(gaze_data["robot_face"]["right_pupil"]["x"]),
    mean(gaze_data["robot_face"]["right_pupil"]["y"]),
])
print("\n")
print("[Pose1]", "Left Pupil X", mean(gaze_data["pose1"]["left_pupil"]["x"]), range(gaze_data["pose1"]["left_pupil"]["x"]))
print("[Pose1]", "Left Pupil Y", mean(gaze_data["pose1"]["left_pupil"]["y"]), range(gaze_data["pose1"]["left_pupil"]["y"]))
print("[Pose1]", "Right Pupil X", mean(gaze_data["pose1"]["right_pupil"]["x"]), range(gaze_data["pose1"]["right_pupil"]["x"]))
print("[Pose1]", "Right Pupil Y", mean(gaze_data["pose1"]["right_pupil"]["y"]), range(gaze_data["pose1"]["right_pupil"]["y"]))
fixations["pose1"] = np.array([
    mean(gaze_data["pose1"]["left_pupil"]["x"]),
    mean(gaze_data["pose1"]["left_pupil"]["y"]),
    mean(gaze_data["pose1"]["right_pupil"]["x"]),
    mean(gaze_data["pose1"]["right_pupil"]["y"]),
])
print("\n")
print("[Pose2]", "Left Pupil X", mean(gaze_data["pose2"]["left_pupil"]["x"]), range(gaze_data["pose2"]["left_pupil"]["x"]))
print("[Pose2]", "Left Pupil Y", mean(gaze_data["pose2"]["left_pupil"]["y"]), range(gaze_data["pose2"]["left_pupil"]["y"]))
print("[Pose2]", "Right Pupil X", mean(gaze_data["pose2"]["right_pupil"]["x"]), range(gaze_data["pose2"]["right_pupil"]["x"]))
print("[Pose2]", "Right Pupil Y", mean(gaze_data["pose2"]["right_pupil"]["y"]), range(gaze_data["pose2"]["right_pupil"]["y"]))
fixations["pose2"] = np.array([
    mean(gaze_data["pose2"]["left_pupil"]["x"]),
    mean(gaze_data["pose2"]["left_pupil"]["y"]),
    mean(gaze_data["pose2"]["right_pupil"]["x"]),
    mean(gaze_data["pose2"]["right_pupil"]["y"]),
])
print("\n")
print("[own_items]", "Left Pupil X", mean(gaze_data["own_items"]["left_pupil"]["x"]), range(gaze_data["own_items"]["left_pupil"]["x"]))
print("[own_items]", "Left Pupil Y", mean(gaze_data["own_items"]["left_pupil"]["y"]), range(gaze_data["own_items"]["left_pupil"]["y"]))
print("[own_items]", "Right Pupil X", mean(gaze_data["own_items"]["right_pupil"]["x"]), range(gaze_data["own_items"]["right_pupil"]["x"]))
print("[own_items]", "Right Pupil Y", mean(gaze_data["own_items"]["right_pupil"]["y"]), range(gaze_data["own_items"]["right_pupil"]["y"]))
fixations["own_items"] = np.array([
    mean(gaze_data["own_items"]["left_pupil"]["x"]),
    mean(gaze_data["own_items"]["left_pupil"]["y"]),
    mean(gaze_data["own_items"]["right_pupil"]["x"]),
    mean(gaze_data["own_items"]["right_pupil"]["y"]),
])
print("\n\n")
input("Press ENTER to start recording ...")

while True:
    # We get a new frame from the webcam
    _, frame = webcam.read()

    # We send this frame to GazeTracking to analyze it
    gaze.refresh(frame)

    frame = gaze.annotated_frame()
    text = ""

    left_pupil = gaze.pupil_left_coords()
    if not left_pupil:
        continue
    right_pupil = gaze.pupil_right_coords()
    if not right_pupil:
        continue

    fixation = find_closest_fixation(np.array([left_pupil[0], left_pupil[1], right_pupil[0], left_pupil[1]]))

    if fixation == "robot_face":
        text = "Robot Face"
    elif fixation == "pose1":
        text = "Pose 1"
    elif fixation == "pose2":
        text = "Pose 2"
    elif fixation == "own_items":
        text = "Own items"

    cv2.putText(frame, text, (90, 60), cv2.FONT_HERSHEY_DUPLEX, 1.6, (147, 58, 31), 2)
    cv2.putText(frame, "Left pupil:  " + str(left_pupil), (90, 130), cv2.FONT_HERSHEY_DUPLEX, 0.9, (147, 58, 31), 1)
    cv2.putText(frame, "Right pupil: " + str(right_pupil), (90, 165), cv2.FONT_HERSHEY_DUPLEX, 0.9, (147, 58, 31), 1)

    cv2.imshow("Gaze Tracking for Adaptive Gaze", frame)

    if cv2.waitKey(1) == 27:
        break
   
webcam.release()
cv2.destroyAllWindows()