Gaze Analysis

What are people in front of my object looking at?

Getting Started

Using the Python SDK:

# -*- coding: utf-8 -*-
import angus.client
from pprint import pprint

conn = angus.client.connect()
service = conn.services.get_service('gaze_analysis', version=1)
job = service.process({'image': open('./macgyver.jpg', 'rb')})

pprint(job.result)

Input

The API takes a stream of 2d still images as input, of format jpg or png, without constraints on resolution.

Note however that the bigger the resolution, the longer the API will take to process and give a result.

The function process() takes a dictionary as input formatted as follows:

{'image' : file}
  • image: a python File Object as returned for example by open() or a StringIO buffer.

Output

Events will be pushed to your client following that format:

{
  "input_size" : [480, 640],
  "nb_faces" : 1,
  "faces" : [
              {
                "roi" : [250, 142, 232, 232],
                "roi_confidence" : 0.89,
                "eye_left" : [123, 253],
                "eye_right" : [345, 253],
                "nose" : [200, 320],
                "head_yaw" : 0.03,
                "head_pitch"   : 0.23,
                "head_roll"  : 0.14,
                "gaze_yaw"    : 0.05,
                "gaze_pitch"  : 0.12
              }
            ]
}
  • input_size : width and height of the input image in pixels (to be used as reference to roi output.
  • nb_faces : number of faces detected in the given image
  • roi : contains [pt.x, pt.y, width, height] where pt is the upper left point of the rectangle outlining the detected face.
  • roi_confidence : an estimate of the probability that a real face is indeed located at the given roi.
  • head_yaw, head_pitch, head_roll : head pose orientation in radian.
  • gaze_yaw, gaze_pitch : gaze (eyes) orientation in radian.
  • eye_left, eye_right, nose : the coordinate of the eyes and noze in the given image.

Code Sample

requirements: opencv2, opencv2 python bindings

This code sample retrieves the stream of a web cam and display in a GUI the result of the face_detection service.

# -*- coding: utf-8 -*-
import StringIO
from math import cos, sin
import cv2
import numpy as np
import angus.client

def main(stream_index):
    camera = cv2.VideoCapture(0)
    camera.set(cv2.cv.CV_CAP_PROP_FRAME_WIDTH, 640);
    camera.set(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT, 480);
    camera.set(cv2.cv.CV_CAP_PROP_FPS, 10)

    if not camera.isOpened():
        print("Cannot open stream of index {}".format(stream_index))
        exit(1)

    print("Input stream is of resolution: {} x {}".format(camera.get(3), camera.get(4)))

    conn = angus.client.connect()
    service = conn.services.get_service('gaze_analysis', 1)
    service.enable_session()

    while camera.isOpened():
        ret, frame = camera.read()
        if not ret:
            break

        ### angus.ai computer vision services require gray images right now.
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        ret, buff = cv2.imencode(".jpg", gray, [cv2.IMWRITE_JPEG_QUALITY, 80])
        buff = StringIO.StringIO(np.array(buff).tostring())

        job = service.process({"image": buff})
        res = job.result

        for face in res['faces']:
            x, y, dx, dy = map(int, face['roi'])

            nose = face['nose']
            nose = (nose[0], nose[1])

            eyel = face['eye_left']
            eyel = (eyel[0], eyel[1])
            eyer = face['eye_right']
            eyer = (eyer[0], eyer[1])

            psi = face['head_roll']
            theta = - face['head_yaw']
            phi = face['head_pitch']

            ### head orientation
            length = 150
            xvec = int(length*(sin(phi)*sin(psi) - cos(phi)*sin(theta)*cos(psi)))
            yvec = int(- length*(sin(phi)*cos(psi) - cos(phi)*sin(theta)*sin(psi)))
            cv2.line(frame, nose, (nose[0]+xvec, nose[1]+yvec), (0, 140, 255), 3)

            psi = 0
            theta = - face['gaze_yaw']
            phi = face['gaze_pitch']

            ### gaze orientation
            length = 150
            xvec = int(length*(sin(phi)*sin(psi) - cos(phi)*sin(theta)*cos(psi)))
            yvec = int(- length*(sin(phi)*cos(psi) - cos(phi)*sin(theta)*sin(psi)))
            cv2.line(frame, eyel, (eyel[0]+xvec, eyel[1]+yvec), (0, 140, 0), 3)

            xvec = int(length*(sin(phi)*sin(psi) - cos(phi)*sin(theta)*cos(psi)))
            yvec = int(- length*(sin(phi)*cos(psi) - cos(phi)*sin(theta)*sin(psi)))
            cv2.line(frame, eyer, (eyer[0]+xvec, eyer[1]+yvec), (0, 140, 0), 3)


        cv2.imshow('original', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    ### Disabling session on the server
    service.disable_session()

    camera.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':
    ### Web cam index might be different from 0 on your setup.
    ### To grab a given video file instead of the host computer cam, try:
    ### main("/path/to/myvideo.avi")
    main(0)
../../_images/screenshot_gazeanalysis.png