Initial checking of the new FER+ label and sample python code.

This commit is contained in:
Emad Barsoum
2016-09-15 20:06:39 -07:00
parent 7287851738
commit eb57565c75
4 changed files with 36048 additions and 2 deletions

12
LICENSE.md Normal file
View File

@@ -0,0 +1,12 @@
FER+
Copyright (c) Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ""Software""), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -1,2 +1,18 @@
# FERPlus
This is new label for Emotion FER dataset, each image is tagged by 10 taggers, which provide better quality ground truth for still image emotion. As described in: https://arxiv.org/abs/1608.01041
# FER+
This is new label for Emotion FER dataset, each image is tagged by 10 taggers, which provide better quality ground truth for still image emotion than the original FER label. Have 10 taggers for each image enable us to create an emotion probability distribution per face so that we can learn a probability or multi-label instead of the conventional majority voting, as described in: https://arxiv.org/abs/1608.01041
The new label file is named: fer2013new.csv, it contains the same number of rows as the original fer2013.csv label file with the same order. So that you infer which emotion tag belong to which image, because we can't host the actual image content. Here the original FER label: https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data
We also provide a simple parsing code in python to demonstrate how to parse the new label and how to convert it to probability distribution (there is multiple way to do it, we show an example). The parsing code is in src/ReadFERPlus.py
The format of the CSV file is as follow: Usage, neutral, happiness, surprise, sadness, anger, disgust, fear, contempt, unknown, NF. Where "Usage" is the same as the original FER label to differentiate between training set, public test set and private test set. The other columns are the vote count for each emotion with the addition of unknown and NF (Not a Face).
# Citation
If you use the new FER label or the sample code or part of it in your research, please cite the below:
@inproceedings{BarsoumICMI2016,
title={Training Deep Networks for Facial Expression Recognition with Crowd-Sourced Label Distribution},
author={Barsoum, Emad and Zhang, Cha and Canton Ferrer, Cristian and Zhang, Zhengyou},
booktitle={ICMI},
year={2016}
}

35888
fer2013new.csv Normal file

File diff suppressed because it is too large Load Diff

130
src/ReadFERPlus.py Normal file
View File

@@ -0,0 +1,130 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
#
import sys
import csv
import argparse
import numpy as np
def main(fer_label_file):
"""
Main entry points, it simply parse the new FER emotion label file and print its summary.
Parameters:
fer_label_file: Path to the CSV label file.
"""
header, train_data, val_data, test_data = load_labels(fer_label_file)
# Print the summary using the emotion with max probability (majority voting).
emotion_count = len(header)
train_image_count_per_emotion = count_image_per_emotion(emotion_count, train_data)
validation_image_count_per_emotion = count_image_per_emotion(emotion_count, val_data)
test_image_count_per_emotion = count_image_per_emotion(emotion_count, test_data)
print("{0}\t{1}\t{2}\t{3}".format("".ljust(10), "Train", "Val", "Test"))
for index in range(emotion_count):
print("{0}\t{1}\t{2}\t{3}".format(header[index].ljust(10),
train_image_count_per_emotion[index],
validation_image_count_per_emotion[index],
test_image_count_per_emotion[index]))
def count_image_per_emotion(emotion_count, data):
"""
For summary display, a helper function that count the number of
image per emotion.
Parameters:
emotion_count: the number of emotions.
data: the list of emotion for each image.
"""
image_count_per_emotion = [0] * emotion_count
for emotion_prob in data:
image_count_per_emotion[np.argmax(emotion_prob)] += 1
return image_count_per_emotion
def load_labels(fer_label_file):
"""
Load and parse the label CSV file, contains the new FER label.
Parameters:
fer_label_file: Path to the CSV label file.
"""
train_data = []
val_data = []
test_data = []
with open(fer_label_file) as label_file:
emotion_label = csv.reader(label_file)
emotion_label_itr = iter(emotion_label)
# First row is the header
header = next(emotion_label_itr)
header = header[1:len(header)]
# Split into train, validate and test set.
for row in emotion_label_itr:
emotion_raw = map(float, row[1:len(row)])
if row[0] == "Training":
train_data.append(process_data(emotion_raw))
elif row[0] == "PublicTest":
val_data.append(process_data(emotion_raw))
elif row[0] == "PrivateTest":
test_data.append(process_data(emotion_raw))
else:
raise ValueError('Invalid usage')
return header, train_data, val_data, test_data
def process_data(emotion_raw):
"""
Takes the raw votes for each emotion and return the probability distribution.
We ignore outliers and distribution that has one vote per emotion.
Parameters:
emotion_raw: Array of vote count per emotion from the label file.
"""
size = len(emotion_raw)
emotion_unknown = [0.0] * size
emotion_unknown[-2] = 1.0
# remove emotions with a single vote (outlier removal)
for i in range(size):
if emotion_raw[i] < 1.0 + sys.float_info.epsilon:
emotion_raw[i] = 0.0
sum_list = sum(emotion_raw)
emotion = [0.0] * size
sum_part = 0
count = 0
valid_emotion = True
while sum_part < 0.75*sum_list and count < 3 and valid_emotion:
maxval = max(emotion_raw)
for i in range(size):
if emotion_raw[i] == maxval:
emotion[i] = maxval
emotion_raw[i] = 0
sum_part += emotion[i]
count += 1
if i >= 8: # unknown or non-face share same number of max votes
valid_emotion = False
if sum(emotion) > maxval: # there have been other emotions ahead of unknown or non-face
emotion[i] = 0
count -= 1
break
if sum(emotion) <= 0.5*sum_list or count > 3: # less than 50% of the votes are integrated, or there are too many emotions, we'd better discard this example
emotion = emotion_unknown # force setting as unknown
return [float(i)/sum(emotion) for i in emotion]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--fer_label_file", type = str, help = "FER 2013 update label file.", required = True)
args = parser.parse_args()
main(args.fer_label_file)