Initial checking of the new FER+ label and sample python code.

2025-12-30 05:22:26 +00:00 · 2016-09-15 20:06:39 -07:00
parent 7287851738
commit eb57565c75
4 changed files with 36048 additions and 2 deletions
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -0,0 +1,12 @@
+FER+
+
+Copyright (c) Microsoft Corporation
+
+All rights reserved.
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ""Software""), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -1,2 +1,18 @@
-# FERPlus
-This is new label for Emotion FER dataset, each image is tagged by 10 taggers, which provide better quality ground truth for still image emotion. As described in: https://arxiv.org/abs/1608.01041
+# FER+
+This is new label for Emotion FER dataset, each image is tagged by 10 taggers, which provide better quality ground truth for still image emotion than the original FER label. Have 10 taggers for each image enable us to create an emotion probability distribution per face so that we can learn a probability or multi-label instead of the conventional majority voting, as described in: https://arxiv.org/abs/1608.01041
+
+The new label file is named: fer2013new.csv, it contains the same number of rows as the original fer2013.csv label file with the same order. So that you infer which emotion tag belong to which image, because we can't host the actual image content. Here the original FER label: https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data
+
+We also provide a simple parsing code in python to demonstrate how to parse the new label and how to convert it to probability distribution (there is multiple way to do it, we show an example). The parsing code is in src/ReadFERPlus.py
+
+The format of the CSV file is as follow: Usage,	neutral,	happiness,	surprise,	sadness,	anger,	disgust,	fear,	contempt,	unknown,	NF. Where "Usage" is the same as the original FER label to differentiate between training set, public test set and private test set. The other columns are the vote count for each emotion with the addition of unknown and NF (Not a Face).
+
+# Citation
+If you use the new FER label or the sample code or part of it in your research, please cite the below:
+
+@inproceedings{BarsoumICMI2016,
+  title={Training Deep Networks for Facial Expression Recognition with Crowd-Sourced Label Distribution},
+  author={Barsoum, Emad and Zhang, Cha and Canton Ferrer, Cristian and Zhang, Zhengyou},
+  booktitle={ICMI},
+  year={2016}
+}
--- a/fer2013new.csv
+++ b/fer2013new.csv
--- a/src/ReadFERPlus.py
+++ b/src/ReadFERPlus.py
@@ -0,0 +1,130 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+#
+
+import sys
+import csv
+import argparse
+import numpy as np
+
+def main(fer_label_file):
+    """
+    Main entry points, it simply parse the new FER emotion label file and print its summary.
+
+    Parameters:
+    fer_label_file: Path to the CSV label file.
+    """
+    header, train_data, val_data, test_data = load_labels(fer_label_file)
+
+    # Print the summary using the emotion with max probability (majority voting).
+    emotion_count = len(header)
+    train_image_count_per_emotion      = count_image_per_emotion(emotion_count, train_data)
+    validation_image_count_per_emotion = count_image_per_emotion(emotion_count, val_data)
+    test_image_count_per_emotion       = count_image_per_emotion(emotion_count, test_data)
+
+    print("{0}\t{1}\t{2}\t{3}".format("".ljust(10), "Train", "Val", "Test"))
+
+    for index in range(emotion_count): 
+        print("{0}\t{1}\t{2}\t{3}".format(header[index].ljust(10), 
+                                          train_image_count_per_emotion[index], 
+                                          validation_image_count_per_emotion[index], 
+                                          test_image_count_per_emotion[index]))
+
+def count_image_per_emotion(emotion_count, data):
+    """
+    For summary display, a helper function that count the number of
+    image per emotion.
+
+    Parameters:
+    emotion_count: the number of emotions.
+    data: the list of emotion for each image.
+    """
+    image_count_per_emotion = [0] * emotion_count
+    for emotion_prob in data:
+        image_count_per_emotion[np.argmax(emotion_prob)] += 1
+
+    return image_count_per_emotion
+
+def load_labels(fer_label_file):
+    """
+    Load and parse the label CSV file, contains the new FER label.
+
+    Parameters:
+    fer_label_file: Path to the CSV label file.
+    """    
+    train_data = []
+    val_data   = []
+    test_data  = []
+
+    with open(fer_label_file) as label_file: 
+        emotion_label = csv.reader(label_file)
+        emotion_label_itr = iter(emotion_label)
+
+        # First row is the header
+        header = next(emotion_label_itr)
+        header = header[1:len(header)]
+
+        # Split into train, validate and test set.
+        for row in emotion_label_itr:
+            emotion_raw = map(float, row[1:len(row)])
+            if row[0] == "Training":
+                train_data.append(process_data(emotion_raw))
+            elif row[0] == "PublicTest":
+                val_data.append(process_data(emotion_raw))
+            elif row[0] == "PrivateTest":
+                test_data.append(process_data(emotion_raw))
+            else:
+                raise ValueError('Invalid usage')
+
+    return header, train_data, val_data, test_data
+
+def process_data(emotion_raw):
+    """
+    Takes the raw votes for each emotion and return the probability distribution. 
+    We ignore outliers and distribution that has one vote per emotion.
+
+    Parameters:
+    emotion_raw: Array of vote count per emotion from the label file.
+    """
+    size = len(emotion_raw) 
+    emotion_unknown = [0.0] * size
+    emotion_unknown[-2] = 1.0
+
+    # remove emotions with a single vote (outlier removal) 
+    for i in range(size):
+        if emotion_raw[i] < 1.0 + sys.float_info.epsilon:
+            emotion_raw[i] = 0.0
+
+    sum_list = sum(emotion_raw)
+    emotion = [0.0] * size 
+
+    sum_part = 0
+    count = 0
+    valid_emotion = True
+    while sum_part < 0.75*sum_list and count < 3 and valid_emotion:
+        maxval = max(emotion_raw) 
+        for i in range(size): 
+            if emotion_raw[i] == maxval: 
+                emotion[i] = maxval
+                emotion_raw[i] = 0
+                sum_part += emotion[i]
+                count += 1
+                if i >= 8:  # unknown or non-face share same number of max votes 
+                    valid_emotion = False
+                    if sum(emotion) > maxval:   # there have been other emotions ahead of unknown or non-face
+                        emotion[i] = 0
+                        count -= 1
+                    break
+    if sum(emotion) <= 0.5*sum_list or count > 3: # less than 50% of the votes are integrated, or there are too many emotions, we'd better discard this example
+        emotion = emotion_unknown   # force setting as unknown 
+
+    return [float(i)/sum(emotion) for i in emotion]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f", "--fer_label_file", type = str, help = "FER 2013 update label file.", required = True)
+
+    args = parser.parse_args()
+
+    main(args.fer_label_file)