MasterarbeitCode/evaluation/final_eval.py

import json
import statistics
import helpers.revise_substitutes as revise_subs


def eval_dataset(substitutes_dict):
    nones = 0
    all_lengths = []
    for ingredient in substitutes_dict.keys():
        if len(substitutes_dict[ingredient]) == 0:
            nones += 1
        all_lengths.append(len(substitutes_dict[ingredient]))

    print("number of ingredients: " + str(len(substitutes_dict.keys())))
    print("number of nones: " + str(nones))
    print("average number of subs: " + str(sum(all_lengths) / len(substitutes_dict.keys())))
    print("median number of subs: " + str(statistics.median(all_lengths)))
    print("largest number of subs: " + str(max(all_lengths)))
    print("smallest number of subs: " + str(min(all_lengths)))


def translate_engl_ground_truth(ground_truth, ger_transl):
    new_ground_truth = {}
    for base_ingr in ground_truth.keys():
        new_ground_truth[ger_transl[base_ingr]] = []
        for subst in ground_truth[base_ingr]:
            if subst in ger_transl.keys():
                new_ground_truth[ger_transl[base_ingr]].append(ger_transl[subst])
            # else:
            #     print("translation error: " + subst)

    return new_ground_truth


def eval_ground_truth(substitutes_dict, ground_truth_dict):
    total_corr_int = 0
    total_corr_list = []
    total_incorr_int = 0
    total_incorr_list = []
    total_subs_ground_truth = 0
    test_prec = 0

    highest_prec = [0,[]]
    highest_recall = [0,[]]

    other_corr = 0
    other_incorr = 0
    ger_corr = 0
    ger_incorr = 0
    ger_total = 0
    other_total = 0

    german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"]

    for ingredient in ground_truth_dict:
        correct = 0
        incorrect = 0
        correct_list = []
        incorrect_list = []

        # print("\n" + ingredient + ": " + str(len(substitutes_dict[ingredient])))
        for sub in substitutes_dict[ingredient]:
            if sub in ground_truth_dict[ingredient]:
                # print(sub)
                correct += 1
                correct_list.append(sub)
            else:
                incorrect += 1
                incorrect_list.append(sub)
        total_corr_int += correct
        total_incorr_int += incorrect
        total_corr_list.append(correct)
        total_incorr_list.append(incorrect)
        total_subs_ground_truth += len(ground_truth_dict[ingredient])
        if correct > 0:
            curr_recall = correct/len(ground_truth_dict[ingredient])
            curr_prec = correct/(correct+incorrect)
            test_prec += curr_prec
            if curr_prec == highest_prec[0]:
                highest_prec[1].append(ingredient)
            if curr_prec > highest_prec[0]:
                highest_prec[0] = curr_prec
                highest_prec[1] = [ingredient]
            if curr_recall == highest_recall[0]:
                highest_recall[1].append(ingredient)
            if curr_recall > highest_recall[0]:
                highest_recall[0] = curr_recall
                highest_recall[1] = [ingredient]
            print(ingredient + ": " + str(curr_prec) + " ..... " + str(curr_recall))
        if ingredient in german_words:
            ger_corr += correct
            ger_incorr += incorrect
        else:
            other_corr += correct
            other_incorr += incorrect

        if ingredient == "Zucker":
            print("correct: " + str(correct_list) + ", incorrect: " + str(incorrect_list))
    ger_total = ger_corr + ger_incorr
    other_total = other_corr + other_incorr

    print("ger_total: " + str(ger_total/10))
    print("other_total: " + str(other_total/30))

                  # print(correct)
    print(ingredient + ": " + str(correct_list) + " / " + str(incorrect_list))

    print("precision: " + str(total_corr_int / (total_corr_int + total_incorr_int)))
    print("(average precision:) " + str(test_prec/40))
    print("recall: " + str(total_corr_int / total_subs_ground_truth))
    print("median number of correct subs (ground truth): " + str(statistics.median(total_corr_list)))
    print("average number of correct subs (ground truth): " + str(statistics.mean(total_corr_list)))
    at_least_3 = 0
    no_corr = 0
    for nr in total_corr_list:
        if nr >= 3:
            at_least_3 += 1
        if nr < 1:
            no_corr += 1
    print("ingredients with at least 3 correct substitutes: " + str(at_least_3))
    print("ingredients with no correct substitutes: " + str(no_corr))
    print("highest precision: " + str(highest_prec[1]) + ": " + str(highest_prec[0]))
    print("highest recall: " + str(highest_recall[1]) + ": " + str(highest_recall[0]))

    # print("german precision: " + str(ger_corr/(ger_corr + ger_incorr)))
    # print("german correct:" + str(ger_corr))
    # print("precision rest: " + str(other_corr/(other_corr + other_incorr)))
    # print("other correct: " + str(other_corr))


def get_ground_truth_substitutes(substitutes_dict, ground_truth_dict):
    ground_truth_substitutes = {}
    for ingredient in ground_truth_dict:
        ground_truth_substitutes[ingredient] = substitutes_dict[ingredient]
    return ground_truth_substitutes


def main():
    substitutes_path = "final_Versions/models/vers3/eval/complete_substitute_pairs_50.json"
    with open(substitutes_path, "r") as whole_json_file:
        substitutes_dict = json.load(whole_json_file)

    ground_truth_path = "data/ground_truth.json"
    with open(ground_truth_path, "r") as whole_json_file:
        ground_truth_dict = json.load(whole_json_file)

    print("no synonyms at all:")
    print("entire dataset")
    eval_dataset(substitutes_dict)
    print("\nonly ground truth:")
    ground_truth_substitutes0 = get_ground_truth_substitutes(substitutes_dict, ground_truth_dict)
    # print(ground_truth_substitutes["Truthahn"])
    eval_dataset(ground_truth_substitutes0)
    eval_ground_truth(substitutes_dict, ground_truth_dict)

    print("======================================")

    print("\nsynonyms of substitutes only: ")
    new_substitutes_dict1 = substitutes_dict.copy()
    new_substitutes_dict1 = revise_subs.combined_substitutes_dict(new_substitutes_dict1)
    print("entire dataset")
    eval_dataset(new_substitutes_dict1)
    print("\nonly ground truth:")
    ground_truth_substitutes1 = get_ground_truth_substitutes(new_substitutes_dict1, ground_truth_dict)
    # print(ground_truth_substitutes["Truthahn"])
    eval_dataset(ground_truth_substitutes1)
    eval_ground_truth(new_substitutes_dict1, ground_truth_dict)

    print("======================================")

    print("\nsynonyms of everything: ")
    new_substitutes_dict2 = substitutes_dict.copy()
    new_substitutes_dict2 = revise_subs.combine_all_synonyms(new_substitutes_dict2)
    print("entire dataset")
    eval_dataset(new_substitutes_dict2)
    print("\nonly ground truth:")
    ground_truth_substitutes2 = get_ground_truth_substitutes(new_substitutes_dict2, ground_truth_dict)
    # print(ground_truth_substitutes["Truthahn"])
    eval_dataset(ground_truth_substitutes2)
    eval_ground_truth(new_substitutes_dict2, ground_truth_dict)

    print("======================================")
    print("======================================")

    print("English Evaluation")

    data_path = "data/"
    occurances_path = "mult_ingredients_nice.json"
    ground_truth_path = "ground_truth.json"
    engl_data_path = "evaluation/engl_data/"

    evaluation_path = "evaluation/"
    synonyms_path = "synonyms.json"

    german_ground_truth = {
        "Karotte": ["Pastinake", "Steckrübe", "Staudensellerie", "Kürbis", "Süßkartoffel", "Rettich", "Radieschen",
                    "Kartoffel", "Paprika_rot", "Butternusskürbis", "Petersilienwurzel", "Rübe"],
        "Kirsche": ["Aprikose", "Pflaume", "Nektarine", "Himbeeren", "Weintrauben", "Erdbeere", "Johannisbeeren",
                    "Brombeeren", "Beeren_gemischte", "Pfirsich", "Cranberries", "Cranberries_getrocknet", "Blaubeeren",
                    "Maraschino", "Beeren", "Trockenpflaumen"],
        "Huhn": ["Truthahn", "Kaninchen", "Austernpilze", "Kalbfleisch", "Fisch", "Tofu", "Rindfleisch", "Tofu_fester",
                 "Schweinefleisch", "Seitan", "Ente", "Lamm", "Pilze", "Shrimps", "Wachtel", "Gans", "Wildfleisch"],
        "Petersilie": ["Kerbel", "Koriander", "Estragon", "Basilikum", "Oregano", "Liebstöckel", "Dill",
                       "Koriandergrün", "Rosmarin", "Kapern", "Thymian", "Schnittlauch", "Minze",
                       "Basilikum_getrockneter", "Oregano_getrocknet", "Thymian_getrocknet"],
        "Schokolade": ["Nutella", "Kakaopulver_Instant", "Zucker", "Marmelade", "Marshmallow", "Kakao", "Süßigkeiten",
                       "Erdnussbutter"],
        "Frühstücksspeck": ["Pancetta", "Schinken_Prosciutto", "Speck", "Schinken_rohen", "Parmaschinken", "Schinken",
                            "Salami", "Chorizo", "Wurst_Krakauer", "Schweineschwarte", "Schinkenwürfel", "Croûtons",
                            "Speckwürfel", "Kochschinken", "Corned_Beef", "Wurst_Mortadella"],
        "Grünkohl": ["Spinat", "Chinakohl", "Lauch", "Endiviensalat", "Mangold", "Wirsing", "Kohl", "Blumenkohl",
                     "Brunnenkresse", "Rucola", "Blattspinat", "Kopfsalat", "Römersalat", "Babyspinat"],
        "Zucker": ["Honig", "Stevia", "Süßstoff", "Stevia_flüssig", "Süßstoff_flüssigen", "Reissirup", "Ahornsirup",
                   "Kondensmilch_gezuckerte", "Agavendicksaft", "Schokolade", "Vanille", "Melasse", "Zuckerrübensirup",
                   "Sirup"],
        "Brie": ["Camembert", "Gorgonzola", "Schmelzkäse", "Cheddarkäse", "Ziegenkäse", "Doppelrahmfrischkäse",
                 "Blauschimmelkäse", "Roquefort", "Gouda", "Käse_Fontina", "Käse_Provolone", "Feta_Käse",
                 "Scheiblettenkäse"],
        "Truthahn": ["Huhn", "Kaninchen", "Ente", "Kochschinken", "Fasan", "Gans", "Rindfleisch", "Lammfleisch",
                     "Schweinefleisch", "Roastbeef", "Kalbfleisch", "Geflügelfleisch", "Hähnchenfilet", "Hühnerkeule",
                     "Wachtel", "schweinekotelett", "Wildfleisch"]
    }

    with open(engl_data_path + "translation.json", "r") as whole_json_file:
        ger_transl = json.load(whole_json_file)

    with open(engl_data_path + "substitute_pairs_foodbert_text.json", "r") as whole_json_file:
        engl_list = json.load(whole_json_file)

    with open(engl_data_path + "revised_engl_ground_truth.json", "r") as whole_json_file:
        engl_ground_truth = json.load(whole_json_file)

    engl_dict = {}
    for foo in engl_list:
        if foo[0] in engl_dict.keys():
            engl_dict[foo[0]].append(foo[1])
        else:
            engl_dict[foo[0]] = [foo[1]]

    # translate english ground truth to german for comparison
    # any ingredients that aren't in the german dataset are removed
    # translated_ground_truth = translate_engl_ground_truth(engl_ground_truth, ger_transl)

    print("Eval English results")
    print("entire dataset")
    eval_dataset(engl_dict)
    orig_engl_dict = engl_dict.copy()
    # print("turkey results: " + str(orig_engl_dict["turkey"]))

    print("\nonly ground truth:")
    ground_truth_substitutes_engl = get_ground_truth_substitutes(orig_engl_dict, engl_ground_truth)
    # print(ground_truth_substitutes)
    eval_dataset(ground_truth_substitutes_engl)
    eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth)

    print("\n\nEval method 1:")
    engl_dict1 = engl_dict.copy()
    engl_dict1 = revise_subs.engl_combined_substitutes_dict(engl_dict1)
    print("entire dataset")
    eval_dataset(engl_dict1)
    print("\nonly ground truth:")
    ground_truth_substitutes_engl = get_ground_truth_substitutes(engl_dict1, engl_ground_truth)
    # print(ground_truth_substitutes["Truthahn"])
    eval_dataset(ground_truth_substitutes_engl)
    eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth)


    print("\nReevaluate German Data: ")
    eval_ground_truth(substitutes_dict, german_ground_truth)
    print("\nwith syn1")
    eval_ground_truth(new_substitutes_dict1, german_ground_truth)
    # print("Truthahn results 1: " + str(new_substitutes_dict1["Truthahn"]))
    print("\nwith syn2")
    eval_ground_truth(new_substitutes_dict2, german_ground_truth)
    # print("Truthahn results 2: " + str(new_substitutes_dict2["Truthahn"]))


    #
    # engl_substitutes_dict = get_ground_truth_substitutes(engl_dict1, german_ground_truth)
    #
    # engl_new_substitutes_dict1 = new_substitutes_dict1.copy()
    # engl_new_substitutes_dict2 = new_substitutes_dict2.copy()


main()