Files
MasterarbeitCode/evaluation/final_eval.py
2021-04-11 23:28:41 +02:00

288 lines
12 KiB
Python

import json
import statistics
import helpers.revise_substitutes as revise_subs
def eval_dataset(substitutes_dict):
nones = 0
all_lengths = []
for ingredient in substitutes_dict.keys():
if len(substitutes_dict[ingredient]) == 0:
nones += 1
all_lengths.append(len(substitutes_dict[ingredient]))
print("number of ingredients: " + str(len(substitutes_dict.keys())))
print("number of nones: " + str(nones))
print("average number of subs: " + str(sum(all_lengths) / len(substitutes_dict.keys())))
print("median number of subs: " + str(statistics.median(all_lengths)))
print("largest number of subs: " + str(max(all_lengths)))
print("smallest number of subs: " + str(min(all_lengths)))
def translate_engl_ground_truth(ground_truth, ger_transl):
new_ground_truth = {}
for base_ingr in ground_truth.keys():
new_ground_truth[ger_transl[base_ingr]] = []
for subst in ground_truth[base_ingr]:
if subst in ger_transl.keys():
new_ground_truth[ger_transl[base_ingr]].append(ger_transl[subst])
# else:
# print("translation error: " + subst)
return new_ground_truth
def eval_ground_truth(substitutes_dict, ground_truth_dict):
total_corr_int = 0
total_corr_list = []
total_incorr_int = 0
total_incorr_list = []
total_subs_ground_truth = 0
test_prec = 0
highest_prec = [0,[]]
highest_recall = [0,[]]
other_corr = 0
other_incorr = 0
ger_corr = 0
ger_incorr = 0
ger_total = 0
other_total = 0
german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"]
for ingredient in ground_truth_dict:
correct = 0
incorrect = 0
correct_list = []
incorrect_list = []
# print("\n" + ingredient + ": " + str(len(substitutes_dict[ingredient])))
for sub in substitutes_dict[ingredient]:
if sub in ground_truth_dict[ingredient]:
# print(sub)
correct += 1
correct_list.append(sub)
else:
incorrect += 1
incorrect_list.append(sub)
total_corr_int += correct
total_incorr_int += incorrect
total_corr_list.append(correct)
total_incorr_list.append(incorrect)
total_subs_ground_truth += len(ground_truth_dict[ingredient])
if correct > 0:
curr_recall = correct/len(ground_truth_dict[ingredient])
curr_prec = correct/(correct+incorrect)
test_prec += curr_prec
if curr_prec == highest_prec[0]:
highest_prec[1].append(ingredient)
if curr_prec > highest_prec[0]:
highest_prec[0] = curr_prec
highest_prec[1] = [ingredient]
if curr_recall == highest_recall[0]:
highest_recall[1].append(ingredient)
if curr_recall > highest_recall[0]:
highest_recall[0] = curr_recall
highest_recall[1] = [ingredient]
print(ingredient + ": " + str(curr_prec) + " ..... " + str(curr_recall))
if ingredient in german_words:
ger_corr += correct
ger_incorr += incorrect
else:
other_corr += correct
other_incorr += incorrect
if ingredient == "Zucker":
print("correct: " + str(correct_list) + ", incorrect: " + str(incorrect_list))
ger_total = ger_corr + ger_incorr
other_total = other_corr + other_incorr
print("ger_total: " + str(ger_total/10))
print("other_total: " + str(other_total/30))
# print(correct)
print(ingredient + ": " + str(correct_list) + " / " + str(incorrect_list))
print("precision: " + str(total_corr_int / (total_corr_int + total_incorr_int)))
print("(average precision:) " + str(test_prec/40))
print("recall: " + str(total_corr_int / total_subs_ground_truth))
print("median number of correct subs (ground truth): " + str(statistics.median(total_corr_list)))
print("average number of correct subs (ground truth): " + str(statistics.mean(total_corr_list)))
at_least_3 = 0
no_corr = 0
for nr in total_corr_list:
if nr >= 3:
at_least_3 += 1
if nr < 1:
no_corr += 1
print("ingredients with at least 3 correct substitutes: " + str(at_least_3))
print("ingredients with no correct substitutes: " + str(no_corr))
print("highest precision: " + str(highest_prec[1]) + ": " + str(highest_prec[0]))
print("highest recall: " + str(highest_recall[1]) + ": " + str(highest_recall[0]))
# print("german precision: " + str(ger_corr/(ger_corr + ger_incorr)))
# print("german correct:" + str(ger_corr))
# print("precision rest: " + str(other_corr/(other_corr + other_incorr)))
# print("other correct: " + str(other_corr))
def get_ground_truth_substitutes(substitutes_dict, ground_truth_dict):
ground_truth_substitutes = {}
for ingredient in ground_truth_dict:
ground_truth_substitutes[ingredient] = substitutes_dict[ingredient]
return ground_truth_substitutes
def main():
substitutes_path = "final_Versions/models/vers3/eval/complete_substitute_pairs_50.json"
with open(substitutes_path, "r") as whole_json_file:
substitutes_dict = json.load(whole_json_file)
ground_truth_path = "data/ground_truth.json"
with open(ground_truth_path, "r") as whole_json_file:
ground_truth_dict = json.load(whole_json_file)
print("no synonyms at all:")
print("entire dataset")
eval_dataset(substitutes_dict)
print("\nonly ground truth:")
ground_truth_substitutes0 = get_ground_truth_substitutes(substitutes_dict, ground_truth_dict)
# print(ground_truth_substitutes["Truthahn"])
eval_dataset(ground_truth_substitutes0)
eval_ground_truth(substitutes_dict, ground_truth_dict)
print("======================================")
print("\nsynonyms of substitutes only: ")
new_substitutes_dict1 = substitutes_dict.copy()
new_substitutes_dict1 = revise_subs.combined_substitutes_dict(new_substitutes_dict1)
print("entire dataset")
eval_dataset(new_substitutes_dict1)
print("\nonly ground truth:")
ground_truth_substitutes1 = get_ground_truth_substitutes(new_substitutes_dict1, ground_truth_dict)
# print(ground_truth_substitutes["Truthahn"])
eval_dataset(ground_truth_substitutes1)
eval_ground_truth(new_substitutes_dict1, ground_truth_dict)
print("======================================")
print("\nsynonyms of everything: ")
new_substitutes_dict2 = substitutes_dict.copy()
new_substitutes_dict2 = revise_subs.combine_all_synonyms(new_substitutes_dict2)
print("entire dataset")
eval_dataset(new_substitutes_dict2)
print("\nonly ground truth:")
ground_truth_substitutes2 = get_ground_truth_substitutes(new_substitutes_dict2, ground_truth_dict)
# print(ground_truth_substitutes["Truthahn"])
eval_dataset(ground_truth_substitutes2)
eval_ground_truth(new_substitutes_dict2, ground_truth_dict)
print("======================================")
print("======================================")
print("English Evaluation")
data_path = "data/"
occurances_path = "mult_ingredients_nice.json"
ground_truth_path = "ground_truth.json"
engl_data_path = "evaluation/engl_data/"
evaluation_path = "evaluation/"
synonyms_path = "synonyms.json"
german_ground_truth = {
"Karotte": ["Pastinake", "Steckrübe", "Staudensellerie", "Kürbis", "Süßkartoffel", "Rettich", "Radieschen",
"Kartoffel", "Paprika_rot", "Butternusskürbis", "Petersilienwurzel", "Rübe"],
"Kirsche": ["Aprikose", "Pflaume", "Nektarine", "Himbeeren", "Weintrauben", "Erdbeere", "Johannisbeeren",
"Brombeeren", "Beeren_gemischte", "Pfirsich", "Cranberries", "Cranberries_getrocknet", "Blaubeeren",
"Maraschino", "Beeren", "Trockenpflaumen"],
"Huhn": ["Truthahn", "Kaninchen", "Austernpilze", "Kalbfleisch", "Fisch", "Tofu", "Rindfleisch", "Tofu_fester",
"Schweinefleisch", "Seitan", "Ente", "Lamm", "Pilze", "Shrimps", "Wachtel", "Gans", "Wildfleisch"],
"Petersilie": ["Kerbel", "Koriander", "Estragon", "Basilikum", "Oregano", "Liebstöckel", "Dill",
"Koriandergrün", "Rosmarin", "Kapern", "Thymian", "Schnittlauch", "Minze",
"Basilikum_getrockneter", "Oregano_getrocknet", "Thymian_getrocknet"],
"Schokolade": ["Nutella", "Kakaopulver_Instant", "Zucker", "Marmelade", "Marshmallow", "Kakao", "Süßigkeiten",
"Erdnussbutter"],
"Frühstücksspeck": ["Pancetta", "Schinken_Prosciutto", "Speck", "Schinken_rohen", "Parmaschinken", "Schinken",
"Salami", "Chorizo", "Wurst_Krakauer", "Schweineschwarte", "Schinkenwürfel", "Croûtons",
"Speckwürfel", "Kochschinken", "Corned_Beef", "Wurst_Mortadella"],
"Grünkohl": ["Spinat", "Chinakohl", "Lauch", "Endiviensalat", "Mangold", "Wirsing", "Kohl", "Blumenkohl",
"Brunnenkresse", "Rucola", "Blattspinat", "Kopfsalat", "Römersalat", "Babyspinat"],
"Zucker": ["Honig", "Stevia", "Süßstoff", "Stevia_flüssig", "Süßstoff_flüssigen", "Reissirup", "Ahornsirup",
"Kondensmilch_gezuckerte", "Agavendicksaft", "Schokolade", "Vanille", "Melasse", "Zuckerrübensirup",
"Sirup"],
"Brie": ["Camembert", "Gorgonzola", "Schmelzkäse", "Cheddarkäse", "Ziegenkäse", "Doppelrahmfrischkäse",
"Blauschimmelkäse", "Roquefort", "Gouda", "Käse_Fontina", "Käse_Provolone", "Feta_Käse",
"Scheiblettenkäse"],
"Truthahn": ["Huhn", "Kaninchen", "Ente", "Kochschinken", "Fasan", "Gans", "Rindfleisch", "Lammfleisch",
"Schweinefleisch", "Roastbeef", "Kalbfleisch", "Geflügelfleisch", "Hähnchenfilet", "Hühnerkeule",
"Wachtel", "schweinekotelett", "Wildfleisch"]
}
with open(engl_data_path + "translation.json", "r") as whole_json_file:
ger_transl = json.load(whole_json_file)
with open(engl_data_path + "substitute_pairs_foodbert_text.json", "r") as whole_json_file:
engl_list = json.load(whole_json_file)
with open(engl_data_path + "revised_engl_ground_truth.json", "r") as whole_json_file:
engl_ground_truth = json.load(whole_json_file)
engl_dict = {}
for foo in engl_list:
if foo[0] in engl_dict.keys():
engl_dict[foo[0]].append(foo[1])
else:
engl_dict[foo[0]] = [foo[1]]
# translate english ground truth to german for comparison
# any ingredients that aren't in the german dataset are removed
# translated_ground_truth = translate_engl_ground_truth(engl_ground_truth, ger_transl)
print("Eval English results")
print("entire dataset")
eval_dataset(engl_dict)
orig_engl_dict = engl_dict.copy()
# print("turkey results: " + str(orig_engl_dict["turkey"]))
print("\nonly ground truth:")
ground_truth_substitutes_engl = get_ground_truth_substitutes(orig_engl_dict, engl_ground_truth)
# print(ground_truth_substitutes)
eval_dataset(ground_truth_substitutes_engl)
eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth)
print("\n\nEval method 1:")
engl_dict1 = engl_dict.copy()
engl_dict1 = revise_subs.engl_combined_substitutes_dict(engl_dict1)
print("entire dataset")
eval_dataset(engl_dict1)
print("\nonly ground truth:")
ground_truth_substitutes_engl = get_ground_truth_substitutes(engl_dict1, engl_ground_truth)
# print(ground_truth_substitutes["Truthahn"])
eval_dataset(ground_truth_substitutes_engl)
eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth)
print("\nReevaluate German Data: ")
eval_ground_truth(substitutes_dict, german_ground_truth)
print("\nwith syn1")
eval_ground_truth(new_substitutes_dict1, german_ground_truth)
# print("Truthahn results 1: " + str(new_substitutes_dict1["Truthahn"]))
print("\nwith syn2")
eval_ground_truth(new_substitutes_dict2, german_ground_truth)
# print("Truthahn results 2: " + str(new_substitutes_dict2["Truthahn"]))
#
# engl_substitutes_dict = get_ground_truth_substitutes(engl_dict1, german_ground_truth)
#
# engl_new_substitutes_dict1 = new_substitutes_dict1.copy()
# engl_new_substitutes_dict2 = new_substitutes_dict2.copy()
main()