288 lines
12 KiB
Python
288 lines
12 KiB
Python
import json
|
|
import statistics
|
|
import helpers.revise_substitutes as revise_subs
|
|
|
|
|
|
def eval_dataset(substitutes_dict):
|
|
nones = 0
|
|
all_lengths = []
|
|
for ingredient in substitutes_dict.keys():
|
|
if len(substitutes_dict[ingredient]) == 0:
|
|
nones += 1
|
|
all_lengths.append(len(substitutes_dict[ingredient]))
|
|
|
|
print("number of ingredients: " + str(len(substitutes_dict.keys())))
|
|
print("number of nones: " + str(nones))
|
|
print("average number of subs: " + str(sum(all_lengths) / len(substitutes_dict.keys())))
|
|
print("median number of subs: " + str(statistics.median(all_lengths)))
|
|
print("largest number of subs: " + str(max(all_lengths)))
|
|
print("smallest number of subs: " + str(min(all_lengths)))
|
|
|
|
|
|
def translate_engl_ground_truth(ground_truth, ger_transl):
|
|
new_ground_truth = {}
|
|
for base_ingr in ground_truth.keys():
|
|
new_ground_truth[ger_transl[base_ingr]] = []
|
|
for subst in ground_truth[base_ingr]:
|
|
if subst in ger_transl.keys():
|
|
new_ground_truth[ger_transl[base_ingr]].append(ger_transl[subst])
|
|
# else:
|
|
# print("translation error: " + subst)
|
|
|
|
return new_ground_truth
|
|
|
|
|
|
def eval_ground_truth(substitutes_dict, ground_truth_dict):
|
|
total_corr_int = 0
|
|
total_corr_list = []
|
|
total_incorr_int = 0
|
|
total_incorr_list = []
|
|
total_subs_ground_truth = 0
|
|
test_prec = 0
|
|
|
|
highest_prec = [0,[]]
|
|
highest_recall = [0,[]]
|
|
|
|
other_corr = 0
|
|
other_incorr = 0
|
|
ger_corr = 0
|
|
ger_incorr = 0
|
|
ger_total = 0
|
|
other_total = 0
|
|
|
|
german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"]
|
|
|
|
for ingredient in ground_truth_dict:
|
|
correct = 0
|
|
incorrect = 0
|
|
correct_list = []
|
|
incorrect_list = []
|
|
|
|
# print("\n" + ingredient + ": " + str(len(substitutes_dict[ingredient])))
|
|
for sub in substitutes_dict[ingredient]:
|
|
if sub in ground_truth_dict[ingredient]:
|
|
# print(sub)
|
|
correct += 1
|
|
correct_list.append(sub)
|
|
else:
|
|
incorrect += 1
|
|
incorrect_list.append(sub)
|
|
total_corr_int += correct
|
|
total_incorr_int += incorrect
|
|
total_corr_list.append(correct)
|
|
total_incorr_list.append(incorrect)
|
|
total_subs_ground_truth += len(ground_truth_dict[ingredient])
|
|
if correct > 0:
|
|
curr_recall = correct/len(ground_truth_dict[ingredient])
|
|
curr_prec = correct/(correct+incorrect)
|
|
test_prec += curr_prec
|
|
if curr_prec == highest_prec[0]:
|
|
highest_prec[1].append(ingredient)
|
|
if curr_prec > highest_prec[0]:
|
|
highest_prec[0] = curr_prec
|
|
highest_prec[1] = [ingredient]
|
|
if curr_recall == highest_recall[0]:
|
|
highest_recall[1].append(ingredient)
|
|
if curr_recall > highest_recall[0]:
|
|
highest_recall[0] = curr_recall
|
|
highest_recall[1] = [ingredient]
|
|
print(ingredient + ": " + str(curr_prec) + " ..... " + str(curr_recall))
|
|
if ingredient in german_words:
|
|
ger_corr += correct
|
|
ger_incorr += incorrect
|
|
else:
|
|
other_corr += correct
|
|
other_incorr += incorrect
|
|
|
|
if ingredient == "Zucker":
|
|
print("correct: " + str(correct_list) + ", incorrect: " + str(incorrect_list))
|
|
ger_total = ger_corr + ger_incorr
|
|
other_total = other_corr + other_incorr
|
|
|
|
print("ger_total: " + str(ger_total/10))
|
|
print("other_total: " + str(other_total/30))
|
|
|
|
# print(correct)
|
|
print(ingredient + ": " + str(correct_list) + " / " + str(incorrect_list))
|
|
|
|
print("precision: " + str(total_corr_int / (total_corr_int + total_incorr_int)))
|
|
print("(average precision:) " + str(test_prec/40))
|
|
print("recall: " + str(total_corr_int / total_subs_ground_truth))
|
|
print("median number of correct subs (ground truth): " + str(statistics.median(total_corr_list)))
|
|
print("average number of correct subs (ground truth): " + str(statistics.mean(total_corr_list)))
|
|
at_least_3 = 0
|
|
no_corr = 0
|
|
for nr in total_corr_list:
|
|
if nr >= 3:
|
|
at_least_3 += 1
|
|
if nr < 1:
|
|
no_corr += 1
|
|
print("ingredients with at least 3 correct substitutes: " + str(at_least_3))
|
|
print("ingredients with no correct substitutes: " + str(no_corr))
|
|
print("highest precision: " + str(highest_prec[1]) + ": " + str(highest_prec[0]))
|
|
print("highest recall: " + str(highest_recall[1]) + ": " + str(highest_recall[0]))
|
|
|
|
# print("german precision: " + str(ger_corr/(ger_corr + ger_incorr)))
|
|
# print("german correct:" + str(ger_corr))
|
|
# print("precision rest: " + str(other_corr/(other_corr + other_incorr)))
|
|
# print("other correct: " + str(other_corr))
|
|
|
|
|
|
def get_ground_truth_substitutes(substitutes_dict, ground_truth_dict):
|
|
ground_truth_substitutes = {}
|
|
for ingredient in ground_truth_dict:
|
|
ground_truth_substitutes[ingredient] = substitutes_dict[ingredient]
|
|
return ground_truth_substitutes
|
|
|
|
|
|
|
|
def main():
|
|
substitutes_path = "final_Versions/models/vers3/eval/complete_substitute_pairs_50.json"
|
|
with open(substitutes_path, "r") as whole_json_file:
|
|
substitutes_dict = json.load(whole_json_file)
|
|
|
|
ground_truth_path = "data/ground_truth.json"
|
|
with open(ground_truth_path, "r") as whole_json_file:
|
|
ground_truth_dict = json.load(whole_json_file)
|
|
|
|
print("no synonyms at all:")
|
|
print("entire dataset")
|
|
eval_dataset(substitutes_dict)
|
|
print("\nonly ground truth:")
|
|
ground_truth_substitutes0 = get_ground_truth_substitutes(substitutes_dict, ground_truth_dict)
|
|
# print(ground_truth_substitutes["Truthahn"])
|
|
eval_dataset(ground_truth_substitutes0)
|
|
eval_ground_truth(substitutes_dict, ground_truth_dict)
|
|
|
|
print("======================================")
|
|
|
|
print("\nsynonyms of substitutes only: ")
|
|
new_substitutes_dict1 = substitutes_dict.copy()
|
|
new_substitutes_dict1 = revise_subs.combined_substitutes_dict(new_substitutes_dict1)
|
|
print("entire dataset")
|
|
eval_dataset(new_substitutes_dict1)
|
|
print("\nonly ground truth:")
|
|
ground_truth_substitutes1 = get_ground_truth_substitutes(new_substitutes_dict1, ground_truth_dict)
|
|
# print(ground_truth_substitutes["Truthahn"])
|
|
eval_dataset(ground_truth_substitutes1)
|
|
eval_ground_truth(new_substitutes_dict1, ground_truth_dict)
|
|
|
|
print("======================================")
|
|
|
|
print("\nsynonyms of everything: ")
|
|
new_substitutes_dict2 = substitutes_dict.copy()
|
|
new_substitutes_dict2 = revise_subs.combine_all_synonyms(new_substitutes_dict2)
|
|
print("entire dataset")
|
|
eval_dataset(new_substitutes_dict2)
|
|
print("\nonly ground truth:")
|
|
ground_truth_substitutes2 = get_ground_truth_substitutes(new_substitutes_dict2, ground_truth_dict)
|
|
# print(ground_truth_substitutes["Truthahn"])
|
|
eval_dataset(ground_truth_substitutes2)
|
|
eval_ground_truth(new_substitutes_dict2, ground_truth_dict)
|
|
|
|
print("======================================")
|
|
print("======================================")
|
|
|
|
print("English Evaluation")
|
|
|
|
data_path = "data/"
|
|
occurances_path = "mult_ingredients_nice.json"
|
|
ground_truth_path = "ground_truth.json"
|
|
engl_data_path = "evaluation/engl_data/"
|
|
|
|
evaluation_path = "evaluation/"
|
|
synonyms_path = "synonyms.json"
|
|
|
|
german_ground_truth = {
|
|
"Karotte": ["Pastinake", "Steckrübe", "Staudensellerie", "Kürbis", "Süßkartoffel", "Rettich", "Radieschen",
|
|
"Kartoffel", "Paprika_rot", "Butternusskürbis", "Petersilienwurzel", "Rübe"],
|
|
"Kirsche": ["Aprikose", "Pflaume", "Nektarine", "Himbeeren", "Weintrauben", "Erdbeere", "Johannisbeeren",
|
|
"Brombeeren", "Beeren_gemischte", "Pfirsich", "Cranberries", "Cranberries_getrocknet", "Blaubeeren",
|
|
"Maraschino", "Beeren", "Trockenpflaumen"],
|
|
"Huhn": ["Truthahn", "Kaninchen", "Austernpilze", "Kalbfleisch", "Fisch", "Tofu", "Rindfleisch", "Tofu_fester",
|
|
"Schweinefleisch", "Seitan", "Ente", "Lamm", "Pilze", "Shrimps", "Wachtel", "Gans", "Wildfleisch"],
|
|
"Petersilie": ["Kerbel", "Koriander", "Estragon", "Basilikum", "Oregano", "Liebstöckel", "Dill",
|
|
"Koriandergrün", "Rosmarin", "Kapern", "Thymian", "Schnittlauch", "Minze",
|
|
"Basilikum_getrockneter", "Oregano_getrocknet", "Thymian_getrocknet"],
|
|
"Schokolade": ["Nutella", "Kakaopulver_Instant", "Zucker", "Marmelade", "Marshmallow", "Kakao", "Süßigkeiten",
|
|
"Erdnussbutter"],
|
|
"Frühstücksspeck": ["Pancetta", "Schinken_Prosciutto", "Speck", "Schinken_rohen", "Parmaschinken", "Schinken",
|
|
"Salami", "Chorizo", "Wurst_Krakauer", "Schweineschwarte", "Schinkenwürfel", "Croûtons",
|
|
"Speckwürfel", "Kochschinken", "Corned_Beef", "Wurst_Mortadella"],
|
|
"Grünkohl": ["Spinat", "Chinakohl", "Lauch", "Endiviensalat", "Mangold", "Wirsing", "Kohl", "Blumenkohl",
|
|
"Brunnenkresse", "Rucola", "Blattspinat", "Kopfsalat", "Römersalat", "Babyspinat"],
|
|
"Zucker": ["Honig", "Stevia", "Süßstoff", "Stevia_flüssig", "Süßstoff_flüssigen", "Reissirup", "Ahornsirup",
|
|
"Kondensmilch_gezuckerte", "Agavendicksaft", "Schokolade", "Vanille", "Melasse", "Zuckerrübensirup",
|
|
"Sirup"],
|
|
"Brie": ["Camembert", "Gorgonzola", "Schmelzkäse", "Cheddarkäse", "Ziegenkäse", "Doppelrahmfrischkäse",
|
|
"Blauschimmelkäse", "Roquefort", "Gouda", "Käse_Fontina", "Käse_Provolone", "Feta_Käse",
|
|
"Scheiblettenkäse"],
|
|
"Truthahn": ["Huhn", "Kaninchen", "Ente", "Kochschinken", "Fasan", "Gans", "Rindfleisch", "Lammfleisch",
|
|
"Schweinefleisch", "Roastbeef", "Kalbfleisch", "Geflügelfleisch", "Hähnchenfilet", "Hühnerkeule",
|
|
"Wachtel", "schweinekotelett", "Wildfleisch"]
|
|
}
|
|
|
|
with open(engl_data_path + "translation.json", "r") as whole_json_file:
|
|
ger_transl = json.load(whole_json_file)
|
|
|
|
with open(engl_data_path + "substitute_pairs_foodbert_text.json", "r") as whole_json_file:
|
|
engl_list = json.load(whole_json_file)
|
|
|
|
with open(engl_data_path + "revised_engl_ground_truth.json", "r") as whole_json_file:
|
|
engl_ground_truth = json.load(whole_json_file)
|
|
|
|
engl_dict = {}
|
|
for foo in engl_list:
|
|
if foo[0] in engl_dict.keys():
|
|
engl_dict[foo[0]].append(foo[1])
|
|
else:
|
|
engl_dict[foo[0]] = [foo[1]]
|
|
|
|
# translate english ground truth to german for comparison
|
|
# any ingredients that aren't in the german dataset are removed
|
|
# translated_ground_truth = translate_engl_ground_truth(engl_ground_truth, ger_transl)
|
|
|
|
print("Eval English results")
|
|
print("entire dataset")
|
|
eval_dataset(engl_dict)
|
|
orig_engl_dict = engl_dict.copy()
|
|
# print("turkey results: " + str(orig_engl_dict["turkey"]))
|
|
|
|
print("\nonly ground truth:")
|
|
ground_truth_substitutes_engl = get_ground_truth_substitutes(orig_engl_dict, engl_ground_truth)
|
|
# print(ground_truth_substitutes)
|
|
eval_dataset(ground_truth_substitutes_engl)
|
|
eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth)
|
|
|
|
print("\n\nEval method 1:")
|
|
engl_dict1 = engl_dict.copy()
|
|
engl_dict1 = revise_subs.engl_combined_substitutes_dict(engl_dict1)
|
|
print("entire dataset")
|
|
eval_dataset(engl_dict1)
|
|
print("\nonly ground truth:")
|
|
ground_truth_substitutes_engl = get_ground_truth_substitutes(engl_dict1, engl_ground_truth)
|
|
# print(ground_truth_substitutes["Truthahn"])
|
|
eval_dataset(ground_truth_substitutes_engl)
|
|
eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth)
|
|
|
|
|
|
print("\nReevaluate German Data: ")
|
|
eval_ground_truth(substitutes_dict, german_ground_truth)
|
|
print("\nwith syn1")
|
|
eval_ground_truth(new_substitutes_dict1, german_ground_truth)
|
|
# print("Truthahn results 1: " + str(new_substitutes_dict1["Truthahn"]))
|
|
print("\nwith syn2")
|
|
eval_ground_truth(new_substitutes_dict2, german_ground_truth)
|
|
# print("Truthahn results 2: " + str(new_substitutes_dict2["Truthahn"]))
|
|
|
|
|
|
#
|
|
# engl_substitutes_dict = get_ground_truth_substitutes(engl_dict1, german_ground_truth)
|
|
#
|
|
# engl_new_substitutes_dict1 = new_substitutes_dict1.copy()
|
|
# engl_new_substitutes_dict2 = new_substitutes_dict2.copy()
|
|
|
|
|
|
|
|
|
|
main() |