import json import statistics import helpers.revise_substitutes as revise_subs def eval_dataset(substitutes_dict): nones = 0 all_lengths = [] for ingredient in substitutes_dict.keys(): if len(substitutes_dict[ingredient]) == 0: nones += 1 all_lengths.append(len(substitutes_dict[ingredient])) print("number of ingredients: " + str(len(substitutes_dict.keys()))) print("number of nones: " + str(nones)) print("average number of subs: " + str(sum(all_lengths) / len(substitutes_dict.keys()))) print("median number of subs: " + str(statistics.median(all_lengths))) print("largest number of subs: " + str(max(all_lengths))) print("smallest number of subs: " + str(min(all_lengths))) def translate_engl_ground_truth(ground_truth, ger_transl): new_ground_truth = {} for base_ingr in ground_truth.keys(): new_ground_truth[ger_transl[base_ingr]] = [] for subst in ground_truth[base_ingr]: if subst in ger_transl.keys(): new_ground_truth[ger_transl[base_ingr]].append(ger_transl[subst]) # else: # print("translation error: " + subst) return new_ground_truth def eval_ground_truth(substitutes_dict, ground_truth_dict): total_corr_int = 0 total_corr_list = [] total_incorr_int = 0 total_incorr_list = [] total_subs_ground_truth = 0 test_prec = 0 highest_prec = [0,[]] highest_recall = [0,[]] other_corr = 0 other_incorr = 0 ger_corr = 0 ger_incorr = 0 ger_total = 0 other_total = 0 german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"] for ingredient in ground_truth_dict: correct = 0 incorrect = 0 correct_list = [] incorrect_list = [] # print("\n" + ingredient + ": " + str(len(substitutes_dict[ingredient]))) for sub in substitutes_dict[ingredient]: if sub in ground_truth_dict[ingredient]: # print(sub) correct += 1 correct_list.append(sub) else: incorrect += 1 incorrect_list.append(sub) total_corr_int += correct total_incorr_int += incorrect total_corr_list.append(correct) total_incorr_list.append(incorrect) total_subs_ground_truth += len(ground_truth_dict[ingredient]) if correct > 0: curr_recall = correct/len(ground_truth_dict[ingredient]) curr_prec = correct/(correct+incorrect) test_prec += curr_prec if curr_prec == highest_prec[0]: highest_prec[1].append(ingredient) if curr_prec > highest_prec[0]: highest_prec[0] = curr_prec highest_prec[1] = [ingredient] if curr_recall == highest_recall[0]: highest_recall[1].append(ingredient) if curr_recall > highest_recall[0]: highest_recall[0] = curr_recall highest_recall[1] = [ingredient] print(ingredient + ": " + str(curr_prec) + " ..... " + str(curr_recall)) if ingredient in german_words: ger_corr += correct ger_incorr += incorrect else: other_corr += correct other_incorr += incorrect if ingredient == "Zucker": print("correct: " + str(correct_list) + ", incorrect: " + str(incorrect_list)) ger_total = ger_corr + ger_incorr other_total = other_corr + other_incorr print("ger_total: " + str(ger_total/10)) print("other_total: " + str(other_total/30)) # print(correct) print(ingredient + ": " + str(correct_list) + " / " + str(incorrect_list)) print("precision: " + str(total_corr_int / (total_corr_int + total_incorr_int))) print("(average precision:) " + str(test_prec/40)) print("recall: " + str(total_corr_int / total_subs_ground_truth)) print("median number of correct subs (ground truth): " + str(statistics.median(total_corr_list))) print("average number of correct subs (ground truth): " + str(statistics.mean(total_corr_list))) at_least_3 = 0 no_corr = 0 for nr in total_corr_list: if nr >= 3: at_least_3 += 1 if nr < 1: no_corr += 1 print("ingredients with at least 3 correct substitutes: " + str(at_least_3)) print("ingredients with no correct substitutes: " + str(no_corr)) print("highest precision: " + str(highest_prec[1]) + ": " + str(highest_prec[0])) print("highest recall: " + str(highest_recall[1]) + ": " + str(highest_recall[0])) # print("german precision: " + str(ger_corr/(ger_corr + ger_incorr))) # print("german correct:" + str(ger_corr)) # print("precision rest: " + str(other_corr/(other_corr + other_incorr))) # print("other correct: " + str(other_corr)) def get_ground_truth_substitutes(substitutes_dict, ground_truth_dict): ground_truth_substitutes = {} for ingredient in ground_truth_dict: ground_truth_substitutes[ingredient] = substitutes_dict[ingredient] return ground_truth_substitutes def main(): substitutes_path = "final_Versions/models/vers3/eval/complete_substitute_pairs_50.json" with open(substitutes_path, "r") as whole_json_file: substitutes_dict = json.load(whole_json_file) ground_truth_path = "data/ground_truth.json" with open(ground_truth_path, "r") as whole_json_file: ground_truth_dict = json.load(whole_json_file) print("no synonyms at all:") print("entire dataset") eval_dataset(substitutes_dict) print("\nonly ground truth:") ground_truth_substitutes0 = get_ground_truth_substitutes(substitutes_dict, ground_truth_dict) # print(ground_truth_substitutes["Truthahn"]) eval_dataset(ground_truth_substitutes0) eval_ground_truth(substitutes_dict, ground_truth_dict) print("======================================") print("\nsynonyms of substitutes only: ") new_substitutes_dict1 = substitutes_dict.copy() new_substitutes_dict1 = revise_subs.combined_substitutes_dict(new_substitutes_dict1) print("entire dataset") eval_dataset(new_substitutes_dict1) print("\nonly ground truth:") ground_truth_substitutes1 = get_ground_truth_substitutes(new_substitutes_dict1, ground_truth_dict) # print(ground_truth_substitutes["Truthahn"]) eval_dataset(ground_truth_substitutes1) eval_ground_truth(new_substitutes_dict1, ground_truth_dict) print("======================================") print("\nsynonyms of everything: ") new_substitutes_dict2 = substitutes_dict.copy() new_substitutes_dict2 = revise_subs.combine_all_synonyms(new_substitutes_dict2) print("entire dataset") eval_dataset(new_substitutes_dict2) print("\nonly ground truth:") ground_truth_substitutes2 = get_ground_truth_substitutes(new_substitutes_dict2, ground_truth_dict) # print(ground_truth_substitutes["Truthahn"]) eval_dataset(ground_truth_substitutes2) eval_ground_truth(new_substitutes_dict2, ground_truth_dict) print("======================================") print("======================================") print("English Evaluation") data_path = "data/" occurances_path = "mult_ingredients_nice.json" ground_truth_path = "ground_truth.json" engl_data_path = "evaluation/engl_data/" evaluation_path = "evaluation/" synonyms_path = "synonyms.json" german_ground_truth = { "Karotte": ["Pastinake", "Steckrübe", "Staudensellerie", "Kürbis", "Süßkartoffel", "Rettich", "Radieschen", "Kartoffel", "Paprika_rot", "Butternusskürbis", "Petersilienwurzel", "Rübe"], "Kirsche": ["Aprikose", "Pflaume", "Nektarine", "Himbeeren", "Weintrauben", "Erdbeere", "Johannisbeeren", "Brombeeren", "Beeren_gemischte", "Pfirsich", "Cranberries", "Cranberries_getrocknet", "Blaubeeren", "Maraschino", "Beeren", "Trockenpflaumen"], "Huhn": ["Truthahn", "Kaninchen", "Austernpilze", "Kalbfleisch", "Fisch", "Tofu", "Rindfleisch", "Tofu_fester", "Schweinefleisch", "Seitan", "Ente", "Lamm", "Pilze", "Shrimps", "Wachtel", "Gans", "Wildfleisch"], "Petersilie": ["Kerbel", "Koriander", "Estragon", "Basilikum", "Oregano", "Liebstöckel", "Dill", "Koriandergrün", "Rosmarin", "Kapern", "Thymian", "Schnittlauch", "Minze", "Basilikum_getrockneter", "Oregano_getrocknet", "Thymian_getrocknet"], "Schokolade": ["Nutella", "Kakaopulver_Instant", "Zucker", "Marmelade", "Marshmallow", "Kakao", "Süßigkeiten", "Erdnussbutter"], "Frühstücksspeck": ["Pancetta", "Schinken_Prosciutto", "Speck", "Schinken_rohen", "Parmaschinken", "Schinken", "Salami", "Chorizo", "Wurst_Krakauer", "Schweineschwarte", "Schinkenwürfel", "Croûtons", "Speckwürfel", "Kochschinken", "Corned_Beef", "Wurst_Mortadella"], "Grünkohl": ["Spinat", "Chinakohl", "Lauch", "Endiviensalat", "Mangold", "Wirsing", "Kohl", "Blumenkohl", "Brunnenkresse", "Rucola", "Blattspinat", "Kopfsalat", "Römersalat", "Babyspinat"], "Zucker": ["Honig", "Stevia", "Süßstoff", "Stevia_flüssig", "Süßstoff_flüssigen", "Reissirup", "Ahornsirup", "Kondensmilch_gezuckerte", "Agavendicksaft", "Schokolade", "Vanille", "Melasse", "Zuckerrübensirup", "Sirup"], "Brie": ["Camembert", "Gorgonzola", "Schmelzkäse", "Cheddarkäse", "Ziegenkäse", "Doppelrahmfrischkäse", "Blauschimmelkäse", "Roquefort", "Gouda", "Käse_Fontina", "Käse_Provolone", "Feta_Käse", "Scheiblettenkäse"], "Truthahn": ["Huhn", "Kaninchen", "Ente", "Kochschinken", "Fasan", "Gans", "Rindfleisch", "Lammfleisch", "Schweinefleisch", "Roastbeef", "Kalbfleisch", "Geflügelfleisch", "Hähnchenfilet", "Hühnerkeule", "Wachtel", "schweinekotelett", "Wildfleisch"] } with open(engl_data_path + "translation.json", "r") as whole_json_file: ger_transl = json.load(whole_json_file) with open(engl_data_path + "substitute_pairs_foodbert_text.json", "r") as whole_json_file: engl_list = json.load(whole_json_file) with open(engl_data_path + "revised_engl_ground_truth.json", "r") as whole_json_file: engl_ground_truth = json.load(whole_json_file) engl_dict = {} for foo in engl_list: if foo[0] in engl_dict.keys(): engl_dict[foo[0]].append(foo[1]) else: engl_dict[foo[0]] = [foo[1]] # translate english ground truth to german for comparison # any ingredients that aren't in the german dataset are removed # translated_ground_truth = translate_engl_ground_truth(engl_ground_truth, ger_transl) print("Eval English results") print("entire dataset") eval_dataset(engl_dict) orig_engl_dict = engl_dict.copy() # print("turkey results: " + str(orig_engl_dict["turkey"])) print("\nonly ground truth:") ground_truth_substitutes_engl = get_ground_truth_substitutes(orig_engl_dict, engl_ground_truth) # print(ground_truth_substitutes) eval_dataset(ground_truth_substitutes_engl) eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth) print("\n\nEval method 1:") engl_dict1 = engl_dict.copy() engl_dict1 = revise_subs.engl_combined_substitutes_dict(engl_dict1) print("entire dataset") eval_dataset(engl_dict1) print("\nonly ground truth:") ground_truth_substitutes_engl = get_ground_truth_substitutes(engl_dict1, engl_ground_truth) # print(ground_truth_substitutes["Truthahn"]) eval_dataset(ground_truth_substitutes_engl) eval_ground_truth(ground_truth_substitutes_engl, engl_ground_truth) print("\nReevaluate German Data: ") eval_ground_truth(substitutes_dict, german_ground_truth) print("\nwith syn1") eval_ground_truth(new_substitutes_dict1, german_ground_truth) # print("Truthahn results 1: " + str(new_substitutes_dict1["Truthahn"])) print("\nwith syn2") eval_ground_truth(new_substitutes_dict2, german_ground_truth) # print("Truthahn results 2: " + str(new_substitutes_dict2["Truthahn"])) # # engl_substitutes_dict = get_ground_truth_substitutes(engl_dict1, german_ground_truth) # # engl_new_substitutes_dict1 = new_substitutes_dict1.copy() # engl_new_substitutes_dict2 = new_substitutes_dict2.copy() main()