import json import statistics def main(): ground_truth_path = "data/ground_truth.json" # ground_truth_path = "evaluation/engl_data/engl_ground_truth.json" with open(ground_truth_path, "r") as whole_json_file: ground_truth_dict = json.load(whole_json_file) ingredients_path = "data/mult_ingredients_nice.json" # ingredients_path = "data/cleaned_steps_occurrance.json" with open(ingredients_path, "r") as whole_json_file: ingredients_occurrences = json.load(whole_json_file) synonyms_path = "data/synonyms.json" with open(synonyms_path, "r") as whole_json_file: synonyms_dict = json.load(whole_json_file) german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"] german_total = 0 other_total = 0 all_counts = [] occurrence_count = [] for base in ground_truth_dict.keys(): # print(base + " substitutes: " + str(len(ground_truth_dict[base]))) all_counts.append(len(ground_truth_dict[base])) curr_occurrences = ingredients_occurrences[base] if base in synonyms_dict.keys(): for syn in synonyms_dict[base]: curr_occurrences += ingredients_occurrences[syn] occurrence_count.append(curr_occurrences) print(base + " occurrences: " + str(curr_occurrences)) if base in german_words: german_total += len(ground_truth_dict[base]) else: other_total += len(ground_truth_dict[base]) print("Average: " + str(statistics.mean(all_counts))) print("Median: " + str(statistics.median(all_counts))) print("Standard deviation: " + str(statistics.stdev(all_counts))) print("Min: " + str(min(all_counts))) print("Max: " + str(max(all_counts))) # print("german total: " + str(german_total)) # print("other total: " + str(other_total)) main()