Files
MasterarbeitCode/evaluation/ground_truth_stats.py
2021-04-11 23:28:41 +02:00

48 lines
1.9 KiB
Python

import json
import statistics
def main():
ground_truth_path = "data/ground_truth.json"
# ground_truth_path = "evaluation/engl_data/engl_ground_truth.json"
with open(ground_truth_path, "r") as whole_json_file:
ground_truth_dict = json.load(whole_json_file)
ingredients_path = "data/mult_ingredients_nice.json"
# ingredients_path = "data/cleaned_steps_occurrance.json"
with open(ingredients_path, "r") as whole_json_file:
ingredients_occurrences = json.load(whole_json_file)
synonyms_path = "data/synonyms.json"
with open(synonyms_path, "r") as whole_json_file:
synonyms_dict = json.load(whole_json_file)
german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"]
german_total = 0
other_total = 0
all_counts = []
occurrence_count = []
for base in ground_truth_dict.keys():
# print(base + " substitutes: " + str(len(ground_truth_dict[base])))
all_counts.append(len(ground_truth_dict[base]))
curr_occurrences = ingredients_occurrences[base]
if base in synonyms_dict.keys():
for syn in synonyms_dict[base]:
curr_occurrences += ingredients_occurrences[syn]
occurrence_count.append(curr_occurrences)
print(base + " occurrences: " + str(curr_occurrences))
if base in german_words:
german_total += len(ground_truth_dict[base])
else:
other_total += len(ground_truth_dict[base])
print("Average: " + str(statistics.mean(all_counts)))
print("Median: " + str(statistics.median(all_counts)))
print("Standard deviation: " + str(statistics.stdev(all_counts)))
print("Min: " + str(min(all_counts)))
print("Max: " + str(max(all_counts)))
# print("german total: " + str(german_total))
# print("other total: " + str(other_total))
main()