48 lines
1.9 KiB
Python
48 lines
1.9 KiB
Python
import json
|
|
import statistics
|
|
|
|
|
|
def main():
|
|
ground_truth_path = "data/ground_truth.json"
|
|
# ground_truth_path = "evaluation/engl_data/engl_ground_truth.json"
|
|
with open(ground_truth_path, "r") as whole_json_file:
|
|
ground_truth_dict = json.load(whole_json_file)
|
|
ingredients_path = "data/mult_ingredients_nice.json"
|
|
# ingredients_path = "data/cleaned_steps_occurrance.json"
|
|
with open(ingredients_path, "r") as whole_json_file:
|
|
ingredients_occurrences = json.load(whole_json_file)
|
|
synonyms_path = "data/synonyms.json"
|
|
with open(synonyms_path, "r") as whole_json_file:
|
|
synonyms_dict = json.load(whole_json_file)
|
|
|
|
german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"]
|
|
german_total = 0
|
|
other_total = 0
|
|
|
|
all_counts = []
|
|
occurrence_count = []
|
|
for base in ground_truth_dict.keys():
|
|
# print(base + " substitutes: " + str(len(ground_truth_dict[base])))
|
|
all_counts.append(len(ground_truth_dict[base]))
|
|
curr_occurrences = ingredients_occurrences[base]
|
|
if base in synonyms_dict.keys():
|
|
for syn in synonyms_dict[base]:
|
|
curr_occurrences += ingredients_occurrences[syn]
|
|
occurrence_count.append(curr_occurrences)
|
|
print(base + " occurrences: " + str(curr_occurrences))
|
|
if base in german_words:
|
|
german_total += len(ground_truth_dict[base])
|
|
else:
|
|
other_total += len(ground_truth_dict[base])
|
|
|
|
print("Average: " + str(statistics.mean(all_counts)))
|
|
print("Median: " + str(statistics.median(all_counts)))
|
|
print("Standard deviation: " + str(statistics.stdev(all_counts)))
|
|
print("Min: " + str(min(all_counts)))
|
|
print("Max: " + str(max(all_counts)))
|
|
|
|
# print("german total: " + str(german_total))
|
|
# print("other total: " + str(other_total))
|
|
|
|
|
|
main() |