initial commit of project
This commit is contained in:
48
evaluation/ground_truth_stats.py
Normal file
48
evaluation/ground_truth_stats.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import json
|
||||
import statistics
|
||||
|
||||
|
||||
def main():
|
||||
ground_truth_path = "data/ground_truth.json"
|
||||
# ground_truth_path = "evaluation/engl_data/engl_ground_truth.json"
|
||||
with open(ground_truth_path, "r") as whole_json_file:
|
||||
ground_truth_dict = json.load(whole_json_file)
|
||||
ingredients_path = "data/mult_ingredients_nice.json"
|
||||
# ingredients_path = "data/cleaned_steps_occurrance.json"
|
||||
with open(ingredients_path, "r") as whole_json_file:
|
||||
ingredients_occurrences = json.load(whole_json_file)
|
||||
synonyms_path = "data/synonyms.json"
|
||||
with open(synonyms_path, "r") as whole_json_file:
|
||||
synonyms_dict = json.load(whole_json_file)
|
||||
|
||||
german_words = ["Spätzle", "Schwarzbrot", "Schupfnudeln", "Bratwürste_Nürnberger", "Vanillinzucker", "Bier", "Semmelknödel", "Rote_Bete", "Eisbeine", "Spargel_weiß"]
|
||||
german_total = 0
|
||||
other_total = 0
|
||||
|
||||
all_counts = []
|
||||
occurrence_count = []
|
||||
for base in ground_truth_dict.keys():
|
||||
# print(base + " substitutes: " + str(len(ground_truth_dict[base])))
|
||||
all_counts.append(len(ground_truth_dict[base]))
|
||||
curr_occurrences = ingredients_occurrences[base]
|
||||
if base in synonyms_dict.keys():
|
||||
for syn in synonyms_dict[base]:
|
||||
curr_occurrences += ingredients_occurrences[syn]
|
||||
occurrence_count.append(curr_occurrences)
|
||||
print(base + " occurrences: " + str(curr_occurrences))
|
||||
if base in german_words:
|
||||
german_total += len(ground_truth_dict[base])
|
||||
else:
|
||||
other_total += len(ground_truth_dict[base])
|
||||
|
||||
print("Average: " + str(statistics.mean(all_counts)))
|
||||
print("Median: " + str(statistics.median(all_counts)))
|
||||
print("Standard deviation: " + str(statistics.stdev(all_counts)))
|
||||
print("Min: " + str(min(all_counts)))
|
||||
print("Max: " + str(max(all_counts)))
|
||||
|
||||
# print("german total: " + str(german_total))
|
||||
# print("other total: " + str(other_total))
|
||||
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user