Files
MasterarbeitCode/evaluation/helpers/revise_substitutes.py
2021-04-11 23:28:41 +02:00

166 lines
7.2 KiB
Python

import json
synonyms_path = "data/synonyms.json"
ground_truth_path = "data/ground_truth.json"
category_subs = ["Paprika", "Apfel", "Gouda", "Huhn", "Gans", "Kaninchen", "Kalbfleisch", "Schweinefleisch", "Ente", "Lamm",
"Pute", "Wildfleisch", "Rindfleisch", "Speck", "Fisch", "Kohl", "Blattsalat", "Schokolade", "Kuvertüre", "Kuchenglasur",
"Honig", "Sirup", "Joghurt", "Essig", "Traubensaft", "Truthahn", "Wein"]
with open("final_Versions/models/vers3/eval/complete_substitute_pairs_50.json") as f:
sub_dict = json.load(f)
def engl_combined_substitutes_dict(found_substitutes_dict):
with open("evaluation/engl_data/engl_synonyms.json", "r") as whole_json_file:
synonyms_dict = json.load(whole_json_file)
with open("evaluation/engl_data/revised_engl_ground_truth.json", "r") as whole_json_file:
ground_truth_dict = json.load(whole_json_file)
reversed_synonyms_dict = get_reversed_syn_dict(is_engl=True)
new_found_sub_dict = {}
for ingredient in found_substitutes_dict.keys():
new_found_sub_dict[ingredient] = []
current_subs = set()
for sub in found_substitutes_dict[ingredient]:
# delete substitute if it is the same as the ingredient
if sub == ingredient:
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
continue
# delete substitute if it is a synonym of the ingredient
if ingredient in synonyms_dict.keys() and sub in synonyms_dict[ingredient]:
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
continue
# if substitute is a synonym of sth
if sub in reversed_synonyms_dict.keys():
if len(reversed_synonyms_dict[sub]) == 1:
current_subs.add(reversed_synonyms_dict[sub][0])
else:
print(sub + " is in " + str(reversed_synonyms_dict[sub]))
else:
current_subs.add(sub)
new_found_sub_dict[ingredient] += list(current_subs)
return new_found_sub_dict
# merges substitutes with their synonyms, replaces synonyms with base synonym
def combined_substitutes_dict(found_substitutes_dict):
with open(synonyms_path, "r") as whole_json_file:
synonyms_dict = json.load(whole_json_file)
with open(ground_truth_path, "r") as whole_json_file:
ground_truth_dict = json.load(whole_json_file)
reversed_synonyms_dict = get_reversed_syn_dict()
new_found_sub_dict = {}
for ingredient in found_substitutes_dict.keys():
new_found_sub_dict[ingredient] = []
current_subs = set()
for sub in found_substitutes_dict[ingredient]:
# delete substitute if it is the same as the ingredient
if sub == ingredient:
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
continue
# delete substitute if it is a synonym of the ingredient
if ingredient in synonyms_dict.keys() and sub in synonyms_dict[ingredient]:
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
continue
# if substitute is a synonym of sth
if sub in reversed_synonyms_dict.keys():
if len(reversed_synonyms_dict[sub]) == 1:
if reversed_synonyms_dict[sub][0] not in category_subs:
current_subs.add(reversed_synonyms_dict[sub][0])
else:
if ingredient in category_subs:
current_subs.add(reversed_synonyms_dict[sub][0])
else:
current_subs.add(sub)
elif len(reversed_synonyms_dict[sub]) == 2:
if ingredient in category_subs:
if reversed_synonyms_dict[sub][0] in category_subs and reversed_synonyms_dict[sub][1] not in category_subs:
current_subs.add(reversed_synonyms_dict[sub][0])
elif reversed_synonyms_dict[sub][1] in category_subs and reversed_synonyms_dict[sub][0] not in category_subs:
current_subs.add(reversed_synonyms_dict[sub][1])
else:
print(reversed_synonyms_dict[sub])
else:
if reversed_synonyms_dict[sub][0] in category_subs and reversed_synonyms_dict[sub][1] not in category_subs:
current_subs.add(reversed_synonyms_dict[sub][1])
elif reversed_synonyms_dict[sub][1] in category_subs and reversed_synonyms_dict[sub][0] not in category_subs:
current_subs.add(reversed_synonyms_dict[sub][0])
else:
print(reversed_synonyms_dict[sub])
else:
print(sub + " is in " + str(reversed_synonyms_dict[sub]))
else:
current_subs.add(sub)
new_found_sub_dict[ingredient] += list(current_subs)
return new_found_sub_dict
# combine substitutes found for an ingredient and its synonyms
# also combine synonyms in substitutes
def combine_all_synonyms(found_substitutes_dict):
reversed_synonyms_dict = get_reversed_syn_dict_no_cat()
new_found_sub_dict = {}
for ingredient in found_substitutes_dict.keys():
if ingredient not in reversed_synonyms_dict.keys():
new_found_sub_dict[ingredient] = set()
for ingredient in found_substitutes_dict.keys():
if ingredient in reversed_synonyms_dict.keys():
new_found_sub_dict[reversed_synonyms_dict[ingredient][0]] |= set(found_substitutes_dict[ingredient])
else:
new_found_sub_dict[ingredient] |= set(found_substitutes_dict[ingredient])
new_found_sub_dict_list = {}
for ingredient in new_found_sub_dict.keys():
new_found_sub_dict_list[ingredient] = list(new_found_sub_dict[ingredient])
return combined_substitutes_dict(new_found_sub_dict_list)
def get_reversed_syn_dict(is_engl=False):
if is_engl:
with open("evaluation/engl_data/engl_synonyms.json", "r") as whole_json_file:
synonyms_dict = json.load(whole_json_file)
else:
with open(synonyms_path, "r") as whole_json_file:
synonyms_dict = json.load(whole_json_file)
reversed_synonyms_dict = {}
for ingredient in synonyms_dict.keys():
for syn in synonyms_dict[ingredient]:
if syn not in reversed_synonyms_dict.keys():
reversed_synonyms_dict[syn] = []
reversed_synonyms_dict[syn].append(ingredient)
return reversed_synonyms_dict
def get_reversed_syn_dict_no_cat():
with open(synonyms_path, "r") as whole_json_file:
synonyms_dict = json.load(whole_json_file)
reversed_synonyms_dict = {}
for ingredient in synonyms_dict.keys():
if ingredient not in category_subs:
for syn in synonyms_dict[ingredient]:
if syn not in reversed_synonyms_dict.keys():
reversed_synonyms_dict[syn] = []
reversed_synonyms_dict[syn].append(ingredient)
return reversed_synonyms_dict
combined_substitutes_dict(sub_dict)