166 lines
7.2 KiB
Python
166 lines
7.2 KiB
Python
import json
|
|
synonyms_path = "data/synonyms.json"
|
|
ground_truth_path = "data/ground_truth.json"
|
|
category_subs = ["Paprika", "Apfel", "Gouda", "Huhn", "Gans", "Kaninchen", "Kalbfleisch", "Schweinefleisch", "Ente", "Lamm",
|
|
"Pute", "Wildfleisch", "Rindfleisch", "Speck", "Fisch", "Kohl", "Blattsalat", "Schokolade", "Kuvertüre", "Kuchenglasur",
|
|
"Honig", "Sirup", "Joghurt", "Essig", "Traubensaft", "Truthahn", "Wein"]
|
|
|
|
with open("final_Versions/models/vers3/eval/complete_substitute_pairs_50.json") as f:
|
|
sub_dict = json.load(f)
|
|
|
|
|
|
def engl_combined_substitutes_dict(found_substitutes_dict):
|
|
with open("evaluation/engl_data/engl_synonyms.json", "r") as whole_json_file:
|
|
synonyms_dict = json.load(whole_json_file)
|
|
with open("evaluation/engl_data/revised_engl_ground_truth.json", "r") as whole_json_file:
|
|
ground_truth_dict = json.load(whole_json_file)
|
|
|
|
reversed_synonyms_dict = get_reversed_syn_dict(is_engl=True)
|
|
|
|
new_found_sub_dict = {}
|
|
|
|
for ingredient in found_substitutes_dict.keys():
|
|
new_found_sub_dict[ingredient] = []
|
|
current_subs = set()
|
|
for sub in found_substitutes_dict[ingredient]:
|
|
# delete substitute if it is the same as the ingredient
|
|
if sub == ingredient:
|
|
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
|
|
continue
|
|
|
|
# delete substitute if it is a synonym of the ingredient
|
|
if ingredient in synonyms_dict.keys() and sub in synonyms_dict[ingredient]:
|
|
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
|
|
continue
|
|
|
|
# if substitute is a synonym of sth
|
|
if sub in reversed_synonyms_dict.keys():
|
|
if len(reversed_synonyms_dict[sub]) == 1:
|
|
current_subs.add(reversed_synonyms_dict[sub][0])
|
|
else:
|
|
print(sub + " is in " + str(reversed_synonyms_dict[sub]))
|
|
|
|
else:
|
|
current_subs.add(sub)
|
|
|
|
new_found_sub_dict[ingredient] += list(current_subs)
|
|
return new_found_sub_dict
|
|
|
|
|
|
# merges substitutes with their synonyms, replaces synonyms with base synonym
|
|
def combined_substitutes_dict(found_substitutes_dict):
|
|
with open(synonyms_path, "r") as whole_json_file:
|
|
synonyms_dict = json.load(whole_json_file)
|
|
with open(ground_truth_path, "r") as whole_json_file:
|
|
ground_truth_dict = json.load(whole_json_file)
|
|
|
|
|
|
reversed_synonyms_dict = get_reversed_syn_dict()
|
|
|
|
new_found_sub_dict = {}
|
|
|
|
for ingredient in found_substitutes_dict.keys():
|
|
new_found_sub_dict[ingredient] = []
|
|
current_subs = set()
|
|
for sub in found_substitutes_dict[ingredient]:
|
|
# delete substitute if it is the same as the ingredient
|
|
if sub == ingredient:
|
|
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
|
|
continue
|
|
|
|
# delete substitute if it is a synonym of the ingredient
|
|
if ingredient in synonyms_dict.keys() and sub in synonyms_dict[ingredient]:
|
|
# found_substitutes_dict = found_substitutes_dict[ingredient].remove[sub]
|
|
continue
|
|
|
|
# if substitute is a synonym of sth
|
|
if sub in reversed_synonyms_dict.keys():
|
|
if len(reversed_synonyms_dict[sub]) == 1:
|
|
if reversed_synonyms_dict[sub][0] not in category_subs:
|
|
current_subs.add(reversed_synonyms_dict[sub][0])
|
|
else:
|
|
if ingredient in category_subs:
|
|
current_subs.add(reversed_synonyms_dict[sub][0])
|
|
else:
|
|
current_subs.add(sub)
|
|
elif len(reversed_synonyms_dict[sub]) == 2:
|
|
if ingredient in category_subs:
|
|
if reversed_synonyms_dict[sub][0] in category_subs and reversed_synonyms_dict[sub][1] not in category_subs:
|
|
current_subs.add(reversed_synonyms_dict[sub][0])
|
|
elif reversed_synonyms_dict[sub][1] in category_subs and reversed_synonyms_dict[sub][0] not in category_subs:
|
|
current_subs.add(reversed_synonyms_dict[sub][1])
|
|
else:
|
|
print(reversed_synonyms_dict[sub])
|
|
else:
|
|
if reversed_synonyms_dict[sub][0] in category_subs and reversed_synonyms_dict[sub][1] not in category_subs:
|
|
current_subs.add(reversed_synonyms_dict[sub][1])
|
|
elif reversed_synonyms_dict[sub][1] in category_subs and reversed_synonyms_dict[sub][0] not in category_subs:
|
|
current_subs.add(reversed_synonyms_dict[sub][0])
|
|
else:
|
|
print(reversed_synonyms_dict[sub])
|
|
else:
|
|
print(sub + " is in " + str(reversed_synonyms_dict[sub]))
|
|
|
|
else:
|
|
current_subs.add(sub)
|
|
|
|
new_found_sub_dict[ingredient] += list(current_subs)
|
|
return new_found_sub_dict
|
|
|
|
|
|
# combine substitutes found for an ingredient and its synonyms
|
|
# also combine synonyms in substitutes
|
|
def combine_all_synonyms(found_substitutes_dict):
|
|
reversed_synonyms_dict = get_reversed_syn_dict_no_cat()
|
|
|
|
new_found_sub_dict = {}
|
|
for ingredient in found_substitutes_dict.keys():
|
|
if ingredient not in reversed_synonyms_dict.keys():
|
|
new_found_sub_dict[ingredient] = set()
|
|
|
|
for ingredient in found_substitutes_dict.keys():
|
|
if ingredient in reversed_synonyms_dict.keys():
|
|
new_found_sub_dict[reversed_synonyms_dict[ingredient][0]] |= set(found_substitutes_dict[ingredient])
|
|
else:
|
|
new_found_sub_dict[ingredient] |= set(found_substitutes_dict[ingredient])
|
|
|
|
new_found_sub_dict_list = {}
|
|
|
|
for ingredient in new_found_sub_dict.keys():
|
|
new_found_sub_dict_list[ingredient] = list(new_found_sub_dict[ingredient])
|
|
|
|
return combined_substitutes_dict(new_found_sub_dict_list)
|
|
|
|
|
|
|
|
def get_reversed_syn_dict(is_engl=False):
|
|
if is_engl:
|
|
with open("evaluation/engl_data/engl_synonyms.json", "r") as whole_json_file:
|
|
synonyms_dict = json.load(whole_json_file)
|
|
else:
|
|
with open(synonyms_path, "r") as whole_json_file:
|
|
synonyms_dict = json.load(whole_json_file)
|
|
reversed_synonyms_dict = {}
|
|
for ingredient in synonyms_dict.keys():
|
|
for syn in synonyms_dict[ingredient]:
|
|
if syn not in reversed_synonyms_dict.keys():
|
|
reversed_synonyms_dict[syn] = []
|
|
reversed_synonyms_dict[syn].append(ingredient)
|
|
|
|
return reversed_synonyms_dict
|
|
|
|
def get_reversed_syn_dict_no_cat():
|
|
with open(synonyms_path, "r") as whole_json_file:
|
|
synonyms_dict = json.load(whole_json_file)
|
|
reversed_synonyms_dict = {}
|
|
for ingredient in synonyms_dict.keys():
|
|
if ingredient not in category_subs:
|
|
for syn in synonyms_dict[ingredient]:
|
|
if syn not in reversed_synonyms_dict.keys():
|
|
reversed_synonyms_dict[syn] = []
|
|
reversed_synonyms_dict[syn].append(ingredient)
|
|
|
|
return reversed_synonyms_dict
|
|
|
|
|
|
combined_substitutes_dict(sub_dict) |