Files
MasterarbeitCode/clean_dataset/dataset_helpers.py
2021-04-11 23:28:41 +02:00

1340 lines
83 KiB
Python

import json
import re
import spacy
import gc
from clean_dataset.lists import regex_list_ingr
nlp = spacy.load('de_core_news_lg')
data_path = "data/"
home_path = "prep_dataset/"
def clean_ice(curr_ingredient, or_set):
crushed = False
cubes = False
connect = ""
if "crush" in curr_ingredient or "Crush" in curr_ingredient or "gestoßen" in curr_ingredient or "gebrochen" in curr_ingredient:
crushed = True
if "würfel" in curr_ingredient or "Würfel" in curr_ingredient:
cubes = True
if crushed and not cubes:
curr_ingredient = "Crushed Ice"
if cubes and not crushed:
curr_ingredient = "Eisw\u00fcrfel"
# combine types
if "und" in curr_ingredient or "+" in curr_ingredient or "Und" in curr_ingredient or "UND" in curr_ingredient:
connect = " und"
elif "oder" in curr_ingredient or "Oder" in curr_ingredient or "ODER" in curr_ingredient or "/" in curr_ingredient or or_set:
connect = " oder"
if crushed and cubes:
if connect == " oder":
curr_ingredient = "Eisw\u00fcrfel"
else:
curr_ingredient = "Crushed Ice" + connect + " Eisw\u00fcrfel"
return curr_ingredient
def clean_citrus(curr_ingredient, or_set):
org_ingr = curr_ingredient
existing_citrus = []
existing_type = []
if "Grapefruit" in curr_ingredient or "grapefruit" in curr_ingredient:
existing_citrus.append('Grapefruit')
if "Apfelsine" in curr_ingredient or "Mandarine" in curr_ingredient or "Clementine" in curr_ingredient:
existing_citrus.append('Mandarine')
if ("Zitrone" in curr_ingredient or "zitrone" in curr_ingredient) and not ("gras" in curr_ingredient):
existing_citrus.append('Zitrone')
if "Orange" in curr_ingredient:
existing_citrus.append('Orange')
if ("Limette" in curr_ingredient or "limette" in curr_ingredient or "Limone" in curr_ingredient or "limone" in curr_ingredient) and "gras" not in curr_ingredient:
existing_citrus.append('Limette')
if len(existing_citrus) >= 1:
if "schale" in curr_ingredient or "Schale" in curr_ingredient or "Abrieb" in curr_ingredient or "abrieb" in curr_ingredient or "gerieben" in curr_ingredient or "reibe" in curr_ingredient or "Zeste" in curr_ingredient or "zeste" in curr_ingredient or "Abger." in curr_ingredient:
existing_type.append("nabrieb")
if "presst" in curr_ingredient or "pressen" in curr_ingredient or "Saft" in curr_ingredient or "saft" in curr_ingredient:
existing_type.append("nsaft")
if "Fleisch" in curr_ingredient or "fleisch" in curr_ingredient:
existing_type.append("nfruchtfleisch")
if "Limonade" in curr_ingredient or "limonade" in curr_ingredient or "Limo " in curr_ingredient or "limo " in curr_ingredient:
existing_type.append("nlimonade")
if "oder" in curr_ingredient or "Oder" in curr_ingredient or "ODER" in curr_ingredient or "/" in curr_ingredient or or_set:
connect = " oder "
else:
connect = " und "
ingr_list = []
# fruit parts
if len(existing_type) >= 1:
for cit in existing_citrus:
for typ in existing_type:
ingr_list.append(cit + typ)
if connect == " oder ":
curr_ingredient = ingr_list[0]
else:
curr_ingredient = connect.join(ingr_list)
if curr_ingredient == "Grapefruitnsaft":
curr_ingredient = "Grapefruitsaft"
if curr_ingredient == "Mandarine_Orange":
curr_ingredient = "Mandarine"
else:
if len(existing_citrus) > 1:
ingr_list = []
for cit in existing_citrus:
ingr_list.append(cit)
curr_ingredient = connect.join(ingr_list)
else:
# remove (n)
curr_ingredient = org_ingr
curr_ingredient = regex_list_ingr[404].sub('', curr_ingredient)
match_str = existing_citrus[0] + r'[A-Za-zÄäÖöÜüß\-]+'
reg_return = re.search(match_str, curr_ingredient)
if reg_return:
curr_ingredient = existing_citrus[0]
else:
curr_ingredient = org_ingr
else:
curr_ingredient = org_ingr
curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
return curr_ingredient
def clean_pepper(curr_ingredient):
# curr_ingredient = re.sub(r'(K|k)örner', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[377].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(B|b)eeren', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[378].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'getrocknet(\w){0,2}(\s|,|\)|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[379].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'bunt', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[380].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'grün', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[381].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'schwarz', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[382].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'weiß', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[383].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'rosa', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[384].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'rot', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[385].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()gem\.(\s|,|\)|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[386].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'gemischt(\w){0,2}(\s|,|\)|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[387].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()a\.(\s)?d\.(\s)?M\.(\s|,|\)|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[388].sub(' ', curr_ingredient)
return curr_ingredient
def clean_ingredient(ingredient):
# remove line breaks
# curr_ingredient = re.sub(r'\n', ' ', ingredient)
curr_ingredient = regex_list_ingr[0].sub(' ', ingredient)
orig_ingredient = curr_ingredient
if ("Mehl" in curr_ingredient or "mehl" in curr_ingredient) and "Typ" in curr_ingredient:
# delete amount
# curr_ingredient = re.sub(r'^[\s]*[\d]+[\s]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[1].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'Type', 'Typ', curr_ingredient)
curr_ingredient = regex_list_ingr[394].sub('Typ', curr_ingredient)
else:
# delete amount
# curr_ingredient = re.sub(r'[\s]*[\d]+[\s]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[2].sub(' ', curr_ingredient)
# remove leading whitespaces
# curr_ingredient = re.sub(r'^[\s]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
or_set = False
if "oder" in curr_ingredient or "Oder" in curr_ingredient or "ODER" in curr_ingredient or "/" in curr_ingredient:
or_set = True
# curr_ingredient = re.sub(r'[\s(,]*(und/)?[oO]der.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[4].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[\s(,]*(und/)?ODER.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[5].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[\s(,]*wahlweise.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[6].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[\s(,]*ersatzweise.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[376].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[\s(,]*alternativ.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[7].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[\s(,]*bzw\..*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[8].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r',(\s)?evtl(\.)?.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[359].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove 1/2
# curr_ingredient = re.sub(r'[\s]*[\d]*(\u00bd)[\s]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[9].sub('', curr_ingredient)
# remove 1/4
# curr_ingredient = re.sub(r'[\s]*[\d]*(\u00bc)[\s]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[10].sub('', curr_ingredient)
# remove 3/4
# curr_ingredient = re.sub(r'[\s]*[\d]*(\u00be)[\s]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[11].sub('', curr_ingredient)
# remove 1/3
# curr_ingredient = re.sub(r'[\s]*[\d]*(\u2153)[\s]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[12].sub('', curr_ingredient)
# remove 2/3
# curr_ingredient = re.sub(r'[\s]*[\d]*(\u2154)[\s]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[13].sub('', curr_ingredient)
# remove 1/8
# curr_ingredient = re.sub(r'[\s]*[\d]*(\u215b)[\s]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[14].sub('', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# curr_ingredient = re.sub(r'(?P<fett>(\w)*)%(\sFett)?\si\.(\s)?Tr\.', r"\g<fett>", curr_ingredient)
curr_ingredient = regex_list_ingr[19].sub(r"\g<fett>", curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove :
# curr_ingredient = re.sub(r':', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[20].sub(' ', curr_ingredient)
# remove !
# curr_ingredient = re.sub(r'!', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[21].sub(' ', curr_ingredient)
# remove ca. and mind.
# curr_ingredient = re.sub(r'ca\.', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[22].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|,|\()+mind\.', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[374].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|,|\()+mindestens', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[375].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove sizes
# curr_ingredient = re.sub(r'(^|\s|\(|,)Gr\u00f6\u00dfe(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[23].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gr\u00f6\u00dfe(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[24].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Gr\u00f6sse(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[25].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gr(\.|$)(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[26].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Gr(\.|$)(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[27].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Gew(\.|$|\s)(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[28].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)kl(\.|$)(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[29].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Kl(\.|$|\s)(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[30].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Klasse(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[31].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)KL(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[32].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)m\.-gro\u00df(e|er|es|en)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[33].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gro\u00df(e|er|es|en)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[34].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)mittelgro\u00df(e|er|es|en)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[35].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)klein(e|er|es|en|ere)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[36].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[37].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove different units and amounts
# curr_ingredient = re.sub(r'(^|\s)(aus\sder\s|gr\.\s|kl.\s)?Dose(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[38].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Tasse(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[39].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Teil(/e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[40].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Tube(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[41].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Prise(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[42].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Prise(\(n\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[43].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Prisen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[44].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Kugel(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[45].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Kugeln(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[46].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Knolle(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[47].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Rippe(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[48].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Ecke(\(n\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[49].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Rispe(\(n\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[50].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Rolle(\()?(n)?(\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[51].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Staude(\(n\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[52].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Halm(\(e\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[53].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Zehe(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[54].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Stange(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[55].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Wurzel(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[56].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Stange(\(n\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[57].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Flasche(/n)?(n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[58].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Platte(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[59].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Scheibe(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[60].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s|in\s)?Scheibe(\()?n(\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[61].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Tablette(\(n\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[62].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Zweig(/e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[63].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Ring(/e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[64].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Tafel(e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[65].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Beet(/e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[66].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Glas(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[67].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Gl\u00e4ser(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[68].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Bl\u00e4tter(n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[69].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)K\u00e4stchen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[70].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)T\u00fcte(/n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[71].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)W\u00fcrfel(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[72].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)B\u00fcndel(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[73].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?St\u00fcck(/e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[74].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?St\u00fcck(\(e\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[75].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?St\u00fcck(\(n\))?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[76].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)St\u00e4ngel(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[77].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Stiel(e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[78].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Stiel/e(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[79].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Streifen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[80].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Paar(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[81].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Becher(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[82].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(in\s)?Ringe(n)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[83].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Sch\u00e4lchen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[84].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Riegel(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[85].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()(R|r)öllchen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[86].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)K\u00d6pfe(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[87].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)K\u00d6rner(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[88].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Kopf(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[89].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(gr\.\s|kl.\s)?Bund(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[90].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Handvoll(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[91].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Beutel(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[92].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Eimer(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[93].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Blatt(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[94].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Schuss(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[95].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Spritzer(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[96].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)F\u00e4sschen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[97].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Fl\u00e4schchen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[98].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Handvoll(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[99].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Paar(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[100].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Topf(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[101].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Tropfen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[102].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)daumengroß(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[336].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)walnussgroß(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[103].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Paket(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[104].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Pck\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[105].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)St\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[106].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Pkt\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[107].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Msp\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[108].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)msp\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[109].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)mm(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[337].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)ml(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[110].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)cl(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[111].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)dl(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[112].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)l(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[113].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Menge(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[114].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)L(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[115].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)XL(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[116].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Liter(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[117].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)liter(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[118].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Port\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[119].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)cm(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[120].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)M(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[121].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)S(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[122].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)mg(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[123].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)g(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[124].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)gramm(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[125].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Gramm(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[126].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Grad(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[127].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)grad(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[128].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Einwaage(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[129].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)kg(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[130].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)El(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[131].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)EL(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[132].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)TL(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[133].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Tl(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[134].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Bund(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[135].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Stich(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[136].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(,)*(\s)*gestr\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[137].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(,)*(\s)*geh\u00e4uft(e)?(r)?(\.)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[138].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)einige(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[139].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)dick(e)?(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[140].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)d\u00fcnne(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[141].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)viel(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[142].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)sehr(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[143].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)wenig(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[144].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)etwas(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[145].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)etwa(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[146].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)extra(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[147].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)mehr(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[148].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)einen(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[149].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)eine(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[150].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()eins(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[151].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()zwei(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[152].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)ein(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[153].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)nach\sGeschmack(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[154].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Abtropfgewicht(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[360].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[155].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove %
# curr_ingredient = re.sub(r'%\sFett(gehalt)?(\s|\)|,|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[335].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'%', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[15].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'Prozent', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[16].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'Fettgehalt', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[17].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'\u00B0', ' ', curr_ingredient) # Grad Zeichen
curr_ingredient = regex_list_ingr[18].sub(' ', curr_ingredient)
# remove everything after z.B.
# curr_ingredient = re.sub(r'z\.(\s)?B\..*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[156].sub(' ', curr_ingredient)
# remove n.B.
# curr_ingredient = re.sub(r'(^|\s|\(|,)n\.(\s)?B\.(\s|,|$|\))*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[157].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)+nach\sBedarf[,\s]*($|\()*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[158].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)+bei\sBedarf[,\s]*($|\()*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[159].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)+nach\sBelieben[,\s]*($|\()*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[160].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)+nach\sGeschmack[,\s]*($|\()*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[367].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove (n), (e) at beginning
# curr_ingredient = re.sub(r'^\(n\)(\s|,|\))+', '', curr_ingredient)
curr_ingredient = regex_list_ingr[161].sub('', curr_ingredient)
# curr_ingredient = re.sub(r'^\(e\)(\s|,|\))+', '', curr_ingredient)
curr_ingredient = regex_list_ingr[162].sub('', curr_ingredient)
# curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[163].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove evtl.
# curr_ingredient = re.sub(r'(^|\s|\()evtl\.(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[164].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\()eventuell(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[165].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\()m\u00f6glichst(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[166].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\()optional(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[167].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\()auch(\s|,|$|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[168].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\()wahlweise(\s|,|$|\))+', '', curr_ingredient)
# curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[169].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove context
# curr_ingredient = re.sub(r'(\s|\(|,)+f\u00fcr.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[170].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+(aus\sder\s|a\.d\.\s|a\.\sd\.\s)M\u00fchle(\s|$|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[171].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+vom(\s|$|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[172].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+von(\s|$|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[173].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+etwaszum(\sBraten)?(\s|$|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[339].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+in(\s|$|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[174].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+zum.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[175].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+als.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[176].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+zur.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[177].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+wenn\sgewünscht.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[340].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+nach\sWahl.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[178].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+nach\sSaison.*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[390].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+bevorzugt[\w]{0,2}$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[393].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+nach\sPackungsan(weisung|leitung).*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[392].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,)+aus\s(dem|der).*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[338].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove nach/mit/und Geschmack
curr_ingredient = regex_list_ingr[403].sub(' ', curr_ingredient)
if "gemischt" in curr_ingredient or "halb" in curr_ingredient:
if "hack" in curr_ingredient or "Hack" in curr_ingredient:
curr_ingredient = "gemischtes Hack"
elif "Gulasch" in curr_ingredient or "gulasch" in curr_ingredient:
curr_ingredient = "Gulasch gemischt"
else:
# curr_ingredient = re.sub(r'(\s|\(|,|^)halb[\w]{0,2}(\s)?(/)?(-)?(\s)?(halb)?(\s|$|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[179].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,|^)je[/\w\-,\s]*($|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[180].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s|\(|,|^)davon(\s|$|\(|\))+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[181].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[182].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
if "Ei" in curr_ingredient and (
"koch" in curr_ingredient or "weich" in curr_ingredient or "hart" in curr_ingredient or "pell" in curr_ingredient):
curr_ingredient = "gekochtes Ei"
else:
# curr_ingredient = re.sub(r'(^|\s|\(|,)(ge)kocht[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[183].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)weich[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[184].sub(' ', curr_ingredient)
if "roh" in curr_ingredient and not (
"Schinken" in curr_ingredient or "Speck" in curr_ingredient or "speck" in curr_ingredient or "Bacon" in curr_ingredient or "zucker" in curr_ingredient or "Zucker" in curr_ingredient):
# curr_ingredient = re.sub(r'(^|\s|\(|,)roh[\w]{0,2}(\s|,|\)|$)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[185].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# verarbeitung
# curr_ingredient = re.sub(r'(^|\s|\(|,)mit\sGrün(\s|,|\)|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[341].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)und\sGrün(\s|,|\)|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[342].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)mit\sStengeln(\s|,|\)|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[343].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[sS]palte(n)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[186].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[sS]cheibe(n)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[187].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'Sorte', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[188].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'Sch\u00e4rfe', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[189].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)wenig[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[190].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)grob[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[191].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)fein[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[192].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)ganz[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[193].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)vollreif[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[350].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)reif[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[194].sub(' ', curr_ingredient)
if "Biobin" not in curr_ingredient:
# curr_ingredient = re.sub(r'(^|\s|\(|,)bio[\s-]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[195].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Bio[\s-]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[196].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gehackt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[197].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gerieben[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[198].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(leicht\s)?verschlagen[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[199].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(leicht\s)?versprudelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[200].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(leicht\s)?verr\u00fchrt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[201].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(leicht\s)?aufgeschlagen[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[202].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(leicht\s)?ver(k)?leppert[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[203].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(Steif|steif)?(\s)?(leicht\s)?geschlagen[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[204].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(in\s)?Stifte(n)?[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[205].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zerbröckelt[\w]{0,2}[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[395].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gestifte(l)?t[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[206].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)pochiert[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[207].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(leicht\s)?verq(u)?irlt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[208].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(leicht\s)?verklopft[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[209].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gegart[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[210].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gesch\u00e4lt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[211].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)abgetropft[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[212].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)geraspelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[213].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zerkleinert[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[373].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gem\u00f6rsert[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[214].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)geschnetzelt[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[215].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)selbst(\s)?gemacht[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[344].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)nach\seigenem\sGeschmack[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[345].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gestoßen[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[346].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)ungeschält[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[347].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)netto[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[348].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gestückelt[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[349].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)neu[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[351].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Qualität[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[352].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)ohne\sVorkochen[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[353].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)geschrotet[\w]*[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[354].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(in)?Spalten[\s]?(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[216].sub(' ', curr_ingredient)
if "Kartoffel" not in curr_ingredient:
# curr_ingredient = re.sub(r'(^|\s|\(|,)kochend[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[217].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)entsteint[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[218].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zerlassen[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[219].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)ohne\sStein[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[220].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gesiebt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[221].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)hei\u00df[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[222].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)lauwarm[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[223].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)nativ[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[224].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zimmerwarm[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[225].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)warm[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[226].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(eis)?kalt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[227].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)geputzt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[228].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gewogen[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[229].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)ausgekratzt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[230].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zerstampft[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[231].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)schaumig[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[232].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gepresst[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[233].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)mundgerecht[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[234].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)getrennt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[235].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)trennen[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[236].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)vorgekocht[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[237].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)vorbereitet[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[238].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)vorgewiegt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[239].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)abgezupft[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[240].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)ungespritzt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[241].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)unbehandelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[242].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)kleingeschnitten[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[243].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zerteilt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[244].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)wiegen[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[245].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gerebelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[246].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)blanchiert[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[247].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Viertel[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[248].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)geviertelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[249].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)geachtelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[250].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)halbiert[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[251].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)st\u00fcckig[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[252].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(zer|ange)drückt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[253].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zersto\u00dfen[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[254].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)zerbr\u00dfselt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[255].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gehobelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[256].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(d\u00fcnn|dick)?(\s)?geschnitten[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[257].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)d\u00fcnn[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[258].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(ge)?mahlen[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[259].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gemörsert[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[368].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)mörsern[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[369].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gew\u00fcrfelt[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[260].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)w\u00fcrfeln[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[261].sub(' ', curr_ingredient)
if "Würfelzucker" not in curr_ingredient:
# curr_ingredient = re.sub(r'(^|\s|\(|,)Würfel[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[391].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)St\u00fccke[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[262].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)(ab)?gek\u00fchlt[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[263].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)vergine[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[264].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)St\u00fcckchen[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[265].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Raumtemperatur[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[266].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Zimmertemperatur[\w]*[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[267].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)ger\.[\s]*(,|\)|$)?', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[268].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[269].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# change certain ingredient names
# curr_ingredient = re.sub(r'(^|\s|,|\()Paprikaschote(\(n\))?(\s|$|,|\))', ' Paprika ', curr_ingredient)
curr_ingredient = regex_list_ingr[270].sub(' Paprika ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Porree(\(s\))?(\s|$|,|\))', ' Lauch ', curr_ingredient)
curr_ingredient = regex_list_ingr[396].sub(' Lauch ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Zucchino(\s|$)', ' Zucchini ', curr_ingredient)
curr_ingredient = regex_list_ingr[271].sub(' Zucchini ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Knoblauchzehe(n)?(\s|$)', ' Knoblauch ', curr_ingredient)
curr_ingredient = regex_list_ingr[272].sub(' Knoblauch ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Knoblauch(\s)?(-)?(\s)?(z|Z)ehe(n)?(\s|$)', ' Knoblauch ', curr_ingredient)
curr_ingredient = regex_list_ingr[273].sub(' Knoblauch ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Schlagsahne(\s|$|,|\))', ' Sahne ', curr_ingredient)
curr_ingredient = regex_list_ingr[274].sub(' Sahne ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Schlagobers(\s|$|,|\))', ' Sahne ', curr_ingredient)
curr_ingredient = regex_list_ingr[275].sub(' Sahne ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)s\u00fc\u00dfe\sSahne(\s|$)', ' Sahne ', curr_ingredient)
curr_ingredient = regex_list_ingr[276].sub(' Sahne ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Sahne\ss\u00fc\u00df(e)?(\s|$)', ' Sahne ', curr_ingredient)
curr_ingredient = regex_list_ingr[356].sub(' Sahne ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Zimtpulver(\s|$)', ' Zimt ', curr_ingredient)
curr_ingredient = regex_list_ingr[277].sub(' Zimt ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Wasser\sohne\sKohlens\u00e4ure(\s|$)', ' Wasser ', curr_ingredient)
curr_ingredient = regex_list_ingr[278].sub(' Wasser ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Mineralwasser\sohne\sKohlens\u00e4ure(\s|$)', ' Wasser ', curr_ingredient)
curr_ingredient = regex_list_ingr[357].sub(' Wasser ', curr_ingredient)
if "Wasser" in curr_ingredient or "wasser" in curr_ingredient:
if "still" in curr_ingredient or "Still" in curr_ingredient or "ohne Sprudel" in curr_ingredient or "ohne sprudel" in curr_ingredient:
curr_ingredient = "Wasser"
elif "mit Kohlensäure" in curr_ingredient or "mit Sprudel" in curr_ingredient or "Sprudelwasser" in curr_ingredient or "kohlensäure" in curr_ingredient:
curr_ingredient = "Sprudelwasser"
# curr_ingredient = re.sub(r'(^|\s)Mineralwasser$', ' Sprudelwasser ', curr_ingredient)
curr_ingredient = regex_list_ingr[358].sub(' Sprudelwasser ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Ingwerwurzel(\s|$)', ' Ingwer ', curr_ingredient)
curr_ingredient = regex_list_ingr[279].sub(' Ingwer ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Sodawasser(\s|$)', ' Wasser ', curr_ingredient)
curr_ingredient = regex_list_ingr[280].sub(' Sprudelwasser ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Tomate(\()?n(\))?(\sentkernt(e)?)?(\s|$|,)', ' Tomate ', curr_ingredient)
curr_ingredient = regex_list_ingr[281].sub(' Tomate ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Pilze(\()?n(\))?(\s|$|,)', ' Pilz ', curr_ingredient)
curr_ingredient = regex_list_ingr[282].sub(' Pilz ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Topfen(\s|$|,)', ' Quark ', curr_ingredient)
curr_ingredient = regex_list_ingr[283].sub(' Quark ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Konfekt\s(?P<ingr>[\w\s\.]+)(\s|$|\)|,)', r"\g<ingr>", curr_ingredient)
curr_ingredient = regex_list_ingr[284].sub(r"\g<ingr>", curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Möhre(\()?(n)?(\))?(\s|$|,)', ' Karotte ', curr_ingredient)
curr_ingredient = regex_list_ingr[285].sub(' Karotte ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Staubzucker(\s|$|,)', ' Puderzucker ', curr_ingredient)
curr_ingredient = regex_list_ingr[286].sub(' Puderzucker ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Eis(s)?(-)?(\s)?(w|W)asser(\s|$|,)', ' Puderzucker ', curr_ingredient)
curr_ingredient = regex_list_ingr[287].sub(' Wasser ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Zucker(\s)?,(\s)?wei\u00df(\s|$)', ' Zucker ', curr_ingredient)
curr_ingredient = regex_list_ingr[288].sub(' Zucker ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Mehl(\s)?(,)?\sglatt(es)?(\s|$)', ' Mehl ', curr_ingredient)
curr_ingredient = regex_list_ingr[355].sub(' Mehl ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Pfefferkörner(\s|$)', ' Pfeffer ', curr_ingredient)
curr_ingredient = regex_list_ingr[364].sub(' Pfeffer ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Pfefferbeeren(\s|$)', ' Pfeffer ', curr_ingredient)
curr_ingredient = regex_list_ingr[365].sub(' Pfeffer ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(\s)?mittelscharf[\w]{0,2}(\s|$)', ' mittelscharf ', curr_ingredient)
curr_ingredient = regex_list_ingr[289].sub(' mittelscharf ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(\s)?mild[\w]{0,2}(\s|$)', ' mild ', curr_ingredient)
curr_ingredient = regex_list_ingr[290].sub(' mild ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)(\s)?gesalzen[\w]{0,2}(\s|$)', ' gesalzen ', curr_ingredient)
curr_ingredient = regex_list_ingr[291].sub(' gesalzen ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()schwarz[\w]{0,2}(\s|$)', ' schwarz ', curr_ingredient)
curr_ingredient = regex_list_ingr[292].sub(' schwarz ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()rot[\w]{0,2}(\s|$)', ' rot ', curr_ingredient)
curr_ingredient = regex_list_ingr[293].sub(' rot ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()gr\u00fcn[\w]{0,2}(\s|$)', ' gr\u00fcn ', curr_ingredient)
curr_ingredient = regex_list_ingr[294].sub(' grün ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()gelb[\w]{0,2}(\s|$)', ' gelb ', curr_ingredient)
curr_ingredient = regex_list_ingr[295].sub(' gelb ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()wei(ß|ss)[\w]{0,2}(\s|$|,|\))', ' wei\u00df ', curr_ingredient)
curr_ingredient = regex_list_ingr[296].sub(' weiß ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()bunt[\w]{0,2}(\s|$)', ' bunt ', curr_ingredient)
curr_ingredient = regex_list_ingr[361].sub(' bunt ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()scharz[\w]{0,2}(\s|$)', ' schwarz ', curr_ingredient)
curr_ingredient = regex_list_ingr[362].sub(' schwarz ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()rosa[\w]{0,3}(\s|$)', ' rosa ', curr_ingredient)
curr_ingredient = regex_list_ingr[363].sub(' rosa ', curr_ingredient)
# Erdäpfel kommen immer mit Kartoffel vor
# curr_ingredient = re.sub(r'(^|\s|\()Erd\u00e4pfel(\)|\s|$)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[297].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# Eier
if "Ei" in curr_ingredient and "roh" in curr_ingredient:
curr_ingredient = "Eier"
if "Eier" in curr_ingredient and "schlag" in curr_ingredient:
curr_ingredient = "Eiweiß"
if "Pfeffer" in curr_ingredient or "pfeffer" in curr_ingredient:
curr_ingredient = clean_pepper(curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()[\w\s]*(Eidotter|Dotter)[\s\w]*(\s|$|\)|,)', ' Eigelb ', curr_ingredient)
curr_ingredient = regex_list_ingr[298].sub(' Eigelb ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()[\w\s]*Freilandeier[\s\w]*(\s|$|\)|,)', ' Eier ', curr_ingredient)
curr_ingredient = regex_list_ingr[299].sub(' Eier ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Vollei(er)?(\s|$|\)|,)', ' Eier ', curr_ingredient)
curr_ingredient = regex_list_ingr[300].sub(' Eier ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()[\w\s]*zu\sSchnee(\s|$|\)|,)', ' geschlagenes Eiwei\u00df ',iiiii curr_ingredient)
curr_ingredient = regex_list_ingr[301].sub(' Eiweiß ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()Eiklar(\s|$|\)|,)', ' Eiwei\u00df ', curr_ingredient)
curr_ingredient = regex_list_ingr[302].sub(' Eiwei\u00df ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|,|\()[\w\s]*Eischnee[\s\w]*(\s|$|\)|,)', ' geschlagenes Eiwei\u00df ', curr_ingredient)
curr_ingredient = regex_list_ingr[303].sub(' Eiweiß ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Ei(\()?er(\))?(\s)?(,)?(\s)?(davon\s)?(nur\s)?das\sEigelb(\s|$)', ' Eigelb ', curr_ingredient)
curr_ingredient = regex_list_ingr[304].sub(' Eigelb ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Ei(\()?er(\))?(\s)?(,)?(\s)?(nur\s)?das\sEigelb\sdavon(\s|$)', ' Eigelb ', curr_ingredient)
curr_ingredient = regex_list_ingr[305].sub(' Eigelb ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Ei(\()?er(\))?(\s)?(,)?(\s)?(nur\s)?das\sEiwei(\u00df|ss)\sdavon(\s|$)', ' Eigelb ', curr_ingredient)
curr_ingredient = regex_list_ingr[306].sub(' Eiweiß ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s)Ei(\()?er(\))?(\s)?(,)?(\s)?(davon\s)?(nur\s)?das\sEiwei(\u00df|ss)(\s|$)', ' Eiwei\u00df ', curr_ingredient)
curr_ingredient = regex_list_ingr[307].sub(' Eiwei\u00df ', curr_ingredient)
if "Eigelb" in curr_ingredient and "Eiwei\u00df" not in curr_ingredient:
curr_ingredient = "Eigelb"
elif "Eiwei\u00df" in curr_ingredient and "Eigelb" not in curr_ingredient:
curr_ingredient = "Eiwei\u00df"
elif "Eiwei\u00df" in curr_ingredient and "Eigelb" in curr_ingredient:
curr_ingredient = "Eier"
# curr_ingredient = re.sub(r'(\s)+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[308].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# clean citrus fruits
curr_ingredient = clean_citrus(curr_ingredient, or_set)
if "Eis" in curr_ingredient or "eis" in curr_ingredient or "Ice" in curr_ingredient or "ice" in curr_ingredient:
curr_ingredient = clean_ice(curr_ingredient, or_set)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# curr_ingredient = re.sub(r'-', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[370].sub(' ', curr_ingredient)
# remove single characters
if "Öl" not in curr_ingredient and "öl " not in curr_ingredient and "Ei" not in curr_ingredient:
# curr_ingredient = re.sub(r'(^|\s|\(|,)((?!\d)\w){1,2}($|\s|\)|,)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[309].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove annoying adjectives
# curr_ingredient = re.sub(r'(^|\s|\(|,)rund(^|\s|\)|,)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[310].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)Freiland(^|\s|\)|,)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[311].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'tiefgek\u00fchlt[\w]*', 'TK', curr_ingredient)
curr_ingredient = regex_list_ingr[312].sub('TK', curr_ingredient)
# curr_ingredient = re.sub(r'(tief)?gefroren[\w]*', 'TK', curr_ingredient)
curr_ingredient = regex_list_ingr[372].sub('TK', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)frisch\soder\sTK[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[313].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)TK\soder\sfrisch[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[314].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)reichlich[\w]{0,2}', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[366].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)TK[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[315].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)beste[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[316].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)frisch[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[317].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)gut[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[318].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)fein[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[319].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)verschieden[\w]*', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[389].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'(^|\s|\(|,)bis', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[320].sub(' ', curr_ingredient)
# remove "mit Stein"
curr_ingredient = regex_list_ingr[400].sub(' ', curr_ingredient)
# remove "mit Kern(e|en)"
curr_ingredient = regex_list_ingr[401].sub(' ', curr_ingredient)
# remove "mit Kern(e|en)"
curr_ingredient = regex_list_ingr[402].sub('_Schale_und_Kerne', curr_ingredient)
# remove "mit Schale ..."
curr_ingredient = regex_list_ingr[405].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# curr_ingredient = re.sub(r'(\\u00e4)', 'ä', curr_ingredient)
# curr_ingredient = re.sub(r'(\\u00f6)', 'ö', curr_ingredient)
# curr_ingredient = re.sub(r'(\\u00fc)', 'ü', curr_ingredient)
# curr_ingredient = re.sub(r'(\\u00c4)', 'Ä', curr_ingredient)
# curr_ingredient = re.sub(r'(\\u00d6)', 'Ö', curr_ingredient)
# curr_ingredient = re.sub(r'(\\u00dc)', 'Ü', curr_ingredient)
# curr_ingredient = re.sub(r'(\\u00df)', 'ß', curr_ingredient)
# replace ( with ,
# curr_ingredient = re.sub(r'(?P<ingr>(\w)*)\(', r"\g<ingr>", curr_ingredient)
curr_ingredient = regex_list_ingr[321].sub(r"\g<ingr>", curr_ingredient)
# curr_ingredient = re.sub(r'\(', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[322].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'\)', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[323].sub(' ', curr_ingredient)
# remove single . in middle
# curr_ingredient = re.sub(r'\s\.\s', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[324].sub(' ', curr_ingredient)
# remove lone period at end
# curr_ingredient = re.sub(r'\s\.(\s|,|\))*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[325].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'\sund(\s|,|\))*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[326].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'\sin(\s|,|\))*$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[327].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'\soder(\s|,|\))*$', ' ', curr_ingredient)
# curr_ingredient = re.sub(r'\sODER(\s|,|\))*$', ' ', curr_ingredient)
# curr_ingredient = re.sub(r'\s+(\s)?$', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[328].sub(' ', curr_ingredient)
# remove leading whitespaces
curr_ingredient = regex_list_ingr[3].sub('', curr_ingredient)
# remove commas
# curr_ingredient = re.sub(r',', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[329].sub(' ', curr_ingredient)
# # remove commas at the end
# curr_ingredient = re.sub(r'(\s)+,(\s)+$', ' ', curr_ingredient)
# # remove commas at the beginning
# curr_ingredient = re.sub(r'^(\s)?,(\s)?', ' ', curr_ingredient)
# # remove multiple commas
# curr_ingredient = re.sub(r'((\s)*,(\s)*)+', ' , ', curr_ingredient)
# remove leading whitespaces, periods
# curr_ingredient = re.sub(r'^[\s\./]*', '', curr_ingredient)
curr_ingredient = regex_list_ingr[330].sub('', curr_ingredient)
# remove trailing whitespaces commas,...
# curr_ingredient = re.sub(r'[\s,()-+/]*$', '', curr_ingredient)
curr_ingredient = regex_list_ingr[331].sub('', curr_ingredient)
# remove trailing commas
# curr_ingredient = re.sub(r'[\s,]*$', '', curr_ingredient)
# double to single ingredient
# curr_ingredient = re.sub(r'^(?P<ingr>\w+)\s\1$', r"\g<ingr>", curr_ingredient)
curr_ingredient = regex_list_ingr[332].sub(r"\g<ingr>", curr_ingredient)
# Tomate Tomatensaft -> Tomatensaft
curr_ingredient = regex_list_ingr[334].sub(r"\g<long>", curr_ingredient)
# Ei Spiegelei
curr_ingredient = re.sub(r'^(:?\w+) (?P<long>\w+\1)$', r"\g<long>", curr_ingredient, flags=re.IGNORECASE)
# Spiegelei Ei
curr_ingredient = re.sub(r'^(?P<long>.+)(.+) \2$', r"\g<long>", curr_ingredient, flags=re.IGNORECASE)
# replace white spaces with _
# curr_ingredient = re.sub(r'[\s]+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[333].sub(' ', curr_ingredient)
# curr_ingredient = re.sub(r'[\s,()-+/]*$', '', curr_ingredient)
curr_ingredient = regex_list_ingr[331].sub('', curr_ingredient)
if "ohne " not in curr_ingredient:
if " und " in curr_ingredient:
temp_ingr_list = curr_ingredient.split(" und ")
if temp_ingr_list[1][0].isupper():
ingr_list = temp_ingr_list
else:
ingr_list = [curr_ingredient]
elif " + " in curr_ingredient:
temp_ingr_list = curr_ingredient.split(" + ")
if temp_ingr_list[1][0].isupper():
ingr_list = temp_ingr_list
else:
ingr_list = [curr_ingredient]
elif " & " in curr_ingredient:
temp_ingr_list = curr_ingredient.split(" & ")
if temp_ingr_list[1][0].isupper():
ingr_list = temp_ingr_list
else:
ingr_list = [curr_ingredient]
else:
ingr_list = [curr_ingredient]
else:
ingr_list = [curr_ingredient]
for inde, ing in enumerate(ingr_list):
# ing = re.sub(r'(\s)?mit(\s)', ' ', ing)
ing = regex_list_ingr[371].sub(' ', ing)
# ing = re.sub(r'(\s)?und(\s)', ' ', ing)
ing = regex_list_ingr[398].sub(' ', ing)
# curr_ingredient = re.sub(r'[\s]+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[333].sub(' ', curr_ingredient)
# Tomate Tomatensaft -> Tomatensaft
ing = regex_list_ingr[334].sub(r"\g<long>", ing)
# Ei Spiegelei
ing = re.sub(r'^(:?\w+) (?P<long>\w+\1)$', r"\g<long>", ing, flags=re.IGNORECASE)
# Spiegelei Ei
ing = re.sub(r'^(?P<long>.+)(.+) \2$', r"\g<long>", ing, flags=re.IGNORECASE)
# ing = re.sub(r'[^\d\w]', '', ing)
ing = regex_list_ingr[397].sub(' ', ing)
# curr_ingredient = re.sub(r'[\s]+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[333].sub(' ', curr_ingredient)
# nlp_ingr = nlp(ing)
# normalized_ingr_parts = []
# for token in nlp_ingr:
# normalized_ingr_parts.append(token.lemma_)
# space = " "
# ing = space.join(normalized_ingr_parts)
# ing = re.sub(r'(\s)?mit(\s)?', ' ', ing)
ing = regex_list_ingr[371].sub(' ', ing)
# curr_ingredient = re.sub(r'[\s]+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[333].sub(' ', curr_ingredient)
# Tomate Tomatensaft -> Tomatensaft
ing = regex_list_ingr[334].sub(r"\g<long>", ing)
# Ei Spiegelei
ing = re.sub(r'^(:?\w+) (?P<long>\w+\1)$', r"\g<long>", ing, flags=re.IGNORECASE)
# Spiegelei Ei
ing = re.sub(r'^(?P<long>.+)(.+) \2$', r"\g<long>", ing, flags=re.IGNORECASE)
# ing = re.sub(r'(\s)?mit(\s)?', ' ', ing)
ing = regex_list_ingr[371].sub(' ', ing)
# curr_ingredient = re.sub(r'[\s]+', ' ', curr_ingredient)
curr_ingredient = regex_list_ingr[333].sub(' ', curr_ingredient)
# Tomate Tomatensaft -> Tomatensaft
ing = regex_list_ingr[334].sub(r"\g<long>", ing)
# Ei Spiegelei
ing = re.sub(r'^(:?\w+) (?P<long>\w+\1)$', r"\g<long>", ing, flags=re.IGNORECASE)
# Spiegelei Ei
ing = re.sub(r'^(?P<long>.+)(.+) \2$', r"\g<long>", ing, flags=re.IGNORECASE)
ingr_list[inde] = ing
# out = []
# for ingr_word in ingr_list:
# nlp_ingr = nlp(ingr_word)
# normalized_ingr_parts = []
# for token in nlp_ingr:
# normalized_ingr_parts.append(token.lemma_)
# space = " "
# ingr = space.join(normalized_ingr_parts)
# out.append(ingr)
out = ingr_list
out2 = []
for ingr in out:
ingr = re.sub(r'^[\s]*', "", ingr)
ingr = re.sub(r'[\s]*$', "", ingr)
if ingr == "":
if "Blätter" in orig_ingredient:
ingr = "Blätter"
if ingr == "Crushed_Ice_Eiswürfel":
ingr = "Crushed_Ice"
out2.append(regex_list_ingr[399].sub('_', ingr))
with open(home_path + "ingredient_comparison.txt", "a") as myfile:
myfile.write(orig_ingredient + ": " + str(out2) + "\n")
return out2
# make new, cleaned dataset from old dataset and ingredient dict
# write new dataset to files (different encodings)
def edit_ingredients_dataset(whole_dataset, mult_ingredient_dict):
print("Making dataset with cleaned ingredients")
rec_nr = 1
for key in whole_dataset.keys():
if rec_nr % 10000 == 0:
print(" edit: " + str(rec_nr))
rec_nr += 1
ingredients = []
for ingredient in whole_dataset[key]['ingredients']:
cleaned_ingredients = clean_ingredient(ingredient)
for cleaned_ingredient in cleaned_ingredients:
if cleaned_ingredient in mult_ingredient_dict.keys():
ingredients.append(cleaned_ingredient)
# ingredients.append(cleaned_ingredient)
whole_dataset[key]['ingredients'] = ingredients
with open(data_path + "dataset_cleaned_nice.json", "w") as cleaned_dataset_nice:
json.dump(whole_dataset, cleaned_dataset_nice, ensure_ascii=False, indent=4)
# with open("dataset_cleaned.json", "w") as cleaned_dataset:
# json.dump(whole_dataset, cleaned_dataset, indent=4)
# json.dump(whole_dataset, ingredient_json_file)
# make list of cleaned ingredients and amount from dataset (all and >20)
# write to files
def make_ingredient_list(whole_dataset):
print("Making lists of cleaned ingredients")
all_ingredients = {}
rec_nr = 1
for key in whole_dataset.keys():
if rec_nr % 10000 == 0:
print(" make: " + str(rec_nr))
rec_nr += 1
for ingredient in whole_dataset[key]['ingredients']:
curr_ingredients = clean_ingredient(ingredient)
# if rec_nr < 10:
# print(curr_ingredients[0])
for curr_ingredient in curr_ingredients:
if curr_ingredient in all_ingredients.keys():
all_ingredients[curr_ingredient] = all_ingredients[curr_ingredient] + 1
else:
all_ingredients[curr_ingredient] = 1
# with open("all_ingredients.json", "w") as ingredient_json_file:
# # json.dump(all_ingredients, ingredient_json_file, ensure_ascii=False)
# json.dump(all_ingredients, ingredient_json_file, indent=4)
with open(data_path + "all_ingredients_nice.json", "w") as ingredient_json_file:
json.dump(all_ingredients, ingredient_json_file, ensure_ascii=False, indent=4)
# json.dump(all_ingredients, ingredient_json_file)
mult_ingr = {}
for key in all_ingredients.keys():
if all_ingredients[key] > 20:
mult_ingr[key] = all_ingredients[key]
with open(data_path + "mult_ingredients_nice.json", "w") as ingredient_json_file3:
json.dump(mult_ingr, ingredient_json_file3, ensure_ascii=False, indent=4)
print("Number of all ingredients: " + str(len(all_ingredients.keys())))
print("Number of ingredients > 20: " + str(len(mult_ingr.keys())))
return all_ingredients, mult_ingr
# print recipe amount
def print_dataset_length(whole_dataset):
print("Number of recipes in DB: " + str(len(whole_dataset.keys())))
# load ingredient file from dict
# default is mult_ingredients_nice.json
def load_ingr_file(file_name="mult_ingredients_nice.json"):
with open(data_path + file_name, "r") as mult_json_file:
mult_ingredient_dict = json.load(mult_json_file)
return mult_ingredient_dict
# take ingredient list and sort by amount
def make_sorted_ingr_file(mult_ingredient_dict, file_name="mult_ingredients_sorted.json"):
print("Make sorted ingredient file")
with open(data_path + file_name, "w") as sorted_json_file:
ingr_list = sorted(mult_ingredient_dict.items(), key=lambda x: x[1])
for ingr in ingr_list:
sorted_json_file.write(ingr[0] + ": " + str(ingr[1]) + "\n")
def correct_crushed_cubed_ice(whole_dataset):
for recipe in whole_dataset.keys():
ingredients = []
for ingr in whole_dataset[recipe]['ingredients']:
if ingr == "Crushed_Ice_Eiswürfel":
ingredients.append("Crushed_Ice")
elif ingr == "Grapefruitnsaft":
ingredients.append("Grapefruitsaft")
else:
ingredients.append(ingr)
whole_dataset[recipe]['ingredients'] = ingredients
return whole_dataset
def main():
result_path = "dataset_fin.json"
# test clean for certain ingredient
# print(clean_ingredient("n. B. Zitronensaft , Zitronenabrieb, Orangensaft oder Orangenabrieb"))
# load dataset
with open(data_path + result_path, "r") as whole_json_file:
whole_dataset = json.load(whole_json_file)
# variant1
make_ingredient_list(whole_dataset)
gc.collect()
mult_ingredient_dict = load_ingr_file()
make_sorted_ingr_file(mult_ingredient_dict=mult_ingredient_dict)
gc.collect()
# variant 2
# mult_ingredient_dict = load_ingr_file("mult_ingredients_nice.json")
edit_ingredients_dataset(whole_dataset, mult_ingredient_dict)
# correct ice
# with open(data_path + "dataset_sep_sentences.json", "r") as whole_json_file:
# whole_dataset = json.load(whole_json_file)
# whole_dataset = correct_crushed_cubed_ice(whole_dataset)
# make_ingredient_list(whole_dataset)
# with open(data_path + "fin_dataset_sep_sentences.json", "w") as ingredient_json_file3:
# json.dump(whole_dataset, ingredient_json_file3, ensure_ascii=False, indent=4)
main()