initial commit of project

This commit is contained in:
2021-04-11 19:51:12 +02:00
commit a21a8186d9
110 changed files with 16326178 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
# adapted from:
# Pellegrini., C., E. Özsoy., M. Wintergerst., and G. Groh. (2021).
# “Exploiting Food Embeddings for Ingredient Substitution.”
# In: Proceedings of the 14th International Joint Conference on Biomedical
# Engineering Systems and Technologies - Volume 5: HEALTHINF, INSTICC.
# SciTePress, pp. 6777. isbn: 978-989-758-490-9. doi: 10.5220/0010202000670077.
from pathlib import Path
import numpy as np
from annoy import AnnoyIndex
from tqdm import tqdm
# Full guide https://github.com/spotify/annoy
class ApproxKNNClassifier:
def __init__(self, all_ingredient_embeddings, max_embedding_count,
save_path=Path('data/eval/approx_knn_classifier.ann'), n_trees=10):
vector_length = all_ingredient_embeddings.shape[-1]
self.max_embedding_count = max_embedding_count
if save_path.exists():
print('Loading Existing Approx Classifier')
self.approx_knn_classifier = AnnoyIndex(vector_length, 'angular')
self.approx_knn_classifier.load(str(save_path)) # super fast, will just mmap the file
else:
# To make sure we don't just get ourselves: add max_embedding_count
self.approx_knn_classifier = AnnoyIndex(vector_length, 'angular') # Length of item vector that will be indexed
for i in tqdm(range(len(all_ingredient_embeddings)), total=len(all_ingredient_embeddings), desc='Creating Approx Classifier'):
self.approx_knn_classifier.add_item(i, all_ingredient_embeddings[i])
self.approx_knn_classifier.build(n_trees)
print('Saving Approx Classifier')
self.approx_knn_classifier.save(str(save_path))
def k_nearest_neighbors(self, ingredient_embeddings):
all_indices, all_distances = [], []
for idx, ingredient_embedding in enumerate(
ingredient_embeddings): # search_k gives you a run-time tradeoff between better accuracy and speed currently defaults
indices, distances = self.approx_knn_classifier.get_nns_by_vector(ingredient_embedding, self.max_embedding_count + 200, include_distances=True)
all_indices.append(indices)
all_distances.append(distances)
return np.stack(all_distances), np.stack(all_indices)