Browse Source

[fix] Properly handle the case when indexing does not find any embeddings

jherve 1 year ago
parent
commit
a702699ac4

+ 5 - 2
src/de_quoi_parle_le_monde/main.py

@@ -63,8 +63,11 @@ class Application:
 
     async def _run_similarity_index(self):
         logger.info("Starting index..")
-        await self.similarity_index.add_embeddings()
-        logger.info("Similarity index ready")
+        try:
+            await self.similarity_index.add_embeddings()
+            logger.info("Similarity index ready")
+        except ValueError:
+            ...
 
     @staticmethod
     async def create():

+ 8 - 0
src/de_quoi_parle_le_monde/similarity_search.py

@@ -14,6 +14,14 @@ class SimilaritySearch:
 
     async def add_embeddings(self):
         embeds = await self.storage.list_all_articles_embeddings()
+        if not embeds:
+            msg = (
+                f"Did not find any embeddings in storage. "
+                "A plausible cause is that they have not been computed yet"
+            )
+            logger.error(msg)
+            raise ValueError(msg)
+
         all_titles = np.array([e["title_embedding"] for e in embeds])
         faiss.normalize_L2(all_titles)
         self.index.add_with_ids(