Explorar el Código

Ensure the similarity index can be reloaded by web app when stale

jherve hace 1 año
padre
commit
b5c4a1705a
Se han modificado 2 ficheros con 43 adiciones y 17 borrados
  1. 33 16
      src/media_observer/similarity_index.py
  2. 10 1
      src/media_observer/web.py

+ 33 - 16
src/media_observer/similarity_index.py

@@ -1,4 +1,6 @@
 import asyncio
+import os
+from datetime import datetime
 import pickle
 from attrs import define
 from typing import Any, Callable, ClassVar
@@ -17,6 +19,7 @@ file_path_pickle_class = "./similarity.class"
 class SimilaritySearch:
     storage: Storage
     index: AnnoyIndex
+    build_dt: datetime | None = None
     index_id_to_title: dict[int, int] = {}
     title_to_index_id: dict[int, int] = {}
     instance: ClassVar[Any | None] = None
@@ -37,6 +40,7 @@ class SimilaritySearch:
             self.index_id_to_title[idx] = e["title_id"]
 
         self.index.build(20)
+        self.build_dt = datetime.now()
 
     async def search(
         self,
@@ -79,24 +83,37 @@ class SimilaritySearch:
         with open(file_path_pickle_class, "wb") as f:
             pickle.dump((self.index_id_to_title, self.title_to_index_id), f)
 
+    @classmethod
+    def _latest_index_file_modification_dt(cls) -> datetime:
+        statinfo = os.stat(file_path_index)
+        return datetime.fromtimestamp(statinfo.st_mtime)
+
+    @property
+    def stale(self) -> bool:
+        return (
+            self.build_dt is not None
+            and self._latest_index_file_modification_dt() > self.build_dt
+        )
+
     @classmethod
     def load(cls, storage):
-        if cls.instance is None:
-            d = 1024
-            index = AnnoyIndex(d, "dot")
-            try:
-                index.load(file_path_index)
-                with open(file_path_pickle_class, "rb") as f:
-                    (index_to_title, title_to_index) = pickle.load(f)
-
-                cls.instance = SimilaritySearch(
-                    storage, index, index_to_title, title_to_index
-                )
-            except OSError:
-                logger.warning("Could not find index data")
-                cls.instance = SimilaritySearch(storage, index)
-
-        return cls.instance
+        d = 1024
+        index = AnnoyIndex(d, "dot")
+        try:
+            index.load(file_path_index)
+            with open(file_path_pickle_class, "rb") as f:
+                (index_to_title, title_to_index) = pickle.load(f)
+
+            return SimilaritySearch(
+                storage,
+                index,
+                cls._latest_index_file_modification_dt(),
+                index_to_title,
+                title_to_index,
+            )
+        except OSError:
+            logger.warning("Could not find index data")
+            return SimilaritySearch(storage, index)
 
 
 async def main():

+ 10 - 1
src/media_observer/web.py

@@ -74,8 +74,17 @@ async def get_db():
     return storage
 
 
+sim_index: SimilaritySearch | None = None
+
+
 async def get_similarity_search(storage: Storage = Depends(get_db)):
-    return SimilaritySearch.load(storage)
+    global sim_index
+
+    if sim_index is None or sim_index.stale:
+        sim_index = SimilaritySearch.load(storage)
+        return sim_index
+    else:
+        return sim_index
 
 
 @app.get("/", response_class=HTMLResponse)