Browse Source

Move embeddings to its own process that can be started by a rye script

jherve 1 năm trước cách đây
mục cha
commit
8b73d72d51

+ 1 - 0
pyproject.toml

@@ -47,3 +47,4 @@ packages = ["src/de_quoi_parle_le_monde"]
 [tool.rye.scripts]
 web_server = {cmd = "hypercorn de_quoi_parle_le_monde.web:app"}
 snapshots = {call = "de_quoi_parle_le_monde.snapshots"}
+embeddings = {call = "de_quoi_parle_le_monde.embeddings"}

+ 31 - 0
src/de_quoi_parle_le_monde/embeddings.py

@@ -0,0 +1,31 @@
+import asyncio
+from loguru import logger
+
+
+from de_quoi_parle_le_monde.storage import Storage
+from de_quoi_parle_le_monde.workers.embeddings import (
+    EmbeddingsJob,
+    EmbeddingsWorker,
+)
+
+
+async def main():
+    storage = await Storage.create()
+
+    logger.info("Starting embeddings service..")
+    jobs = await EmbeddingsJob.create(storage)
+    if jobs:
+        loop = asyncio.get_event_loop()
+        worker = await loop.run_in_executor(
+            None,
+            EmbeddingsWorker.create,
+            storage,
+            "dangvantuan/sentence-camembert-large",
+        )
+        await worker.run(jobs)
+
+    logger.info("Embeddings service exiting")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 0 - 20
src/de_quoi_parle_le_monde/main.py

@@ -5,10 +5,6 @@ from attrs import frozen
 
 from de_quoi_parle_le_monde.http import HttpClient
 from de_quoi_parle_le_monde.storage import Storage
-from de_quoi_parle_le_monde.workers.embeddings import (
-    EmbeddingsJob,
-    EmbeddingsWorker,
-)
 from de_quoi_parle_le_monde.similarity_search import SimilaritySearch
 
 
@@ -21,25 +17,9 @@ class Application:
     async def run(self):
         await asyncio.gather(
             self._run_similarity_index(),
-            self._run_embeddings_worker(),
         )
         logger.info("Will quit now..")
 
-    async def _run_embeddings_worker(self):
-        logger.info("Starting embeddings service..")
-        jobs = await EmbeddingsJob.create(self.storage)
-        if jobs:
-            loop = asyncio.get_event_loop()
-            worker = await loop.run_in_executor(
-                None,
-                EmbeddingsWorker.create,
-                self.storage,
-                "dangvantuan/sentence-camembert-large",
-            )
-            await worker.run(jobs)
-
-        logger.info("Embeddings service exiting")
-
     async def _run_similarity_index(self):
         logger.info("Starting index..")
         try: