1 年間前 · 6d305a47bb
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,9 @@ build-backend = "hatchling.build"
 
				 
			
 
				 [tool.rye]
			
 
				 managed = true
			
 
				-dev-dependencies = []
			
 
				+dev-dependencies = [
			
 
				+    "ipython>=8.25.0",
			
 
				+]
			
 
				 
			
 
				 [tool.hatch.metadata]
			
 
				 allow-direct-references = true
			
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -3,7 +3,7 @@
 
				 #
			
 
				 # last locked with the following flags:
			
 
				 #   pre: false
			
 
				-#   features: []
			
 
				+#   features: ["embeddings"]
			
 
				 #   all-features: false
			
 
				 #   with-sources: false
			
 
				 
			
@@ -18,13 +18,13 @@ aiofiles==23.2.1
 
				 aiohttp==3.9.5
			
 
				     # via aiobotocore
			
 
				     # via aiohttp-client-cache
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 aiohttp-client-cache==0.11.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 aioitertools==0.11.0
			
 
				     # via aiobotocore
			
 
				 aiolimiter==1.1.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 aiosignal==1.3.1
			
 
				     # via aiohttp
			
 
				 aiosqlite==0.20.0
			
@@ -32,25 +32,27 @@ aiosqlite==0.20.0
 
				 annotated-types==0.6.0
			
 
				     # via pydantic
			
 
				 annoy==1.17.3
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 anyio==4.3.0
			
 
				     # via httpx
			
 
				     # via starlette
			
 
				     # via watchfiles
			
 
				+asttokens==2.4.1
			
 
				+    # via stack-data
			
 
				 async-timeout==4.0.3
			
 
				     # via asyncpg
			
 
				 asyncpg==0.29.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 attrs==23.2.0
			
 
				     # via aiohttp
			
 
				     # via aiohttp-client-cache
			
 
				     # via cattrs
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				     # via requests-cache
			
 
				 babel==2.15.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 beautifulsoup4==4.12.3
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 boto3==1.34.69
			
 
				     # via aiobotocore
			
 
				 botocore==1.34.69
			
@@ -58,7 +60,7 @@ botocore==1.34.69
 
				     # via boto3
			
 
				     # via s3transfer
			
 
				 cattrs==23.2.3
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				     # via requests-cache
			
 
				 certifi==2024.2.2
			
 
				     # via httpcore
			
@@ -69,21 +71,33 @@ charset-normalizer==3.3.2
 
				 click==8.1.7
			
 
				     # via typer
			
 
				     # via uvicorn
			
 
				+decorator==5.1.1
			
 
				+    # via ipython
			
 
				 dnspython==2.6.1
			
 
				     # via email-validator
			
 
				     # via pymongo
			
 
				 dynaconf==3.2.5
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 email-validator==2.1.1
			
 
				     # via fastapi
			
 
				+executing==2.0.1
			
 
				+    # via stack-data
			
 
				 fastapi==0.111.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				     # via fastapi-cli
			
 
				+    # via media-observer
			
 
				 fastapi-cli==0.0.3
			
 
				     # via fastapi
			
 
				+filelock==3.14.0
			
 
				+    # via huggingface-hub
			
 
				+    # via torch
			
 
				+    # via transformers
			
 
				+    # via triton
			
 
				 frozenlist==1.4.1
			
 
				     # via aiohttp
			
 
				     # via aiosignal
			
 
				+fsspec==2024.6.0
			
 
				+    # via huggingface-hub
			
 
				+    # via torch
			
 
				 h11==0.14.0
			
 
				     # via httpcore
			
 
				     # via hypercorn
			
@@ -99,10 +113,14 @@ httptools==0.6.1
 
				     # via uvicorn
			
 
				 httpx==0.27.0
			
 
				     # via fastapi
			
 
				+huggingface-hub==0.23.3
			
 
				+    # via sentence-transformers
			
 
				+    # via tokenizers
			
 
				+    # via transformers
			
 
				 humanize==4.9.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 hypercorn==0.16.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 hyperframe==6.0.1
			
 
				     # via h2
			
 
				 idna==3.7
			
@@ -111,46 +129,105 @@ idna==3.7
 
				     # via httpx
			
 
				     # via requests
			
 
				     # via yarl
			
 
				+ipython==8.25.0
			
 
				 itsdangerous==2.2.0
			
 
				     # via aiohttp-client-cache
			
 
				+jedi==0.19.1
			
 
				+    # via ipython
			
 
				 jinja2==3.1.4
			
 
				-    # via de-quoi-parle-le-monde
			
 
				     # via fastapi
			
 
				+    # via media-observer
			
 
				+    # via torch
			
 
				 jmespath==1.0.1
			
 
				     # via boto3
			
 
				     # via botocore
			
 
				+joblib==1.4.2
			
 
				+    # via scikit-learn
			
 
				 loguru==0.7.2
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 lxml==5.2.2
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 markdown-it-py==3.0.0
			
 
				     # via rich
			
 
				 markupsafe==2.1.5
			
 
				     # via jinja2
			
 
				+matplotlib-inline==0.1.7
			
 
				+    # via ipython
			
 
				 mdurl==0.1.2
			
 
				     # via markdown-it-py
			
 
				 motor==3.4.0
			
 
				     # via aiohttp-client-cache
			
 
				+mpmath==1.3.0
			
 
				+    # via sympy
			
 
				 multidict==6.0.5
			
 
				     # via aiohttp
			
 
				     # via yarl
			
 
				+networkx==3.3
			
 
				+    # via torch
			
 
				 numpy==1.26.4
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				+    # via scikit-learn
			
 
				+    # via scipy
			
 
				+    # via sentence-transformers
			
 
				+    # via transformers
			
 
				+nvidia-cublas-cu12==12.1.3.1
			
 
				+    # via nvidia-cudnn-cu12
			
 
				+    # via nvidia-cusolver-cu12
			
 
				+    # via torch
			
 
				+nvidia-cuda-cupti-cu12==12.1.105
			
 
				+    # via torch
			
 
				+nvidia-cuda-nvrtc-cu12==12.1.105
			
 
				+    # via torch
			
 
				+nvidia-cuda-runtime-cu12==12.1.105
			
 
				+    # via torch
			
 
				+nvidia-cudnn-cu12==8.9.2.26
			
 
				+    # via torch
			
 
				+nvidia-cufft-cu12==11.0.2.54
			
 
				+    # via torch
			
 
				+nvidia-curand-cu12==10.3.2.106
			
 
				+    # via torch
			
 
				+nvidia-cusolver-cu12==11.4.5.107
			
 
				+    # via torch
			
 
				+nvidia-cusparse-cu12==12.1.0.106
			
 
				+    # via nvidia-cusolver-cu12
			
 
				+    # via torch
			
 
				+nvidia-nccl-cu12==2.20.5
			
 
				+    # via torch
			
 
				+nvidia-nvjitlink-cu12==12.5.40
			
 
				+    # via nvidia-cusolver-cu12
			
 
				+    # via nvidia-cusparse-cu12
			
 
				+nvidia-nvtx-cu12==12.1.105
			
 
				+    # via torch
			
 
				 orjson==3.10.3
			
 
				     # via fastapi
			
 
				 packaging==24.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via huggingface-hub
			
 
				+    # via media-observer
			
 
				+    # via transformers
			
 
				+parso==0.8.4
			
 
				+    # via jedi
			
 
				+pexpect==4.9.0
			
 
				+    # via ipython
			
 
				+pillow==10.3.0
			
 
				+    # via sentence-transformers
			
 
				 platformdirs==4.2.1
			
 
				     # via requests-cache
			
 
				 priority==2.0.0
			
 
				     # via hypercorn
			
 
				+prompt-toolkit==3.0.46
			
 
				+    # via ipython
			
 
				 protobuf==5.26.1
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				+ptyprocess==0.7.0
			
 
				+    # via pexpect
			
 
				+pure-eval==0.2.2
			
 
				+    # via stack-data
			
 
				 pydantic==2.7.1
			
 
				     # via fastapi
			
 
				 pydantic-core==2.18.2
			
 
				     # via pydantic
			
 
				 pygments==2.18.0
			
 
				+    # via ipython
			
 
				     # via rich
			
 
				 pymongo==4.7.2
			
 
				     # via motor
			
@@ -161,25 +238,41 @@ python-dotenv==1.0.1
 
				 python-multipart==0.0.9
			
 
				     # via fastapi
			
 
				 pyyaml==6.0.1
			
 
				+    # via huggingface-hub
			
 
				+    # via transformers
			
 
				     # via uvicorn
			
 
				 redis==5.0.4
			
 
				     # via aiohttp-client-cache
			
 
				+regex==2024.5.15
			
 
				+    # via transformers
			
 
				 requests==2.31.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via huggingface-hub
			
 
				+    # via media-observer
			
 
				     # via requests-cache
			
 
				+    # via transformers
			
 
				 requests-cache==1.2.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 rich==13.7.1
			
 
				     # via typer
			
 
				 ruff==0.4.4
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 s3transfer==0.10.1
			
 
				     # via boto3
			
 
				+safetensors==0.4.3
			
 
				+    # via transformers
			
 
				+scikit-learn==1.5.0
			
 
				+    # via sentence-transformers
			
 
				+scipy==1.13.1
			
 
				+    # via scikit-learn
			
 
				+    # via sentence-transformers
			
 
				+sentence-transformers==3.0.1
			
 
				+    # via media-observer
			
 
				 sentencepiece==0.2.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 shellingham==1.5.4
			
 
				     # via typer
			
 
				 six==1.16.0
			
 
				+    # via asttokens
			
 
				     # via python-dateutil
			
 
				     # via url-normalize
			
 
				 sniffio==1.3.1
			
@@ -187,15 +280,39 @@ sniffio==1.3.1
 
				     # via httpx
			
 
				 soupsieve==2.5
			
 
				     # via beautifulsoup4
			
 
				+stack-data==0.6.3
			
 
				+    # via ipython
			
 
				 starlette==0.37.2
			
 
				     # via fastapi
			
 
				+sympy==1.12.1
			
 
				+    # via torch
			
 
				+threadpoolctl==3.5.0
			
 
				+    # via scikit-learn
			
 
				+tokenizers==0.19.1
			
 
				+    # via transformers
			
 
				+torch==2.3.1
			
 
				+    # via sentence-transformers
			
 
				+tqdm==4.66.4
			
 
				+    # via huggingface-hub
			
 
				+    # via sentence-transformers
			
 
				+    # via transformers
			
 
				+traitlets==5.14.3
			
 
				+    # via ipython
			
 
				+    # via matplotlib-inline
			
 
				+transformers==4.41.2
			
 
				+    # via sentence-transformers
			
 
				+triton==2.3.1
			
 
				+    # via torch
			
 
				 typer==0.12.3
			
 
				     # via fastapi-cli
			
 
				 typing-extensions==4.11.0
			
 
				     # via aiosqlite
			
 
				     # via fastapi
			
 
				+    # via huggingface-hub
			
 
				+    # via ipython
			
 
				     # via pydantic
			
 
				     # via pydantic-core
			
 
				+    # via torch
			
 
				     # via typer
			
 
				 ujson==5.9.0
			
 
				     # via fastapi
			
@@ -213,6 +330,8 @@ uvloop==0.19.0
 
				     # via uvicorn
			
 
				 watchfiles==0.21.0
			
 
				     # via uvicorn
			
 
				+wcwidth==0.2.13
			
 
				+    # via prompt-toolkit
			
 
				 websockets==12.0
			
 
				     # via uvicorn
			
 
				 wrapt==1.16.0
			
@@ -221,4 +340,4 @@ wsproto==1.2.0
 
				     # via hypercorn
			
 
				 yarl==1.9.4
			
 
				     # via aiohttp
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
--- a/requirements.lock
+++ b/requirements.lock
@@ -3,7 +3,7 @@
 
				 #
			
 
				 # last locked with the following flags:
			
 
				 #   pre: false
			
 
				-#   features: []
			
 
				+#   features: ["embeddings"]
			
 
				 #   all-features: false
			
 
				 #   with-sources: false
			
 
				 
			
@@ -18,13 +18,13 @@ aiofiles==23.2.1
 
				 aiohttp==3.9.5
			
 
				     # via aiobotocore
			
 
				     # via aiohttp-client-cache
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 aiohttp-client-cache==0.11.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 aioitertools==0.11.0
			
 
				     # via aiobotocore
			
 
				 aiolimiter==1.1.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 aiosignal==1.3.1
			
 
				     # via aiohttp
			
 
				 aiosqlite==0.20.0
			
@@ -32,7 +32,7 @@ aiosqlite==0.20.0
 
				 annotated-types==0.6.0
			
 
				     # via pydantic
			
 
				 annoy==1.17.3
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 anyio==4.3.0
			
 
				     # via httpx
			
 
				     # via starlette
			
@@ -40,17 +40,17 @@ anyio==4.3.0
 
				 async-timeout==4.0.3
			
 
				     # via asyncpg
			
 
				 asyncpg==0.29.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 attrs==23.2.0
			
 
				     # via aiohttp
			
 
				     # via aiohttp-client-cache
			
 
				     # via cattrs
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				     # via requests-cache
			
 
				 babel==2.15.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 beautifulsoup4==4.12.3
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 boto3==1.34.69
			
 
				     # via aiobotocore
			
 
				 botocore==1.34.69
			
@@ -58,7 +58,7 @@ botocore==1.34.69
 
				     # via boto3
			
 
				     # via s3transfer
			
 
				 cattrs==23.2.3
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				     # via requests-cache
			
 
				 certifi==2024.2.2
			
 
				     # via httpcore
			
@@ -73,17 +73,25 @@ dnspython==2.6.1
 
				     # via email-validator
			
 
				     # via pymongo
			
 
				 dynaconf==3.2.5
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 email-validator==2.1.1
			
 
				     # via fastapi
			
 
				 fastapi==0.111.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				     # via fastapi-cli
			
 
				+    # via media-observer
			
 
				 fastapi-cli==0.0.3
			
 
				     # via fastapi
			
 
				+filelock==3.14.0
			
 
				+    # via huggingface-hub
			
 
				+    # via torch
			
 
				+    # via transformers
			
 
				+    # via triton
			
 
				 frozenlist==1.4.1
			
 
				     # via aiohttp
			
 
				     # via aiosignal
			
 
				+fsspec==2024.6.0
			
 
				+    # via huggingface-hub
			
 
				+    # via torch
			
 
				 h11==0.14.0
			
 
				     # via httpcore
			
 
				     # via hypercorn
			
@@ -99,10 +107,14 @@ httptools==0.6.1
 
				     # via uvicorn
			
 
				 httpx==0.27.0
			
 
				     # via fastapi
			
 
				+huggingface-hub==0.23.3
			
 
				+    # via sentence-transformers
			
 
				+    # via tokenizers
			
 
				+    # via transformers
			
 
				 humanize==4.9.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 hypercorn==0.16.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 hyperframe==6.0.1
			
 
				     # via h2
			
 
				 idna==3.7
			
@@ -114,15 +126,18 @@ idna==3.7
 
				 itsdangerous==2.2.0
			
 
				     # via aiohttp-client-cache
			
 
				 jinja2==3.1.4
			
 
				-    # via de-quoi-parle-le-monde
			
 
				     # via fastapi
			
 
				+    # via media-observer
			
 
				+    # via torch
			
 
				 jmespath==1.0.1
			
 
				     # via boto3
			
 
				     # via botocore
			
 
				+joblib==1.4.2
			
 
				+    # via scikit-learn
			
 
				 loguru==0.7.2
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 lxml==5.2.2
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 markdown-it-py==3.0.0
			
 
				     # via rich
			
 
				 markupsafe==2.1.5
			
@@ -131,21 +146,61 @@ mdurl==0.1.2
 
				     # via markdown-it-py
			
 
				 motor==3.4.0
			
 
				     # via aiohttp-client-cache
			
 
				+mpmath==1.3.0
			
 
				+    # via sympy
			
 
				 multidict==6.0.5
			
 
				     # via aiohttp
			
 
				     # via yarl
			
 
				+networkx==3.3
			
 
				+    # via torch
			
 
				 numpy==1.26.4
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				+    # via scikit-learn
			
 
				+    # via scipy
			
 
				+    # via sentence-transformers
			
 
				+    # via transformers
			
 
				+nvidia-cublas-cu12==12.1.3.1
			
 
				+    # via nvidia-cudnn-cu12
			
 
				+    # via nvidia-cusolver-cu12
			
 
				+    # via torch
			
 
				+nvidia-cuda-cupti-cu12==12.1.105
			
 
				+    # via torch
			
 
				+nvidia-cuda-nvrtc-cu12==12.1.105
			
 
				+    # via torch
			
 
				+nvidia-cuda-runtime-cu12==12.1.105
			
 
				+    # via torch
			
 
				+nvidia-cudnn-cu12==8.9.2.26
			
 
				+    # via torch
			
 
				+nvidia-cufft-cu12==11.0.2.54
			
 
				+    # via torch
			
 
				+nvidia-curand-cu12==10.3.2.106
			
 
				+    # via torch
			
 
				+nvidia-cusolver-cu12==11.4.5.107
			
 
				+    # via torch
			
 
				+nvidia-cusparse-cu12==12.1.0.106
			
 
				+    # via nvidia-cusolver-cu12
			
 
				+    # via torch
			
 
				+nvidia-nccl-cu12==2.20.5
			
 
				+    # via torch
			
 
				+nvidia-nvjitlink-cu12==12.5.40
			
 
				+    # via nvidia-cusolver-cu12
			
 
				+    # via nvidia-cusparse-cu12
			
 
				+nvidia-nvtx-cu12==12.1.105
			
 
				+    # via torch
			
 
				 orjson==3.10.3
			
 
				     # via fastapi
			
 
				 packaging==24.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via huggingface-hub
			
 
				+    # via media-observer
			
 
				+    # via transformers
			
 
				+pillow==10.3.0
			
 
				+    # via sentence-transformers
			
 
				 platformdirs==4.2.1
			
 
				     # via requests-cache
			
 
				 priority==2.0.0
			
 
				     # via hypercorn
			
 
				 protobuf==5.26.1
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 pydantic==2.7.1
			
 
				     # via fastapi
			
 
				 pydantic-core==2.18.2
			
@@ -161,22 +216,37 @@ python-dotenv==1.0.1
 
				 python-multipart==0.0.9
			
 
				     # via fastapi
			
 
				 pyyaml==6.0.1
			
 
				+    # via huggingface-hub
			
 
				+    # via transformers
			
 
				     # via uvicorn
			
 
				 redis==5.0.4
			
 
				     # via aiohttp-client-cache
			
 
				+regex==2024.5.15
			
 
				+    # via transformers
			
 
				 requests==2.31.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via huggingface-hub
			
 
				+    # via media-observer
			
 
				     # via requests-cache
			
 
				+    # via transformers
			
 
				 requests-cache==1.2.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 rich==13.7.1
			
 
				     # via typer
			
 
				 ruff==0.4.4
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 s3transfer==0.10.1
			
 
				     # via boto3
			
 
				+safetensors==0.4.3
			
 
				+    # via transformers
			
 
				+scikit-learn==1.5.0
			
 
				+    # via sentence-transformers
			
 
				+scipy==1.13.1
			
 
				+    # via scikit-learn
			
 
				+    # via sentence-transformers
			
 
				+sentence-transformers==3.0.1
			
 
				+    # via media-observer
			
 
				 sentencepiece==0.2.0
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
 
				 shellingham==1.5.4
			
 
				     # via typer
			
 
				 six==1.16.0
			
@@ -189,13 +259,31 @@ soupsieve==2.5
 
				     # via beautifulsoup4
			
 
				 starlette==0.37.2
			
 
				     # via fastapi
			
 
				+sympy==1.12.1
			
 
				+    # via torch
			
 
				+threadpoolctl==3.5.0
			
 
				+    # via scikit-learn
			
 
				+tokenizers==0.19.1
			
 
				+    # via transformers
			
 
				+torch==2.3.1
			
 
				+    # via sentence-transformers
			
 
				+tqdm==4.66.4
			
 
				+    # via huggingface-hub
			
 
				+    # via sentence-transformers
			
 
				+    # via transformers
			
 
				+transformers==4.41.2
			
 
				+    # via sentence-transformers
			
 
				+triton==2.3.1
			
 
				+    # via torch
			
 
				 typer==0.12.3
			
 
				     # via fastapi-cli
			
 
				 typing-extensions==4.11.0
			
 
				     # via aiosqlite
			
 
				     # via fastapi
			
 
				+    # via huggingface-hub
			
 
				     # via pydantic
			
 
				     # via pydantic-core
			
 
				+    # via torch
			
 
				     # via typer
			
 
				 ujson==5.9.0
			
 
				     # via fastapi
			
@@ -221,4 +309,4 @@ wsproto==1.2.0
 
				     # via hypercorn
			
 
				 yarl==1.9.4
			
 
				     # via aiohttp
			
 
				-    # via de-quoi-parle-le-monde
			
 
				+    # via media-observer
			
--- a/src/media_observer/embeddings.py
+++ b/src/media_observer/embeddings.py
@@ -26,21 +26,13 @@ def batched(iterable, n):
 
				 
			
 
				 @frozen
			
 
				 class EmbeddingsJob:
			
 
				-    article_id: int
			
 
				+    title_id: int
			
 
				     text: NDArray
			
 
				 
			
 
				     @staticmethod
			
 
				     async def create(storage: Storage):
			
 
				-        all_snapshots = await storage.list_all_featured_article_snapshots()
			
 
				-        all_embeds_ids = set(
			
 
				-            await storage.list_all_embedded_featured_article_snapshot_ids()
			
 
				-        )
			
 
				-
			
 
				-        all_snapshots_not_stored = (
			
 
				-            s for s in all_snapshots if s["id"] not in all_embeds_ids
			
 
				-        )
			
 
				-
			
 
				-        return [EmbeddingsJob(s["id"], s["title"]) for s in all_snapshots_not_stored]
			
 
				+        all_titles = await storage.list_all_titles_without_embedding()
			
 
				+        return [EmbeddingsJob(t["id"], t["text"]) for t in all_titles]
			
 
				 
			
 
				 
			
 
				 @frozen
			
@@ -73,7 +65,7 @@ class EmbeddingsWorker:
 
				         batch_size = 64
			
 
				         for batch in batched(jobs, batch_size):
			
 
				             embeddings_by_id = self.compute_embeddings_for(
			
 
				-                {j.article_id: j.text for j in batch}
			
 
				+                {j.title_id: j.text for j in batch}
			
 
				             )
			
 
				             await self.store_embeddings(embeddings_by_id)
			
 
				 
			
--- a/src/media_observer/similarity_index.py
+++ b/src/media_observer/similarity_index.py
@@ -17,12 +17,12 @@ file_path_pickle_class = "./similarity.class"
 
				 class SimilaritySearch:
			
 
				     storage: Storage
			
 
				     index: AnnoyIndex
			
 
				-    embedding_to_featured: dict[int, int] = {}
			
 
				-    featured_to_embedding: dict[int, int] = {}
			
 
				+    index_id_to_title: dict[int, int] = {}
			
 
				+    title_to_index_id: dict[int, int] = {}
			
 
				     instance: ClassVar[Any | None] = None
			
 
				 
			
 
				     async def add_embeddings(self):
			
 
				-        embeds = await self.storage.list_all_articles_embeddings()
			
 
				+        embeds = await self.storage.list_all_embeddings()
			
 
				         if not embeds:
			
 
				             msg = (
			
 
				                 "Did not find any embeddings in storage. "
			
@@ -31,41 +31,39 @@ class SimilaritySearch:
 
				             logger.error(msg)
			
 
				             raise ValueError(msg)
			
 
				 
			
 
				-        for e in embeds:
			
 
				-            self.index.add_item(e["id"], e["title_embedding"])
			
 
				-            self.embedding_to_featured[e["id"]] = e["featured_article_snapshot_id"]
			
 
				-            self.featured_to_embedding[e["featured_article_snapshot_id"]] = e["id"]
			
 
				+        for idx, e in enumerate(embeds):
			
 
				+            self.index.add_item(idx, e["vector"])
			
 
				+            self.title_to_index_id[e["title_id"]] = idx
			
 
				+            self.index_id_to_title[idx] = e["title_id"]
			
 
				 
			
 
				         self.index.build(20)
			
 
				 
			
 
				     async def search(
			
 
				         self,
			
 
				-        featured_article_snapshot_ids: list[int],
			
 
				+        title_ids: list[int],
			
 
				         nb_results: int,
			
 
				         score_func: Callable[[float], bool],
			
 
				     ):
			
 
				         try:
			
 
				-            [embed_id] = [
			
 
				-                self.featured_to_embedding[id_] for id_ in featured_article_snapshot_ids
			
 
				-            ]
			
 
				+            [title_id] = [self.title_to_index_id[id] for id in title_ids]
			
 
				         except KeyError as e:
			
 
				             msg = (
			
 
				-                f"Could not find all embedding(s) in storage for {featured_article_snapshot_ids}. "
			
 
				+                f"Could not find all embedding(s) in storage for {title_ids}. "
			
 
				                 "A plausible cause is that they have not been computed yet"
			
 
				             )
			
 
				             logger.error(msg)
			
 
				             raise e
			
 
				 
			
 
				         indices, distances = self.index.get_nns_by_item(
			
 
				-            embed_id, nb_results, include_distances=True
			
 
				+            title_id, nb_results, include_distances=True
			
 
				         )
			
 
				         return [
			
 
				             (
			
 
				-                embed_id,
			
 
				+                title_id,
			
 
				                 [
			
 
				-                    (self.embedding_to_featured[i], d)
			
 
				+                    (self.index_id_to_title[i], d)
			
 
				                     for i, d in (zip(indices, distances))
			
 
				-                    if i != embed_id and score_func(d)
			
 
				+                    if i != title_id and score_func(d)
			
 
				                 ],
			
 
				             )
			
 
				         ]
			
@@ -82,7 +80,7 @@ class SimilaritySearch:
 
				     async def save(self):
			
 
				         self.index.save(file_path_index)
			
 
				         with open(file_path_pickle_class, "wb") as f:
			
 
				-            pickle.dump((self.embedding_to_featured, self.featured_to_embedding), f)
			
 
				+            pickle.dump((self.index_id_to_title, self.title_to_index_id), f)
			
 
				 
			
 
				     @classmethod
			
 
				     def load(cls, storage):
			
@@ -92,10 +90,10 @@ class SimilaritySearch:
 
				             try:
			
 
				                 index.load(file_path_index)
			
 
				                 with open(file_path_pickle_class, "rb") as f:
			
 
				-                    (embedding_to_featured, featured_to_embedding) = pickle.load(f)
			
 
				+                    (index_to_title, title_to_index) = pickle.load(f)
			
 
				 
			
 
				                 cls.instance = SimilaritySearch(
			
 
				-                    storage, index, embedding_to_featured, featured_to_embedding
			
 
				+                    storage, index, index_to_title, title_to_index
			
 
				                 )
			
 
				             except OSError:
			
 
				                 logger.warning("Could not find index data")
			
--- a/src/media_observer/storage.py
+++ b/src/media_observer/storage.py
@@ -46,35 +46,35 @@ class Storage(StorageAbc):
 
				             ],
			
 
				         ),
			
 
				         Table(
			
 
				-            name="featured_articles",
			
 
				+            name="articles",
			
 
				             columns=[
			
 
				                 Column(name="id", primary_key=True),
			
 
				                 Column(name="url", type_="TEXT"),
			
 
				             ],
			
 
				         ),
			
 
				         Table(
			
 
				-            name="featured_article_snapshots",
			
 
				+            name="titles",
			
 
				             columns=[
			
 
				                 Column(name="id", primary_key=True),
			
 
				-                Column(
			
 
				-                    name="featured_article_id",
			
 
				-                    references="featured_articles (id) ON DELETE CASCADE",
			
 
				-                ),
			
 
				-                Column(name="title", type_="TEXT"),
			
 
				-                Column(name="url", type_="TEXT"),
			
 
				+                Column(name="text", type_="TEXT"),
			
 
				             ],
			
 
				         ),
			
 
				         Table(
			
 
				             name="main_articles",
			
 
				             columns=[
			
 
				                 Column(name="id", primary_key=True),
			
 
				+                Column(name="url", type_="TEXT"),
			
 
				                 Column(
			
 
				                     name="snapshot_id",
			
 
				                     references="snapshots (id) ON DELETE CASCADE",
			
 
				                 ),
			
 
				                 Column(
			
 
				-                    name="featured_article_snapshot_id",
			
 
				-                    references="featured_article_snapshots (id) ON DELETE CASCADE",
			
 
				+                    name="article_id",
			
 
				+                    references="articles (id) ON DELETE CASCADE",
			
 
				+                ),
			
 
				+                Column(
			
 
				+                    name="title_id",
			
 
				+                    references="titles (id) ON DELETE CASCADE",
			
 
				                 ),
			
 
				             ],
			
 
				         ),
			
@@ -82,26 +82,28 @@ class Storage(StorageAbc):
 
				             name="top_articles",
			
 
				             columns=[
			
 
				                 Column(name="id", primary_key=True),
			
 
				+                Column(name="url", type_="TEXT"),
			
 
				+                Column(name="rank", type_="INTEGER"),
			
 
				                 Column(
			
 
				                     name="snapshot_id",
			
 
				                     references="snapshots (id) ON DELETE CASCADE",
			
 
				                 ),
			
 
				                 Column(
			
 
				-                    name="featured_article_snapshot_id",
			
 
				-                    references="featured_article_snapshots (id) ON DELETE CASCADE",
			
 
				+                    name="article_id",
			
 
				+                    references="articles (id) ON DELETE CASCADE",
			
 
				+                ),
			
 
				+                Column(
			
 
				+                    name="title_id",
			
 
				+                    references="titles (id) ON DELETE CASCADE",
			
 
				                 ),
			
 
				-                Column(name="rank", type_="INTEGER"),
			
 
				             ],
			
 
				         ),
			
 
				         Table(
			
 
				-            name="articles_embeddings",
			
 
				+            name="embeddings",
			
 
				             columns=[
			
 
				                 Column(name="id", primary_key=True),
			
 
				-                Column(
			
 
				-                    name="featured_article_snapshot_id",
			
 
				-                    references="featured_article_snapshots (id) ON DELETE CASCADE",
			
 
				-                ),
			
 
				-                Column(name="title_embedding", type_="bytea"),
			
 
				+                Column(name="title_id", references="titles (id) ON DELETE CASCADE"),
			
 
				+                Column(name="vector", type_="bytea"),
			
 
				             ],
			
 
				         ),
			
 
				     ]
			
@@ -135,8 +137,8 @@ class Storage(StorageAbc):
 
				             name="main_page_apparitions",
			
 
				             column_names=[
			
 
				                 "id",
			
 
				-                "featured_article_id",
			
 
				                 "title",
			
 
				+                "title_id",
			
 
				                 "url_archive",
			
 
				                 "url_article",
			
 
				                 "main_in_snapshot_id",
			
@@ -145,18 +147,32 @@ class Storage(StorageAbc):
 
				             ],
			
 
				             create_stmt="""
			
 
				                 SELECT
			
 
				-                    fas.id,
			
 
				-                    fas.featured_article_id,
			
 
				-                    fas.title,
			
 
				-                    fas.url AS url_archive,
			
 
				-                    fa.url AS url_article,
			
 
				-                    m.snapshot_id AS main_in_snapshot_id,
			
 
				-                    t.snapshot_id AS top_in_snapshot_id,
			
 
				-                    t.rank
			
 
				-                FROM featured_article_snapshots fas
			
 
				-                JOIN featured_articles fa ON fa.id = fas.featured_article_id
			
 
				-                LEFT JOIN main_articles m ON m.featured_article_snapshot_id = fas.id
			
 
				-                LEFT JOIN top_articles t ON t.featured_article_snapshot_id = fas.id
			
 
				+                    a.id,
			
 
				+                    t.text AS title,
			
 
				+                    t.id AS title_id,
			
 
				+                    ma.url AS url_archive,
			
 
				+                    a.url AS url_article,
			
 
				+                    ma.snapshot_id AS main_in_snapshot_id,
			
 
				+                    NULL AS top_in_snapshot_id,
			
 
				+                    NULL AS rank
			
 
				+                FROM articles a
			
 
				+                JOIN main_articles ma ON ma.article_id = a.id
			
 
				+                JOIN titles t ON t.id = ma.title_id
			
 
				+
			
 
				+                UNION ALL
			
 
				+
			
 
				+                SELECT
			
 
				+                    a.id,
			
 
				+                    t.text AS title,
			
 
				+                    t.id AS title_id,
			
 
				+                    ta.url AS url_archive,
			
 
				+                    a.url AS url_article,
			
 
				+                    NULL AS main_in_snapshot_id,
			
 
				+                    ta.snapshot_id AS top_in_snapshot_id,
			
 
				+                    ta.rank
			
 
				+                FROM articles a
			
 
				+                JOIN top_articles ta ON ta.article_id = a.id
			
 
				+                JOIN titles t ON t.id = ta.title_id
			
 
				                 """,
			
 
				         ),
			
 
				         View(
			
@@ -168,9 +184,9 @@ class Storage(StorageAbc):
 
				                 "site_original_url",
			
 
				                 "timestamp",
			
 
				                 "timestamp_virtual",
			
 
				-                "featured_article_snapshot_id",
			
 
				-                "featured_article_id",
			
 
				+                "article_id",
			
 
				                 "title",
			
 
				+                "title_id",
			
 
				                 "url_archive",
			
 
				                 "url_article",
			
 
				                 "is_main",
			
@@ -178,22 +194,22 @@ class Storage(StorageAbc):
 
				             ],
			
 
				             create_stmt="""
			
 
				                 SELECT
			
 
				-                    sv.id as snapshot_id,
			
 
				+                    sv.id AS snapshot_id,
			
 
				                     sv.site_id,
			
 
				                     sv.site_name,
			
 
				                     sv.site_original_url,
			
 
				-                    sv.timestamp,
			
 
				+                    sv."timestamp",
			
 
				                     sv.timestamp_virtual,
			
 
				-                    mpa.id AS featured_article_snapshot_id,
			
 
				-                    mpa.featured_article_id,
			
 
				+                    mpa.id AS article_id,
			
 
				                     mpa.title,
			
 
				+                    mpa.title_id,
			
 
				                     mpa.url_archive,
			
 
				                     mpa.url_article,
			
 
				                     mpa.main_in_snapshot_id IS NOT NULL AS is_main,
			
 
				                     mpa.rank
			
 
				                 FROM main_page_apparitions mpa
			
 
				                 JOIN snapshots_view sv ON sv.id = mpa.main_in_snapshot_id OR sv.id = mpa.top_in_snapshot_id
			
 
				-                """,
			
 
				+            """,
			
 
				         ),
			
 
				     ]
			
 
				 
			
@@ -209,29 +225,29 @@ class Storage(StorageAbc):
 
				             columns=["timestamp_virtual", "site_id"],
			
 
				         ),
			
 
				         UniqueIndex(
			
 
				-            name="main_articles_unique_idx_snapshot_id",
			
 
				-            table="main_articles",
			
 
				-            columns=["snapshot_id"],
			
 
				+            name="articles_unique_url",
			
 
				+            table="articles",
			
 
				+            columns=["url"],
			
 
				         ),
			
 
				         UniqueIndex(
			
 
				-            name="featured_articles_unique_url",
			
 
				-            table="featured_articles",
			
 
				-            columns=["url"],
			
 
				+            name="titles_unique_text",
			
 
				+            table="titles",
			
 
				+            columns=["text"],
			
 
				         ),
			
 
				         UniqueIndex(
			
 
				-            name="featured_article_snapshots_unique_idx_featured_article_id_url",
			
 
				-            table="featured_article_snapshots",
			
 
				-            columns=["featured_article_id", "url"],
			
 
				+            name="main_articles_unique_idx_snapshot_id_article_id",
			
 
				+            table="main_articles",
			
 
				+            columns=["snapshot_id", "article_id"],
			
 
				         ),
			
 
				         UniqueIndex(
			
 
				-            name="top_articles_unique_idx_snapshot_id_rank",
			
 
				+            name="top_articles_unique_idx_snapshot_id_article_id_rank",
			
 
				             table="top_articles",
			
 
				-            columns=["snapshot_id", "rank"],
			
 
				+            columns=["snapshot_id", "article_id", "rank"],
			
 
				         ),
			
 
				         UniqueIndex(
			
 
				-            name="articles_embeddings_unique_idx_featured_article_snapshot_id",
			
 
				-            table="articles_embeddings",
			
 
				-            columns=["featured_article_snapshot_id"],
			
 
				+            name="embeddings_unique_title_id",
			
 
				+            table="embeddings",
			
 
				+            columns=["title_id"],
			
 
				         ),
			
 
				     ]
			
 
				 
			
@@ -288,91 +304,12 @@ class Storage(StorageAbc):
 
				 
			
 
				         return exists != []
			
 
				 
			
 
				-    async def list_all_featured_article_snapshots(self):
			
 
				-        async with self.backend.get_connection() as conn:
			
 
				-            rows = await conn.execute_fetchall(
			
 
				-                """
			
 
				-                    SELECT *
			
 
				-                    FROM featured_article_snapshots
			
 
				-                """,
			
 
				-            )
			
 
				-
			
 
				-            return [
			
 
				-                self._from_row(r, self._table_by_name["featured_article_snapshots"])
			
 
				-                for r in rows
			
 
				-            ]
			
 
				-
			
 
				-    async def list_snapshot_apparitions(self, featured_article_snapshot_ids: list[int]):
			
 
				-        if len(featured_article_snapshot_ids) == 0:
			
 
				-            return []
			
 
				-
			
 
				-        async with self.backend.get_connection() as conn:
			
 
				-            rows = await conn.execute_fetchall(
			
 
				-                f"""
			
 
				-                    SELECT *
			
 
				-                    FROM snapshot_apparitions
			
 
				-                    WHERE featured_article_snapshot_id IN ({self._placeholders(*featured_article_snapshot_ids)})
			
 
				-                """,
			
 
				-                *featured_article_snapshot_ids,
			
 
				-            )
			
 
				-
			
 
				-            return [
			
 
				-                self._from_row(r, self._view_by_name["snapshot_apparitions"])
			
 
				-                for r in rows
			
 
				-            ]
			
 
				-
			
 
				     @classmethod
			
 
				     def _from_row(cls, r, table_or_view: Table | View):
			
 
				         columns = table_or_view.column_names
			
 
				 
			
 
				         return {col: r[idx] for idx, col in enumerate(columns)}
			
 
				 
			
 
				-    async def list_all_embedded_featured_article_snapshot_ids(self) -> list[int]:
			
 
				-        async with self.backend.get_connection() as conn:
			
 
				-            rows = await conn.execute_fetchall(
			
 
				-                """
			
 
				-                    SELECT featured_article_snapshot_id
			
 
				-                    FROM articles_embeddings
			
 
				-                """,
			
 
				-            )
			
 
				-
			
 
				-            return [r[0] for r in rows]
			
 
				-
			
 
				-    async def list_all_articles_embeddings(self):
			
 
				-        async with self.backend.get_connection() as conn:
			
 
				-            rows = await conn.execute_fetchall(
			
 
				-                """
			
 
				-                    SELECT *
			
 
				-                    FROM articles_embeddings
			
 
				-                """,
			
 
				-            )
			
 
				-
			
 
				-            return [self._from_articles_embeddings_row(r) for r in rows]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def _from_articles_embeddings_row(cls, r):
			
 
				-        [embeds_table] = [t for t in cls.tables if t.name == "articles_embeddings"]
			
 
				-        d = cls._from_row(r, embeds_table)
			
 
				-        d.update(title_embedding=np.frombuffer(d["title_embedding"], dtype="float32"))
			
 
				-
			
 
				-        return d
			
 
				-
			
 
				-    async def add_embedding(self, featured_article_snapshot_id: int, embedding):
			
 
				-        async with self.backend.get_connection() as conn:
			
 
				-            await conn.execute_insert(
			
 
				-                self._insert_stmt(
			
 
				-                    "articles_embeddings",
			
 
				-                    ["featured_article_snapshot_id", "title_embedding"],
			
 
				-                ),
			
 
				-                featured_article_snapshot_id,
			
 
				-                embedding,
			
 
				-            )
			
 
				-
			
 
				-    async def list_sites(self):
			
 
				-        async with self.backend.get_connection() as conn:
			
 
				-            sites = await conn.execute_fetchall("SELECT * FROM sites")
			
 
				-            return [self._from_row(s, self._table_by_name["sites"]) for s in sites]
			
 
				-
			
 
				     async def list_neighbouring_main_articles(
			
 
				         self,
			
 
				         site_id: int,
			
@@ -431,6 +368,70 @@ class Storage(StorageAbc):
 
				                 for a in main_articles
			
 
				             ]
			
 
				 
			
 
				+    async def list_all_titles_without_embedding(self):
			
 
				+        async with self.backend.get_connection() as conn:
			
 
				+            rows = await conn.execute_fetchall("""
			
 
				+                SELECT t.*
			
 
				+                FROM public.titles AS t
			
 
				+                WHERE NOT EXISTS (SELECT 1 FROM embeddings WHERE title_id = t.id)
			
 
				+            """)
			
 
				+
			
 
				+            return [self._from_row(r, self._table_by_name["titles"]) for r in rows]
			
 
				+
			
 
				+    async def list_all_embeddings(self):
			
 
				+        async with self.backend.get_connection() as conn:
			
 
				+            rows = await conn.execute_fetchall(
			
 
				+                """
			
 
				+                    SELECT *
			
 
				+                    FROM embeddings
			
 
				+                """,
			
 
				+            )
			
 
				+
			
 
				+            return [self._from_embeddings_row(r) for r in rows]
			
 
				+
			
 
				+    async def list_snapshot_apparitions(self, title_ids: list[int]):
			
 
				+        if len(title_ids) == 0:
			
 
				+            return []
			
 
				+
			
 
				+        async with self.backend.get_connection() as conn:
			
 
				+            rows = await conn.execute_fetchall(
			
 
				+                f"""
			
 
				+                    SELECT *
			
 
				+                    FROM snapshot_apparitions
			
 
				+                    WHERE title_id IN ({self._placeholders(*title_ids)})
			
 
				+                """,
			
 
				+                *title_ids,
			
 
				+            )
			
 
				+
			
 
				+            return [
			
 
				+                self._from_row(r, self._view_by_name["snapshot_apparitions"])
			
 
				+                for r in rows
			
 
				+            ]
			
 
				+
			
 
				+    @classmethod
			
 
				+    def _from_embeddings_row(cls, r):
			
 
				+        [embeds_table] = [t for t in cls.tables if t.name == "embeddings"]
			
 
				+        d = cls._from_row(r, embeds_table)
			
 
				+        d.update(vector=np.frombuffer(d["vector"], dtype="float32"))
			
 
				+
			
 
				+        return d
			
 
				+
			
 
				+    async def add_embedding(self, title_id: int, embedding):
			
 
				+        async with self.backend.get_connection() as conn:
			
 
				+            await conn.execute_insert(
			
 
				+                self._insert_stmt(
			
 
				+                    "embeddings",
			
 
				+                    ["title_id", "vector"],
			
 
				+                ),
			
 
				+                title_id,
			
 
				+                embedding,
			
 
				+            )
			
 
				+
			
 
				+    async def list_sites(self):
			
 
				+        async with self.backend.get_connection() as conn:
			
 
				+            sites = await conn.execute_fetchall("SELECT * FROM sites")
			
 
				+            return [self._from_row(s, self._table_by_name["sites"]) for s in sites]
			
 
				+
			
 
				     async def add_page(self, collection, page, dt):
			
 
				         assert dt.tzinfo is not None
			
 
				 
			
@@ -440,23 +441,23 @@ class Storage(StorageAbc):
 
				                 snapshot_id = await self._add_snapshot(
			
 
				                     conn, site_id, page.snapshot.id, dt
			
 
				                 )
			
 
				-                article_id = await self._add_featured_article(
			
 
				+                article_id = await self._add_article(
			
 
				                     conn, page.main_article.article.original
			
 
				                 )
			
 
				-                main_article_snap_id = await self._add_featured_article_snapshot(
			
 
				-                    conn, article_id, page.main_article.article
			
 
				+                title_id = await self._add_title(conn, page.main_article.article.title)
			
 
				+                await self._add_main_article(
			
 
				+                    conn,
			
 
				+                    snapshot_id,
			
 
				+                    article_id,
			
 
				+                    title_id,
			
 
				+                    page.main_article.article.url,
			
 
				                 )
			
 
				-                await self._add_main_article(conn, snapshot_id, main_article_snap_id)
			
 
				 
			
 
				                 for t in page.top_articles:
			
 
				-                    article_id = await self._add_featured_article(
			
 
				-                        conn, t.article.original
			
 
				-                    )
			
 
				-                    top_article_snap_id = await self._add_featured_article_snapshot(
			
 
				-                        conn, article_id, t.article
			
 
				-                    )
			
 
				+                    article_id = await self._add_article(conn, t.article.original)
			
 
				+                    title_id = await self._add_title(conn, t.article.title)
			
 
				                     await self._add_top_article(
			
 
				-                        conn, snapshot_id, top_article_snap_id, t
			
 
				+                        conn, snapshot_id, article_id, title_id, t.article.url, t.rank
			
 
				                     )
			
 
				 
			
 
				         return site_id
			
@@ -490,49 +491,56 @@ class Storage(StorageAbc):
 
				             [virtual, site_id],
			
 
				         )
			
 
				 
			
 
				-    async def _add_featured_article(self, conn, article: FeaturedArticle):
			
 
				+    async def _add_article(self, conn, article: FeaturedArticle):
			
 
				         return await self._insert_or_get(
			
 
				             conn,
			
 
				-            self._insert_stmt("featured_articles", ["url"]),
			
 
				+            self._insert_stmt("articles", ["url"]),
			
 
				             [str(article.url)],
			
 
				-            "SELECT id FROM featured_articles WHERE url = $1",
			
 
				+            "SELECT id FROM articles WHERE url = $1",
			
 
				             [str(article.url)],
			
 
				         )
			
 
				 
			
 
				-    async def _add_featured_article_snapshot(
			
 
				-        self, conn, featured_article_id: int, article: FeaturedArticleSnapshot
			
 
				-    ):
			
 
				+    async def _add_title(self, conn, title: str):
			
 
				         return await self._insert_or_get(
			
 
				             conn,
			
 
				-            self._insert_stmt(
			
 
				-                "featured_article_snapshots",
			
 
				-                ["title", "url", "featured_article_id"],
			
 
				-            ),
			
 
				-            [article.title, str(article.url), featured_article_id],
			
 
				-            "SELECT id FROM featured_article_snapshots WHERE featured_article_id = $1 AND url = $2",
			
 
				-            [featured_article_id, str(article.url)],
			
 
				+            self._insert_stmt("titles", ["text"]),
			
 
				+            [title],
			
 
				+            "SELECT id FROM titles WHERE text = $1",
			
 
				+            [title],
			
 
				         )
			
 
				 
			
 
				-    async def _add_main_article(self, conn, snapshot_id: int, article_id: int):
			
 
				+    async def _add_main_article(
			
 
				+        self, conn, snapshot_id: int, article_id: int, title_id: int, url: str
			
 
				+    ):
			
 
				         await conn.execute_insert(
			
 
				             self._insert_stmt(
			
 
				-                "main_articles", ["snapshot_id", "featured_article_snapshot_id"]
			
 
				+                "main_articles", ["snapshot_id", "article_id", "title_id", "url"]
			
 
				             ),
			
 
				             snapshot_id,
			
 
				             article_id,
			
 
				+            title_id,
			
 
				+            str(url),
			
 
				         )
			
 
				 
			
 
				     async def _add_top_article(
			
 
				-        self, conn, snapshot_id: int, article_id: int, article: TopArticle
			
 
				+        self,
			
 
				+        conn,
			
 
				+        snapshot_id: int,
			
 
				+        article_id: int,
			
 
				+        title_id: int,
			
 
				+        url: str,
			
 
				+        rank: int,
			
 
				     ):
			
 
				         await conn.execute_insert(
			
 
				             self._insert_stmt(
			
 
				                 "top_articles",
			
 
				-                ["snapshot_id", "featured_article_snapshot_id", "rank"],
			
 
				+                ["snapshot_id", "article_id", "title_id", "url", "rank"],
			
 
				             ),
			
 
				             snapshot_id,
			
 
				             article_id,
			
 
				-            article.rank,
			
 
				+            title_id,
			
 
				+            str(url),
			
 
				+            rank,
			
 
				         )
			
 
				 
			
 
				     async def _insert_or_get(
			
--- a/src/media_observer/storage_abstraction.py
+++ b/src/media_observer/storage_abstraction.py
@@ -86,7 +86,7 @@ class StorageAbc(ABC):
 
				     async def list_all_featured_article_snapshots(self):
			
 
				         raise NotImplementedError()
			
 
				 
			
 
				-    async def list_snapshot_apparitions(self, featured_article_snapshot_ids: list[int]):
			
 
				+    async def list_snapshot_apparitions(self, title_ids: list[int]):
			
 
				         raise NotImplementedError()
			
 
				 
			
 
				     async def list_all_embedded_featured_article_snapshot_ids(self) -> list[int]:
			
@@ -95,7 +95,7 @@ class StorageAbc(ABC):
 
				     async def list_all_articles_embeddings(self):
			
 
				         raise NotImplementedError()
			
 
				 
			
 
				-    async def add_embedding(self, featured_article_snapshot_id: int, embedding):
			
 
				+    async def add_embedding(self, title_id: int, embedding):
			
 
				         raise NotImplementedError()
			
 
				 
			
 
				     async def list_sites(self):
			
--- a/src/media_observer/web.py
+++ b/src/media_observer/web.py
@@ -105,6 +105,7 @@ async def site_main_article_snapshot(
 
				         )
			
 
				 
			
 
				     main_articles = await storage.list_neighbouring_main_articles(id, timestamp)
			
 
				+
			
 
				     [focused_article] = [
			
 
				         a for a in main_articles if a["site_id"] == id and a["time_diff"] == 0
			
 
				     ]
			
@@ -116,10 +117,10 @@ async def site_main_article_snapshot(
 
				         a for a in main_articles if a["site_id"] == id and a["time_diff"] != 0
			
 
				     ]
			
 
				 
			
 
				-    focused_article_id = focused_article["featured_article_snapshot_id"]
			
 
				+    focused_title_id = focused_article["title_id"]
			
 
				     try:
			
 
				         [(_, similar)] = await sim_index.search(
			
 
				-            [focused_article_id],
			
 
				+            [focused_title_id],
			
 
				             20,
			
 
				             lambda s: s < 100 and s >= 25,
			
 
				         )
			
@@ -133,9 +134,9 @@ async def site_main_article_snapshot(
 
				     # A list of articles and score, sorted by descending score
			
 
				     similar_articles_and_score = sorted(
			
 
				         [
			
 
				-            (a, similar_by_id[a["featured_article_snapshot_id"]])
			
 
				+            (a, similar_by_id[a["title_id"]])
			
 
				             for a in similar_articles
			
 
				-            if a["featured_article_snapshot_id"] != focused_article_id
			
 
				+            if a["title_id"] != focused_title_id
			
 
				         ],
			
 
				         key=lambda a: a[1],
			
 
				         reverse=True,