Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F113389
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
6 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/mix/tasks/pleroma/search/indexer.ex b/lib/mix/tasks/pleroma/search/indexer.ex
index ffa2f3c94..326646b69 100644
--- a/lib/mix/tasks/pleroma/search/indexer.ex
+++ b/lib/mix/tasks/pleroma/search/indexer.ex
@@ -1,60 +1,66 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Mix.Tasks.Pleroma.Search.Indexer do
import Mix.Pleroma
import Ecto.Query
alias Pleroma.Workers.SearchIndexingWorker
+ def run(["create_index"]) do
+ Application.ensure_all_started(:pleroma)
+
+ Pleroma.Config.get([Pleroma.Search, :module]).create_index()
+ end
+
def run(["index" | options]) do
{options, [], []} =
OptionParser.parse(
options,
strict: [
limit: :integer
]
)
start_pleroma()
limit = Keyword.get(options, :limit, 100_000)
per_step = 1000
chunks = max(div(limit, per_step), 1)
1..chunks
|> Enum.each(fn step ->
q =
from(a in Pleroma.Activity,
limit: ^per_step,
offset: ^per_step * (^step - 1),
select: [:id],
order_by: [desc: :id]
)
{:ok, ids} =
Pleroma.Repo.transaction(fn ->
Pleroma.Repo.stream(q, timeout: :infinity)
|> Enum.map(fn a ->
a.id
end)
end)
IO.puts("Got #{length(ids)} activities, adding to indexer")
ids
|> Enum.chunk_every(100)
|> Enum.each(fn chunk ->
IO.puts("Adding #{length(chunk)} activities to indexing queue")
chunk
|> Enum.map(fn id ->
SearchIndexingWorker.new(%{"op" => "add_to_index", "activity" => id})
end)
|> Oban.insert_all()
end)
end)
end
end
diff --git a/lib/pleroma/search/qdrant_search.ex b/lib/pleroma/search/qdrant_search.ex
index 31e7754ae..315262cb3 100644
--- a/lib/pleroma/search/qdrant_search.ex
+++ b/lib/pleroma/search/qdrant_search.ex
@@ -1,117 +1,118 @@
defmodule Pleroma.Search.QdrantSearch do
@behaviour Pleroma.Search.SearchBackend
import Ecto.Query
alias Pleroma.Activity
alias __MODULE__.QdrantClient
alias __MODULE__.OllamaClient
import Pleroma.Search.Meilisearch, only: [object_to_search_data: 1]
- def initialize_index() do
+ @impl true
+ def create_index() do
payload = Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_index_configuration])
QdrantClient.put("/collections/posts", payload)
end
def drop_index() do
QdrantClient.delete("/collections/posts")
end
def get_embedding(text) do
with {:ok, %{body: %{"embedding" => embedding}}} <-
OllamaClient.post("/api/embeddings", %{
prompt: text,
model: Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_model])
}) do
{:ok, embedding}
else
_ ->
{:error, "Failed to get embedding"}
end
end
defp build_index_payload(activity, embedding) do
%{
points: [
%{
id: activity.id |> FlakeId.from_string() |> Ecto.UUID.cast!(),
vector: embedding
}
]
}
end
defp build_search_payload(embedding) do
%{
vector: embedding,
limit: 20
}
end
@impl true
def add_to_index(activity) do
# This will only index public or unlisted notes
maybe_search_data = object_to_search_data(activity.object)
if activity.data["type"] == "Create" and maybe_search_data do
with {:ok, embedding} <- get_embedding(maybe_search_data.content),
{:ok, %{status: 200}} <-
QdrantClient.put(
"/collections/posts/points",
build_index_payload(activity, embedding)
) do
:ok
else
e -> {:error, e}
end
else
:ok
end
end
@impl true
def search(_user, query, _options) do
query = "Represent this sentence for searching relevant passages: #{query}"
with {:ok, embedding} <- get_embedding(query),
{:ok, %{body: %{"result" => result}}} <-
QdrantClient.post("/collections/posts/points/search", build_search_payload(embedding)) do
ids =
Enum.map(result, fn %{"id" => id} ->
Ecto.UUID.dump!(id)
end)
from(a in Activity, where: a.id in ^ids)
|> Activity.with_preloaded_object()
|> Activity.restrict_deactivated_users()
|> Ecto.Query.order_by([a], fragment("array_position(?, ?)", ^ids, a.id))
|> Pleroma.Repo.all()
else
_ ->
[]
end
end
@impl true
def remove_from_index(_object) do
:ok
end
end
defmodule Pleroma.Search.QdrantSearch.OllamaClient do
use Tesla
plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :ollama_url]))
plug(Tesla.Middleware.JSON)
end
defmodule Pleroma.Search.QdrantSearch.QdrantClient do
use Tesla
plug(Tesla.Middleware.BaseUrl, Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_url]))
plug(Tesla.Middleware.JSON)
plug(Tesla.Middleware.Headers, [
{"api-key", Pleroma.Config.get([Pleroma.Search.QdrantSearch, :qdrant_api_key])}
])
end
diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex
index 68bc48cec..5be0169d0 100644
--- a/lib/pleroma/search/search_backend.ex
+++ b/lib/pleroma/search/search_backend.ex
@@ -1,24 +1,29 @@
defmodule Pleroma.Search.SearchBackend do
@doc """
Search statuses with a query, restricting to only those the user should have access to.
"""
@callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [
Pleroma.Activity.t()
]
@doc """
Add the object associated with the activity to the search index.
The whole activity is passed, to allow filtering on things such as scope.
"""
@callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()}
@doc """
Remove the object from the index.
Just the object, as opposed to the whole activity, is passed, since the object
is what contains the actual content and there is no need for filtering when removing
from index.
"""
@callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()}
+
+ @doc """
+ Create the index
+ """
+ @callback create_index() :: :ok | {:error, any()}
end
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Nov 25, 5:11 AM (1 d, 10 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
39606
Default Alt Text
(6 KB)
Attached To
Mode
rPUBE pleroma-upstream
Attached
Detach File
Event Timeline
Log In to Comment