paperless-ngx/src/paperless/ai/llms.py

import httpx
from llama_index.core.base.llms.types import ChatMessage
from llama_index.core.base.llms.types import ChatResponse
from llama_index.core.base.llms.types import ChatResponseGen
from llama_index.core.base.llms.types import CompletionResponse
from llama_index.core.base.llms.types import CompletionResponseGen
from llama_index.core.base.llms.types import LLMMetadata
from llama_index.core.llms.llm import LLM
from pydantic import Field


class OllamaLLM(LLM):
    model: str = Field(default="llama3")
    base_url: str = Field(default="http://localhost:11434")

    @property
    def metadata(self) -> LLMMetadata:
        return LLMMetadata(
            model_name=self.model,
            is_chat_model=False,
            context_window=4096,
            num_output=512,
            is_function_calling_model=False,
        )

    def complete(self, prompt: str, **kwargs) -> CompletionResponse:
        with httpx.Client(timeout=120.0) as client:
            response = client.post(
                f"{self.base_url}/api/generate",
                json={
                    "model": self.model,
                    "prompt": prompt,
                    "stream": False,
                },
            )
            response.raise_for_status()
            data = response.json()
            return CompletionResponse(text=data["response"])

    def chat(self, messages: list[ChatMessage], **kwargs) -> ChatResponse:
        with httpx.Client(timeout=120.0) as client:
            response = client.post(
                f"{self.base_url}/api/generate",
                json={
                    "model": self.model,
                    "messages": [
                        {
                            "role": message.role,
                            "content": message.content,
                        }
                        for message in messages
                    ],
                    "stream": False,
                },
            )
            response.raise_for_status()
            data = response.json()
            return ChatResponse(text=data["response"])

    # -- Required stubs for ABC:
    def stream_complete(
        self,
        prompt: str,
        **kwargs,
    ) -> CompletionResponseGen:  # pragma: no cover
        raise NotImplementedError("stream_complete not supported")

    def stream_chat(
        self,
        messages: list[ChatMessage],
        **kwargs,
    ) -> ChatResponseGen:  # pragma: no cover
        raise NotImplementedError("stream_chat not supported")

    async def achat(
        self,
        messages: list[ChatMessage],
        **kwargs,
    ) -> ChatResponse:  # pragma: no cover
        raise NotImplementedError("async chat not supported")

    async def astream_chat(
        self,
        messages: list[ChatMessage],
        **kwargs,
    ) -> ChatResponseGen:  # pragma: no cover
        raise NotImplementedError("async stream_chat not supported")

    async def acomplete(
        self,
        prompt: str,
        **kwargs,
    ) -> CompletionResponse:  # pragma: no cover
        raise NotImplementedError("async complete not supported")

    async def astream_complete(
        self,
        prompt: str,
        **kwargs,
    ) -> CompletionResponseGen:  # pragma: no cover
        raise NotImplementedError("async stream_complete not supported")
Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`import httpx`
			`from llama_index.core.base.llms.types import ChatMessage`
			`from llama_index.core.base.llms.types import ChatResponse`
			`from llama_index.core.base.llms.types import ChatResponseGen`
			`from llama_index.core.base.llms.types import CompletionResponse`
			`from llama_index.core.base.llms.types import CompletionResponseGen`
			`from llama_index.core.base.llms.types import LLMMetadata`
			`from llama_index.core.llms.llm import LLM`
			`from pydantic import Field`


			`class OllamaLLM(LLM):`
			`model: str = Field(default="llama3")`
			`base_url: str = Field(default="http://localhost:11434")`

			`@property`
			`def metadata(self) -> LLMMetadata:`
			`return LLMMetadata(`
			`model_name=self.model,`
			`is_chat_model=False,`
			`context_window=4096,`
			`num_output=512,`
			`is_function_calling_model=False,`
			`)`

			`def complete(self, prompt: str, **kwargs) -> CompletionResponse:`
			`with httpx.Client(timeout=120.0) as client:`
			`response = client.post(`
			`f"{self.base_url}/api/generate",`
			`json={`
			`"model": self.model,`
			`"prompt": prompt,`
			`"stream": False,`
			`},`
			`)`
			`response.raise_for_status()`
			`data = response.json()`
			`return CompletionResponse(text=data["response"])`

Fixup some tests 2025-04-25 00:59:46 -07:00			`def chat(self, messages: list[ChatMessage], **kwargs) -> ChatResponse:`
			`with httpx.Client(timeout=120.0) as client:`
			`response = client.post(`
			`f"{self.base_url}/api/generate",`
			`json={`
			`"model": self.model,`
			`"messages": [`
			`{`
			`"role": message.role,`
			`"content": message.content,`
			`}`
			`for message in messages`
			`],`
			`"stream": False,`
			`},`
			`)`
			`response.raise_for_status()`
			`data = response.json()`
			`return ChatResponse(text=data["response"])`

Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`# -- Required stubs for ABC:`
Fixup some tests 2025-04-25 00:59:46 -07:00			`def stream_complete(`
			`self,`
			`prompt: str,`
			`**kwargs,`
			`) -> CompletionResponseGen: # pragma: no cover`
Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`raise NotImplementedError("stream_complete not supported")`

Fixup some tests 2025-04-25 00:59:46 -07:00			`def stream_chat(`
			`self,`
			`messages: list[ChatMessage],`
			`**kwargs,`
			`) -> ChatResponseGen: # pragma: no cover`
Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`raise NotImplementedError("stream_chat not supported")`

Fixup some tests 2025-04-25 00:59:46 -07:00			`async def achat(`
			`self,`
			`messages: list[ChatMessage],`
			`**kwargs,`
			`) -> ChatResponse: # pragma: no cover`
Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`raise NotImplementedError("async chat not supported")`

			`async def astream_chat(`
			`self,`
			`messages: list[ChatMessage],`
			`**kwargs,`
Fixup some tests 2025-04-25 00:59:46 -07:00			`) -> ChatResponseGen: # pragma: no cover`
Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`raise NotImplementedError("async stream_chat not supported")`

Fixup some tests 2025-04-25 00:59:46 -07:00			`async def acomplete(`
			`self,`
			`prompt: str,`
			`**kwargs,`
			`) -> CompletionResponse: # pragma: no cover`
Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`raise NotImplementedError("async complete not supported")`

Fixup some tests 2025-04-25 00:59:46 -07:00			`async def astream_complete(`
			`self,`
			`prompt: str,`
			`**kwargs,`
			`) -> CompletionResponseGen: # pragma: no cover`
Better encapsulate backends, use llama_index OpenAI 2025-04-24 23:20:27 -07:00			`raise NotImplementedError("async stream_complete not supported")`