From e6233f237be75789f184463aadbbad541005775c Mon Sep 17 00:00:00 2001 From: zphinx Date: Mon, 4 May 2026 04:30:14 +0200 Subject: [PATCH] update Co-authored-by: Copilot --- README.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 1 + 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 74ed425..53e77c5 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,65 @@ A troubleshooter receives a ticket reporting that the Apache service on a remote | Component | Tool | |-----------|------| -| AI inference backend | [vLLM](https://github.com/vllm-project/vllm) | -| Model | `gemma4:a4b` | +| AI inference backend | [Ollama](https://ollama.com) | +| Model | `gemma3:4b`, `llama3.1:8b`, or `qwen2.5:7b` | +| Language | Python 3.11+ | -> **Note:** A suitable implementation language for this project is yet to be determined. +--- + +## How-To: Setting Up the AI Backend (Arch Linux + RTX 3080) + +`tai` uses [Ollama](https://ollama.com) as its local AI backend. It exposes an OpenAI-compatible HTTP API that `tai` talks to — no cloud services, no data leaving your machine. + +An RTX 3080 (10 GB VRAM) comfortably runs 7–8B parameter models at 4-bit quantisation. + +### 1. Install CUDA and Ollama + +```bash +# CUDA runtime (skip if already installed) +sudo pacman -S cuda + +# Ollama with CUDA support from the AUR +yay -S ollama-cuda +# or: paru -S ollama-cuda + +# Enable and start the service +sudo systemctl enable --now ollama +``` + +### 2. Pull a model + +```bash +ollama pull gemma3:4b # ~3 GB — fast, good for sysadmin tasks +ollama pull llama3.1:8b # ~5 GB — stronger reasoning +ollama pull qwen2.5:7b # ~4.5 GB — strong structured output +``` + +### 3. Verify the model works + +```bash +ollama run gemma3:4b "what causes a systemd service to enter failed state?" +``` + +### 4. Verify the HTTP API is running + +`tai` communicates with Ollama over its OpenAI-compatible REST API: + +```bash +curl http://localhost:11434/api/generate \ + -d '{"model":"gemma3:4b","prompt":"hello","stream":false}' +``` + +A JSON response with a `response` field confirms everything is working. + +### 5. Point tai at your Ollama instance + +Once `tai` AI integration is complete, use these flags: + +```bash +tai "nginx failing to start" --host web01 \ + --ai-host http://localhost:11434 \ + --model gemma3:4b +``` + +The default values for `--ai-host` and `--model` will be `http://localhost:11434` and `gemma3:4b` respectively, so for local use you won't need to specify them explicitly. diff --git a/pyproject.toml b/pyproject.toml index 3c80449..348fc5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "typer>=0.12,<1.0", "rich>=13.7,<14.0", "asyncssh>=2.14,<3.0", + "openai>=1.30,<2.0", ] [project.optional-dependencies]