Spaces:
Sleeping
Sleeping
Commit ·
295bc31
1
Parent(s): ce1386b
Fix README for docker SDK
Browse files- .README.md.swp +0 -0
- README.md +25 -7
- book_ingestor.egg-info/PKG-INFO +24 -49
- check_qdrant.py +0 -59
- rag_agent_api/README.md +9 -9
- rag_agent_api/__init__.py +2 -2
- rag_agent_api/agent.py +0 -363
- rag_agent_api/config.py +1 -0
- rag_agent_api/main.py +11 -6
- rag_agent_api/retrieval.py +35 -126
- requirements.txt +11 -9
- test_retrieval.py +0 -60
- tests/test_integration.py +21 -18
.README.md.swp
DELETED
|
Binary file (1.02 kB)
|
|
|
README.md
CHANGED
|
@@ -1,14 +1,32 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
-
sdk_version: "3.10"
|
| 8 |
-
app_file: app/main.py
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
#
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Backend Deploy
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: docker
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# RAG Agent and API Layer
|
| 11 |
|
| 12 |
+
This is a FastAPI application that provides a question-answering API using Gemini agents and Qdrant retrieval for RAG (Retrieval Augmented Generation) functionality.
|
| 13 |
+
|
| 14 |
+
## API Endpoints
|
| 15 |
+
|
| 16 |
+
- `GET /` - Root endpoint with API information
|
| 17 |
+
- `POST /ask` - Main question-answering endpoint
|
| 18 |
+
- `GET /health` - Health check endpoint
|
| 19 |
+
- `GET /ready` - Readiness check endpoint
|
| 20 |
+
- `/docs` - API documentation (Swagger UI)
|
| 21 |
+
- `/redoc` - API documentation (Redoc)
|
| 22 |
+
|
| 23 |
+
## Configuration
|
| 24 |
+
|
| 25 |
+
The application requires the following environment variables:
|
| 26 |
+
- `GEMINI_API_KEY` - API key for Google Gemini
|
| 27 |
+
- `QDRANT_URL` - URL for Qdrant vector database
|
| 28 |
+
- `QDRANT_API_KEY` - API key for Qdrant database
|
| 29 |
+
|
| 30 |
+
## Deployment
|
| 31 |
+
|
| 32 |
+
This application is configured for deployment on Hugging Face Spaces using Docker.
|
book_ingestor.egg-info/PKG-INFO
CHANGED
|
@@ -14,60 +14,35 @@ Requires-Dist: uvicorn>=0.24.0
|
|
| 14 |
Requires-Dist: openai>=1.0.0
|
| 15 |
Requires-Dist: pydantic>=2.0.0
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
|
| 24 |
-
```bash
|
| 25 |
-
cd backend
|
| 26 |
-
uv sync
|
| 27 |
-
```
|
| 28 |
|
| 29 |
-
|
| 30 |
-
``
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
``
|
|
|
|
| 34 |
|
| 35 |
-
##
|
| 36 |
|
| 37 |
-
|
| 38 |
-
- `
|
| 39 |
-
- `
|
| 40 |
-
- `
|
| 41 |
|
| 42 |
-
##
|
| 43 |
|
| 44 |
-
|
| 45 |
-
```bash
|
| 46 |
-
cd backend
|
| 47 |
-
uv run python main.py
|
| 48 |
-
```
|
| 49 |
-
|
| 50 |
-
This will:
|
| 51 |
-
1. Collect all URLs from the target book (https://sanilahmed.github.io/hackathon-ai-book/)
|
| 52 |
-
2. Extract text content from each URL
|
| 53 |
-
3. Chunk the content into fixed-size segments
|
| 54 |
-
4. Generate embeddings using Cohere
|
| 55 |
-
5. Store embeddings with metadata in Qdrant Cloud collection named "rag_embedding"
|
| 56 |
-
|
| 57 |
-
### Run the verification pipeline:
|
| 58 |
-
```bash
|
| 59 |
-
cd backend
|
| 60 |
-
python -m verify_retrieval.main
|
| 61 |
-
```
|
| 62 |
-
|
| 63 |
-
Or with specific options:
|
| 64 |
-
```bash
|
| 65 |
-
python -m verify_retrieval.main --query "transformer architecture in NLP" --top-k 10
|
| 66 |
-
```
|
| 67 |
-
|
| 68 |
-
The verification system will:
|
| 69 |
-
1. Load vectors and metadata stored in Qdrant from the original ingestion
|
| 70 |
-
2. Implement retrieval functions to query Qdrant using sample keywords or phrases
|
| 71 |
-
3. Validate that retrieved chunks are accurate and relevant
|
| 72 |
-
4. Check that metadata (URL, title, chunk_id) matches source content
|
| 73 |
-
5. Log results and confirm the pipeline executes end-to-end without errors
|
|
|
|
| 14 |
Requires-Dist: openai>=1.0.0
|
| 15 |
Requires-Dist: pydantic>=2.0.0
|
| 16 |
|
| 17 |
+
---
|
| 18 |
+
title: Backend Deploy
|
| 19 |
+
emoji: 🚀
|
| 20 |
+
colorFrom: blue
|
| 21 |
+
colorTo: purple
|
| 22 |
+
sdk: docker
|
| 23 |
+
pinned: false
|
| 24 |
+
---
|
| 25 |
|
| 26 |
+
# RAG Agent and API Layer
|
| 27 |
|
| 28 |
+
This is a FastAPI application that provides a question-answering API using Gemini agents and Qdrant retrieval for RAG (Retrieval Augmented Generation) functionality.
|
| 29 |
|
| 30 |
+
## API Endpoints
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
- `GET /` - Root endpoint with API information
|
| 33 |
+
- `POST /ask` - Main question-answering endpoint
|
| 34 |
+
- `GET /health` - Health check endpoint
|
| 35 |
+
- `GET /ready` - Readiness check endpoint
|
| 36 |
+
- `/docs` - API documentation (Swagger UI)
|
| 37 |
+
- `/redoc` - API documentation (Redoc)
|
| 38 |
|
| 39 |
+
## Configuration
|
| 40 |
|
| 41 |
+
The application requires the following environment variables:
|
| 42 |
+
- `GEMINI_API_KEY` - API key for Google Gemini
|
| 43 |
+
- `QDRANT_URL` - URL for Qdrant vector database
|
| 44 |
+
- `QDRANT_API_KEY` - API key for Qdrant database
|
| 45 |
|
| 46 |
+
## Deployment
|
| 47 |
|
| 48 |
+
This application is configured for deployment on Hugging Face Spaces using Docker.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
check_qdrant.py
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Script to check if Qdrant collection exists and has data.
|
| 4 |
-
"""
|
| 5 |
-
import os
|
| 6 |
-
from qdrant_client import QdrantClient
|
| 7 |
-
from dotenv import load_dotenv
|
| 8 |
-
|
| 9 |
-
# Load environment variables
|
| 10 |
-
load_dotenv()
|
| 11 |
-
|
| 12 |
-
# Get environment variables
|
| 13 |
-
qdrant_url = os.getenv('QDRANT_URL')
|
| 14 |
-
qdrant_api_key = os.getenv('QDRANT_API_KEY')
|
| 15 |
-
|
| 16 |
-
if not qdrant_url or not qdrant_api_key:
|
| 17 |
-
print("Error: QDRANT_URL or QDRANT_API_KEY not found in environment variables")
|
| 18 |
-
exit(1)
|
| 19 |
-
|
| 20 |
-
# Initialize Qdrant client
|
| 21 |
-
client = QdrantClient(
|
| 22 |
-
url=qdrant_url,
|
| 23 |
-
api_key=qdrant_api_key,
|
| 24 |
-
timeout=30
|
| 25 |
-
)
|
| 26 |
-
|
| 27 |
-
try:
|
| 28 |
-
# List all collections
|
| 29 |
-
collections = client.get_collections()
|
| 30 |
-
print("Available collections:")
|
| 31 |
-
for collection in collections.collections:
|
| 32 |
-
# For newer Qdrant versions, get the collection info to get point count
|
| 33 |
-
collection_info = client.get_collection(collection.name)
|
| 34 |
-
print(f" - {collection.name} (points: {collection_info.points_count})")
|
| 35 |
-
|
| 36 |
-
# Check specifically for the rag_embedding collection
|
| 37 |
-
try:
|
| 38 |
-
collection_info = client.get_collection("rag_embedding")
|
| 39 |
-
print(f"\nCollection 'rag_embedding' exists with {collection_info.points_count} points")
|
| 40 |
-
|
| 41 |
-
if collection_info.points_count > 0:
|
| 42 |
-
# Get a sample point to verify data exists
|
| 43 |
-
points = client.scroll(
|
| 44 |
-
collection_name="rag_embedding",
|
| 45 |
-
limit=1
|
| 46 |
-
)
|
| 47 |
-
if len(points[0]) > 0:
|
| 48 |
-
sample_point = points[0][0]
|
| 49 |
-
print(f"Sample point ID: {sample_point.id}")
|
| 50 |
-
print(f"Sample point payload keys: {list(sample_point.payload.keys())}")
|
| 51 |
-
print(f"Sample text preview: {sample_point.payload.get('text', '')[:100]}...")
|
| 52 |
-
else:
|
| 53 |
-
print("Collection 'rag_embedding' exists but is empty")
|
| 54 |
-
|
| 55 |
-
except Exception as e:
|
| 56 |
-
print(f"\nCollection 'rag_embedding' does not exist: {e}")
|
| 57 |
-
|
| 58 |
-
except Exception as e:
|
| 59 |
-
print(f"Error connecting to Qdrant: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rag_agent_api/README.md
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
# RAG Agent and API Layer
|
| 2 |
|
| 3 |
-
A FastAPI-based question-answering system that uses
|
| 4 |
|
| 5 |
## Overview
|
| 6 |
|
| 7 |
-
The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an
|
| 8 |
|
| 9 |
## Architecture
|
| 10 |
|
| 11 |
The system consists of several key components:
|
| 12 |
|
| 13 |
- **FastAPI Application**: Main entry point for the question-answering API
|
| 14 |
-
- **
|
| 15 |
- **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
|
| 16 |
- **Configuration Manager**: Handles environment variables and settings
|
| 17 |
- **Data Models**: Pydantic models for API requests/responses
|
|
@@ -22,7 +22,7 @@ The system consists of several key components:
|
|
| 22 |
### Prerequisites
|
| 23 |
|
| 24 |
- Python 3.9+
|
| 25 |
-
-
|
| 26 |
- Qdrant Cloud instance with book content embeddings
|
| 27 |
- Cohere API key (for query embeddings)
|
| 28 |
|
|
@@ -42,7 +42,7 @@ The system consists of several key components:
|
|
| 42 |
|
| 43 |
3. Edit `.env` with your API keys and configuration:
|
| 44 |
```env
|
| 45 |
-
|
| 46 |
QDRANT_URL=your-qdrant-instance-url
|
| 47 |
QDRANT_API_KEY=your-qdrant-api-key
|
| 48 |
QDRANT_COLLECTION_NAME=rag_embedding
|
|
@@ -103,7 +103,7 @@ Root endpoint with API information.
|
|
| 103 |
|
| 104 |
### Environment Variables
|
| 105 |
|
| 106 |
-
- `
|
| 107 |
- `QDRANT_URL`: URL of your Qdrant instance
|
| 108 |
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 109 |
- `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
|
|
@@ -123,8 +123,8 @@ Pydantic models for API request/response schemas.
|
|
| 123 |
### Schemas (`schemas.py`)
|
| 124 |
Additional schemas for internal data structures.
|
| 125 |
|
| 126 |
-
### Agent (`
|
| 127 |
-
|
| 128 |
|
| 129 |
### Retrieval (`retrieval.py`)
|
| 130 |
Qdrant integration for content retrieval with semantic search.
|
|
@@ -160,7 +160,7 @@ pytest
|
|
| 160 |
|
| 161 |
# Run specific test files
|
| 162 |
pytest tests/test_api.py
|
| 163 |
-
pytest tests/
|
| 164 |
pytest tests/test_retrieval.py
|
| 165 |
```
|
| 166 |
|
|
|
|
| 1 |
# RAG Agent and API Layer
|
| 2 |
|
| 3 |
+
A FastAPI-based question-answering system that uses OpenRouter Agents and Qdrant retrieval to generate grounded responses based on book content.
|
| 4 |
|
| 5 |
## Overview
|
| 6 |
|
| 7 |
+
The RAG Agent and API Layer provides a question-answering API that retrieves relevant content from Qdrant and uses an OpenRouter agent to generate accurate, source-grounded responses. The system ensures that all answers are based only on the provided context to prevent hallucinations.
|
| 8 |
|
| 9 |
## Architecture
|
| 10 |
|
| 11 |
The system consists of several key components:
|
| 12 |
|
| 13 |
- **FastAPI Application**: Main entry point for the question-answering API
|
| 14 |
+
- **OpenRouter Agent**: Generates responses based on retrieved context
|
| 15 |
- **Qdrant Retriever**: Retrieves relevant content chunks from Qdrant database
|
| 16 |
- **Configuration Manager**: Handles environment variables and settings
|
| 17 |
- **Data Models**: Pydantic models for API requests/responses
|
|
|
|
| 22 |
### Prerequisites
|
| 23 |
|
| 24 |
- Python 3.9+
|
| 25 |
+
- OpenRouter API key
|
| 26 |
- Qdrant Cloud instance with book content embeddings
|
| 27 |
- Cohere API key (for query embeddings)
|
| 28 |
|
|
|
|
| 42 |
|
| 43 |
3. Edit `.env` with your API keys and configuration:
|
| 44 |
```env
|
| 45 |
+
OPENROUTER_API_KEY=your-openrouter-api-key-here
|
| 46 |
QDRANT_URL=your-qdrant-instance-url
|
| 47 |
QDRANT_API_KEY=your-qdrant-api-key
|
| 48 |
QDRANT_COLLECTION_NAME=rag_embedding
|
|
|
|
| 103 |
|
| 104 |
### Environment Variables
|
| 105 |
|
| 106 |
+
- `OPENROUTER_API_KEY`: Your OpenRouter API key
|
| 107 |
- `QDRANT_URL`: URL of your Qdrant instance
|
| 108 |
- `QDRANT_API_KEY`: Your Qdrant API key
|
| 109 |
- `QDRANT_COLLECTION_NAME`: Name of the collection with book embeddings (default: `rag_embedding`)
|
|
|
|
| 123 |
### Schemas (`schemas.py`)
|
| 124 |
Additional schemas for internal data structures.
|
| 125 |
|
| 126 |
+
### Agent (`openrouter_agent.py`)
|
| 127 |
+
OpenRouter agent implementation with context injection and response validation.
|
| 128 |
|
| 129 |
### Retrieval (`retrieval.py`)
|
| 130 |
Qdrant integration for content retrieval with semantic search.
|
|
|
|
| 160 |
|
| 161 |
# Run specific test files
|
| 162 |
pytest tests/test_api.py
|
| 163 |
+
pytest tests/test_openrouter_agent.py
|
| 164 |
pytest tests/test_retrieval.py
|
| 165 |
```
|
| 166 |
|
rag_agent_api/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ __license__ = "MIT"
|
|
| 10 |
# Import main components for easy access
|
| 11 |
from .main import app
|
| 12 |
from .config import Config, get_config, validate_config
|
| 13 |
-
from .
|
| 14 |
from .retrieval import QdrantRetriever
|
| 15 |
|
| 16 |
# Define what gets imported with "from rag_agent_api import *"
|
|
@@ -19,6 +19,6 @@ __all__ = [
|
|
| 19 |
"Config",
|
| 20 |
"get_config",
|
| 21 |
"validate_config",
|
| 22 |
-
"
|
| 23 |
"QdrantRetriever"
|
| 24 |
]
|
|
|
|
| 10 |
# Import main components for easy access
|
| 11 |
from .main import app
|
| 12 |
from .config import Config, get_config, validate_config
|
| 13 |
+
from .openrouter_agent import OpenRouterAgent
|
| 14 |
from .retrieval import QdrantRetriever
|
| 15 |
|
| 16 |
# Define what gets imported with "from rag_agent_api import *"
|
|
|
|
| 19 |
"Config",
|
| 20 |
"get_config",
|
| 21 |
"validate_config",
|
| 22 |
+
"OpenRouterAgent",
|
| 23 |
"QdrantRetriever"
|
| 24 |
]
|
rag_agent_api/agent.py
DELETED
|
@@ -1,363 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Google Gemini Agent module for the RAG Agent and API Layer system.
|
| 3 |
-
|
| 4 |
-
This module provides functionality for creating and managing a Google Gemini agent
|
| 5 |
-
that generates responses based on retrieved context.
|
| 6 |
-
"""
|
| 7 |
-
import asyncio
|
| 8 |
-
import logging
|
| 9 |
-
from typing import List, Dict, Any, Optional
|
| 10 |
-
import google.generativeai as genai
|
| 11 |
-
from .config import get_config
|
| 12 |
-
from .schemas import AgentContext, AgentResponse, SourceChunkSchema
|
| 13 |
-
from .utils import format_confidence_score
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
class GeminiAgent:
|
| 17 |
-
"""
|
| 18 |
-
A class to manage the Google Gemini agent for generating responses based on context.
|
| 19 |
-
"""
|
| 20 |
-
def __init__(self, model_name: str = "gemini-2.5-flash"):
|
| 21 |
-
"""
|
| 22 |
-
Initialize the Google Gemini agent with configuration.
|
| 23 |
-
|
| 24 |
-
Args:
|
| 25 |
-
model_name: Name of the Gemini model to use (default: gemini-2.5-flash)
|
| 26 |
-
"""
|
| 27 |
-
config = get_config()
|
| 28 |
-
api_key = config.gemini_api_key
|
| 29 |
-
|
| 30 |
-
if not api_key:
|
| 31 |
-
raise ValueError("GEMINI_API_KEY environment variable not set")
|
| 32 |
-
|
| 33 |
-
# Configure the Gemini client
|
| 34 |
-
genai.configure(api_key=api_key)
|
| 35 |
-
|
| 36 |
-
# Create the generative model instance
|
| 37 |
-
self.model = genai.GenerativeModel(model_name)
|
| 38 |
-
self.model_name = model_name
|
| 39 |
-
self.default_temperature = config.default_temperature
|
| 40 |
-
|
| 41 |
-
logging.info(f"Gemini agent initialized with model: {model_name}")
|
| 42 |
-
|
| 43 |
-
async def generate_response(self, context: AgentContext) -> AgentResponse:
|
| 44 |
-
"""
|
| 45 |
-
Generate a response based on the provided context.
|
| 46 |
-
|
| 47 |
-
Args:
|
| 48 |
-
context: AgentContext containing the query and retrieved context chunks
|
| 49 |
-
|
| 50 |
-
Returns:
|
| 51 |
-
AgentResponse with the generated answer and metadata
|
| 52 |
-
"""
|
| 53 |
-
# Check if retrieved context is empty (no chunks at all)
|
| 54 |
-
if not context.retrieved_chunks:
|
| 55 |
-
return AgentResponse(
|
| 56 |
-
raw_response="I could not find this information in the book.",
|
| 57 |
-
used_sources=[],
|
| 58 |
-
confidence_score=0.0,
|
| 59 |
-
is_valid=True,
|
| 60 |
-
validation_details="No context chunks retrieved from the database",
|
| 61 |
-
unsupported_claims=[]
|
| 62 |
-
)
|
| 63 |
-
|
| 64 |
-
# Check if context is insufficient (very short content)
|
| 65 |
-
total_context_length = sum(len(chunk.content) for chunk in context.retrieved_chunks)
|
| 66 |
-
if total_context_length < 10: # Much lower threshold, but still meaningful
|
| 67 |
-
return AgentResponse(
|
| 68 |
-
raw_response="I could not find this information in the book.",
|
| 69 |
-
used_sources=[],
|
| 70 |
-
confidence_score=0.0,
|
| 71 |
-
is_valid=True,
|
| 72 |
-
validation_details="No sufficient context provided to answer the question",
|
| 73 |
-
unsupported_claims=[]
|
| 74 |
-
)
|
| 75 |
-
|
| 76 |
-
try:
|
| 77 |
-
# Prepare the system message with instructions for grounding responses
|
| 78 |
-
system_message = self._create_system_message(context)
|
| 79 |
-
|
| 80 |
-
# Prepare the user message with the query
|
| 81 |
-
user_message = self._create_user_message(context)
|
| 82 |
-
|
| 83 |
-
# For Google Gemini, we need to format the prompt differently
|
| 84 |
-
# Combine system instructions and user query
|
| 85 |
-
full_prompt = f"{system_message}\n\n{user_message}"
|
| 86 |
-
|
| 87 |
-
# Generate response from Google Gemini
|
| 88 |
-
# For async generation, we need to use the appropriate async method
|
| 89 |
-
chat = self.model.start_chat()
|
| 90 |
-
response = await chat.send_message_async(
|
| 91 |
-
full_prompt,
|
| 92 |
-
generation_config={
|
| 93 |
-
"temperature": context.source_policy if hasattr(context, 'temperature') else self.default_temperature,
|
| 94 |
-
"max_output_tokens": 1000
|
| 95 |
-
}
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
# Extract the response text
|
| 99 |
-
raw_response = response.text if response and hasattr(response, 'text') else str(response)
|
| 100 |
-
|
| 101 |
-
# If the response indicates no information was found, return the exact message
|
| 102 |
-
if "I could not find this information in the book" in raw_response:
|
| 103 |
-
return AgentResponse(
|
| 104 |
-
raw_response="I could not find this information in the book.",
|
| 105 |
-
used_sources=[],
|
| 106 |
-
confidence_score=0.0,
|
| 107 |
-
is_valid=True,
|
| 108 |
-
validation_details="No relevant information found in the provided context",
|
| 109 |
-
unsupported_claims=[]
|
| 110 |
-
)
|
| 111 |
-
|
| 112 |
-
# Determine which sources were used (this is a simplified approach)
|
| 113 |
-
used_sources = self._identify_used_sources(raw_response, context.retrieved_chunks)
|
| 114 |
-
|
| 115 |
-
# Calculate confidence score (based on similarity scores of used sources)
|
| 116 |
-
confidence_score = self._calculate_confidence_score(used_sources, context.retrieved_chunks)
|
| 117 |
-
|
| 118 |
-
# Validate that the response is grounded in the provided context
|
| 119 |
-
grounding_validation = self._validate_response_grounding(
|
| 120 |
-
raw_response, context.retrieved_chunks, context.query
|
| 121 |
-
)
|
| 122 |
-
|
| 123 |
-
# Create and return the agent response
|
| 124 |
-
agent_response = AgentResponse(
|
| 125 |
-
raw_response=raw_response,
|
| 126 |
-
used_sources=used_sources,
|
| 127 |
-
confidence_score=confidence_score,
|
| 128 |
-
is_valid=grounding_validation["is_valid"],
|
| 129 |
-
validation_details=grounding_validation["details"],
|
| 130 |
-
unsupported_claims=grounding_validation["unsupported_claims"]
|
| 131 |
-
)
|
| 132 |
-
|
| 133 |
-
logging.info(f"Agent response generated successfully. Confidence: {confidence_score:.2f}")
|
| 134 |
-
return agent_response
|
| 135 |
-
|
| 136 |
-
except Exception as e:
|
| 137 |
-
logging.error(f"Error generating response from Google Gemini agent: {e}", exc_info=True)
|
| 138 |
-
# Return the specific message when there's an error
|
| 139 |
-
return AgentResponse(
|
| 140 |
-
raw_response="I could not find this information in the book.",
|
| 141 |
-
used_sources=[],
|
| 142 |
-
confidence_score=0.0,
|
| 143 |
-
is_valid=False,
|
| 144 |
-
validation_details=f"Error generating response: {str(e)}",
|
| 145 |
-
unsupported_claims=[]
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
def _create_system_message(self, context: AgentContext) -> str:
|
| 149 |
-
"""
|
| 150 |
-
Create the system message that instructs the agent on how to behave.
|
| 151 |
-
|
| 152 |
-
Args:
|
| 153 |
-
context: AgentContext containing the query and retrieved context chunks
|
| 154 |
-
|
| 155 |
-
Returns:
|
| 156 |
-
Formatted system message string
|
| 157 |
-
"""
|
| 158 |
-
system_prompt = """You are a documentation-based assistant.
|
| 159 |
-
Answer ONLY using the provided context from the book
|
| 160 |
-
"Physical AI & Humanoid Robotics".
|
| 161 |
-
If the answer is not found, reply EXACTLY:
|
| 162 |
-
"I could not find this information in the book."""
|
| 163 |
-
return system_prompt
|
| 164 |
-
|
| 165 |
-
def _create_user_message(self, context: AgentContext) -> str:
|
| 166 |
-
"""
|
| 167 |
-
Create the user message containing the query.
|
| 168 |
-
|
| 169 |
-
Args:
|
| 170 |
-
context: AgentContext containing the query and retrieved context chunks
|
| 171 |
-
|
| 172 |
-
Returns:
|
| 173 |
-
Formatted user message string
|
| 174 |
-
"""
|
| 175 |
-
return f"""CONTEXT:
|
| 176 |
-
{self._format_context_chunks(context.retrieved_chunks)}
|
| 177 |
-
|
| 178 |
-
QUESTION:
|
| 179 |
-
{context.query}"""
|
| 180 |
-
|
| 181 |
-
def _format_context_chunks(self, chunks: List[SourceChunkSchema]) -> str:
|
| 182 |
-
"""
|
| 183 |
-
Format the context chunks for the prompt.
|
| 184 |
-
|
| 185 |
-
Args:
|
| 186 |
-
chunks: List of source chunks to format
|
| 187 |
-
|
| 188 |
-
Returns:
|
| 189 |
-
Formatted context string
|
| 190 |
-
"""
|
| 191 |
-
if not chunks:
|
| 192 |
-
return ""
|
| 193 |
-
|
| 194 |
-
formatted_chunks = []
|
| 195 |
-
for i, chunk in enumerate(chunks):
|
| 196 |
-
formatted_chunks.append(f"[Chunk {i+1}]\n{chunk.content}\n[/Chunk {i+1}]")
|
| 197 |
-
|
| 198 |
-
return "\n".join(formatted_chunks)
|
| 199 |
-
|
| 200 |
-
def _create_context_messages(self, context: AgentContext) -> List[Dict[str, str]]:
|
| 201 |
-
"""
|
| 202 |
-
Create context messages from the retrieved chunks.
|
| 203 |
-
With the new format, context is now provided in the user message,
|
| 204 |
-
so this method returns an empty list to avoid duplication.
|
| 205 |
-
|
| 206 |
-
Args:
|
| 207 |
-
context: AgentContext containing the query and retrieved context chunks
|
| 208 |
-
|
| 209 |
-
Returns:
|
| 210 |
-
Empty list since context is now in user message
|
| 211 |
-
"""
|
| 212 |
-
return []
|
| 213 |
-
|
| 214 |
-
def _identify_used_sources(self, response: str, chunks: List[SourceChunkSchema]) -> List[str]:
|
| 215 |
-
"""
|
| 216 |
-
Identify which sources were likely used in the response.
|
| 217 |
-
This is a simplified approach - in a real implementation, you might use
|
| 218 |
-
more sophisticated techniques like semantic similarity.
|
| 219 |
-
|
| 220 |
-
Args:
|
| 221 |
-
response: The agent's response text
|
| 222 |
-
chunks: List of source chunks that were provided to the agent
|
| 223 |
-
|
| 224 |
-
Returns:
|
| 225 |
-
List of source IDs that were likely used
|
| 226 |
-
"""
|
| 227 |
-
used_sources = []
|
| 228 |
-
response_lower = response.lower()
|
| 229 |
-
|
| 230 |
-
for chunk in chunks:
|
| 231 |
-
# Check if any significant words from the chunk appear in the response
|
| 232 |
-
content_words = set(chunk.content.lower().split()[:20]) # Check first 20 words
|
| 233 |
-
response_words = set(response_lower.split())
|
| 234 |
-
|
| 235 |
-
# If there's significant overlap, consider this chunk as used
|
| 236 |
-
overlap = content_words.intersection(response_words)
|
| 237 |
-
if len(overlap) > 2: # Arbitrary threshold
|
| 238 |
-
used_sources.append(chunk.id)
|
| 239 |
-
|
| 240 |
-
# If no sources were identified, return all sources (conservative approach)
|
| 241 |
-
if not used_sources:
|
| 242 |
-
used_sources = [chunk.id for chunk in chunks]
|
| 243 |
-
|
| 244 |
-
return used_sources
|
| 245 |
-
|
| 246 |
-
def _calculate_confidence_score(self, used_sources: List[str], chunks: List[SourceChunkSchema]) -> float:
|
| 247 |
-
"""
|
| 248 |
-
Calculate a confidence score based on the quality of the used sources.
|
| 249 |
-
|
| 250 |
-
Args:
|
| 251 |
-
used_sources: List of source IDs that were used
|
| 252 |
-
chunks: List of all source chunks that were provided to the agent
|
| 253 |
-
|
| 254 |
-
Returns:
|
| 255 |
-
Confidence score between 0.0 and 1.0
|
| 256 |
-
"""
|
| 257 |
-
if not used_sources:
|
| 258 |
-
return 0.1 # Low confidence if no sources were used
|
| 259 |
-
|
| 260 |
-
# Calculate average similarity score of used sources
|
| 261 |
-
total_similarity = 0.0
|
| 262 |
-
used_count = 0
|
| 263 |
-
|
| 264 |
-
for chunk in chunks:
|
| 265 |
-
if chunk.id in used_sources:
|
| 266 |
-
total_similarity += chunk.similarity_score
|
| 267 |
-
used_count += 1
|
| 268 |
-
|
| 269 |
-
if used_count == 0:
|
| 270 |
-
return 0.1 # Low confidence if no matching chunks found
|
| 271 |
-
|
| 272 |
-
avg_similarity = total_similarity / used_count
|
| 273 |
-
|
| 274 |
-
# If similarity scores are very low (e.g., due to embedding issues),
|
| 275 |
-
# but we have content, still provide some confidence
|
| 276 |
-
if avg_similarity < 0.1 and len(used_sources) > 0:
|
| 277 |
-
# If we have relevant content but low similarity scores,
|
| 278 |
-
# it might be due to embedding issues, not lack of relevance
|
| 279 |
-
# So we'll set a minimum confidence if content exists
|
| 280 |
-
return 0.3 # Low but not zero confidence
|
| 281 |
-
else:
|
| 282 |
-
# Normalize the confidence score (adjust based on your requirements)
|
| 283 |
-
# Higher similarity scores contribute to higher confidence
|
| 284 |
-
confidence = avg_similarity
|
| 285 |
-
|
| 286 |
-
return format_confidence_score(confidence)
|
| 287 |
-
|
| 288 |
-
def _validate_response_grounding(self, response: str, chunks: List[SourceChunkSchema], query: str) -> Dict[str, Any]:
|
| 289 |
-
"""
|
| 290 |
-
Validate that the response is grounded in the provided context.
|
| 291 |
-
|
| 292 |
-
Args:
|
| 293 |
-
response: The agent's response text
|
| 294 |
-
chunks: List of source chunks that were provided to the agent
|
| 295 |
-
query: The original query
|
| 296 |
-
|
| 297 |
-
Returns:
|
| 298 |
-
Dictionary with validation results
|
| 299 |
-
"""
|
| 300 |
-
# Check if the response contains elements from the provided context
|
| 301 |
-
response_lower = response.lower()
|
| 302 |
-
context_text = " ".join([chunk.content.lower() for chunk in chunks])
|
| 303 |
-
|
| 304 |
-
# Simple heuristic: check if response contains significant terms from context
|
| 305 |
-
response_words = set(response_lower.split())
|
| 306 |
-
context_words = set(context_text.split())
|
| 307 |
-
|
| 308 |
-
# Calculate overlap between response and context
|
| 309 |
-
overlap = response_words.intersection(context_words)
|
| 310 |
-
total_response_words = len(response_words)
|
| 311 |
-
overlap_count = len(overlap)
|
| 312 |
-
|
| 313 |
-
# If less than 30% of response words come from context, flag as potentially ungrounded
|
| 314 |
-
is_grounded = True
|
| 315 |
-
unsupported_claims = []
|
| 316 |
-
|
| 317 |
-
if total_response_words > 0:
|
| 318 |
-
grounding_ratio = overlap_count / total_response_words
|
| 319 |
-
is_grounded = grounding_ratio >= 0.3 # At least 30% of words should come from context
|
| 320 |
-
|
| 321 |
-
# For now, we'll just return the basic validation
|
| 322 |
-
# In a more sophisticated implementation, you'd analyze the response more deeply
|
| 323 |
-
details = f"Response grounding validation completed. Context overlap ratio: {overlap_count/total_response_words if total_response_words > 0 else 0:.2f}"
|
| 324 |
-
|
| 325 |
-
return {
|
| 326 |
-
"is_valid": is_grounded,
|
| 327 |
-
"details": details,
|
| 328 |
-
"unsupported_claims": unsupported_claims
|
| 329 |
-
}
|
| 330 |
-
|
| 331 |
-
async def validate_response_quality(self, response: str, context: AgentContext) -> bool:
|
| 332 |
-
"""
|
| 333 |
-
Validate the quality of the agent's response.
|
| 334 |
-
|
| 335 |
-
Args:
|
| 336 |
-
response: The agent's response text
|
| 337 |
-
context: AgentContext containing the query and retrieved context chunks
|
| 338 |
-
|
| 339 |
-
Returns:
|
| 340 |
-
True if response meets quality standards, False otherwise
|
| 341 |
-
"""
|
| 342 |
-
# Check for common signs of poor quality responses
|
| 343 |
-
if not response or response.strip() == "":
|
| 344 |
-
logging.warning("Agent returned an empty response")
|
| 345 |
-
return False
|
| 346 |
-
|
| 347 |
-
# Check if response contains generic fallback phrases
|
| 348 |
-
lower_response = response.lower()
|
| 349 |
-
if "i don't know" in lower_response or "i don't have" in lower_response:
|
| 350 |
-
# This might be a valid response if there's no relevant context
|
| 351 |
-
if len(context.retrieved_chunks) == 0:
|
| 352 |
-
return True # Valid response if no context was provided
|
| 353 |
-
else:
|
| 354 |
-
# Check if the response is justified given the context
|
| 355 |
-
# For now, we'll consider it valid if it acknowledges the lack of relevant information
|
| 356 |
-
return True
|
| 357 |
-
|
| 358 |
-
# In a more sophisticated implementation, you'd validate against the context more rigorously
|
| 359 |
-
return True
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
# Global agent instance (if needed)
|
| 363 |
-
# agent_instance = OpenAIAgent()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rag_agent_api/config.py
CHANGED
|
@@ -19,6 +19,7 @@ class Config:
|
|
| 19 |
|
| 20 |
def __init__(self):
|
| 21 |
"""Initialize configuration by loading environment variables."""
|
|
|
|
| 22 |
self.cohere_api_key = os.getenv('COHERE_API_KEY')
|
| 23 |
self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
|
| 24 |
self.qdrant_url = os.getenv('QDRANT_URL')
|
|
|
|
| 19 |
|
| 20 |
def __init__(self):
|
| 21 |
"""Initialize configuration by loading environment variables."""
|
| 22 |
+
self.openai_api_key = os.getenv('OPENAI_API_KEY')
|
| 23 |
self.cohere_api_key = os.getenv('COHERE_API_KEY')
|
| 24 |
self.openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
|
| 25 |
self.qdrant_url = os.getenv('QDRANT_URL')
|
rag_agent_api/main.py
CHANGED
|
@@ -82,22 +82,22 @@ async def health_check() -> HealthResponse:
|
|
| 82 |
HealthResponse with status of services
|
| 83 |
"""
|
| 84 |
# Check if all required components are initialized
|
| 85 |
-
|
| 86 |
qdrant_status = "up" if retriever else "down"
|
| 87 |
agent_status = "up" if agent else "down"
|
| 88 |
|
| 89 |
# Determine overall status
|
| 90 |
overall_status = "healthy"
|
| 91 |
-
if
|
| 92 |
overall_status = "unhealthy"
|
| 93 |
-
elif
|
| 94 |
overall_status = "degraded"
|
| 95 |
|
| 96 |
return HealthResponse(
|
| 97 |
status=overall_status,
|
| 98 |
timestamp=format_timestamp(),
|
| 99 |
services={
|
| 100 |
-
"
|
| 101 |
"qdrant": qdrant_status,
|
| 102 |
"agent": agent_status
|
| 103 |
}
|
|
@@ -194,7 +194,7 @@ async def root() -> Dict[str, Any]:
|
|
| 194 |
return {
|
| 195 |
"message": "RAG Agent and API Layer",
|
| 196 |
"version": "1.0.0",
|
| 197 |
-
"description": "Question-answering API using
|
| 198 |
"endpoints": {
|
| 199 |
"POST /ask": "Main question-answering endpoint",
|
| 200 |
"GET /health": "Health check endpoint",
|
|
@@ -243,4 +243,9 @@ async def readiness_check() -> Dict[str, str]:
|
|
| 243 |
if retriever and agent:
|
| 244 |
return {"status": "ready"}
|
| 245 |
else:
|
| 246 |
-
raise HTTPException(status_code=503, detail="Service not ready")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
HealthResponse with status of services
|
| 83 |
"""
|
| 84 |
# Check if all required components are initialized
|
| 85 |
+
openrouter_status = "up" if agent else "down"
|
| 86 |
qdrant_status = "up" if retriever else "down"
|
| 87 |
agent_status = "up" if agent else "down"
|
| 88 |
|
| 89 |
# Determine overall status
|
| 90 |
overall_status = "healthy"
|
| 91 |
+
if openrouter_status == "down" or qdrant_status == "down":
|
| 92 |
overall_status = "unhealthy"
|
| 93 |
+
elif openrouter_status == "degraded" or qdrant_status == "degraded":
|
| 94 |
overall_status = "degraded"
|
| 95 |
|
| 96 |
return HealthResponse(
|
| 97 |
status=overall_status,
|
| 98 |
timestamp=format_timestamp(),
|
| 99 |
services={
|
| 100 |
+
"openrouter": openrouter_status,
|
| 101 |
"qdrant": qdrant_status,
|
| 102 |
"agent": agent_status
|
| 103 |
}
|
|
|
|
| 194 |
return {
|
| 195 |
"message": "RAG Agent and API Layer",
|
| 196 |
"version": "1.0.0",
|
| 197 |
+
"description": "Question-answering API using OpenRouter Agents and Qdrant retrieval",
|
| 198 |
"endpoints": {
|
| 199 |
"POST /ask": "Main question-answering endpoint",
|
| 200 |
"GET /health": "Health check endpoint",
|
|
|
|
| 243 |
if retriever and agent:
|
| 244 |
return {"status": "ready"}
|
| 245 |
else:
|
| 246 |
+
raise HTTPException(status_code=503, detail="Service not ready")
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
if __name__ == "__main__":
|
| 250 |
+
import uvicorn
|
| 251 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
rag_agent_api/retrieval.py
CHANGED
|
@@ -76,16 +76,6 @@ class QdrantRetriever:
|
|
| 76 |
# Embed the query using Cohere
|
| 77 |
query_embedding = await self._embed_query(query)
|
| 78 |
|
| 79 |
-
# Check if we got a zero vector fallback (indicating embedding service failure)
|
| 80 |
-
is_zero_vector = all(x == 0.0 for x in query_embedding)
|
| 81 |
-
|
| 82 |
-
if is_zero_vector:
|
| 83 |
-
# If we have a zero vector, try a different approach - keyword search
|
| 84 |
-
logging.warning("Zero vector detected, attempting keyword-based fallback search")
|
| 85 |
-
retrieved_chunks = await self._keyword_search_fallback(query, top_k)
|
| 86 |
-
logging.info(f"Keyword fallback search retrieved {len(retrieved_chunks)} chunks from Qdrant")
|
| 87 |
-
return retrieved_chunks
|
| 88 |
-
|
| 89 |
# Perform semantic search in Qdrant
|
| 90 |
search_results = await self.client.query_points(
|
| 91 |
collection_name=self.collection_name,
|
|
@@ -126,134 +116,53 @@ class QdrantRetriever:
|
|
| 126 |
# Return empty list instead of raising exception to allow graceful handling
|
| 127 |
return []
|
| 128 |
|
| 129 |
-
async def
|
| 130 |
"""
|
| 131 |
-
|
| 132 |
|
| 133 |
Args:
|
| 134 |
-
query: The
|
| 135 |
-
top_k: Number of results to return (default: 5)
|
| 136 |
|
| 137 |
Returns:
|
| 138 |
-
List of
|
| 139 |
"""
|
| 140 |
try:
|
| 141 |
-
# Use
|
| 142 |
-
#
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
all_points = await self.client.scroll(
|
| 148 |
-
collection_name=self.collection_name,
|
| 149 |
-
limit=10000, # Get up to 10000 points (or as many as exist)
|
| 150 |
-
with_payload=True,
|
| 151 |
-
with_vectors=False
|
| 152 |
)
|
| 153 |
|
| 154 |
-
# Extract
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
# Score points based on keyword matching
|
| 158 |
-
scored_chunks = []
|
| 159 |
-
query_lower = query.lower()
|
| 160 |
-
query_words = set(query_lower.split())
|
| 161 |
-
|
| 162 |
-
for point in points:
|
| 163 |
-
payload = point.payload if hasattr(point, 'payload') else point
|
| 164 |
-
content = payload.get('text', '') if isinstance(payload, dict) else getattr(payload, 'text', '')
|
| 165 |
-
content_lower = content.lower()
|
| 166 |
-
|
| 167 |
-
# Calculate a simple keyword match score
|
| 168 |
-
content_words = set(content_lower.split())
|
| 169 |
-
overlap = query_words.intersection(content_words)
|
| 170 |
-
score = len(overlap) / len(query_words) if query_words else 0 # Jaccard similarity
|
| 171 |
-
|
| 172 |
-
if score > 0 or query_lower in content_lower: # Only include if there's some match
|
| 173 |
-
chunk = SourceChunkSchema(
|
| 174 |
-
id=point.id if hasattr(point, 'id') else getattr(point, 'point_id', None),
|
| 175 |
-
url=payload.get('url', '') if isinstance(payload, dict) else getattr(payload, 'url', ''),
|
| 176 |
-
title=payload.get('title', '') if isinstance(payload, dict) else getattr(payload, 'title', ''),
|
| 177 |
-
content=content,
|
| 178 |
-
similarity_score=score,
|
| 179 |
-
chunk_index=payload.get('chunk_index', 0) if isinstance(payload, dict) else getattr(payload, 'chunk_index', 0)
|
| 180 |
-
)
|
| 181 |
-
|
| 182 |
-
if self._validate_chunk(chunk):
|
| 183 |
-
scored_chunks.append((chunk, score))
|
| 184 |
-
|
| 185 |
-
# Sort by score and return top_k
|
| 186 |
-
scored_chunks.sort(key=lambda x: x[1], reverse=True)
|
| 187 |
-
top_chunks = [chunk for chunk, score in scored_chunks[:top_k]]
|
| 188 |
-
|
| 189 |
-
return top_chunks
|
| 190 |
-
|
| 191 |
except Exception as e:
|
| 192 |
-
logging.error(f"Error
|
| 193 |
-
return []
|
| 194 |
-
|
| 195 |
-
async def _embed_query(self, query: str) -> List[float]:
|
| 196 |
-
"""
|
| 197 |
-
Embed the query using Cohere to prepare for semantic search with retry logic for rate limits.
|
| 198 |
-
|
| 199 |
-
Args:
|
| 200 |
-
query: The query string to embed
|
| 201 |
-
|
| 202 |
-
Returns:
|
| 203 |
-
List of floats representing the query embedding
|
| 204 |
-
"""
|
| 205 |
-
import time
|
| 206 |
-
import random
|
| 207 |
-
from cohere.errors.too_many_requests_error import TooManyRequestsError
|
| 208 |
|
| 209 |
-
|
| 210 |
-
for attempt in range(3): # Try up to 3 times
|
| 211 |
try:
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
logging.error(f"Error embedding query with Cohere: {e}", exc_info=True)
|
| 233 |
-
break # Don't retry for other types of errors
|
| 234 |
-
|
| 235 |
-
# If Cohere fails, try using OpenAI embeddings as fallback if available
|
| 236 |
-
try:
|
| 237 |
-
from openai import OpenAI
|
| 238 |
-
from .config import get_config
|
| 239 |
-
config = get_config()
|
| 240 |
-
|
| 241 |
-
if config.openai_api_key:
|
| 242 |
-
client = OpenAI(api_key=config.openai_api_key)
|
| 243 |
-
response = client.embeddings.create(
|
| 244 |
-
input=query,
|
| 245 |
-
model="text-embedding-ada-002"
|
| 246 |
-
)
|
| 247 |
-
embedding = response.data[0].embedding
|
| 248 |
-
logging.info("Successfully used OpenAI embedding as fallback")
|
| 249 |
-
return embedding
|
| 250 |
-
except Exception as openai_error:
|
| 251 |
-
logging.warning(f"OpenAI fallback also failed: {openai_error}")
|
| 252 |
-
|
| 253 |
-
# If all fail, return a zero vector of the correct size (1024) as a last resort
|
| 254 |
-
# This will result in poor semantic matches but won't crash the system
|
| 255 |
-
logging.warning("Using zero vector as final fallback for query embedding")
|
| 256 |
-
return [0.0] * 1024
|
| 257 |
|
| 258 |
def _validate_chunk(self, chunk: SourceChunkSchema) -> bool:
|
| 259 |
"""
|
|
|
|
| 76 |
# Embed the query using Cohere
|
| 77 |
query_embedding = await self._embed_query(query)
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# Perform semantic search in Qdrant
|
| 80 |
search_results = await self.client.query_points(
|
| 81 |
collection_name=self.collection_name,
|
|
|
|
| 116 |
# Return empty list instead of raising exception to allow graceful handling
|
| 117 |
return []
|
| 118 |
|
| 119 |
+
async def _embed_query(self, query: str) -> List[float]:
|
| 120 |
"""
|
| 121 |
+
Embed the query using Cohere to prepare for semantic search.
|
| 122 |
|
| 123 |
Args:
|
| 124 |
+
query: The query string to embed
|
|
|
|
| 125 |
|
| 126 |
Returns:
|
| 127 |
+
List of floats representing the query embedding
|
| 128 |
"""
|
| 129 |
try:
|
| 130 |
+
# Use Cohere to embed the query
|
| 131 |
+
# The original book content was likely embedded with Cohere embed-english-v3.0
|
| 132 |
+
response = await self.cohere_client.embed(
|
| 133 |
+
texts=[query],
|
| 134 |
+
model="embed-english-v3.0", # 1024-dimensional embedding model
|
| 135 |
+
input_type="search_query" # Specify this is a search query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
+
# Extract the embedding from the response
|
| 139 |
+
embedding = response.embeddings[0] # Get the first (and only) embedding
|
| 140 |
+
return embedding
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
except Exception as e:
|
| 142 |
+
logging.error(f"Error embedding query with Cohere: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
+
# Try using OpenAI embeddings as fallback if available
|
|
|
|
| 145 |
try:
|
| 146 |
+
from openai import OpenAI
|
| 147 |
+
from .config import get_config
|
| 148 |
+
config = get_config()
|
| 149 |
+
|
| 150 |
+
if config.openai_api_key:
|
| 151 |
+
client = OpenAI(api_key=config.openai_api_key)
|
| 152 |
+
response = client.embeddings.create(
|
| 153 |
+
input=query,
|
| 154 |
+
model="text-embedding-ada-002"
|
| 155 |
+
)
|
| 156 |
+
embedding = response.data[0].embedding
|
| 157 |
+
logging.info("Successfully used OpenAI embedding as fallback")
|
| 158 |
+
return embedding
|
| 159 |
+
except Exception as openai_error:
|
| 160 |
+
logging.warning(f"OpenAI fallback also failed: {openai_error}")
|
| 161 |
+
|
| 162 |
+
# If both fail, return a zero vector of the correct size (1024) as a last resort
|
| 163 |
+
# This will result in poor semantic matches but won't crash the system
|
| 164 |
+
logging.warning("Using zero vector as final fallback for query embedding")
|
| 165 |
+
return [0.0] * 1024
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
def _validate_chunk(self, chunk: SourceChunkSchema) -> bool:
|
| 168 |
"""
|
requirements.txt
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
python-dotenv>=1.0.0
|
| 5 |
-
httpx>=0.25.0
|
| 6 |
cohere>=4.9.0
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Backend Service Dependencies
|
| 2 |
+
requests>=2.31.0
|
| 3 |
+
beautifulsoup4>=4.12.0
|
|
|
|
|
|
|
| 4 |
cohere>=4.9.0
|
| 5 |
+
qdrant-client>=1.7.0
|
| 6 |
+
python-dotenv>=1.0.0
|
| 7 |
+
fastapi>=0.104.0
|
| 8 |
+
uvicorn>=0.24.0
|
| 9 |
+
openai>=1.0.0
|
| 10 |
+
pydantic>=2.0.0
|
| 11 |
+
numpy>=1.21.0
|
| 12 |
+
httpx>=0.27.0
|
test_retrieval.py
DELETED
|
@@ -1,60 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Test script to directly test the Qdrant retrieval functionality
|
| 4 |
-
"""
|
| 5 |
-
import asyncio
|
| 6 |
-
import os
|
| 7 |
-
from dotenv import load_dotenv
|
| 8 |
-
from rag_agent_api.retrieval import QdrantRetriever
|
| 9 |
-
from rag_agent_api.config import get_config
|
| 10 |
-
|
| 11 |
-
# Load environment variables
|
| 12 |
-
load_dotenv()
|
| 13 |
-
|
| 14 |
-
async def test_retrieval():
|
| 15 |
-
print("Testing Qdrant retrieval functionality...")
|
| 16 |
-
|
| 17 |
-
# Create a QdrantRetriever instance
|
| 18 |
-
retriever = QdrantRetriever()
|
| 19 |
-
|
| 20 |
-
print("1. Testing collection existence...")
|
| 21 |
-
exists = await retriever.validate_collection_exists()
|
| 22 |
-
print(f" Collection exists: {exists}")
|
| 23 |
-
|
| 24 |
-
if exists:
|
| 25 |
-
print("2. Getting total points in collection...")
|
| 26 |
-
total_points = await retriever.get_total_points()
|
| 27 |
-
print(f" Total points: {total_points}")
|
| 28 |
-
|
| 29 |
-
print("3. Testing query embedding...")
|
| 30 |
-
try:
|
| 31 |
-
query = "what about this book?"
|
| 32 |
-
embedding = await retriever._embed_query(query)
|
| 33 |
-
print(f" Query embedding successful, length: {len(embedding)}")
|
| 34 |
-
except Exception as e:
|
| 35 |
-
print(f" Query embedding failed: {e}")
|
| 36 |
-
return
|
| 37 |
-
|
| 38 |
-
print("4. Testing direct search...")
|
| 39 |
-
try:
|
| 40 |
-
results = await retriever.retrieve_context(query, top_k=5)
|
| 41 |
-
print(f" Retrieved {len(results)} results")
|
| 42 |
-
|
| 43 |
-
if results:
|
| 44 |
-
print(" Sample results:")
|
| 45 |
-
for i, result in enumerate(results[:2]): # Show first 2 results
|
| 46 |
-
print(f" Result {i+1}:")
|
| 47 |
-
print(f" ID: {result.id}")
|
| 48 |
-
print(f" Title: {result.title}")
|
| 49 |
-
print(f" Content preview: {result.content[:100]}...")
|
| 50 |
-
print(f" Similarity: {result.similarity_score}")
|
| 51 |
-
print(f" URL: {result.url}")
|
| 52 |
-
else:
|
| 53 |
-
print(" No results retrieved - this indicates the main issue")
|
| 54 |
-
except Exception as e:
|
| 55 |
-
print(f" Direct search failed: {e}")
|
| 56 |
-
import traceback
|
| 57 |
-
traceback.print_exc()
|
| 58 |
-
|
| 59 |
-
if __name__ == "__main__":
|
| 60 |
-
asyncio.run(test_retrieval())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_integration.py
CHANGED
|
@@ -7,7 +7,7 @@ from fastapi.testclient import TestClient
|
|
| 7 |
from unittest.mock import Mock, patch, AsyncMock
|
| 8 |
from rag_agent_api.main import app, retriever, agent
|
| 9 |
from rag_agent_api.retrieval import QdrantRetriever
|
| 10 |
-
from rag_agent_api.
|
| 11 |
from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
|
| 12 |
|
| 13 |
|
|
@@ -17,13 +17,13 @@ def test_full_query_flow_with_mocked_components():
|
|
| 17 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 18 |
'QDRANT_API_KEY': 'test-api-key',
|
| 19 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 20 |
-
'
|
| 21 |
}):
|
| 22 |
with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
|
| 23 |
-
with patch('rag_agent_api.main.
|
| 24 |
# Create mock instances
|
| 25 |
mock_retriever = Mock(spec=QdrantRetriever)
|
| 26 |
-
mock_agent = Mock(spec=
|
| 27 |
|
| 28 |
# Configure the class mocks to return our instance mocks
|
| 29 |
mock_retriever_class.return_value = mock_retriever
|
|
@@ -84,11 +84,11 @@ async def test_agent_context_creation():
|
|
| 84 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 85 |
'QDRANT_API_KEY': 'test-api-key',
|
| 86 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 87 |
-
'
|
| 88 |
}):
|
| 89 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 90 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 91 |
-
with patch('rag_agent_api.
|
| 92 |
# Mock the Qdrant client
|
| 93 |
mock_qdrant_instance = Mock()
|
| 94 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
@@ -101,7 +101,7 @@ async def test_agent_context_creation():
|
|
| 101 |
|
| 102 |
# Initialize components
|
| 103 |
retriever = QdrantRetriever(collection_name="test_collection")
|
| 104 |
-
agent =
|
| 105 |
|
| 106 |
# Create test chunks
|
| 107 |
test_chunk = SourceChunkSchema(
|
|
@@ -145,7 +145,7 @@ def test_health_endpoint_integration():
|
|
| 145 |
assert "services" in data
|
| 146 |
|
| 147 |
# Check that services status is included
|
| 148 |
-
assert "
|
| 149 |
assert "qdrant" in data["services"]
|
| 150 |
assert "agent" in data["services"]
|
| 151 |
|
|
@@ -157,11 +157,11 @@ async def test_retrieval_and_agent_integration():
|
|
| 157 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 158 |
'QDRANT_API_KEY': 'test-api-key',
|
| 159 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 160 |
-
'
|
| 161 |
}):
|
| 162 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 163 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 164 |
-
with patch('rag_agent_api.
|
| 165 |
# Mock the Qdrant client
|
| 166 |
mock_qdrant_instance = Mock()
|
| 167 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
@@ -172,18 +172,21 @@ async def test_retrieval_and_agent_integration():
|
|
| 172 |
mock_cohere_client.return_value = mock_cohere_instance
|
| 173 |
mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
|
| 174 |
|
| 175 |
-
# Mock the
|
| 176 |
-
|
| 177 |
-
|
| 178 |
mock_completion = Mock()
|
| 179 |
-
mock_completion.
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
# Initialize components
|
| 185 |
test_retriever = QdrantRetriever(collection_name="test_collection")
|
| 186 |
-
test_agent =
|
| 187 |
|
| 188 |
# Mock the retrieval result
|
| 189 |
mock_chunk = SourceChunkSchema(
|
|
|
|
| 7 |
from unittest.mock import Mock, patch, AsyncMock
|
| 8 |
from rag_agent_api.main import app, retriever, agent
|
| 9 |
from rag_agent_api.retrieval import QdrantRetriever
|
| 10 |
+
from rag_agent_api.openrouter_agent import OpenRouterAgent
|
| 11 |
from rag_agent_api.schemas import SourceChunkSchema, AgentResponse, AgentContext
|
| 12 |
|
| 13 |
|
|
|
|
| 17 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 18 |
'QDRANT_API_KEY': 'test-api-key',
|
| 19 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 20 |
+
'OPENROUTER_API_KEY': 'test-openrouter-key'
|
| 21 |
}):
|
| 22 |
with patch('rag_agent_api.main.QdrantRetriever') as mock_retriever_class:
|
| 23 |
+
with patch('rag_agent_api.main.OpenRouterAgent') as mock_agent_class:
|
| 24 |
# Create mock instances
|
| 25 |
mock_retriever = Mock(spec=QdrantRetriever)
|
| 26 |
+
mock_agent = Mock(spec=OpenRouterAgent)
|
| 27 |
|
| 28 |
# Configure the class mocks to return our instance mocks
|
| 29 |
mock_retriever_class.return_value = mock_retriever
|
|
|
|
| 84 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 85 |
'QDRANT_API_KEY': 'test-api-key',
|
| 86 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 87 |
+
'OPENROUTER_API_KEY': 'test-openrouter-key'
|
| 88 |
}):
|
| 89 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 90 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 91 |
+
with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient'):
|
| 92 |
# Mock the Qdrant client
|
| 93 |
mock_qdrant_instance = Mock()
|
| 94 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
|
|
| 101 |
|
| 102 |
# Initialize components
|
| 103 |
retriever = QdrantRetriever(collection_name="test_collection")
|
| 104 |
+
agent = OpenRouterAgent(model_name="gpt-4-test")
|
| 105 |
|
| 106 |
# Create test chunks
|
| 107 |
test_chunk = SourceChunkSchema(
|
|
|
|
| 145 |
assert "services" in data
|
| 146 |
|
| 147 |
# Check that services status is included
|
| 148 |
+
assert "openrouter" in data["services"]
|
| 149 |
assert "qdrant" in data["services"]
|
| 150 |
assert "agent" in data["services"]
|
| 151 |
|
|
|
|
| 157 |
'QDRANT_URL': 'http://test-qdrant:6333',
|
| 158 |
'QDRANT_API_KEY': 'test-api-key',
|
| 159 |
'COHERE_API_KEY': 'test-cohere-key',
|
| 160 |
+
'OPENROUTER_API_KEY': 'test-openrouter-key'
|
| 161 |
}):
|
| 162 |
with patch('rag_agent_api.retrieval.AsyncQdrantClient') as mock_qdrant_client:
|
| 163 |
with patch('rag_agent_api.retrieval.cohere.Client') as mock_cohere_client:
|
| 164 |
+
with patch('rag_agent_api.openrouter_agent.httpx.AsyncClient') as mock_httpx_client:
|
| 165 |
# Mock the Qdrant client
|
| 166 |
mock_qdrant_instance = Mock()
|
| 167 |
mock_qdrant_client.return_value = mock_qdrant_instance
|
|
|
|
| 172 |
mock_cohere_client.return_value = mock_cohere_instance
|
| 173 |
mock_cohere_instance.embed.return_value = Mock(embeddings=[[0.1, 0.2, 0.3]])
|
| 174 |
|
| 175 |
+
# Mock the httpx client for OpenRouter
|
| 176 |
+
mock_httpx_instance = Mock()
|
| 177 |
+
mock_httpx_client.return_value.__aenter__.return_value = mock_httpx_instance
|
| 178 |
mock_completion = Mock()
|
| 179 |
+
mock_completion.json.return_value = {
|
| 180 |
+
"choices": [
|
| 181 |
+
{"message": {"content": "This is a test response"}}
|
| 182 |
+
]
|
| 183 |
+
}
|
| 184 |
+
mock_httpx_instance.post = AsyncMock(return_value=mock_completion)
|
| 185 |
+
mock_httpx_instance.post.return_value.status_code = 200
|
| 186 |
|
| 187 |
# Initialize components
|
| 188 |
test_retriever = QdrantRetriever(collection_name="test_collection")
|
| 189 |
+
test_agent = OpenRouterAgent(model_name="gpt-4-test")
|
| 190 |
|
| 191 |
# Mock the retrieval result
|
| 192 |
mock_chunk = SourceChunkSchema(
|