Charles Grandjean commited on
Commit
851f2ed
·
0 Parent(s):

first commit

Browse files
Files changed (10) hide show
  1. Dockerfile +93 -0
  2. README.md +313 -0
  3. agent_api.py +257 -0
  4. agent_state.py +101 -0
  5. docker-compose.yml +24 -0
  6. langraph_agent.py +260 -0
  7. prompts.py +133 -0
  8. requirements.txt +19 -0
  9. startup.sh +49 -0
  10. utils.py +274 -0
Dockerfile ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.11 slim base image
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Set environment variables
8
+ ENV PYTHONPATH=/app
9
+ ENV PYTHONUNBUFFERED=1
10
+ ENV PYTHONIOENCODING=utf-8
11
+ ENV LIGHTRAG_HOST=127.0.0.1
12
+ ENV LIGHTRAG_PORT=9621
13
+ ENV API_PORT=8000
14
+
15
+ # Install system dependencies
16
+ RUN apt-get update && apt-get install -y \
17
+ build-essential \
18
+ curl \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
+ # Copy requirements first for better caching
22
+ COPY requirements.txt .
23
+
24
+ # Install Python dependencies
25
+ RUN pip install --no-cache-dir --upgrade pip && \
26
+ pip install --no-cache-dir -r requirements.txt
27
+
28
+ # Copy application files
29
+ COPY . .
30
+
31
+ # Create non-root user for security
32
+ RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
33
+ USER appuser
34
+
35
+ # Create startup script
36
+ RUN echo '#!/bin/bash\n\
37
+ set -e\n\
38
+ \n\
39
+ echo "🚀 Starting CyberLegal AI Stack..."\n\
40
+ echo "Step 1: Starting LightRAG server..."\n\
41
+ \n\
42
+ # Start LightRAG server in background\n\
43
+ lightrag-server --host $LIGHTRAG_HOST --port $LIGHTRAG_PORT &\n\
44
+ LIGHTRAG_PID=$!\n\
45
+ \n\
46
+ # Wait for LightRAG to be ready\n\
47
+ echo "Waiting for LightRAG server to be ready..."\n\
48
+ max_attempts=30\n\
49
+ attempt=1\n\
50
+ while [ $attempt -le $max_attempts ]; do\n\
51
+ if curl -f http://$LIGHTRAG_HOST:$LIGHTRAG_PORT/health > /dev/null 2>&1; then\n\
52
+ echo "✅ LightRAG server is ready!"\n\
53
+ break\n\
54
+ fi\n\
55
+ echo "Attempt $attempt/$max_attempts: LightRAG not ready yet..."\n\
56
+ sleep 2\n\
57
+ attempt=$((attempt + 1))\n\
58
+ done\n\
59
+ \n\
60
+ if [ $attempt -gt $max_attempts ]; then\n\
61
+ echo "❌ LightRAG server failed to start"\n\
62
+ exit 1\n\
63
+ fi\n\
64
+ \n\
65
+ echo "Step 2: Starting LangGraph API server..."\n\
66
+ echo "🌐 API will be available at: http://localhost:$API_PORT"\n\
67
+ echo "📚 LightRAG server running at: http://$LIGHTRAG_HOST:$LIGHTRAG_PORT"\n\
68
+ echo ""\n\
69
+ echo "Available endpoints:"\n\
70
+ echo " - GET /health - Health check"\n\
71
+ echo " - GET / - API info"\n\
72
+ echo " - POST /chat - Chat with assistant"\n\
73
+ echo ""\n\
74
+ echo "🎉 CyberLegal AI is ready!"\n\
75
+ \n\
76
+ # Start the API server\n\
77
+ python agent_api.py\n\
78
+ \n\
79
+ # Cleanup\n\
80
+ kill $LIGHTRAG_PID 2>/dev/null || true\n\
81
+ ' > /app/startup.sh
82
+
83
+ RUN chmod +x /app/startup.sh
84
+
85
+ # Expose ports (API only for security, LightRAG stays internal)
86
+ EXPOSE 8000
87
+
88
+ # Health check for the API
89
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
90
+ CMD curl -f http://localhost:8000/health || exit 1
91
+
92
+ # Run the startup script
93
+ CMD ["/app/startup.sh"]
README.md ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CyberLegal AI - LangGraph Agent
2
+
3
+ Advanced cyber-legal assistant powered by LangGraph + LightRAG + GPT-5-Nano for European regulations expertise.
4
+
5
+ ## 🏗️ Architecture
6
+
7
+ ```
8
+ ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐
9
+ │ Client API │────│ LangGraph Agent │────│ LightRAG Server│
10
+ │ (Port 8000) │ │ (Orchestration)│ │ (Port 9621) │
11
+ └─────────────────┘ └──────────────────┘ └─────────────────┘
12
+
13
+ ┌──────────────┐
14
+ │ GPT-5-Nano │
15
+ │ (Reasoning) │
16
+ └──────────────┘
17
+ ```
18
+
19
+ ## 🚀 Quick Start
20
+
21
+ ### Using Docker Compose (Recommended)
22
+
23
+ 1. **Environment Setup**
24
+ ```bash
25
+ # Copy and configure environment
26
+ cp .env.example .env
27
+
28
+ # Edit .env with your API keys
29
+ # OPENAI_API_KEY=your_openai_key
30
+ # LIGHTRAG_API_KEY=your_lightrag_key (optional)
31
+ ```
32
+
33
+ 2. **Deploy**
34
+ ```bash
35
+ docker-compose up -d
36
+ ```
37
+
38
+ 3. **Verify Deployment**
39
+ ```bash
40
+ curl http://localhost:8000/health
41
+ ```
42
+
43
+ ### Using Docker Directly
44
+
45
+ ```bash
46
+ # Build the image
47
+ docker build -t cyberlegal-ai .
48
+
49
+ # Run the container
50
+ docker run -d \
51
+ --name cyberlegal-ai \
52
+ -p 8000:8000 \
53
+ -e OPENAI_API_KEY=your_key \
54
+ -v $(pwd)/rag_storage:/app/rag_storage \
55
+ cyberlegal-ai
56
+ ```
57
+
58
+ ## 📡 API Usage
59
+
60
+ ### Base URL
61
+ ```
62
+ http://localhost:8000
63
+ ```
64
+
65
+ ### Endpoints
66
+
67
+ #### Chat with Assistant
68
+ ```bash
69
+ curl -X POST "http://localhost:8000/chat" \
70
+ -H "Content-Type: application/json" \
71
+ -d '{
72
+ "message": "What are the main obligations under GDPR?",
73
+ "role": "client",
74
+ "jurisdiction": "EU",
75
+ "conversationHistory": []
76
+ }'
77
+ ```
78
+
79
+ #### Health Check
80
+ ```bash
81
+ curl http://localhost:8000/health
82
+ ```
83
+
84
+ #### API Info
85
+ ```bash
86
+ curl http://localhost:8000/
87
+ ```
88
+
89
+ ## 📝 Request Format
90
+
91
+ ```json
92
+ {
93
+ "message": "User's legal question",
94
+ "role": "client" | "lawyer",
95
+ "jurisdiction": "EU" | "France" | "Germany" | "Italy" | "Spain" | "Romania" | "Netherlands" | "Belgium",
96
+ "conversationHistory": [
97
+ {"role": "user|assistant", "content": "Previous message"}
98
+ ]
99
+ }
100
+ ```
101
+
102
+ ## 📤 Response Format
103
+
104
+ ```json
105
+ {
106
+ "response": "Detailed legal answer with references",
107
+ "confidence": 0.85,
108
+ "processing_time": 2.34,
109
+ "references": ["gdpr_2022_2555.txt", "nis2_2022_2555.txt"],
110
+ "timestamp": "2025-01-15T10:30:00Z",
111
+ "error": null
112
+ }
113
+ ```
114
+
115
+ ## 🧠 Expertise Areas
116
+
117
+ - **GDPR** (General Data Protection Regulation)
118
+ - **NIS2** (Network and Information Systems Directive 2)
119
+ - **DORA** (Digital Operational Resilience Act)
120
+ - **CRA** (Cyber Resilience Act)
121
+ - **eIDAS 2.0** (Electronic Identification, Authentication and Trust Services)
122
+ - Romanian Civil Code provisions
123
+
124
+ ## 🔄 Workflow
125
+
126
+ 1. **User Query** → API receives request with role/jurisdiction context
127
+ 2. **LightRAG Retrieval** → Searches legal documents for relevant information
128
+ 3. **LangGraph Processing** → Orchestrates the workflow through nodes:
129
+ - Query validation
130
+ - LightRAG integration
131
+ - Context enhancement with GPT-5-Nano
132
+ - Response formatting
133
+ 4. **Enhanced Response** → Returns structured answer with confidence score
134
+
135
+ ## 🛠️ Development
136
+
137
+ ### Local Development
138
+
139
+ ```bash
140
+ # Install dependencies
141
+ pip install -r requirements.txt
142
+
143
+ # Start LightRAG server (required)
144
+ lightrag-server --host 127.0.0.1 --port 9621
145
+
146
+ # Start the API
147
+ python agent_api.py
148
+ ```
149
+
150
+ ### Environment Variables
151
+
152
+ ```bash
153
+ OPENAI_API_KEY=your_openai_api_key
154
+ LIGHTRAG_API_KEY=your_lightrag_api_key
155
+ LIGHTRAG_HOST=127.0.0.1
156
+ LIGHTRAG_PORT=9621
157
+ API_PORT=8000
158
+ ```
159
+
160
+ ## 📁 Project Structure
161
+
162
+ ```
163
+ CyberlegalAI/
164
+ ├── agent_api.py # FastAPI server
165
+ ├── langraph_agent.py # Main LangGraph workflow
166
+ ├── agent_state.py # State management
167
+ ├── prompts.py # System prompts
168
+ ├── utils.py # LightRAG integration
169
+ ├── requirements.txt # Python dependencies
170
+ ├── Dockerfile # Container configuration
171
+ ├── docker-compose.yml # Orchestration
172
+ ├── rag_storage/ # LightRAG data persistence
173
+ └── .env # Environment variables
174
+ ```
175
+
176
+ ## 🔧 Configuration
177
+
178
+ ### Port Management
179
+ - **Port 8000**: API (exposed externally)
180
+ - **Port 9621**: LightRAG (internal only, for security)
181
+
182
+ ### Security Features
183
+ - LightRAG server not exposed externally
184
+ - API key authentication support
185
+ - Non-root container execution
186
+ - Health checks and monitoring
187
+
188
+ ## 📊 Monitoring
189
+
190
+ ### Health Checks
191
+ ```bash
192
+ # Container health
193
+ docker ps
194
+
195
+ # Service health
196
+ curl http://localhost:8000/health
197
+
198
+ # Logs
199
+ docker logs cyberlegal-ai
200
+ ```
201
+
202
+ ### Performance Metrics
203
+ The API returns:
204
+ - Processing time per request
205
+ - Confidence scores
206
+ - Referenced documents
207
+ - Error tracking
208
+
209
+ ## 🚨 Error Handling
210
+
211
+ The API gracefully handles:
212
+ - LightRAG server unavailability
213
+ - OpenAI API errors
214
+ - Invalid request format
215
+ - Network timeouts
216
+
217
+ ## 📚 API Examples
218
+
219
+ ### Client Role Example
220
+ ```json
221
+ {
222
+ "message": "What should my small business do to comply with GDPR?",
223
+ "role": "client",
224
+ "jurisdiction": "France"
225
+ }
226
+ ```
227
+
228
+ ### Lawyer Role Example
229
+ ```json
230
+ {
231
+ "message": "Analyze the legal implications of NIS2 for financial institutions",
232
+ "role": "lawyer",
233
+ "jurisdiction": "EU"
234
+ }
235
+ ```
236
+
237
+ ### Comparison Query
238
+ ```json
239
+ {
240
+ "message": "Compare incident reporting requirements between NIS2 and DORA",
241
+ "role": "client",
242
+ "jurisdiction": "EU"
243
+ }
244
+ ```
245
+
246
+ ## 🤝 Integration Examples
247
+
248
+ ### Python Client
249
+ ```python
250
+ import requests
251
+
252
+ response = requests.post("http://localhost:8000/chat", json={
253
+ "message": "What are GDPR penalties?",
254
+ "role": "client",
255
+ "jurisdiction": "EU",
256
+ "conversationHistory": []
257
+ })
258
+
259
+ result = response.json()
260
+ print(result["response"])
261
+ ```
262
+
263
+ ### JavaScript Client
264
+ ```javascript
265
+ const response = await fetch('http://localhost:8000/chat', {
266
+ method: 'POST',
267
+ headers: { 'Content-Type': 'application/json' },
268
+ body: JSON.stringify({
269
+ message: 'GDPR requirements',
270
+ role: 'client',
271
+ jurisdiction: 'EU',
272
+ conversationHistory: []
273
+ })
274
+ });
275
+
276
+ const result = await response.json();
277
+ console.log(result.response);
278
+ ```
279
+
280
+ ## 📋 Troubleshooting
281
+
282
+ ### Common Issues
283
+
284
+ 1. **LightRAG Connection Failed**
285
+ - Verify LightRAG server is running on port 9621
286
+ - Check container logs: `docker logs cyberlegal-ai`
287
+
288
+ 2. **OpenAI API Errors**
289
+ - Verify OPENAI_API_KEY is set correctly
290
+ - Check API key permissions and quota
291
+
292
+ 3. **Slow Responses**
293
+ - Monitor processing time in API response
294
+ - Check LightRAG document indexing
295
+
296
+ ### Debug Mode
297
+
298
+ Enable debug logging:
299
+ ```bash
300
+ docker-compose logs -f cyberlegal-api
301
+ ```
302
+
303
+ ## 📜 License
304
+
305
+ This project provides general legal information and is not a substitute for professional legal advice.
306
+
307
+ ## 🔄 Updates
308
+
309
+ The system automatically:
310
+ - Retrieves latest regulatory documents
311
+ - Updates knowledge base through LightRAG
312
+ - Maintains conversation context
313
+ - Provides confidence scoring
agent_api.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ FastAPI interface for the LangGraph cyber-legal assistant
4
+ """
5
+
6
+ import os
7
+ import asyncio
8
+ from typing import Dict, List, Any, Optional
9
+ from datetime import datetime
10
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
11
+ from pydantic import BaseModel, Field
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi.responses import JSONResponse
14
+ import uvicorn
15
+ from dotenv import load_dotenv
16
+
17
+ from langraph_agent import CyberLegalAgent
18
+ from agent_state import ConversationManager
19
+ from utils import validate_query
20
+
21
+ # Load environment variables
22
+ load_dotenv(dotenv_path=".env", override=False)
23
+
24
+ # Initialize FastAPI app
25
+ app = FastAPI(
26
+ title="CyberLegal AI API",
27
+ description="LangGraph-powered cyber-legal assistant API",
28
+ version="1.0.0"
29
+ )
30
+
31
+ # Add CORS middleware
32
+ app.add_middleware(
33
+ CORSMiddleware,
34
+ allow_origins=["*"],
35
+ allow_credentials=True,
36
+ allow_methods=["*"],
37
+ allow_headers=["*"],
38
+ )
39
+
40
+ # Pydantic models for request/response
41
+ class Message(BaseModel):
42
+ role: str = Field(..., description="Role: 'user' or 'assistant'")
43
+ content: str = Field(..., description="Message content")
44
+
45
+ class ChatRequest(BaseModel):
46
+ message: str = Field(..., description="User's question")
47
+ role: str = Field(..., description="User role: 'client' or 'lawyer'")
48
+ jurisdiction: str = Field(..., description="Selected jurisdiction")
49
+ conversationHistory: Optional[List[Message]] = Field(default=[], description="Previous conversation messages")
50
+
51
+ class ChatResponse(BaseModel):
52
+ response: str = Field(..., description="Assistant's response")
53
+ confidence: float = Field(..., description="Confidence score (0.0-1.0)")
54
+ processing_time: float = Field(..., description="Processing time in seconds")
55
+ references: List[str] = Field(default=[], description="Referenced documents")
56
+ timestamp: str = Field(..., description="Response timestamp")
57
+ error: Optional[str] = Field(None, description="Error message if any")
58
+
59
+ class HealthResponse(BaseModel):
60
+ status: str = Field(..., description="Health status")
61
+ agent_ready: bool = Field(..., description="Whether agent is ready")
62
+ lightrag_healthy: bool = Field(..., description="Whether LightRAG is healthy")
63
+ timestamp: str = Field(..., description="Health check timestamp")
64
+
65
+ # Global agent instance
66
+ agent_instance = None
67
+
68
+ class CyberLegalAPI:
69
+ """
70
+ API wrapper for the LangGraph agent
71
+ """
72
+
73
+ def __init__(self):
74
+ self.agent = CyberLegalAgent()
75
+ self.conversation_manager = ConversationManager()
76
+
77
+ async def process_request(self, request: ChatRequest) -> ChatResponse:
78
+ """
79
+ Process chat request through the agent
80
+ """
81
+ # Validate message
82
+ is_valid, error_msg = validate_query(request.message)
83
+ if not is_valid:
84
+ raise HTTPException(status_code=400, detail=error_msg)
85
+
86
+ # Convert conversation history format
87
+ conversation_history = []
88
+ for msg in request.conversationHistory or []:
89
+ conversation_history.append({
90
+ "role": msg.role,
91
+ "content": msg.content
92
+ })
93
+
94
+ try:
95
+ # Create enhanced query with context
96
+ enhanced_query = self._create_enhanced_query(request)
97
+
98
+ # Process through agent
99
+ result = await self.agent.process_query(
100
+ user_query=enhanced_query,
101
+ conversation_history=conversation_history
102
+ )
103
+
104
+ # Create response
105
+ response = ChatResponse(
106
+ response=result["response"],
107
+ confidence=result.get("confidence", 0.0),
108
+ processing_time=result.get("processing_time", 0.0),
109
+ references=result.get("references", []),
110
+ timestamp=result.get("timestamp", datetime.now().isoformat()),
111
+ error=result.get("error")
112
+ )
113
+
114
+ return response
115
+
116
+ except Exception as e:
117
+ raise HTTPException(
118
+ status_code=500,
119
+ detail=f"Processing failed: {str(e)}"
120
+ )
121
+
122
+ def _create_enhanced_query(self, request: ChatRequest) -> str:
123
+ """
124
+ Create enhanced query with role and jurisdiction context
125
+ """
126
+ base_query = request.message
127
+
128
+ # Add role context
129
+ role_context = ""
130
+ if request.role == "client":
131
+ role_context = "Answer from the perspective of advising a client who needs practical guidance."
132
+ elif request.role == "lawyer":
133
+ role_context = "Answer from the perspective of providing legal analysis for a legal professional."
134
+
135
+ # Add jurisdiction context
136
+ jurisdiction_context = f"Focus on the legal framework in {request.jurisdiction}."
137
+
138
+ # Combine into enhanced query
139
+ enhanced_query = f"""{base_query}
140
+
141
+ Context:
142
+ - User Role: {request.role}
143
+ - Jurisdiction: {request.jurisdiction}
144
+ - Special Instructions: {role_context} {jurisdiction_context}
145
+
146
+ Please provide a response tailored to this context."""
147
+
148
+ return enhanced_query
149
+
150
+ async def health_check(self) -> HealthResponse:
151
+ """
152
+ Check health status of the API and dependencies
153
+ """
154
+ try:
155
+ # Check LightRAG health
156
+ lightrag_healthy = self.agent.lightrag_client.health_check()
157
+
158
+ return HealthResponse(
159
+ status="healthy" if lightrag_healthy else "degraded",
160
+ agent_ready=True,
161
+ lightrag_healthy=lightrag_healthy,
162
+ timestamp=datetime.now().isoformat()
163
+ )
164
+
165
+ except Exception as e:
166
+ return HealthResponse(
167
+ status="unhealthy",
168
+ agent_ready=False,
169
+ lightrag_healthy=False,
170
+ timestamp=datetime.now().isoformat()
171
+ )
172
+
173
+ # Initialize API instance
174
+ api = CyberLegalAPI()
175
+
176
+ @app.on_event("startup")
177
+ async def startup_event():
178
+ """
179
+ Initialize the API on startup
180
+ """
181
+ print("🚀 Starting CyberLegal AI API...")
182
+ print("🔧 Powered by: LangGraph + LightRAG + GPT-5-Nano")
183
+ print("📍 API endpoints:")
184
+ print(" - POST /chat - Chat with the assistant")
185
+ print(" - GET /health - Health check")
186
+ print(" - GET / - API info")
187
+
188
+ @app.post("/chat", response_model=ChatResponse)
189
+ async def chat_endpoint(request: ChatRequest):
190
+ """
191
+ Chat endpoint for the cyber-legal assistant
192
+
193
+ Args:
194
+ request: Chat request with message, role, jurisdiction, and history
195
+
196
+ Returns:
197
+ ChatResponse with assistant's response and metadata
198
+ """
199
+ return await api.process_request(request)
200
+
201
+ @app.get("/health", response_model=HealthResponse)
202
+ async def health_endpoint():
203
+ """
204
+ Health check endpoint
205
+
206
+ Returns:
207
+ HealthResponse with system status
208
+ """
209
+ return await api.health_check()
210
+
211
+ @app.get("/")
212
+ async def root():
213
+ """
214
+ Root endpoint with API information
215
+ """
216
+ return {
217
+ "name": "CyberLegal AI API",
218
+ "version": "1.0.0",
219
+ "description": "LangGraph-powered cyber-legal assistant API",
220
+ "technology": "LangGraph + LightRAG + GPT-5-Nano",
221
+ "endpoints": {
222
+ "chat": "POST /chat - Chat with the assistant",
223
+ "health": "GET /health - Health check"
224
+ },
225
+ "supported_jurisdictions": [
226
+ "EU", "France", "Germany", "Italy", "Spain", "Romania", "Netherlands", "Belgium"
227
+ ],
228
+ "user_roles": ["client", "lawyer"],
229
+ "expertise": [
230
+ "GDPR", "NIS2", "DORA", "Cyber Resilience Act", "eIDAS 2.0"
231
+ ]
232
+ }
233
+
234
+ @app.exception_handler(Exception)
235
+ async def global_exception_handler(request, exc):
236
+ """
237
+ Global exception handler
238
+ """
239
+ return JSONResponse(
240
+ status_code=500,
241
+ content={
242
+ "error": "Internal server error",
243
+ "detail": str(exc),
244
+ "timestamp": datetime.now().isoformat()
245
+ }
246
+ )
247
+
248
+ if __name__ == "__main__":
249
+ port = int(os.getenv("PORT", os.getenv("API_PORT", "8000")))
250
+
251
+ uvicorn.run(
252
+ "agent_api:app",
253
+ host="0.0.0.0",
254
+ port=port,
255
+ reload=False,
256
+ log_level="info"
257
+ )
agent_state.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Agent state management for the LangGraph cyber-legal assistant
4
+ """
5
+
6
+ from typing import TypedDict, List, Dict, Any, Optional
7
+ from datetime import datetime
8
+
9
+
10
+ class AgentState(TypedDict):
11
+ """
12
+ State definition for the LangGraph agent workflow
13
+ """
14
+ # User interaction
15
+ user_query: str
16
+ conversation_history: List[Dict[str, str]]
17
+
18
+ # LightRAG integration
19
+ lightrag_response: Optional[Dict[str, Any]]
20
+ lightrag_error: Optional[str]
21
+
22
+ # Context processing
23
+ processed_context: Optional[str]
24
+ relevant_documents: List[str]
25
+
26
+ # Agent reasoning
27
+ analysis_thoughts: Optional[str]
28
+ needs_clarification: bool
29
+ clarification_question: Optional[str]
30
+
31
+ # Final output
32
+ final_response: Optional[str]
33
+ confidence_score: Optional[float]
34
+
35
+ # Metadata
36
+ query_timestamp: str
37
+ processing_time: Optional[float]
38
+ query_type: Optional[str] # "comparison", "explanation", "compliance", "general"
39
+
40
+
41
+ class ConversationManager:
42
+ """
43
+ Manages conversation history and context
44
+ """
45
+
46
+ def __init__(self, max_history: int = 10):
47
+ self.max_history = max_history
48
+
49
+ def add_exchange(self, history: List[Dict[str, str]], user_query: str, agent_response: str) -> List[Dict[str, str]]:
50
+ """
51
+ Add a new user-agent exchange to the conversation history
52
+ """
53
+ updated_history = history.copy()
54
+
55
+ # Add user message
56
+ updated_history.append({
57
+ "role": "user",
58
+ "content": user_query,
59
+ "timestamp": datetime.now().isoformat()
60
+ })
61
+
62
+ # Add agent response
63
+ updated_history.append({
64
+ "role": "assistant",
65
+ "content": agent_response,
66
+ "timestamp": datetime.now().isoformat()
67
+ })
68
+
69
+ # Keep only the last max_history exchanges (pairs)
70
+ if len(updated_history) > self.max_history * 2:
71
+ updated_history = updated_history[-self.max_history * 2:]
72
+
73
+ return updated_history
74
+
75
+ def format_for_lightrag(self, history: List[Dict[str, str]]) -> List[Dict[str, str]]:
76
+ """
77
+ Format conversation history for LightRAG API
78
+ """
79
+ formatted = []
80
+ for exchange in history:
81
+ formatted.append({
82
+ "role": exchange["role"],
83
+ "content": exchange["content"]
84
+ })
85
+ return formatted
86
+
87
+ def get_context_summary(self, history: List[Dict[str, str]]) -> str:
88
+ """
89
+ Generate a summary of recent conversation context
90
+ """
91
+ if not history:
92
+ return "No previous conversation context."
93
+
94
+ recent_exchanges = history[-6:] # Last 3 exchanges
95
+ context_parts = []
96
+
97
+ for i, exchange in enumerate(recent_exchanges):
98
+ role = "User" if exchange["role"] == "user" else "Assistant"
99
+ context_parts.append(f"{role}: {exchange['content']}")
100
+
101
+ return "\n".join(context_parts)
docker-compose.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ cyberlegal-api:
5
+ build: .
6
+ container_name: cyberlegal-ai
7
+ ports:
8
+ - "8000:8000" # API port only
9
+ environment:
10
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
11
+ - LIGHTRAG_API_KEY=${LIGHTRAG_API_KEY}
12
+ - LIGHTRAG_HOST=127.0.0.1
13
+ - LIGHTRAG_PORT=9621
14
+ - API_PORT=8000
15
+ volumes:
16
+ - ./rag_storage:/app/rag_storage # Persist LightRAG data
17
+ - ./.env:/app/.env # Environment file
18
+ restart: unless-stopped
19
+ healthcheck:
20
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
21
+ interval: 30s
22
+ timeout: 10s
23
+ retries: 3
24
+ start_period: 60s
langraph_agent.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simplified LangGraph agent implementation for cyber-legal assistant
4
+ """
5
+
6
+ import os
7
+ from typing import Dict, Any, List, Optional
8
+ from datetime import datetime
9
+ from langgraph.graph import StateGraph, END
10
+ from langchain_openai import ChatOpenAI
11
+ from langchain_core.messages import HumanMessage, SystemMessage
12
+
13
+ from agent_state import AgentState, ConversationManager
14
+ from prompts import SYSTEM_PROMPT, ERROR_HANDLING_PROMPT
15
+ from utils import LightRAGClient, ConversationFormatter, PerformanceMonitor
16
+
17
+
18
+ class CyberLegalAgent:
19
+ """
20
+ Simplified LangGraph-based cyber-legal assistant agent
21
+ """
22
+
23
+ def __init__(self, openai_api_key: Optional[str] = None):
24
+ # Initialize LLM with gpt-4o-mini (closest available to gpt-5-nano)
25
+ self.llm = ChatOpenAI(
26
+ model="gpt-5-nano-2025-08-07",
27
+ temperature=0.1,
28
+ openai_api_key=openai_api_key or os.getenv("OPENAI_API_KEY")
29
+ )
30
+
31
+ # Initialize components
32
+ self.lightrag_client = LightRAGClient()
33
+ self.conversation_manager = ConversationManager()
34
+ self.performance_monitor = PerformanceMonitor()
35
+
36
+ # Build the workflow graph
37
+ self.workflow = self._build_workflow()
38
+
39
+ def _build_workflow(self) -> StateGraph:
40
+ """
41
+ Build the simplified LangGraph workflow
42
+ """
43
+ workflow = StateGraph(AgentState)
44
+
45
+ # Add nodes
46
+ workflow.add_node("query_lightrag", self._query_lightrag)
47
+ workflow.add_node("answer_with_context", self._answer_with_context)
48
+ workflow.add_node("handle_error", self._handle_error)
49
+
50
+ # Add edges
51
+ workflow.set_entry_point("query_lightrag")
52
+ workflow.add_edge("query_lightrag", "answer_with_context")
53
+ workflow.add_edge("answer_with_context", END)
54
+ workflow.add_edge("handle_error", END)
55
+
56
+ # Add conditional edges
57
+ workflow.add_conditional_edges(
58
+ "query_lightrag",
59
+ self._should_handle_error,
60
+ {
61
+ "error": "handle_error",
62
+ "continue": "answer_with_context"
63
+ }
64
+ )
65
+
66
+ return workflow.compile()
67
+
68
+ def _should_handle_error(self, state: AgentState) -> str:
69
+ """
70
+ Determine if we should handle an error
71
+ """
72
+ if state.get("lightrag_error"):
73
+ return "error"
74
+ return "continue"
75
+
76
+ async def _query_lightrag(self, state: AgentState) -> AgentState:
77
+ """
78
+ Query LightRAG for legal information
79
+ """
80
+ self.performance_monitor.start_timer("lightrag_query")
81
+
82
+ try:
83
+ # Check LightRAG health
84
+ if not self.lightrag_client.health_check():
85
+ state["lightrag_error"] = "LightRAG server is not healthy"
86
+ return state
87
+
88
+ # Prepare conversation history for LightRAG
89
+ history = state.get("conversation_history", [])
90
+ formatted_history = ConversationFormatter.build_conversation_history(history)
91
+
92
+ # Query LightRAG
93
+ query = state["user_query"]
94
+ response = self.lightrag_client.query(
95
+ query=query,
96
+ conversation_history=formatted_history
97
+ )
98
+
99
+ if "error" in response:
100
+ state["lightrag_error"] = response["error"]
101
+ else:
102
+ state["lightrag_response"] = response
103
+ state["relevant_documents"] = self.lightrag_client.get_references(response)
104
+
105
+ except Exception as e:
106
+ state["lightrag_error"] = f"LightRAG query failed: {str(e)}"
107
+
108
+ self.performance_monitor.end_timer("lightrag_query")
109
+ return state
110
+
111
+ async def _answer_with_context(self, state: AgentState) -> AgentState:
112
+ """
113
+ Answer user query using LightRAG context
114
+ """
115
+ self.performance_monitor.start_timer("answer_generation")
116
+
117
+ try:
118
+ if not state.get("lightrag_response"):
119
+ state["lightrag_error"] = "No response from LightRAG"
120
+ return state
121
+
122
+ # Extract context from LightRAG response
123
+ lightrag_response = state["lightrag_response"]
124
+ context = lightrag_response.get("response", "")
125
+
126
+ if not context:
127
+ state["final_response"] = "I apologize, but I couldn't find relevant information for your query."
128
+ return state
129
+
130
+ # Create prompt for LLM to answer based on retrieved context
131
+ answer_prompt = f"""Based on the following retrieved legal information, please answer the user's question accurately and comprehensively.
132
+
133
+ **User Question:** {state["user_query"]}
134
+
135
+ **Retrieved Legal Context:**
136
+ {context}
137
+
138
+ **Instructions:**
139
+ 1. Answer the user's question directly based on the provided context
140
+ 2. If the context doesn't fully answer the question, acknowledge the limitations
141
+ 3. Provide specific legal references when available in the context
142
+ 4. Include practical implications for organizations
143
+ 5. Add a disclaimer that this is for guidance purposes only
144
+
145
+ Please provide a clear, well-structured response."""
146
+
147
+ # Get answer from LLM
148
+ messages = [
149
+ SystemMessage(content=SYSTEM_PROMPT),
150
+ HumanMessage(content=answer_prompt)
151
+ ]
152
+
153
+ response = await self.llm.ainvoke(messages)
154
+ answer = response.content
155
+
156
+ # Add references if available
157
+ references = state.get("relevant_documents", [])
158
+ if references:
159
+ answer += "\n\n**📚 References:**\n"
160
+ for ref in references[:3]: # Limit to top 3 references
161
+ answer += f"• {ref}\n"
162
+
163
+ # Add standard disclaimer
164
+ answer += "\n\n**Disclaimer:** This information is for guidance purposes only and not legal advice. For specific legal matters, consult with qualified legal counsel."
165
+
166
+ state["final_response"] = answer
167
+ state["confidence_score"] = 0.8 # High confidence when LightRAG provides good context
168
+
169
+ except Exception as e:
170
+ state["lightrag_error"] = f"Answer generation failed: {str(e)}"
171
+
172
+ self.performance_monitor.end_timer("answer_generation")
173
+
174
+ # Record total processing time
175
+ total_time = sum(
176
+ self.performance_monitor.get_metrics().get(f"{op}_duration", 0)
177
+ for op in ["lightrag_query", "answer_generation"]
178
+ )
179
+ state["processing_time"] = total_time
180
+ state["query_timestamp"] = datetime.now().isoformat()
181
+
182
+ return state
183
+
184
+ async def _handle_error(self, state: AgentState) -> AgentState:
185
+ """
186
+ Handle errors gracefully
187
+ """
188
+ error = state.get("lightrag_error", "Unknown error occurred")
189
+
190
+ error_prompt = ERROR_HANDLING_PROMPT.format(error_message=error)
191
+
192
+ try:
193
+ messages = [
194
+ SystemMessage(content=SYSTEM_PROMPT),
195
+ HumanMessage(content=error_prompt)
196
+ ]
197
+
198
+ response = await self.llm.ainvoke(messages)
199
+ state["final_response"] = response.content
200
+
201
+ except Exception:
202
+ state["final_response"] = f"I apologize, but an error occurred: {error}"
203
+
204
+ state["confidence_score"] = 0.2 # Low confidence for errors
205
+ state["processing_time"] = self.performance_monitor.get_metrics()
206
+ state["query_timestamp"] = datetime.now().isoformat()
207
+
208
+ return state
209
+
210
+ async def process_query(
211
+ self,
212
+ user_query: str,
213
+ conversation_history: Optional[List[Dict[str, str]]] = None
214
+ ) -> Dict[str, Any]:
215
+ """
216
+ Process a user query through the agent workflow
217
+ """
218
+ # Initialize state
219
+ initial_state: AgentState = {
220
+ "user_query": user_query,
221
+ "conversation_history": conversation_history or [],
222
+ "lightrag_response": None,
223
+ "lightrag_error": None,
224
+ "processed_context": None,
225
+ "relevant_documents": [],
226
+ "analysis_thoughts": None,
227
+ "needs_clarification": False,
228
+ "clarification_question": None,
229
+ "final_response": None,
230
+ "confidence_score": None,
231
+ "query_timestamp": datetime.now().isoformat(),
232
+ "processing_time": None,
233
+ "query_type": None
234
+ }
235
+
236
+ # Reset performance monitor
237
+ self.performance_monitor.reset()
238
+
239
+ try:
240
+ # Run the workflow
241
+ final_state = await self.workflow.ainvoke(initial_state)
242
+
243
+ return {
244
+ "response": final_state.get("final_response", ""),
245
+ "confidence": final_state.get("confidence_score", 0.0),
246
+ "processing_time": final_state.get("processing_time", 0.0),
247
+ "references": final_state.get("relevant_documents", []),
248
+ "error": final_state.get("lightrag_error"),
249
+ "timestamp": final_state.get("query_timestamp")
250
+ }
251
+
252
+ except Exception as e:
253
+ return {
254
+ "response": f"I apologize, but a critical error occurred: {str(e)}",
255
+ "confidence": 0.0,
256
+ "processing_time": 0.0,
257
+ "references": [],
258
+ "error": str(e),
259
+ "timestamp": datetime.now().isoformat()
260
+ }
prompts.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ System prompts for the LangGraph cyber-legal assistant
4
+ """
5
+
6
+ SYSTEM_PROMPT = """You are an expert cyber-legal assistant specializing in European Union regulations and directives.
7
+ Your expertise covers:
8
+
9
+ - GDPR (General Data Protection Regulation)
10
+ - NIS2 Directive (Network and Information Systems Directive 2)
11
+ - DORA (Digital Operational Resilience Act)
12
+ - Cyber Resilience Act (CRA)
13
+ - eIDAS 2.0 (Electronic Identification, Authentication and Trust Services)
14
+ - Romanian Civil Code provisions relevant to cyber security
15
+
16
+ **Your Role:**
17
+ Provide accurate, clear, and practical information about cyber-legal regulations. Always base your responses on the retrieved legal documents and context provided.
18
+
19
+ **Guidelines:**
20
+ 1. Be precise and accurate with legal information
21
+ 2. Provide practical examples when helpful
22
+ 3. Clarify jurisdiction (EU-wide vs member state implementation)
23
+ 4. Mention important dates, deadlines, or transitional periods
24
+ 5. Include relevant penalties or enforcement mechanisms when applicable
25
+ 6. Suggest official sources for further reading
26
+
27
+ **Response Structure:**
28
+ 1. Direct answer to the user's question
29
+ 2. Relevant legal basis (specific articles, sections)
30
+ 3. Practical implications
31
+ 4. Related compliance requirements
32
+ 5. References to source documents
33
+
34
+ **Important Disclaimer:**
35
+ Always include a note that this information is for guidance purposes and not legal advice. For specific legal matters, consult with qualified legal counsel."""
36
+
37
+ CONTEXT_ENHANCEMENT_PROMPT = """Based on the following RAG response about European cyber-legal regulations, enhance the information by:
38
+
39
+ 1. **Structuring**: Organize the information in a clear, logical manner
40
+ 2. **Context**: Add relevant background information about the regulation
41
+ 3. **Practicality**: Include practical implications for organizations
42
+ 4. **Completeness**: Fill in gaps with general knowledge about EU regulations
43
+ 5. **Clarity**: Ensure complex legal concepts are explained clearly
44
+
45
+ **RAG Response:**
46
+ {lightrag_response}
47
+
48
+ **Conversation Context:**
49
+ {conversation_context}
50
+
51
+ **User Query:**
52
+ {user_query}
53
+
54
+ Please provide an enhanced response that is more comprehensive and user-friendly while maintaining accuracy."""
55
+
56
+ ERROR_HANDLING_PROMPT = """I apologize, but I encountered an issue while retrieving information from the legal database.
57
+
58
+ **Error Details:**
59
+ {error_message}
60
+
61
+ **What you can do:**
62
+ 1. Try rephrasing your question
63
+ 2. Check if the regulation name is spelled correctly
64
+ 3. Ask about a specific aspect of the regulation
65
+ 4. Try a more general question about the topic
66
+
67
+ **Available Regulations:**
68
+ - GDPR (Data Protection)
69
+ - NIS2 (Cybersecurity for critical entities)
70
+ - DORA (Financial sector operational resilience)
71
+ - Cyber Resilience Act (Product security requirements)
72
+ - eIDAS 2.0 (Digital identity and trust services)
73
+
74
+ Would you like to try asking your question in a different way?"""
75
+
76
+ CLARIFICATION_PROMPT = """To provide you with the most accurate information, I need a bit more detail about your question.
77
+
78
+ **Your Question:** {user_query}
79
+
80
+ **Clarification Needed:** {clarification_question}
81
+
82
+ This will help me search the specific legal provisions that are most relevant to your situation."""
83
+
84
+ RESPONSE_FORMATTING_PROMPT = """Format the final response according to these guidelines:
85
+
86
+ 1. **Clear Heading**: Start with a clear, direct answer
87
+ 2. **Legal Basis**: Reference specific articles or sections when available
88
+ 3. **Key Points**: Use bullet points for important information
89
+ 4. **Practical Impact**: Explain what this means for organizations
90
+ 5. **References**: List source documents
91
+ 6. **Disclaimer**: Include the standard legal disclaimer
92
+
93
+ **Content to Format:**
94
+ {content}
95
+
96
+ **User Query:** {user_query}"""
97
+
98
+ FOLLOW_UP_SUGGESTIONS_PROMPT = """Based on the user's query about "{user_query}", suggest relevant follow-up questions that might be helpful:
99
+
100
+ Consider:
101
+ 1. Related regulations they might need to know about
102
+ 2. Implementation or compliance aspects
103
+ 3. Similar scenarios or use cases
104
+ 4. Recent updates or changes
105
+
106
+ Provide 3-4 relevant follow-up suggestions."""
107
+
108
+ CONVERSATION_SUMMARY_PROMPT = """Summarize the key points discussed in this conversation about European cyber-legal regulations:
109
+
110
+ **Conversation History:**
111
+ {conversation_history}
112
+
113
+ **Focus Areas:**
114
+ - Main regulations discussed
115
+ - Key compliance points mentioned
116
+ - Important deadlines or requirements
117
+ - Any specific scenarios covered
118
+
119
+ Provide a concise summary that captures the essence of the legal discussion."""
120
+
121
+ CONFIDENCE_ASSESSMENT_PROMPT = """Assess the confidence level of the provided response based on:
122
+
123
+ 1. **Source Quality**: How reliable are the referenced documents?
124
+ 2. **Information Completeness**: Does the response fully address the query?
125
+ 3. **Legal Specificity**: How specific and accurate are the legal references?
126
+ 4. **Context Relevance**: How well does it match the user's needs?
127
+
128
+ **Response to Assess:**
129
+ {response}
130
+
131
+ **User Query:** {user_query}
132
+
133
+ Provide a confidence score (0.0-1.0) and brief reasoning."""
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ gradio>=4.0.0
3
+ requests>=2.25.0
4
+ python-dotenv
5
+ lightrag-hku[api]
6
+
7
+ # LangGraph and LangChain dependencies
8
+ langgraph>=0.0.26
9
+ langchain>=0.1.0
10
+ langchain-openai>=0.1.0
11
+ langchain-community>=0.0.20
12
+
13
+ # FastAPI and server dependencies
14
+ fastapi>=0.104.0
15
+ uvicorn[standard]>=0.24.0
16
+
17
+ # Additional utilities
18
+ pydantic>=2.0.0
19
+ typing-extensions>=4.0.0
startup.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ LIGHTRAG_HOST="${LIGHTRAG_HOST:-127.0.0.1}"
5
+ LIGHTRAG_PORT="${LIGHTRAG_PORT:-9621}"
6
+
7
+ # Platform public port (Render/Koyeb/etc) or local fallback
8
+ PUBLIC_PORT="${PORT:-${API_PORT:-8000}}"
9
+
10
+ echo "🚀 Starting CyberLegal AI Stack..."
11
+ echo "Step 1: Starting LightRAG server on ${LIGHTRAG_HOST}:${LIGHTRAG_PORT} ..."
12
+
13
+ lightrag-server --host "${LIGHTRAG_HOST}" --port "${LIGHTRAG_PORT}" &
14
+ LIGHTRAG_PID=$!
15
+
16
+ cleanup() {
17
+ echo "🧹 Shutting down..."
18
+ kill -TERM "${LIGHTRAG_PID}" 2>/dev/null || true
19
+ wait "${LIGHTRAG_PID}" 2>/dev/null || true
20
+ }
21
+ trap cleanup EXIT INT TERM
22
+
23
+ echo "Waiting for LightRAG server to be ready..."
24
+ max_attempts=30
25
+ attempt=1
26
+ while [ "${attempt}" -le "${max_attempts}" ]; do
27
+ if curl -fsS "http://${LIGHTRAG_HOST}:${LIGHTRAG_PORT}/health" >/dev/null 2>&1; then
28
+ echo "✅ LightRAG server is ready!"
29
+ break
30
+ fi
31
+ echo "Attempt ${attempt}/${max_attempts}: LightRAG not ready yet..."
32
+ sleep 2
33
+ attempt=$((attempt + 1))
34
+ done
35
+
36
+ if [ "${attempt}" -gt "${max_attempts}" ]; then
37
+ echo "❌ LightRAG server failed to start"
38
+ exit 1
39
+ fi
40
+
41
+ echo "Step 2: Starting LangGraph API server on 0.0.0.0:${PUBLIC_PORT} ..."
42
+ echo "🌐 API: http://localhost:${PUBLIC_PORT}"
43
+ echo "📚 RAG: http://${LIGHTRAG_HOST}:${LIGHTRAG_PORT}"
44
+ echo "🎉 Ready!"
45
+
46
+ # Ensure FastAPI reads the correct port on platforms
47
+ export PORT="${PUBLIC_PORT}"
48
+
49
+ python agent_api.py
utils.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Utility functions for LightRAG integration and agent operations
4
+ """
5
+
6
+ import os
7
+ import requests
8
+ import time
9
+ from typing import Dict, List, Any, Optional, Tuple
10
+ from dotenv import load_dotenv
11
+ from datetime import datetime
12
+ import logging
13
+
14
+ # Load environment variables
15
+ load_dotenv(dotenv_path=".env", override=False)
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # LightRAG configuration
22
+ LIGHTRAG_PORT = int(os.getenv("LIGHTRAG_PORT", "9621"))
23
+ LIGHTRAG_HOST = os.getenv("LIGHTRAG_HOST", "127.0.0.1")
24
+ SERVER_URL = f"http://{LIGHTRAG_HOST}:{LIGHTRAG_PORT}"
25
+ API_KEY = os.getenv("LIGHTRAG_API_KEY")
26
+
27
+ class LightRAGClient:
28
+ """
29
+ Client for interacting with LightRAG server
30
+ """
31
+
32
+ def __init__(self, server_url: str = SERVER_URL, api_key: Optional[str] = API_KEY):
33
+ self.server_url = server_url
34
+ self.api_key = api_key
35
+ self.timeout = 60
36
+
37
+ def health_check(self, timeout: float = 1.5) -> bool:
38
+ """
39
+ Check if LightRAG server is healthy
40
+ """
41
+ try:
42
+ response = requests.get(f"{self.server_url}/health", timeout=timeout)
43
+ return response.status_code == 200
44
+ except Exception as e:
45
+ logger.warning(f"Health check failed: {e}")
46
+ return False
47
+
48
+ def query(
49
+ self,
50
+ query: str,
51
+ mode: str = "mix",
52
+ include_references: bool = True,
53
+ conversation_history: Optional[List[Dict[str, str]]] = None,
54
+ max_retries: int = 3
55
+ ) -> Dict[str, Any]:
56
+ """
57
+ Query LightRAG server with retry logic
58
+ """
59
+ headers = {"Content-Type": "application/json"}
60
+ if self.api_key:
61
+ headers["X-API-Key"] = self.api_key
62
+
63
+ payload = {
64
+ "query": query,
65
+ "mode": mode,
66
+ "include_references": include_references,
67
+ "conversation_history": conversation_history or [],
68
+ }
69
+
70
+ for attempt in range(max_retries):
71
+ try:
72
+ response = requests.post(
73
+ f"{self.server_url}/query",
74
+ json=payload,
75
+ headers=headers,
76
+ timeout=self.timeout
77
+ )
78
+
79
+ if response.status_code == 200:
80
+ return response.json()
81
+ else:
82
+ logger.warning(f"Query failed with status {response.status_code}, attempt {attempt + 1}")
83
+
84
+ except requests.exceptions.Timeout:
85
+ logger.warning(f"Query timeout, attempt {attempt + 1}")
86
+ except Exception as e:
87
+ logger.warning(f"Query error: {e}, attempt {attempt + 1}")
88
+
89
+ if attempt < max_retries - 1:
90
+ time.sleep(2 ** attempt) # Exponential backoff
91
+
92
+ return {"error": f"Query failed after {max_retries} attempts"}
93
+
94
+ def get_references(self, response_data: Dict[str, Any]) -> List[str]:
95
+ """
96
+ Extract reference information from LightRAG response
97
+ """
98
+ references = response_data.get("references", []) or []
99
+ ref_list = []
100
+
101
+ for ref in references[:5]: # Limit to top 5 references
102
+ file_name = str(ref.get("file_path", "Unknown file")).split("/")[-1]
103
+ ref_list.append(file_name)
104
+
105
+ return ref_list
106
+
107
+
108
+ class ResponseProcessor:
109
+ """
110
+ Process and enhance LightRAG responses
111
+ """
112
+
113
+ @staticmethod
114
+ def extract_main_content(response: Dict[str, Any]) -> str:
115
+ """
116
+ Extract the main response content
117
+ """
118
+ return response.get("response", "No response available.")
119
+
120
+ @staticmethod
121
+ def format_references(references: List[str]) -> str:
122
+ """
123
+ Format reference list for display
124
+ """
125
+ if not references:
126
+ return ""
127
+
128
+ ref_text = "\n\n**📚 References:**\n"
129
+ for ref in references:
130
+ ref_text += f"• {ref}\n"
131
+
132
+ return ref_text
133
+
134
+ @staticmethod
135
+ def extract_key_entities(response: Dict[str, Any]) -> List[str]:
136
+ """
137
+ Extract key entities mentioned in the response
138
+ """
139
+ # This could be enhanced if LightRAG provides entity information
140
+ content = response.get("response", "")
141
+
142
+ # Simple entity extraction based on common legal terms
143
+ legal_entities = []
144
+ regulations = ["GDPR", "NIS2", "DORA", "CRA", "eIDAS", "Cyber Resilience Act"]
145
+
146
+ for reg in regulations:
147
+ if reg.lower() in content.lower():
148
+ legal_entities.append(reg)
149
+
150
+ return list(set(legal_entities)) # Remove duplicates
151
+
152
+
153
+ class ConversationFormatter:
154
+ """
155
+ Format conversation data for different purposes
156
+ """
157
+
158
+ @staticmethod
159
+ def build_conversation_history(history: List[Dict[str, str]], max_turns: int = 10) -> List[Dict[str, str]]:
160
+ """
161
+ Build conversation history for LightRAG API
162
+ """
163
+ if not history:
164
+ return []
165
+
166
+ # Take last max_turns pairs (user + assistant)
167
+ recent_history = history[-max_turns*2:]
168
+ formatted = []
169
+
170
+ for exchange in recent_history:
171
+ formatted.append({
172
+ "role": exchange["role"],
173
+ "content": exchange["content"]
174
+ })
175
+
176
+ return formatted
177
+
178
+ @staticmethod
179
+ def create_context_summary(history: List[Dict[str, str]]) -> str:
180
+ """
181
+ Create a summary of conversation context
182
+ """
183
+ if not history:
184
+ return "No previous conversation."
185
+
186
+ recent_exchanges = history[-4:] # Last 2 exchanges
187
+ context_parts = []
188
+
189
+ for exchange in recent_exchanges:
190
+ role = "User" if exchange["role"] == "user" else "Assistant"
191
+ content = exchange["content"][:100] + "..." if len(exchange["content"]) > 100 else exchange["content"]
192
+ context_parts.append(f"{role}: {content}")
193
+
194
+ return "\n".join(context_parts)
195
+
196
+
197
+ class PerformanceMonitor:
198
+ """
199
+ Monitor agent performance and timing
200
+ """
201
+
202
+ def __init__(self):
203
+ self.metrics = {}
204
+
205
+ def start_timer(self, operation: str) -> None:
206
+ """
207
+ Start timing an operation
208
+ """
209
+ self.metrics[f"{operation}_start"] = time.time()
210
+
211
+ def end_timer(self, operation: str) -> float:
212
+ """
213
+ End timing an operation and return duration
214
+ """
215
+ start_time = self.metrics.get(f"{operation}_start")
216
+ if start_time:
217
+ duration = time.time() - start_time
218
+ self.metrics[f"{operation}_duration"] = duration
219
+ return duration
220
+ return 0.0
221
+
222
+ def get_metrics(self) -> Dict[str, Any]:
223
+ """
224
+ Get all collected metrics
225
+ """
226
+ return self.metrics.copy()
227
+
228
+ def reset(self) -> None:
229
+ """
230
+ Reset all metrics
231
+ """
232
+ self.metrics.clear()
233
+
234
+
235
+ def validate_query(query: str) -> Tuple[bool, Optional[str]]:
236
+ """
237
+ Validate user query
238
+ """
239
+ if not query or not query.strip():
240
+ return False, "Query cannot be empty."
241
+
242
+ if len(query) > 1000:
243
+ return False, "Query is too long. Please keep it under 1000 characters."
244
+
245
+ return True, None
246
+
247
+
248
+ def format_error_message(error: str) -> str:
249
+ """
250
+ Format error messages for user display
251
+ """
252
+ error_map = {
253
+ "Server unreachable": "❌ The legal database is currently unavailable. Please try again in a moment.",
254
+ "timeout": "❌ The request timed out. Please try again.",
255
+ "invalid json": "❌ There was an issue processing the response. Please try again.",
256
+ "health check failed": "❌ The system is initializing. Please wait a moment and try again."
257
+ }
258
+
259
+ for key, message in error_map.items():
260
+ if key.lower() in error.lower():
261
+ return message
262
+
263
+ return f"❌ An error occurred: {error}"
264
+
265
+
266
+ def create_safe_filename(query: str, timestamp: str) -> str:
267
+ """
268
+ Create a safe filename for logging purposes
269
+ """
270
+ # Remove problematic characters
271
+ safe_query = "".join(c for c in query if c.isalnum() or c in (' ', '-', '_')).strip()
272
+ safe_query = safe_query[:50] # Limit length
273
+
274
+ return f"{timestamp}_{safe_query}.log"