Spaces:

MCP-1st-Birthday
/

BirdScopeAI

Paused

App Files Files Community

facemelter commited on Nov 30, 2025

Commit

0588003

verified ·

1 Parent(s): acf4dc8

Fixing agent url hallucinations

Browse files

Files changed (4) hide show

app.py +3 -0
langgraph_agent/prompts.py +52 -8
langgraph_agent/structured_output.py +21 -4
langgraph_agent/subagent_config.py +1 -0

app.py CHANGED Viewed

@@ -977,6 +977,9 @@ async def chat_with_tool_visibility(
             api_key=api_key,
             model=model
         )
         progress(1.0, desc="✅ Complete")
         yield formatted_response, tool_log
     except ImportError:

             api_key=api_key,
             model=model
         )
+        print(f"\n[FINAL] Formatted response length: {len(formatted_response)}")
+        print(f"[FINAL] Formatted response (last 800 chars): {formatted_response[-800:]}")
+        print(f"[FINAL] Image markdown count: {formatted_response.count('![')}")
         progress(1.0, desc="✅ Complete")
         yield formatted_response, tool_log
     except ImportError:

langgraph_agent/prompts.py CHANGED Viewed

@@ -133,10 +133,10 @@ Always be educational and cite your sources.
 Let's explore the amazing world of birds together!"""
-AUDIO_FINDER_PROMPT = """You are BirdScope AI Generalist, an expert at searching the bird database and retrieving audio recordings.
 **Your Mission:**
-Help users find birds in the database and discover bird songs and calls.
 **Your Tools:**
 1. **search_birds(name, family, region, status, page_size)**
@@ -151,7 +151,12 @@ Help users find birds in the database and discover bird songs and calls.
     - Check `audio_count` field to verify recordings exist
     - ⚠️ OPTIMIZATION: Skip this if the user provides a specific bird name and you only need audio
-3. **get_bird_audio(name, max_recordings)**
     - Fetch actual audio recordings from xeno-canto.org
     - Returns recording metadata and download URLs
     - Can call directly if you have the bird's common name
@@ -165,6 +170,21 @@ When users ask to "find birds", "show examples", "list birds":
 2. Set `page_size` parameter to control results (default to 5-10 unless they specify)
 3. Present the birds you found with their names and basic info
 **CRITICAL WORKFLOW for "find audio for any bird":**
 The API has NO `has_audio` filter parameter. You MUST use this two-step process:
@@ -206,9 +226,11 @@ The API has NO `has_audio` filter parameter. You MUST use this two-step process:
     **CRITICAL - No Hallucination:**
     - If get_bird_audio returns empty/no recordings: Tell user "No audio recordings available for this species"
     - If search_birds returns no results: Tell user "No birds found matching that criteria"
-    - NEVER fabricate audio URLs, bird names, or recording metadata
-    - Only show audio recordings and data that are actually returned by the API tools
     - If a tool fails or returns empty results, honestly report it to the user
     **Error Handling:**
@@ -221,12 +243,13 @@ The API has NO `has_audio` filter parameter. You MUST use this two-step process:
 # HuggingFace-Optimized Prompts (More Explicit, Step-by-Step)
 # =============================================================================
-AUDIO_FINDER_PROMPT_HF = """You are BirdScope AI Generalist. Find birds in database and retrieve audio recordings.
 **Tools Available:**
 1. search_birds(name, family, region, status, page_size) - Search for birds
 2. get_bird_info(name) - Get bird details
-3. get_bird_audio(name, max_recordings) - Get audio files
 **Step-by-Step Process:**
@@ -234,12 +257,23 @@ When user asks to find/list/search birds:
 1. Call search_birds with region="North America" and page_size=5-10
 2. Present birds found
 When user asks for audio:
 1. Call search_birds with ONE filter (name, region, family, or status)
 2. Look at results for birds with has_audio=true
 3. Call get_bird_audio(name="Bird Name") for a bird that has audio
 4. Return the full URL from file_url field
 **Example - General Search:**
 User: "Find five birds"
 1. Call: search_birds(region="North America", page_size=5)
@@ -252,6 +286,14 @@ User: "Find audio for any bird"
 3. Call: get_bird_audio(name="Snow Goose", max_recordings=1)
 4. Return: "Recording: https://xeno-canto.org/123456/download"
 **Important:**
 - NEVER use has_audio as a parameter in search_birds
 - ALWAYS include full file_url in your response
@@ -259,8 +301,10 @@ User: "Find audio for any bird"
 **CRITICAL - No Hallucination:**
 - If get_bird_audio returns empty: Tell user "No audio recordings available"
 - If search_birds returns no results: Tell user "No birds found"
-- NEVER make up audio URLs or bird names
 - Only return actual data from API tools
 """

 Let's explore the amazing world of birds together!"""
+AUDIO_FINDER_PROMPT = """You are BirdScope AI Generalist, an expert at searching the bird database and retrieving audio recordings and images.
 **Your Mission:**
+Help users find birds in the database and discover bird songs, calls, and reference images.
 **Your Tools:**
 1. **search_birds(name, family, region, status, page_size)**
     - Check `audio_count` field to verify recordings exist
     - ⚠️ OPTIMIZATION: Skip this if the user provides a specific bird name and you only need audio
+3. **get_bird_images(name, max_images)**
+    - Fetch reference photos from Unsplash
+    - Returns high-quality image URLs
+    - ALWAYS use this tool when users request images - NEVER fabricate image URLs
+4. **get_bird_audio(name, max_recordings)**
     - Fetch actual audio recordings from xeno-canto.org
     - Returns recording metadata and download URLs
     - Can call directly if you have the bird's common name
 2. Set `page_size` parameter to control results (default to 5-10 unless they specify)
 3. Present the birds you found with their names and basic info
+**WORKFLOW for "find images" or "show me photos":**
+When users ask for images/photos:
+1. Call `search_birds()` to find a bird (or use bird name if provided)
+2. Call `get_bird_images(name)` to fetch real image URLs
+3. Display images using markdown: ![Bird Name](url)
+4. NEVER fabricate or hallucinate image URLs - always use the tool
+**WORKFLOW for "find both images AND audio":**
+When users ask for both images and audio (e.g., "find me one image and one audio sample"):
+1. Call `search_birds()` to find birds
+2. Pick a bird that has `has_audio=true`
+3. Call `get_bird_images(name)` to get real image URLs
+4. Call `get_bird_audio(name)` to get audio recordings
+5. Display both with full URLs in your response
 **CRITICAL WORKFLOW for "find audio for any bird":**
 The API has NO `has_audio` filter parameter. You MUST use this two-step process:
     **CRITICAL - No Hallucination:**
     - If get_bird_audio returns empty/no recordings: Tell user "No audio recordings available for this species"
+    - If get_bird_images returns empty/no images: Tell user "No reference images available for this species"
     - If search_birds returns no results: Tell user "No birds found matching that criteria"
+    - NEVER fabricate audio URLs, image URLs, bird names, or recording metadata
+    - ALWAYS use get_bird_images tool when users request images - NEVER fabricate Unsplash URLs
+    - Only show audio recordings, images, and data that are actually returned by the API tools
     - If a tool fails or returns empty results, honestly report it to the user
     **Error Handling:**
 # HuggingFace-Optimized Prompts (More Explicit, Step-by-Step)
 # =============================================================================
+AUDIO_FINDER_PROMPT_HF = """You are BirdScope AI Generalist. Find birds in database and retrieve audio recordings and images.
 **Tools Available:**
 1. search_birds(name, family, region, status, page_size) - Search for birds
 2. get_bird_info(name) - Get bird details
+3. get_bird_images(name, max_images) - Get reference photos (NEVER fabricate image URLs!)
+4. get_bird_audio(name, max_recordings) - Get audio files
 **Step-by-Step Process:**
 1. Call search_birds with region="North America" and page_size=5-10
 2. Present birds found
+When user asks for images:
+1. Call search_birds to find a bird
+2. Call get_bird_images(name="Bird Name") to get photo URLs
+3. Display images using markdown: ![Bird](url)
 When user asks for audio:
 1. Call search_birds with ONE filter (name, region, family, or status)
 2. Look at results for birds with has_audio=true
 3. Call get_bird_audio(name="Bird Name") for a bird that has audio
 4. Return the full URL from file_url field
+When user asks for BOTH images AND audio:
+1. Call search_birds to find a bird
+2. Call get_bird_images(name="Bird Name")
+3. Call get_bird_audio(name="Bird Name")
+4. Display both using markdown
 **Example - General Search:**
 User: "Find five birds"
 1. Call: search_birds(region="North America", page_size=5)
 3. Call: get_bird_audio(name="Snow Goose", max_recordings=1)
 4. Return: "Recording: https://xeno-canto.org/123456/download"
+**Example - Image + Audio Search:**
+User: "Find me one image and one audio sample for any species"
+1. Call: search_birds(region="North America", page_size=20)
+2. Find bird with has_audio=true (example: "Black-bellied Whistling-Duck")
+3. Call: get_bird_images(name="Black-bellied Whistling-Duck", max_images=1)
+4. Call: get_bird_audio(name="Black-bellied Whistling-Duck", max_recordings=1)
+5. Display both image and audio with full URLs
 **Important:**
 - NEVER use has_audio as a parameter in search_birds
 - ALWAYS include full file_url in your response
 **CRITICAL - No Hallucination:**
 - If get_bird_audio returns empty: Tell user "No audio recordings available"
+- If get_bird_images returns empty: Tell user "No images available"
 - If search_birds returns no results: Tell user "No birds found"
+- NEVER make up audio URLs, image URLs, or bird names
+- ALWAYS use get_bird_images tool - NEVER fabricate Unsplash URLs
 - Only return actual data from API tools
 """

langgraph_agent/structured_output.py CHANGED Viewed

@@ -75,14 +75,27 @@ def extract_urls_from_text(text: str) -> tuple[List[str], List[str]]:
     # Clean URLs (remove trailing quotes, commas, etc.)
     def clean_url(url: str) -> str:
-        return url.rstrip('",;)')
-    image_urls = list(set(clean_url(url) for url in raw_image_urls))
-    audio_urls_files = list(set(clean_url(url) for url in raw_audio_urls_files))
     # Combine both types of audio URLs
     audio_urls = audio_urls_files + audio_urls_xenocanto
     return image_urls, audio_urls
@@ -177,7 +190,9 @@ async def parse_agent_response(
             for idx, url in enumerate(structured.image_urls, 1):
                 # Use species name if available, otherwise generic
                 alt_text = structured.species_name or f"Bird {idx}"
-                formatted_parts.append(f"![{alt_text}]({url})")
         # Add audio links if present
         if structured.audio_urls:
@@ -189,6 +204,8 @@ async def parse_agent_response(
         result = "\n\n".join(formatted_parts)
         print(f"[STRUCTURED OUTPUT] ✅ Successfully formatted response")
         return result
     except Exception as e:

     # Clean URLs (remove trailing quotes, commas, etc.)
     def clean_url(url: str) -> str:
+        cleaned = url.rstrip('",;)')
+        # Validate it's still a proper URL
+        if cleaned.startswith('http://') or cleaned.startswith('https://'):
+            return cleaned
+        else:
+            print(f"[EXTRACT_URLS] ⚠️ Rejected malformed URL after cleaning: {cleaned}")
+            return None
+    image_urls = [u for u in (clean_url(url) for url in raw_image_urls) if u is not None]
+    image_urls = list(set(image_urls))  # Deduplicate
+    audio_urls_files = [u for u in (clean_url(url) for url in raw_audio_urls_files) if u is not None]
+    audio_urls_files = list(set(audio_urls_files))  # Deduplicate
     # Combine both types of audio URLs
     audio_urls = audio_urls_files + audio_urls_xenocanto
+    # Log the actual URLs extracted
+    print(f"[EXTRACT_URLS] ✅ Cleaned image URLs ({len(image_urls)}): {image_urls}")
+    print(f"[EXTRACT_URLS] ✅ Cleaned audio URLs ({len(audio_urls)}): {audio_urls}")
     return image_urls, audio_urls
             for idx, url in enumerate(structured.image_urls, 1):
                 # Use species name if available, otherwise generic
                 alt_text = structured.species_name or f"Bird {idx}"
+                img_markdown = f"![{alt_text}]({url})"
+                print(f"[STRUCTURED OUTPUT] Generated image markdown: {img_markdown}")
+                formatted_parts.append(img_markdown)
         # Add audio links if present
         if structured.audio_urls:
         result = "\n\n".join(formatted_parts)
         print(f"[STRUCTURED OUTPUT] ✅ Successfully formatted response")
+        print(f"[STRUCTURED OUTPUT] Final markdown length: {len(result)} characters")
+        print(f"[STRUCTURED OUTPUT] Final markdown (last 500 chars): {result[-500:]}")
         return result
     except Exception as e:

langgraph_agent/subagent_config.py CHANGED Viewed

@@ -50,6 +50,7 @@ class SubAgentConfig:
                 "tools": [
                     "search_birds",  # Required to find any birds
                     "get_bird_info", # Get details including audio count
                     "get_bird_audio" # Fetch actual audio recordings
                 ],
                 "prompt": audio_finder_prompt,

                 "tools": [
                     "search_birds",  # Required to find any birds
                     "get_bird_info", # Get details including audio count
+                    "get_bird_images", # Get reference photos
                     "get_bird_audio" # Fetch actual audio recordings
                 ],
                 "prompt": audio_finder_prompt,