# app.py # ============================================================ # BactAI-D — Microbiology Identification (LLM-Toggle + RAG) # # - LLM parser OFF by default (safe for HF Spaces) # - Checkbox to enable LLM parser: # "Enable LLM Parser (Finetuned Flan-T5-baseextraction)" # - Tri-Fusion + ML hybrid identification + RAG # - Hybrid weighting & Gated Confidence # - Confidence bands: # <65% → Low Discrimination # 65–79 → Acceptable Identification # 80–89 → Good Identification # ≥90 → Excellent Identification # - RAG (Bart-Large) always enabled for top genera # - Commit-to-HF kept with all key artefacts # # TOP-5 TABLE (DECISION AID) RULE: # ✅ Confidence is assigned AFTER unified scoring. # ✅ Only Rank #1 may be Acceptable/Good/Excellent. # ✅ If Rank #1 is Low Discrimination, ALL ranks are Low Discrimination. # ✅ Ranks #2–#5 are always Low Discrimination (even if their % is high). # # TOP-5 TABLE (DECISION AID) COLUMNS: # ✅ Genus # ✅ Probability % (within TOP-5, sums to 100%) # ✅ Probability (Odds) — human-friendly ("1 in X") # ✅ Confidence (decision_band logic above) # ============================================================ from __future__ import annotations import os from datetime import datetime from typing import Dict, Any, List, Tuple import pandas as pd import gradio as gr # ============================================================ # ENGINE IMPORTS # ============================================================ from engine.bacteria_identifier import BacteriaIdentifier from engine.parser_rules import parse_text_rules from engine.parser_ext import parse_text_extended from engine.parser_fusion import parse_text_fused # We will *not* import parser_llm directly here. # LLM usage is controlled via the `use_llm` flag passed into parse_text_fused HAS_LLM = True # Architecturally supported; UI toggle decides whether to use it. # ============================================================ # ML GENUS PREDICTOR # ============================================================ try: from engine.genus_predictor import predict_genus_from_fused HAS_GENUS_ML = True except Exception as e: print(f"[app] ML predictor unavailable: {type(e).__name__}: {e}") HAS_GENUS_ML = False # ============================================================ # TRAINING MODULES # ============================================================ try: from training.parser_eval import run_parser_eval HAS_PARSER_EVAL = True except Exception as e: print(f"[app] parser_eval unavailable: {type(e).__name__}: {e}") HAS_PARSER_EVAL = False try: from training.gold_trainer import train_from_gold HAS_GOLD_TRAINER = True except Exception as e: print(f"[app] gold_trainer unavailable: {type(e).__name__}: {e}") HAS_GOLD_TRAINER = False try: from training.field_weight_trainer import train_field_weights HAS_FIELD_WEIGHT_TRAINER = True except Exception as e: print(f"[app] field_weight_trainer unavailable: {type(e).__name__}: {e}") HAS_FIELD_WEIGHT_TRAINER = False try: from engine.train_genus_model import train_genus_model HAS_GENUS_TRAINER = True except Exception as e: print(f"[app] genus trainer unavailable: {type(e).__name__}: {e}") HAS_GENUS_TRAINER = False # ============================================================ # RAG INDEX BUILDER # ============================================================ try: from training.rag_index_builder import build_rag_index HAS_RAG_INDEX_BUILDER = True except Exception as e: print(f"[app] rag_index_builder unavailable: {type(e).__name__}: {e}") HAS_RAG_INDEX_BUILDER = False # ============================================================ # PHASE 1 — OVERALL RANKER # ============================================================ from scoring.overall_ranker import compute_overall_scores # ============================================================ # DIAGNOSTIC ANCHORS (OVERRIDES) # ============================================================ from scoring.diagnostic_anchors import apply_diagnostic_overrides # ============================================================ # RAG IMPORTS (Mistral + Retriever) # ============================================================ from rag.rag_retriever import retrieve_rag_context from rag.rag_generator import generate_genus_rag_explanation from rag.species_scorer import score_species_for_genus # ============================================================ # DEFAULT EXAMPLE (HF DEMO) # ============================================================ DEFAULT_EXAMPLE_PHENOTYPE = ( "Gram positive short rods. Facultative anaerobe. Catalase positive and oxidase negative. " "Motile at room temperature with tumbling motility at 37°C. Beta haemolysis on Blood Agar. " "Esculin positive. Non-spore forming. Colonies are small, translucent, and smooth. " "Pigment negative and odor none. Rhamnose positive, xylose negative" ) # ============================================================ # DATA LOADING # ============================================================ def load_db() -> Tuple[pd.DataFrame, str]: primary = os.path.join("data", "bacteria_db.xlsx") fallback = "bacteria_db.xlsx" if os.path.exists(primary): path = primary elif os.path.exists(fallback): path = fallback else: raise FileNotFoundError( "bacteria_db.xlsx not found in 'data/' or project root." ) df = pd.read_excel(path) df.columns = [c.strip() for c in df.columns] mtime = os.path.getmtime(path) return df, datetime.fromtimestamp(mtime).strftime("%Y-%m-%d") DB, DB_LAST_UPDATED = load_db() ENG = BacteriaIdentifier(DB) # ============================================================ # CONFIDENCE BANDS (FINAL CONTRACT) # ============================================================ def _confidence_band_local(p: float) -> str: """ Confidence band based on the FINAL contract: <0.65 -> Low Discrimination 0.65-0.79 -> Acceptable Identification 0.80-0.89 -> Good Identification >=0.90 -> Excellent Identification """ if p >= 0.90: return "Excellent Identification" if p >= 0.80: return "Good Identification" if p >= 0.65: return "Acceptable Identification" return "Low Discrimination" def _apply_top5_decision_confidence(unified_ranking: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ TOP-5 TABLE DECISION RULE: - Only rank #1 can be Acceptable/Good/Excellent. - If rank #1 is Low Discrimination -> ALL ranks Low Discrimination. - Ranks #2-#5 ALWAYS Low Discrimination. We store this as: item["decision_band"] (for the top-5 table + UI labels if desired) """ if not unified_ranking: return unified_ranking # Determine rank-1 band based on unified combined_score top = unified_ranking[0] top_score = float(top.get("combined_score", 0.0) or 0.0) top_band = _confidence_band_local(top_score) if top_band == "Low Discrimination": # All LD for item in unified_ranking: item["decision_band"] = "Low Discrimination" return unified_ranking # Rank1 gets its true band; everyone else forced LD unified_ranking[0]["decision_band"] = top_band for item in unified_ranking[1:]: item["decision_band"] = "Low Discrimination" return unified_ranking def _format_odds_human_friendly(odds_1000: int) -> str: """ Convert odds per 1000 into a human-friendly "1 in X". Example: odds_1000 = 500 -> 1 in 2 odds_1000 = 333 -> 1 in 3 odds_1000 = 125 -> 1 in 8 """ try: o = int(odds_1000) except Exception: o = 0 if o <= 0: return "—" # 1000/o gives expected "1 in X" x = int(round(1000.0 / float(o))) if x <= 1: return "1 in 1" return f"1 in {x}" def _safe_float(x, default: float = 0.0) -> float: try: return float(x) except Exception: return default # ============================================================ # CORE IDENTIFICATION PIPELINE # ============================================================ def compute_trifusion_and_ml(text: str, use_llm_parser: bool = False) -> Dict[str, Any]: text = text or "" if not text.strip(): return { "error": "Please enter a description.", "fused_fields": {}, "tri_fusion_results": [], "tri_fusion_summary_markdown": "", "ml_genus_results": [], "ml_summary_markdown": "", "unified_summary_markdown": "", "unified_ranking": [], "overall_scores": {}, "raw": {}, } # 1) Tri-Fusion try: fusion = parse_text_fused(text, use_llm=use_llm_parser) except TypeError: fusion = parse_text_fused(text) fused_fields = fusion.get("fused_fields", {}) results = ENG.identify(fused_fields) # Tri-Fusion summary tri_lines: List[str] = [] if not results: tri_lines.append("No matches found.") else: tri_lines.append("Tri-Fusion Identification Results:\n") for r in results: blended = r.blended_confidence_percent() core = r.confidence_percent() true = r.true_confidence() emoji = "🟢" if blended >= 75 else "🟡" if blended >= 50 else "🔴" tri_lines.append( f"- **{r.genus}** — {emoji} {blended}% " f"(Core: {core}%, True: {true}%)" ) tri_md = "\n".join(tri_lines) # 2) ML GENUS MODEL ml_results_raw: List[Dict[str, Any]] = [] ml_lines: List[str] = [] if not HAS_GENUS_ML: ml_lines.append("ML genus model not available.") else: try: preds = predict_genus_from_fused(fused_fields, top_k=10) if preds: ml_lines.append("ML Genus Model Results (XGBoost, Stage 12D):\n") band_emoji = { "Excellent Identification": "🟢", "Good Identification": "🟡", "Acceptable Identification": "🟠", "Low Discrimination": "🔴", } rank = 1 for genus, prob, band in preds: perc = prob * 100.0 emo = band_emoji.get(band, "⚪") ml_lines.append( f"{rank}. **{genus}** — {emo} {perc:.1f}% ({band})" ) ml_results_raw.append( { "genus": genus, "probability": prob, "probability_percent": perc, "confidence_band": band, } ) rank += 1 else: ml_lines.append("ML model returned no predictions.") except Exception as e: ml_lines.append(f"ML genus model error: {type(e).__name__}: {e}") ml_md = "\n".join(ml_lines) # 3) UNIFIED HYBRID RANKING unified_lines: List[str] = [] unified_ranking: List[Dict[str, Any]] = [] tri_blended_by_genus: Dict[str, float] = {} for r in results: g = str(r.genus) s = (r.blended_confidence_percent() or 0.0) / 100.0 if s > tri_blended_by_genus.get(g, 0.0): tri_blended_by_genus[g] = s ml_by_genus: Dict[str, float] = { item["genus"]: float(item["probability"]) for item in ml_results_raw } all_genera = set(tri_blended_by_genus.keys()) | set(ml_by_genus.keys()) band_emoji = { "Excellent Identification": "🟢", "Good Identification": "🟡", "Acceptable Identification": "🟠", "Low Discrimination": "🔴", } if all_genera: # Build raw unified scores for g in all_genera: tf = tri_blended_by_genus.get(g, 0.0) ml = ml_by_genus.get(g, 0.0) if ml <= 0.01: combined = 0.01 * tf + 0.99 * ml elif ml >= 0.90: combined = 0.3 * tf + 0.7 * ml else: combined = 0.5 * tf + 0.5 * ml # TF Gate TF_GATE = 0.30 if tf <= TF_GATE: combined = min(combined, tf) band = _confidence_band_local(combined) unified_ranking.append( { "genus": g, "combined_score": combined, "combined_percent": combined * 100.0, "tri_fusion_blended_percent": tf * 100.0, "ml_prob_percent": ml * 100.0, "ml_band": band, # band based on combined score } ) # Apply diagnostic anchor overrides unified_ranking = apply_diagnostic_overrides(text, unified_ranking) # Sort after overrides unified_ranking.sort( key=lambda d: d.get("combined_score", 0.0), reverse=True ) # Apply TOP-5 decision confidence rule (rank1-only) unified_ranking = _apply_top5_decision_confidence(unified_ranking) # Build markdown summary unified_lines.append("Unified Hybrid Ranking (Tri-Fusion + ML Genus Model):\n") for rank, item in enumerate(unified_ranking[:10], start=1): g = item["genus"] combined = item["combined_score"] band = item.get("decision_band") or item.get("ml_band") or "Low Discrimination" emo = band_emoji.get(band, "⚪") tf = item["tri_fusion_blended_percent"] / 100.0 ml = item["ml_prob_percent"] / 100.0 unified_lines.append( f"{rank}. **{g}** — {emo} Combined: {combined*100:.1f}% " f"(Tri-Fusion: {tf*100:.1f}% | ML: {ml*100:.1f}% — {band})" ) unified_md = "\n".join(unified_lines) # 4) OVERALL RANKER (TOP-5 NORMALISATION) try: # NOTE: keep this contract stable for now; we will refactor overall_ranker next. tri_scores_map = {item["genus"]: float(item.get("combined_score", 0.0) or 0.0) for item in unified_ranking} overall_scores = compute_overall_scores( ml_scores=ml_results_raw, tri_scores=tri_scores_map, top_k=5, ) except Exception as e: overall_scores = { "error": f"overall_ranker failed: {type(e).__name__}: {e}", "overall": [], "normalized_share_percent": [], "probabilities_1000": [], } return { "error": None, "fused_fields": fused_fields, "tri_fusion_results": results, "tri_fusion_summary_markdown": tri_md, "ml_genus_results": ml_results_raw, "ml_summary_markdown": ml_md, "unified_summary_markdown": unified_md, "unified_ranking": unified_ranking, "overall_scores": overall_scores, "raw": fusion, } # ============================================================ # GENUS CARD RENDERER # ============================================================ def _genus_card_markdown( item: Dict[str, Any], rank: int, rag_text: str | None = None, ) -> str: genus = item["genus"] combined = item["combined_percent"] tf = item["tri_fusion_blended_percent"] ml = item["ml_prob_percent"] # Show the DECISION confidence band (rank1-only rule) decision_band = item.get("decision_band") or item.get("ml_band") or "Low Discrimination" if combined >= 80: bar_color = "#1e88e5" elif combined >= 65: bar_color = "#43a047" elif combined >= 50: bar_color = "#fb8c00" else: bar_color = "#e53935" bar_html = f"""
""" rag_section = "" if rag_text: rag_section = f""" #### RAG Interpretation (Genus-Level) {rag_text} """ return f""" ### Rank {rank}: **{genus}** {bar_html} - **Combined Score:** {combined:.1f}% - **Tri-Fusion (Blended):** {tf:.1f}% - **ML Probability:** {ml:.1f}% - **Decision Confidence:** {decision_band} {rag_section} """ # ============================================================ # IDENTIFICATION CALLBACK # ============================================================ def run_identification(text: str, use_llm_parser: bool): result = compute_trifusion_and_ml(text, use_llm_parser=use_llm_parser) # DEBUG payload debug_payload = { "fused_fields": result["fused_fields"], "tri_fusion_summary_markdown": result["tri_fusion_summary_markdown"], "ml_genus_results": result["ml_genus_results"], "unified_summary_markdown": result["unified_summary_markdown"], "unified_ranking": result["unified_ranking"], "overall_scores": result["overall_scores"], "raw": result["raw"], } ranking = result["unified_ranking"] or [] # ------------------------------------------------------------ # Top-5 Decision Table (ROBUST, APP-SIDE) # ------------------------------------------------------------ # We do NOT trust overall_ranker yet. # We defensively reconstruct probabilities so the table always fills. # ------------------------------------------------------------ top5_rows: List[List[str]] = [] overall = result.get("overall_scores") or {} overall_list = overall.get("overall") or [] probs_1000_list = overall.get("probabilities_1000") or [] share_by_genus: Dict[str, float] = {} odds_by_genus: Dict[str, int] = {} # 1) Normalized share for it in overall_list: if not isinstance(it, dict): continue g = str(it.get("genus") or "").strip() if not g: continue share = ( it.get("normalized_share") or it.get("share") or it.get("normalized_share_percent") ) if share is not None: s = _safe_float(share) if s > 1.0: # percent → fraction s = s / 100.0 share_by_genus[g] = max(0.0, min(1.0, s)) # 2) Odds /1000 for it in probs_1000_list: if not isinstance(it, dict): continue g = str(it.get("genus") or "").strip() if not g: continue o = it.get("odds_1000") or it.get("prob_1000") if isinstance(o, (int, float)): odds_by_genus[g] = int(round(o)) # 3) HARD FALLBACK — derive from unified_ranking if needed if not share_by_genus: total = sum(float(item.get("combined_score", 0.0) or 0.0) for item in ranking[:5]) or 1.0 for item in ranking[:5]: genus = str(item.get("genus") or "").strip() if genus: share_by_genus[genus] = float(item.get("combined_score", 0.0) or 0.0) / total # 4) Build table rows IN RANK ORDER top1_band = ranking[0].get("decision_band") if ranking else "Low Discrimination" for idx, item in enumerate(ranking[:5], start=1): genus = str(item.get("genus") or "").strip() share = share_by_genus.get(genus, 0.0) # If overall_ranker doesn't provide odds, approximate odds_1000 from share. odds_1000 = odds_by_genus.get(genus, int(round(share * 1000))) prob_pct = f"{share * 100.0:.2f}%" odds_text = _format_odds_human_friendly(odds_1000) if top1_band == "Low Discrimination": confidence = "Low Discrimination" else: confidence = top1_band if idx == 1 else "Low Discrimination" top5_rows.append([ genus, prob_pct, odds_text, confidence, ]) # RAG explanations for top genera (rank 1) rag_summaries: Dict[str, str] = {} if ranking: top_item = ranking[0] genus = top_item["genus"] try: ctx = retrieve_rag_context( phenotype_text=text, target_genus=genus, top_k=5, parsed_fields=result["fused_fields"], # 🔑 enables species scoring ) # 🔍 HF SPACES DEBUG LOGGING print("\n" + "=" * 80) print("RAG DEBUG — GENERATOR INPUT") print("=" * 80) print("\n[PHENOTYPE]") print(text) print("\n[LLM CONTEXT]") print(ctx.get("llm_context_shaped", "")) print("\n[DEBUG CONTEXT]") print(ctx.get("debug_context", "")) print("=" * 80 + "\n") # 🔍 END DEBUG explanation = generate_genus_rag_explanation( phenotype_text=text, rag_context=ctx.get("llm_context_shaped", "") or ctx.get("llm_context", ""), genus=genus, ) # ------------------------------- # SPECIES BEST MATCH # ------------------------------- try: species_out = score_species_for_genus( target_genus=genus, parsed_fields=result["fused_fields"], top_n=1, ) ranked = species_out.get("ranked", []) if isinstance(species_out, dict) else [] if ranked: best = ranked[0] full_name = str(best.get("full_name") or "").strip() score = best.get("score") if full_name: if isinstance(score, (int, float)): explanation += f"\n\n**Species Best Match:** {full_name} ({float(score) * 100.0:.1f}%)" else: explanation += f"\n\n**Species Best Match:** {full_name}" else: explanation += "\n\n**Species Best Match:** Not specified" except Exception: explanation += "\n\n**Species Best Match:** Not specified" rag_summaries[genus] = explanation except Exception as e: rag_summaries[genus] = f"(RAG error: {type(e).__name__}: {e})" # Accordions accordion_updates = [] markdown_updates = [] for _ in range(5): accordion_updates.append(gr.update(visible=False, open=False, label="")) markdown_updates.append("") for idx, item in enumerate(ranking[:5]): decision_band = item.get("decision_band") or "Low Discrimination" label = f"{item['genus']} — {item['combined_percent']:.1f}% — {decision_band}" accordion_updates[idx] = gr.update( visible=True, open=(idx == 0), label=label, ) rag_text = rag_summaries.get(item["genus"]) markdown_updates[idx] = _genus_card_markdown( item, rank=idx + 1, rag_text=rag_text, ) return debug_payload, top5_rows, *accordion_updates, *markdown_updates # ============================================================ # PARSER DEBUG CALLBACKS # ============================================================ def run_rule_parser(text: str): return gr.update(visible=True, open=True), parse_text_rules(text or "") def run_extended_parser(text: str): return gr.update(visible=True, open=True), parse_text_extended(text or "") def run_trifusion_debug(text: str, use_llm_parser: bool): result = compute_trifusion_and_ml(text or "", use_llm_parser=use_llm_parser) return ( gr.update(visible=True, open=True), result, result["tri_fusion_summary_markdown"], result["ml_summary_markdown"], result["unified_summary_markdown"], ) # ============================================================ # TRAINING CALLBACKS # ============================================================ def run_parser_evaluation(): if not HAS_PARSER_EVAL: return gr.update(visible=True, open=True), { "ok": False, "message": "parser_eval not available.", } return gr.update(visible=True, open=True), run_parser_eval(mode="rules+extended") def run_gold_training(): if not HAS_GOLD_TRAINER: return gr.update(visible=True, open=True), { "ok": False, "message": "gold_trainer not available.", } return gr.update(visible=True, open=True), train_from_gold() def run_field_weight_training(): if not HAS_FIELD_WEIGHT_TRAINER: return gr.update(visible=True, open=True), { "ok": False, "message": "field_weight_trainer not available.", } out = train_field_weights(include_llm=False) return gr.update(visible=True, open=True), out def run_genus_training(): if not HAS_GENUS_TRAINER: return gr.update(visible=True, open=True), { "ok": False, "message": "genus trainer not available.", } out = train_genus_model() return gr.update(visible=True, open=True), out def run_rag_index_builder(): if not HAS_RAG_INDEX_BUILDER: return gr.update(visible=True, open=True), { "ok": False, "message": "rag_index_builder not available.", } out = build_rag_index() return gr.update(visible=True, open=True), out def commit_to_hf(): from training.hf_sync import push_to_hf # Paths removed for demo paths = [ ] return push_to_hf(paths) # ============================================================ # UI + BACKGROUND # ============================================================ CSS = """ html, body { height: 100%; } body { background-image: url('static/eph.jpeg'); background-size: cover; background-position: center center; background-attachment: fixed; font-family: 'Inter', sans-serif !important; } .gradio-container { background: rgba(0, 0, 0, 0.55) !important; backdrop-filter: blur(14px); border-radius: 16px !important; } textarea, input[type="text"] { background: rgba(255,255,255,0.05) !important; border: 1px solid rgba(255,255,255,0.18) !important; color: #e5e7eb !important; border-radius: 10px !important; } button { background: rgba(255,255,255,0.08) !important; border: 1px solid rgba(255,255,255,0.20) !important; color: #ffffff !important; border-radius: 10px !important; transition: 0.2s ease; } button:hover { background: rgba(255,255,255,0.16) !important; border-color: #90caf9 !important; } .gr-accordion { background: rgba(255,255,255,0.06) !important; border-radius: 12px !important; border: 1px solid rgba(255,255,255,0.16) !important; } .gr-accordion:hover { border-color: rgba(255,255,255,0.32) !important; } /* Ensure expanded accordion content is not clipped */ .gr-accordion .wrap, .gr-accordion .gr-markdown { max-height: none !important; overflow: visible !important; } /* Improve readability of long RAG text */ .gr-accordion .gr-markdown { line-height: 1.6; padding-bottom: 12px; } """ # ============================================================ # BUILD UI # ============================================================ def create_app(): with gr.Blocks( css=CSS, title="BactAI-D — Microbiology Identification", ) as demo: gr.Markdown( f"# 🧫 BactAI-D — Microbiology Phenotype Identification\n" f"**Database updated:** {DB_LAST_UPDATED}\n\n" "BactAI-D is a schema-driven microbiology identification system that combines " "deterministic phenotype parsing, an extended laboratory test schema, a genus-level " "machine learning classifier, and retrieval-augmented generation (RAG) to provide " "evidence-grounded genus interpretation and a structured decision aid. (First Analysis may take 30 seconds)" ) llm_toggle = gr.Checkbox( label="Enable LLM Parser (Awaiting GPU) ", value=False, ) with gr.Tabs(): # -------------------------------------------------------- # TAB 1 — IDENTIFICATION # -------------------------------------------------------- with gr.Tab("🧬 Identification"): text_in = gr.Textbox( label="Phenotype Description", lines=8, value=DEFAULT_EXAMPLE_PHENOTYPE, placeholder="Paste your microbiology description here…", ) analyse_btn = gr.Button("🔍 Analyse & Identify") debug_json = gr.JSON( label="Debug: fused fields + ML + unified ranking + overall" ) # UPDATED table (Decision Table) top5_table = gr.Dataframe( headers=["Genus", "Probability % (Top 5)", "Probability (Odds)", "Confidence"], row_count=5, col_count=4, interactive=False, label="Top 5 Genus Predictions (Decision Table)", ) genus_accordions = [] genus_markdowns = [] for i in range(5): with gr.Accordion( f"Rank {i+1}", visible=False, open=False, ) as acc: md = gr.Markdown("") genus_accordions.append(acc) genus_markdowns.append(md) analyse_btn.click( fn=run_identification, inputs=[text_in, llm_toggle], outputs=[debug_json, top5_table, *genus_accordions, *genus_markdowns], ) # -------------------------------------------------------- # TAB 2 — SUPPORTED FIELDS (NEW) # -------------------------------------------------------- with gr.Tab("📋 Supported Phenotypes"): gr.Markdown( """ ### Supported Phenotype Fields (Core Schema) This page summarizes the **core fields currently supported** by the deterministic parsers and the unified scoring engine. Only **recognized** fields influence scoring; unrecognized descriptors are retained in raw text but not used for structured matching. BactAI-D is capable of extending it's own schema via testing phases. These are documented and handled by the Trifusion model of parsing. --- #### 1) Gram / Morphology - **Gram stain:** Positive, Negative, Variable, Unknown - **Shape:** Cocci, Bacilli, Rods, Short Rods, Yeast, Spiral, Variable, Unknown --- #### 2) Oxygen & Motility - **Oxygen requirement:** Aerobic, Anaerobic, Facultative, Microaerophilic, Unknown - **Motility:** Positive, Negative, Variable, Unknown - **Motility type (if provided):** Peritrichous, Polar, Tumbling, Swarming, Unknown --- #### 3) Colony / Growth - **Colony morphology:** free-text descriptors (e.g., “Small; Translucent; Smooth”) - **Colony pattern:** Smooth, Rough, Mucoid, Dry, Variable, Unknown - **Pigment:** Positive / Negative (or specific pigment text if your schema supports it) - **Odor:** None / specific odor text / Unknown - **Haemolysis:** Positive / Negative and type (Alpha/Beta/Gamma) if present in input --- #### 4) Core Biochemistry (examples) - Catalase, Oxidase, Indole, Urease - Citrate, Methyl Red, VP - H2S - Nitrate reduction - Lysine decarboxylase, Ornithine decarboxylase, Arginine dihydrolase - Esculin hydrolysis, Gelatin hydrolysis, DNase - ONPG - NaCl tolerance --- #### 5) Carbohydrate Utilisation (examples) - Glucose fermentation, Lactose fermentation, Sucrose fermentation - Additional sugars where present in your extended schema (e.g., xylose, rhamnose) """ ) # -------------------------------------------------------- # TAB 3 — PARSERS DEBUG # -------------------------------------------------------- with gr.Tab("🧪 Parsers (Debug)"): text2 = gr.Textbox( label="Microbiology description", lines=6, placeholder="Paste description…", ) rule_btn = gr.Button("Parse (Rule Parser)") ext_btn = gr.Button("Parse (Extended Tests)") tri_btn = gr.Button("Parse & Identify (Tri-Fusion + ML)") with gr.Accordion("Rule Parser Output", open=False, visible=False) as rule_panel: rule_json = gr.JSON() with gr.Accordion("Extended Parser Output", open=False, visible=False) as ext_panel: ext_json = gr.JSON() with gr.Accordion("Tri-Fusion Debug Output", open=False, visible=False) as tri_panel: tri_json = gr.JSON() tri_summary = gr.Markdown() tri_ml_summary = gr.Markdown() tri_unified_summary = gr.Markdown() rule_btn.click(run_rule_parser, [text2], [rule_panel, rule_json]) ext_btn.click(run_extended_parser, [text2], [ext_panel, ext_json]) tri_btn.click( run_trifusion_debug, [text2, llm_toggle], [tri_panel, tri_json, tri_summary, tri_ml_summary, tri_unified_summary], ) # -------------------------------------------------------- # TAB 4 — TRAINING # -------------------------------------------------------- with gr.Tab("📚 Training & Sync"): gr.Markdown( "Evaluate parsers, train from gold tests, tune parser weights, " "train the genus-level model, build the RAG index, and commit " "artefacts back to the HF Space repository." ) eval_btn = gr.Button("📊 Evaluate Parsers") train_btn = gr.Button("🧬 Train from Gold Tests") weight_btn = gr.Button("⚖️ Train Parser Weights") genus_btn = gr.Button("🧬 Train Genus Model") rag_btn = gr.Button("🧱 Build RAG Index") commit_btn = gr.Button("⬆️ Commit to HF") with gr.Accordion("Parser Evaluation Summary", open=False, visible=False) as eval_panel: eval_json = gr.JSON() with gr.Accordion("Gold Training Summary", open=False, visible=False) as train_panel: train_json = gr.JSON() with gr.Accordion("Field Weight Training Summary", open=False, visible=False) as weight_panel: weight_json = gr.JSON() with gr.Accordion("Genus Model Training Summary", open=False, visible=False) as genus_panel: genus_json = gr.JSON() with gr.Accordion("RAG Index Build Summary", open=False, visible=False) as rag_panel: rag_json = gr.JSON() commit_output = gr.JSON(label="Commit Output") eval_btn.click(run_parser_evaluation, [], [eval_panel, eval_json]) train_btn.click(run_gold_training, [], [train_panel, train_json]) weight_btn.click(run_field_weight_training, [], [weight_panel, weight_json]) genus_btn.click(run_genus_training, [], [genus_panel, genus_json]) rag_btn.click(run_rag_index_builder, [], [rag_panel, rag_json]) commit_btn.click(commit_to_hf, None, commit_output) gr.Markdown("
Built by Zain Asad

") return demo demo = create_app() if __name__ == "__main__": demo.launch()