Initial deploy FastAPI backend to Hugging Face Space
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .env +0 -0
- .gitattributes +2 -0
- .gitignore +4 -0
- Dockerfile +10 -0
- main.py +502 -0
- requirements.txt +123 -0
- retrieval/data/0.html +0 -0
- retrieval/data/1.html +0 -0
- retrieval/data/10.html +0 -0
- retrieval/data/100.html +0 -0
- retrieval/data/101.html +0 -0
- retrieval/data/102.html +0 -0
- retrieval/data/103.html +0 -0
- retrieval/data/104.html +0 -0
- retrieval/data/105.html +0 -0
- retrieval/data/106.html +0 -0
- retrieval/data/107.html +0 -0
- retrieval/data/108.html +0 -0
- retrieval/data/109.html +0 -0
- retrieval/data/11.html +0 -0
- retrieval/data/110.html +0 -0
- retrieval/data/111.html +0 -0
- retrieval/data/112.html +0 -0
- retrieval/data/113.html +0 -0
- retrieval/data/114.html +0 -0
- retrieval/data/115.html +0 -0
- retrieval/data/116.html +0 -0
- retrieval/data/117.html +0 -0
- retrieval/data/118.html +0 -0
- retrieval/data/119.html +0 -0
- retrieval/data/12.html +0 -0
- retrieval/data/120.html +0 -0
- retrieval/data/121.html +0 -0
- retrieval/data/122.html +0 -0
- retrieval/data/123.html +0 -0
- retrieval/data/124.html +0 -0
- retrieval/data/125.html +0 -0
- retrieval/data/126.html +0 -0
- retrieval/data/127.html +0 -0
- retrieval/data/128.html +0 -0
- retrieval/data/129.html +0 -0
- retrieval/data/13.html +0 -0
- retrieval/data/130.html +2 -0
- retrieval/data/131.html +0 -0
- retrieval/data/132.html +0 -0
- retrieval/data/133.html +0 -0
- retrieval/data/134.html +0 -0
- retrieval/data/135.html +0 -0
- retrieval/data/136.html +0 -0
- retrieval/data/137.html +0 -0
.env
ADDED
|
File without changes
|
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
retrieval/docs_no_stop.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
retrieval/ind/*.seg filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
.venv/
|
| 3 |
+
venv/
|
| 4 |
+
env/
|
Dockerfile
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY requirements.txt .
|
| 6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
+
|
| 8 |
+
COPY . .
|
| 9 |
+
|
| 10 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
ADDED
|
@@ -0,0 +1,502 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from typing import List, Dict, Optional
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from whoosh import index, qparser
|
| 7 |
+
from whoosh.qparser import MultifieldParser
|
| 8 |
+
from whoosh.scoring import BM25F
|
| 9 |
+
from whoosh.index import open_dir
|
| 10 |
+
import os
|
| 11 |
+
import nltk
|
| 12 |
+
from nltk import sent_tokenize
|
| 13 |
+
import re
|
| 14 |
+
import unicodedata
|
| 15 |
+
from pyvi import ViTokenizer
|
| 16 |
+
|
| 17 |
+
nltk.download('punkt_tab')
|
| 18 |
+
nltk.download('stopwords')
|
| 19 |
+
|
| 20 |
+
app = FastAPI(title="Document Search API")
|
| 21 |
+
|
| 22 |
+
# CORS configuration
|
| 23 |
+
app.add_middleware(
|
| 24 |
+
CORSMiddleware,
|
| 25 |
+
allow_origins=[
|
| 26 |
+
"http://localhost:3000", # Next.js development
|
| 27 |
+
"http://localhost:3001", # Alternative port
|
| 28 |
+
"https://blue-information-retrieval.vercel.app", # Production frontend URL
|
| 29 |
+
],
|
| 30 |
+
allow_credentials=True,
|
| 31 |
+
allow_methods=["*"],
|
| 32 |
+
allow_headers=["*"],
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Configuration
|
| 36 |
+
INDEX_DIR = "./retrieval/ind"
|
| 37 |
+
META_CSV = "./retrieval/final_document_tfidf_pagerank.csv"
|
| 38 |
+
DATA_CLEAN_DIR = "./retrieval/data_clean"
|
| 39 |
+
IMAGE_CSV = "./retrieval/docs_with_images.csv" # File CSV chứa URL ảnh
|
| 40 |
+
STOPWORDS_PATH = "./retrieval/vietnamese-stopwords-dash.txt"
|
| 41 |
+
|
| 42 |
+
# Global variables
|
| 43 |
+
ix = None
|
| 44 |
+
meta_df = None
|
| 45 |
+
image_df = None # DataFrame chứa mapping doc_id -> image_url
|
| 46 |
+
docs_cache = {}
|
| 47 |
+
pagerank_dict = {}
|
| 48 |
+
vi_stopwords = None
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class SearchRequest(BaseModel):
|
| 52 |
+
query: str
|
| 53 |
+
model: str = "bm25"
|
| 54 |
+
limit: int = 100
|
| 55 |
+
B: float = 0.75
|
| 56 |
+
K1: float = 1.2
|
| 57 |
+
title_boost: float = 1.5
|
| 58 |
+
content_boost: float = 1.0
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class SearchResult(BaseModel):
|
| 62 |
+
doc_id: str
|
| 63 |
+
title: str
|
| 64 |
+
url: str
|
| 65 |
+
snippet: str
|
| 66 |
+
score: float
|
| 67 |
+
relevance_percentage: float
|
| 68 |
+
image_url: Optional[str] = None
|
| 69 |
+
pagerank_score: Optional[float] = None
|
| 70 |
+
total_words: Optional[int] = None
|
| 71 |
+
unique_words: Optional[int] = None
|
| 72 |
+
top_words: Optional[str] = None
|
| 73 |
+
top_tfidf: Optional[str] = None
|
| 74 |
+
avg_tfidf: Optional[float] = None
|
| 75 |
+
final_score: Optional[float] = None
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def split_sentences(text):
|
| 79 |
+
return sent_tokenize(text)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def tokenize_vi_sentence_level(text: str) -> list[str]:
|
| 83 |
+
sentences = sent_tokenize(text)
|
| 84 |
+
tokens = []
|
| 85 |
+
for sent in sentences:
|
| 86 |
+
sent = sent.strip()
|
| 87 |
+
if not sent:
|
| 88 |
+
continue
|
| 89 |
+
sent_tokens = ViTokenizer.tokenize(sent)
|
| 90 |
+
tokens.extend(sent_tokens.split())
|
| 91 |
+
return tokens
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
VI_TOKEN_REGEX = re.compile(
|
| 95 |
+
r"[a-zàáạảãâầấậẩẫăằắặẳẵ"
|
| 96 |
+
r"èéẹẻẽêềếệểễ"
|
| 97 |
+
r"ìíịỉĩ"
|
| 98 |
+
r"òóọỏõôồốộổỗơờớợởỡ"
|
| 99 |
+
r"ùúụủũưừứựửữ"
|
| 100 |
+
r"ỳýỵỷỹđ0-9_]+$"
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def is_valid_vi_token(token: str) -> bool:
|
| 105 |
+
return bool(VI_TOKEN_REGEX.fullmatch(token))
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def load_stopwords(path):
|
| 109 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 110 |
+
stopwords = set(line.strip().lower() for line in f if line.strip())
|
| 111 |
+
return stopwords
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def clean_text(text):
|
| 115 |
+
if text is None:
|
| 116 |
+
return ""
|
| 117 |
+
text = unicodedata.normalize("NFC", text)
|
| 118 |
+
text = re.sub(r"http\S+|www\S+", "", text)
|
| 119 |
+
text = re.sub(r"[.,!?]+", " ", text)
|
| 120 |
+
text = re.sub(r"\s+", " ", text).strip()
|
| 121 |
+
return text
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def preprocess_query(query: str, stopwords: set[str] | None = None) -> str:
|
| 125 |
+
query = clean_text(query)
|
| 126 |
+
tokens = tokenize_vi_sentence_level(query)
|
| 127 |
+
processed_tokens = []
|
| 128 |
+
for tok in tokens:
|
| 129 |
+
tok = tok.lower()
|
| 130 |
+
if not is_valid_vi_token(tok):
|
| 131 |
+
continue
|
| 132 |
+
if tok.isnumeric():
|
| 133 |
+
continue
|
| 134 |
+
if stopwords and tok in stopwords:
|
| 135 |
+
continue
|
| 136 |
+
processed_tokens.append(tok)
|
| 137 |
+
return " ".join(processed_tokens)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def load_pagerank(meta_csv: str) -> Dict[str, float]:
|
| 141 |
+
"""Load PageRank scores from CSV"""
|
| 142 |
+
try:
|
| 143 |
+
df = pd.read_csv(meta_csv)
|
| 144 |
+
if 'pagerank' in df.columns and 'id' in df.columns:
|
| 145 |
+
return dict(zip(df['id'].astype(str), df['pagerank']))
|
| 146 |
+
return {}
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"Warning: Could not load PageRank scores: {e}")
|
| 149 |
+
return {}
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def load_images_csv(image_csv: str) -> pd.DataFrame:
|
| 153 |
+
"""Load image URLs from CSV file"""
|
| 154 |
+
try:
|
| 155 |
+
if not os.path.exists(image_csv):
|
| 156 |
+
print(f"⚠️ Image CSV not found: {image_csv}")
|
| 157 |
+
return pd.DataFrame(columns=['doc_id', 'image_url'])
|
| 158 |
+
|
| 159 |
+
df = pd.read_csv(image_csv)
|
| 160 |
+
|
| 161 |
+
# Đảm bảo có cả 2 cột cần thiết
|
| 162 |
+
if 'doc_id' not in df.columns or 'image_url' not in df.columns:
|
| 163 |
+
print("⚠️ Image CSV missing required columns: doc_id, image_url")
|
| 164 |
+
return pd.DataFrame(columns=['doc_id', 'image_url'])
|
| 165 |
+
|
| 166 |
+
# Convert doc_id to string để dễ mapping
|
| 167 |
+
df['doc_id'] = df['doc_id'].astype(str)
|
| 168 |
+
|
| 169 |
+
# Loại bỏ các dòng có image_url null/empty
|
| 170 |
+
df = df[df['image_url'].notna() & (df['image_url'] != '')]
|
| 171 |
+
|
| 172 |
+
print(f"✅ Loaded {len(df)} image URLs from CSV")
|
| 173 |
+
return df
|
| 174 |
+
|
| 175 |
+
except Exception as e:
|
| 176 |
+
print(f"❌ Error loading image CSV: {e}")
|
| 177 |
+
return pd.DataFrame(columns=['doc_id', 'image_url'])
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def get_image_url(doc_id: str) -> Optional[str]:
|
| 181 |
+
"""Get image URL for a document from CSV"""
|
| 182 |
+
global image_df
|
| 183 |
+
|
| 184 |
+
if image_df is None or image_df.empty:
|
| 185 |
+
return None
|
| 186 |
+
|
| 187 |
+
try:
|
| 188 |
+
# Tìm image_url theo doc_id
|
| 189 |
+
result = image_df[image_df['doc_id'] == str(doc_id)]
|
| 190 |
+
|
| 191 |
+
if not result.empty:
|
| 192 |
+
image_url = result.iloc[0]['image_url']
|
| 193 |
+
# Kiểm tra URL hợp lệ
|
| 194 |
+
if pd.notna(image_url) and str(image_url).strip() != '':
|
| 195 |
+
return str(image_url)
|
| 196 |
+
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
except Exception as e:
|
| 200 |
+
print(f"Error getting image URL for doc {doc_id}: {e}")
|
| 201 |
+
return None
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def load_document_content(doc_id: str) -> str:
|
| 205 |
+
"""Load document content from data_clean directory"""
|
| 206 |
+
global docs_cache
|
| 207 |
+
|
| 208 |
+
if doc_id in docs_cache:
|
| 209 |
+
return docs_cache[doc_id]
|
| 210 |
+
|
| 211 |
+
try:
|
| 212 |
+
file_path = os.path.join(DATA_CLEAN_DIR, f"{doc_id}.txt")
|
| 213 |
+
|
| 214 |
+
if not os.path.exists(file_path):
|
| 215 |
+
print(f"Warning: File not found: {file_path}")
|
| 216 |
+
return ""
|
| 217 |
+
|
| 218 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 219 |
+
content = f.read()
|
| 220 |
+
|
| 221 |
+
docs_cache[doc_id] = content
|
| 222 |
+
return content
|
| 223 |
+
|
| 224 |
+
except Exception as e:
|
| 225 |
+
print(f"Error loading document {doc_id}: {e}")
|
| 226 |
+
return ""
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def get_snippet(doc_id: str, query_terms: List[str], max_length: int = 200) -> str:
|
| 230 |
+
"""Extract relevant snippet from document based on query terms"""
|
| 231 |
+
content = load_document_content(doc_id)
|
| 232 |
+
|
| 233 |
+
if not content or content.strip() == "":
|
| 234 |
+
return "Không có nội dung xem trước."
|
| 235 |
+
|
| 236 |
+
try:
|
| 237 |
+
content_lower = content.lower()
|
| 238 |
+
query_lower = [term.lower() for term in query_terms if term.strip()]
|
| 239 |
+
|
| 240 |
+
if not query_lower:
|
| 241 |
+
words = content.split()
|
| 242 |
+
snippet_words = words[:30]
|
| 243 |
+
snippet = ' '.join(snippet_words)
|
| 244 |
+
if len(snippet) > max_length:
|
| 245 |
+
snippet = snippet[:max_length] + "..."
|
| 246 |
+
return snippet
|
| 247 |
+
|
| 248 |
+
best_pos = 0
|
| 249 |
+
max_matches = 0
|
| 250 |
+
|
| 251 |
+
words = content.split()
|
| 252 |
+
window_size = min(30, len(words))
|
| 253 |
+
|
| 254 |
+
for i in range(max(1, len(words) - window_size + 1)):
|
| 255 |
+
window = ' '.join(words[i:i+window_size]).lower()
|
| 256 |
+
matches = sum(1 for term in query_lower if term in window)
|
| 257 |
+
if matches > max_matches:
|
| 258 |
+
max_matches = matches
|
| 259 |
+
best_pos = i
|
| 260 |
+
|
| 261 |
+
snippet_words = words[best_pos:best_pos+window_size]
|
| 262 |
+
snippet = ' '.join(snippet_words)
|
| 263 |
+
|
| 264 |
+
if len(snippet) > max_length:
|
| 265 |
+
snippet = snippet[:max_length] + "..."
|
| 266 |
+
|
| 267 |
+
if best_pos > 0:
|
| 268 |
+
snippet = "..." + snippet
|
| 269 |
+
|
| 270 |
+
return snippet
|
| 271 |
+
|
| 272 |
+
except Exception as e:
|
| 273 |
+
print(f"Error generating snippet for doc {doc_id}: {e}")
|
| 274 |
+
return "Lỗi khi tạo đoạn trích."
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def bm25_search(ix, query_str: str, vi_stopwords: set[str] | None = None, top_k: int = 100,
|
| 278 |
+
B: float = 0.75, K1: float = 1.2,
|
| 279 |
+
title_boost: float = 1.5, content_boost: float = 1.0) -> Dict[str, float]:
|
| 280 |
+
"""BM25 search with title and content fields"""
|
| 281 |
+
query_str = preprocess_query(query_str, stopwords=vi_stopwords)
|
| 282 |
+
results = {}
|
| 283 |
+
weighting = BM25F(B=B, K1=K1)
|
| 284 |
+
|
| 285 |
+
with ix.searcher(weighting=weighting) as searcher:
|
| 286 |
+
field_boosts = {
|
| 287 |
+
"title": title_boost,
|
| 288 |
+
"content": content_boost
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
parser = MultifieldParser(
|
| 292 |
+
["title", "content"],
|
| 293 |
+
schema=ix.schema,
|
| 294 |
+
fieldboosts=field_boosts,
|
| 295 |
+
group=qparser.OrGroup
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
q = parser.parse(query_str)
|
| 299 |
+
hits = searcher.search(q, limit=top_k)
|
| 300 |
+
|
| 301 |
+
for hit in hits:
|
| 302 |
+
results[str(hit["docid"])] = float(hit.score)
|
| 303 |
+
|
| 304 |
+
return results
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def normalize_scores(scores: Dict[str, float]) -> Dict[str, float]:
|
| 308 |
+
"""Normalize scores to percentage (0-100)"""
|
| 309 |
+
if not scores:
|
| 310 |
+
return {}
|
| 311 |
+
max_score = max(scores.values())
|
| 312 |
+
if max_score == 0:
|
| 313 |
+
return {k: 0.0 for k in scores}
|
| 314 |
+
return {k: (v / max_score) * 100 for k, v in scores.items()}
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
@app.on_event("startup")
|
| 318 |
+
async def startup_event():
|
| 319 |
+
"""Initialize index and load data on startup"""
|
| 320 |
+
global ix, meta_df, image_df, pagerank_dict, vi_stopwords
|
| 321 |
+
|
| 322 |
+
try:
|
| 323 |
+
# Load Whoosh index
|
| 324 |
+
if os.path.exists(INDEX_DIR):
|
| 325 |
+
ix = open_dir(INDEX_DIR)
|
| 326 |
+
print("✅ Loaded Whoosh index")
|
| 327 |
+
else:
|
| 328 |
+
print("❌ Index directory not found:", INDEX_DIR)
|
| 329 |
+
|
| 330 |
+
# Load metadata
|
| 331 |
+
if os.path.exists(META_CSV):
|
| 332 |
+
meta_df = pd.read_csv(META_CSV)
|
| 333 |
+
print(f"✅ Loaded {len(meta_df)} documents metadata")
|
| 334 |
+
else:
|
| 335 |
+
print("❌ Metadata CSV not found:", META_CSV)
|
| 336 |
+
|
| 337 |
+
# Load image CSV
|
| 338 |
+
image_df = load_images_csv(IMAGE_CSV)
|
| 339 |
+
|
| 340 |
+
# Check data_clean directory
|
| 341 |
+
if os.path.exists(DATA_CLEAN_DIR):
|
| 342 |
+
num_files = len([f for f in os.listdir(DATA_CLEAN_DIR) if f.endswith('.txt')])
|
| 343 |
+
print(f"✅ Found {num_files} text files in {DATA_CLEAN_DIR}")
|
| 344 |
+
else:
|
| 345 |
+
print("❌ Data clean directory not found:", DATA_CLEAN_DIR)
|
| 346 |
+
|
| 347 |
+
# Load PageRank scores
|
| 348 |
+
pagerank_dict = load_pagerank(META_CSV)
|
| 349 |
+
print(f"✅ Loaded PageRank scores for {len(pagerank_dict)} documents")
|
| 350 |
+
|
| 351 |
+
# Load StopWords
|
| 352 |
+
if os.path.exists(STOPWORDS_PATH):
|
| 353 |
+
vi_stopwords = load_stopwords(STOPWORDS_PATH)
|
| 354 |
+
print(f"✅ Loaded {len(vi_stopwords)} Vietnamese stopwords")
|
| 355 |
+
else:
|
| 356 |
+
print("⚠️ Stopwords file not found, continuing without stopwords")
|
| 357 |
+
vi_stopwords = set()
|
| 358 |
+
|
| 359 |
+
except Exception as e:
|
| 360 |
+
print(f"❌ Error during startup: {e}")
|
| 361 |
+
raise
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
@app.get("/")
|
| 365 |
+
async def root():
|
| 366 |
+
"""Health check endpoint"""
|
| 367 |
+
return {
|
| 368 |
+
"status": "ok",
|
| 369 |
+
"message": "Document Search API is running",
|
| 370 |
+
"total_documents": len(meta_df) if meta_df is not None else 0,
|
| 371 |
+
"total_images": len(image_df) if image_df is not None else 0,
|
| 372 |
+
"data_clean_dir": DATA_CLEAN_DIR,
|
| 373 |
+
"image_csv": IMAGE_CSV,
|
| 374 |
+
"index_dir": INDEX_DIR
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
@app.post("/search", response_model=List[SearchResult])
|
| 379 |
+
async def search(request: SearchRequest):
|
| 380 |
+
"""Search documents using BM25 algorithm"""
|
| 381 |
+
if not request.query.strip():
|
| 382 |
+
raise HTTPException(status_code=400, detail="Query cannot be empty")
|
| 383 |
+
|
| 384 |
+
if ix is None or meta_df is None:
|
| 385 |
+
raise HTTPException(status_code=503, detail="Search index not initialized")
|
| 386 |
+
|
| 387 |
+
try:
|
| 388 |
+
raw_scores = bm25_search(
|
| 389 |
+
ix,
|
| 390 |
+
request.query,
|
| 391 |
+
vi_stopwords=vi_stopwords,
|
| 392 |
+
top_k=request.limit,
|
| 393 |
+
B=request.B,
|
| 394 |
+
K1=request.K1,
|
| 395 |
+
title_boost=request.title_boost,
|
| 396 |
+
content_boost=request.content_boost
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
if not raw_scores:
|
| 400 |
+
return []
|
| 401 |
+
|
| 402 |
+
normalized_scores = normalize_scores(raw_scores)
|
| 403 |
+
query_terms = request.query.split()
|
| 404 |
+
|
| 405 |
+
results = []
|
| 406 |
+
for doc_id, score in raw_scores.items():
|
| 407 |
+
doc_row = meta_df[meta_df['id'].astype(str) == doc_id]
|
| 408 |
+
|
| 409 |
+
if doc_row.empty:
|
| 410 |
+
continue
|
| 411 |
+
|
| 412 |
+
doc_info = doc_row.iloc[0]
|
| 413 |
+
snippet = get_snippet(doc_id, query_terms, max_length=300)
|
| 414 |
+
pr_score = pagerank_dict.get(doc_id)
|
| 415 |
+
|
| 416 |
+
# Lấy image_url từ CSV
|
| 417 |
+
image_url = get_image_url(doc_id)
|
| 418 |
+
|
| 419 |
+
result = SearchResult(
|
| 420 |
+
doc_id=doc_id,
|
| 421 |
+
title=str(doc_info.get('title', 'Untitled')),
|
| 422 |
+
url=str(doc_info.get('url', '')),
|
| 423 |
+
snippet=snippet,
|
| 424 |
+
score=round(score, 4),
|
| 425 |
+
relevance_percentage=round(normalized_scores[doc_id], 2),
|
| 426 |
+
image_url=image_url,
|
| 427 |
+
pagerank_score=round(pr_score, 6) if pr_score else None,
|
| 428 |
+
total_words=int(doc_info.get('total_words', 0)) if pd.notna(doc_info.get('total_words')) else None,
|
| 429 |
+
unique_words=int(doc_info.get('unique_words', 0)) if pd.notna(doc_info.get('unique_words')) else None,
|
| 430 |
+
top_words=str(doc_info.get('top_words', '')) if pd.notna(doc_info.get('top_words')) else None,
|
| 431 |
+
top_tfidf=str(doc_info.get('top_tfidf', '')) if pd.notna(doc_info.get('top_tfidf')) else None,
|
| 432 |
+
avg_tfidf=round(float(doc_info.get('avg_tfidf', 0)), 6) if pd.notna(doc_info.get('avg_tfidf')) else None,
|
| 433 |
+
final_score=round(float(doc_info.get('final_score', 0)), 6) if pd.notna(doc_info.get('final_score')) else None
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
results.append(result)
|
| 437 |
+
|
| 438 |
+
results.sort(key=lambda x: x.score, reverse=True)
|
| 439 |
+
return results[:request.limit]
|
| 440 |
+
|
| 441 |
+
except Exception as e:
|
| 442 |
+
raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
@app.get("/stats")
|
| 446 |
+
async def get_stats():
|
| 447 |
+
"""Get statistics about the search index"""
|
| 448 |
+
if meta_df is None:
|
| 449 |
+
raise HTTPException(status_code=503, detail="Index not initialized")
|
| 450 |
+
|
| 451 |
+
num_cached_docs = len(docs_cache)
|
| 452 |
+
num_txt_files = 0
|
| 453 |
+
|
| 454 |
+
if os.path.exists(DATA_CLEAN_DIR):
|
| 455 |
+
num_txt_files = len([f for f in os.listdir(DATA_CLEAN_DIR) if f.endswith('.txt')])
|
| 456 |
+
|
| 457 |
+
return {
|
| 458 |
+
"total_documents": len(meta_df),
|
| 459 |
+
"total_images": len(image_df) if image_df is not None else 0,
|
| 460 |
+
"text_files_available": num_txt_files,
|
| 461 |
+
"cached_documents": num_cached_docs,
|
| 462 |
+
"pagerank_scores": len(pagerank_dict),
|
| 463 |
+
"index_directory": INDEX_DIR,
|
| 464 |
+
"data_clean_directory": DATA_CLEAN_DIR,
|
| 465 |
+
"image_csv": IMAGE_CSV
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
@app.get("/document/{doc_id}")
|
| 470 |
+
async def get_document(doc_id: str):
|
| 471 |
+
"""Get full document content and metadata"""
|
| 472 |
+
if meta_df is None:
|
| 473 |
+
raise HTTPException(status_code=503, detail="Index not initialized")
|
| 474 |
+
|
| 475 |
+
doc_row = meta_df[meta_df['id'].astype(str) == doc_id]
|
| 476 |
+
|
| 477 |
+
if doc_row.empty:
|
| 478 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 479 |
+
|
| 480 |
+
doc_info = doc_row.iloc[0]
|
| 481 |
+
content = load_document_content(doc_id)
|
| 482 |
+
image_url = get_image_url(doc_id)
|
| 483 |
+
|
| 484 |
+
return {
|
| 485 |
+
"doc_id": doc_id,
|
| 486 |
+
"title": str(doc_info.get('title', 'Untitled')),
|
| 487 |
+
"url": str(doc_info.get('url', '')),
|
| 488 |
+
"content": content,
|
| 489 |
+
"image_url": image_url,
|
| 490 |
+
"pagerank": float(doc_info.get('pagerank', 0)) if pd.notna(doc_info.get('pagerank')) else None,
|
| 491 |
+
"total_words": int(doc_info.get('total_words', 0)) if pd.notna(doc_info.get('total_words')) else None,
|
| 492 |
+
"unique_words": int(doc_info.get('unique_words', 0)) if pd.notna(doc_info.get('unique_words')) else None,
|
| 493 |
+
"top_words": str(doc_info.get('top_words', '')) if pd.notna(doc_info.get('top_words')) else None,
|
| 494 |
+
"top_tfidf": str(doc_info.get('top_tfidf', '')) if pd.notna(doc_info.get('top_tfidf')) else None,
|
| 495 |
+
"avg_tfidf": float(doc_info.get('avg_tfidf', 0)) if pd.notna(doc_info.get('avg_tfidf')) else None,
|
| 496 |
+
"final_score": float(doc_info.get('final_score', 0)) if pd.notna(doc_info.get('final_score')) else None
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
if __name__ == "__main__":
|
| 501 |
+
import uvicorn
|
| 502 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
requirements.txt
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
alembic==1.17.2
|
| 2 |
+
annotated-doc==0.0.4
|
| 3 |
+
annotated-types==0.7.0
|
| 4 |
+
anyio==4.12.0
|
| 5 |
+
asttokens==3.0.0
|
| 6 |
+
attrs==25.4.0
|
| 7 |
+
bcrypt==5.0.0
|
| 8 |
+
beautifulsoup4==4.14.2
|
| 9 |
+
certifi==2025.10.5
|
| 10 |
+
cffi==2.0.0
|
| 11 |
+
charset-normalizer==3.4.4
|
| 12 |
+
click==8.3.0
|
| 13 |
+
colorama==0.4.6
|
| 14 |
+
colorlog==6.10.1
|
| 15 |
+
comm==0.2.3
|
| 16 |
+
contourpy==1.3.3
|
| 17 |
+
cryptography==46.0.3
|
| 18 |
+
cycler==0.12.1
|
| 19 |
+
datasketch==1.6.5
|
| 20 |
+
debugpy==1.8.17
|
| 21 |
+
decorator==5.2.1
|
| 22 |
+
ecdsa==0.19.1
|
| 23 |
+
emoji==2.15.0
|
| 24 |
+
executing==2.2.1
|
| 25 |
+
fastapi==0.128.0
|
| 26 |
+
filelock==3.20.0
|
| 27 |
+
fonttools==4.61.1
|
| 28 |
+
fsspec==2025.9.0
|
| 29 |
+
gensim==4.4.0
|
| 30 |
+
greenlet==3.3.0
|
| 31 |
+
h11==0.16.0
|
| 32 |
+
huggingface-hub==0.36.0
|
| 33 |
+
idna==3.11
|
| 34 |
+
ipykernel==7.0.1
|
| 35 |
+
ipython==9.6.0
|
| 36 |
+
ipython_pygments_lexers==1.1.1
|
| 37 |
+
jedi==0.19.2
|
| 38 |
+
Jinja2==3.1.6
|
| 39 |
+
joblib==1.5.2
|
| 40 |
+
jupyter_client==8.6.3
|
| 41 |
+
jupyter_core==5.9.1
|
| 42 |
+
kiwisolver==1.4.9
|
| 43 |
+
Mako==1.3.10
|
| 44 |
+
MarkupSafe==3.0.3
|
| 45 |
+
matplotlib==3.10.8
|
| 46 |
+
matplotlib-inline==0.1.7
|
| 47 |
+
mpmath==1.3.0
|
| 48 |
+
nest-asyncio==1.6.0
|
| 49 |
+
networkx==3.5
|
| 50 |
+
nltk==3.9.2
|
| 51 |
+
numpy==2.3.4
|
| 52 |
+
optuna==4.6.0
|
| 53 |
+
outcome==1.3.0.post0
|
| 54 |
+
packaging==25.0
|
| 55 |
+
pandas==2.3.3
|
| 56 |
+
parso==0.8.5
|
| 57 |
+
passlib==1.7.4
|
| 58 |
+
pillow==12.0.0
|
| 59 |
+
platformdirs==4.5.0
|
| 60 |
+
prompt_toolkit==3.0.52
|
| 61 |
+
protobuf==6.33.0
|
| 62 |
+
psutil==7.1.1
|
| 63 |
+
psycopg2-binary==2.9.11
|
| 64 |
+
pure_eval==0.2.3
|
| 65 |
+
py_vncorenlp==0.1.4
|
| 66 |
+
pyasn1==0.6.1
|
| 67 |
+
pycparser==2.23
|
| 68 |
+
pydantic==2.12.5
|
| 69 |
+
pydantic_core==2.41.5
|
| 70 |
+
Pygments==2.19.2
|
| 71 |
+
pyjnius==1.7.0
|
| 72 |
+
pyparsing==3.2.5
|
| 73 |
+
PySocks==1.7.1
|
| 74 |
+
python-crfsuite==0.9.11
|
| 75 |
+
python-dateutil==2.9.0.post0
|
| 76 |
+
python-dotenv==1.2.1
|
| 77 |
+
python-jose==3.5.0
|
| 78 |
+
pytrec_eval-terrier==0.5.10
|
| 79 |
+
pytz==2025.2
|
| 80 |
+
pyvi==0.1.1
|
| 81 |
+
PyYAML==6.0.3
|
| 82 |
+
pyzmq==27.1.0
|
| 83 |
+
regex==2025.10.23
|
| 84 |
+
requests==2.32.5
|
| 85 |
+
rsa==4.9.1
|
| 86 |
+
safetensors==0.7.0
|
| 87 |
+
scikit-learn==1.7.2
|
| 88 |
+
scipy==1.16.2
|
| 89 |
+
selenium==4.37.0
|
| 90 |
+
sentence-transformers==5.2.0
|
| 91 |
+
setuptools==80.9.0
|
| 92 |
+
six==1.17.0
|
| 93 |
+
sklearn-crfsuite==0.5.0
|
| 94 |
+
smart_open==7.5.0
|
| 95 |
+
sniffio==1.3.1
|
| 96 |
+
sortedcontainers==2.4.0
|
| 97 |
+
soupsieve==2.8
|
| 98 |
+
SQLAlchemy==2.0.45
|
| 99 |
+
stack-data==0.6.3
|
| 100 |
+
stanza==1.11.0
|
| 101 |
+
starlette==0.50.0
|
| 102 |
+
sympy==1.14.0
|
| 103 |
+
tabulate==0.9.0
|
| 104 |
+
threadpoolctl==3.6.0
|
| 105 |
+
tokenizers==0.22.1
|
| 106 |
+
torch==2.9.0
|
| 107 |
+
tornado==6.5.2
|
| 108 |
+
tqdm==4.67.1
|
| 109 |
+
traitlets==5.14.3
|
| 110 |
+
transformers==4.57.3
|
| 111 |
+
trio==0.31.0
|
| 112 |
+
trio-websocket==0.12.2
|
| 113 |
+
typing-inspection==0.4.2
|
| 114 |
+
typing_extensions==4.15.0
|
| 115 |
+
tzdata==2025.2
|
| 116 |
+
urllib3==2.5.0
|
| 117 |
+
uvicorn==0.40.0
|
| 118 |
+
wcwidth==0.2.14
|
| 119 |
+
websocket-client==1.9.0
|
| 120 |
+
Whoosh==2.7.4
|
| 121 |
+
wordcloud==1.9.5
|
| 122 |
+
wrapt==2.0.1
|
| 123 |
+
wsproto==1.2.0
|
retrieval/data/0.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/1.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/10.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/100.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/101.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/102.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/103.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/104.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/105.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/106.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/107.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/108.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/109.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/11.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/110.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/111.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/112.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/113.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/114.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/115.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/116.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/117.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/118.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/119.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/12.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/120.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/121.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/122.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/123.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/124.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/125.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/126.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/127.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/128.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/129.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/13.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/130.html
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<html lang="en-US" dir="ltr"><head><title>Just a moment...</title><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=Edge"><meta name="robots" content="noindex,nofollow"><meta name="viewport" content="width=device-width,initial-scale=1"><style>*{box-sizing:border-box;margin:0;padding:0}html{line-height:1.15;-webkit-text-size-adjust:100%;color:#313131;font-family:system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"}body{display:flex;flex-direction:column;height:100vh;min-height:100vh}.main-content{margin:8rem auto;padding-left:1.5rem;max-width:60rem}@media (width <= 720px){.main-content{margin-top:4rem}}.h2{line-height:2.25rem;font-size:1.5rem;font-weight:500}@media (width <= 720px){.h2{line-height:1.5rem;font-size:1.25rem}}#challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI0IyMEYwMyIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjQjIwRjAzIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+");background-repeat:no-repeat;background-size:contain;padding-left:34px}@media (prefers-color-scheme: dark){body{background-color:#222;color:#d9d9d9}}</style><meta http-equiv="refresh" content="360"><script src="/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9b19c72b8f6c85c1"></script><style>*{box-sizing:border-box;margin:0;padding:0}html{line-height:1.15;-webkit-text-size-adjust:100%;color:#313131;font-family:system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"}button{font-family:system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"}body{display:flex;flex-direction:column;height:100vh;min-height:100vh}body.theme-dark{background-color:#222;color:#d9d9d9}body.theme-dark a{color:#fff}body.theme-dark a:hover{text-decoration:underline;color:#ee730a}body.theme-dark .lds-ring div{border-color:#999 rgba(0,0,0,0) rgba(0,0,0,0)}body.theme-dark .font-red{color:#b20f03}body.theme-dark .ctp-button{background-color:#4693ff;color:#1d1d1d}body.theme-dark #challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4")}body.theme-dark #challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI0IyMEYwMyIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjQjIwRjAzIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+")}body.theme-light{background-color:#fff;color:#313131}body.theme-light a{color:#0051c3}body.theme-light a:hover{text-decoration:underline;color:#ee730a}body.theme-light .lds-ring div{border-color:#595959 rgba(0,0,0,0) rgba(0,0,0,0)}body.theme-light .font-red{color:#fc574a}body.theme-light .ctp-button{border-color:#003681;background-color:#003681;color:#fff}body.theme-light #challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4=")}body.theme-light #challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI2ZjNTc0YSIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjZmM1NzRhIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+")}a{transition:color 150ms ease;background-color:rgba(0,0,0,0);text-decoration:none;color:#0051c3}a:hover{text-decoration:underline;color:#ee730a}.main-content{margin:8rem auto;padding-right:1.5rem;padding-left:1.5rem;width:100%;max-width:60rem}.main-content .loading-verifying{height:76.391px}.spacer{margin:2rem 0}.spacer-top{margin-top:4rem}.spacer-bottom{margin-bottom:2rem}.heading-favicon{margin-right:.5rem;width:2rem;height:2rem}@media (width <= 720px){.main-content{margin-top:4rem}.heading-favicon{width:1.5rem;height:1.5rem}}.main-wrapper{display:flex;flex:1;flex-direction:column;align-items:center}.font-red{color:#b20f03}.h1{line-height:3.75rem;font-size:2.5rem;font-weight:500}.h2{line-height:2.25rem;font-size:1.5rem;font-weight:500}.core-msg{line-height:2.25rem;font-size:1.5rem;font-weight:400}.body-text{line-height:1.25rem;font-size:1rem;font-weight:400}@media (width <= 720px){.h1{line-height:1.75rem;font-size:1.5rem}.h2{line-height:1.5rem;font-size:1.25rem}.core-msg{line-height:1.5rem;font-size:1rem}}#challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI2ZjNTc0YSIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjZmM1NzRhIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+");background-repeat:no-repeat;background-size:contain;padding-left:34px}#challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4=");background-repeat:no-repeat;background-size:contain;padding-left:42px}.text-center{text-align:center}.ctp-button{transition-duration:200ms;transition-property:background-color,border-color,color;transition-timing-function:ease;margin:2rem 0;border:.063rem solid #0051c3;border-radius:.313rem;background-color:#0051c3;cursor:pointer;padding:.375rem 1rem;line-height:1.313rem;color:#fff;font-size:.875rem}.ctp-button:hover{border-color:#003681;background-color:#003681;cursor:pointer;color:#fff}.footer{margin:0 auto;padding-right:1.5rem;padding-left:1.5rem;width:100%;max-width:60rem;line-height:1.125rem;font-size:.75rem}.footer-inner{border-top:1px solid #d9d9d9;padding-top:1rem;padding-bottom:1rem}.clearfix::after{display:table;clear:both;content:""}.clearfix .column{float:left;padding-right:1.5rem;width:50%}.diagnostic-wrapper{margin-bottom:.5rem}.footer .ray-id{text-align:center}.footer .ray-id code{font-family:monaco,courier,monospace}.core-msg,.zone-name-title{overflow-wrap:break-word}@media (width <= 720px){.diagnostic-wrapper{display:flex;flex-wrap:wrap;justify-content:center}.clearfix::after{display:initial;clear:none;text-align:center;content:none}.column{padding-bottom:2rem}.clearfix .column{float:none;padding:0;width:auto;word-break:keep-all}.zone-name-title{margin-bottom:1rem}}.loading-verifying{height:76.391px}.lds-ring{display:inline-block;position:relative;width:1.875rem;height:1.875rem}.lds-ring div{box-sizing:border-box;display:block;position:absolute;border:.3rem solid #595959;border-radius:50%;border-color:#313131 rgba(0,0,0,0) rgba(0,0,0,0);width:1.875rem;height:1.875rem;animation:lds-ring 1.2s cubic-bezier(.5, 0, .5, 1) infinite}.lds-ring div:nth-child(1){animation-delay:-.45s}.lds-ring div:nth-child(2){animation-delay:-.3s}.lds-ring div:nth-child(3){animation-delay:-.15s}@keyframes lds-ring{0%{transform:rotate(0deg)}100%{transform:rotate(360deg)}}.rtl .heading-favicon{margin-right:0;margin-left:.5rem}.rtl #challenge-success-text{background-position:right;padding-right:42px;padding-left:0}.rtl #challenge-error-text{background-position:right;padding-right:34px;padding-left:0}.challenge-content .loading-verifying{height:76.391px}@media (prefers-color-scheme: dark){body{background-color:#222;color:#d9d9d9}body a{color:#fff}body a:hover{text-decoration:underline;color:#ee730a}body .lds-ring div{border-color:#999 rgba(0,0,0,0) rgba(0,0,0,0)}body .font-red{color:#b20f03}body .ctp-button{background-color:#4693ff;color:#1d1d1d}body #challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4")}body #challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI0IyMEYwMyIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjQjIwRjAzIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+")}}</style><script src="https://challenges.cloudflare.com/turnstile/v0/g/d39f91d70ce1/api.js?onload=REiSI4&render=explicit" async="" defer="" crossorigin="anonymous"></script></head><body><div class="main-wrapper" role="main"><div class="main-content"><h1 class="zone-name-title h1">vinpearl.com</h1><p id="zutqc7" class="h2 spacer-bottom">Verifying you are human. This may take a few seconds.</p><div id="SoGDz7" style="display: grid;"><div><div><input type="hidden" name="cf-turnstile-response" id="cf-chl-widget-bzbso_response"></div></div></div><div id="UfiHG7" class="spacer loading-verifying" style="display: none; visibility: hidden;"><div class="lds-ring"><div></div><div></div><div></div><div></div></div></div><div id="NGtV8" class="core-msg spacer spacer-top">vinpearl.com needs to review the security of your connection before proceeding.</div><div id="TuMjR6" style="display: none;"><div id="challenge-success-text" class="h2">Verification successful</div><div class="core-msg spacer">Waiting for vinpearl.com to respond...</div></div><noscript><div class="h2"><span id="challenge-error-text">Enable JavaScript and cookies to continue</span></div></noscript></div></div><script>(function(){window._cf_chl_opt = {cvId: '3',cZone: 'vinpearl.com',cType: 'managed',cRay: '9b19c72b8f6c85c1',cH: 'iEdYnWyAIF4BRxyi.ybPoMX_JuiV3juldoI_TG7Tuyo-1766345258-1.2.1.1-iE2GMa9ivjpmYJWkY.pu7plkbUsnhanuJrG0U_sXNDM0WxbjaGNa0stwmasxRmkh',cUPMDTk:"\/vi\/du-lich-pho-co-hoi-an-review-day-du-va-chi-tiet-tu-a-z?__cf_chl_tk=zhEwxnR6r4Ghsmx2r.3u2Sm7y9dL5PVIHGhqIf0MWEs-1766345258-1.0.1.1-QM9oIdMWPcM9mJfu5GJYeOQMAJa8_vyLl02TYuikXy4",cFPWv: 'g',cITimeS: '1766345258',cTplC:0,cTplV:5,cTplB: '0',fa:"\/vi\/du-lich-pho-co-hoi-an-review-day-du-va-chi-tiet-tu-a-z?__cf_chl_f_tk=zhEwxnR6r4Ghsmx2r.3u2Sm7y9dL5PVIHGhqIf0MWEs-1766345258-1.0.1.1-QM9oIdMWPcM9mJfu5GJYeOQMAJa8_vyLl02TYuikXy4",md: 'VSJHYv7_XFPKYpMzf5YBhtYFrxnMZYc2gLJHnf_cGRc-1766345258-1.2.1.1-zQglMRnSnLs9OofJjNR7Ajg9D1GSO_TS2RVDA7LOxxJehfc2lMG1fg0SLJap._RHREs9Wd65bBk0oOpcFp3wayAP3HpEvolccYlmYgYq_Z7EUdu7gQJBFJdAN.LjBQfaqsBA3JKOSqvN2KIkUzlwDfbLiJi4hr6akKvC1137Nvw4KsJo9JUDeOLFdY9Amzpmu_JRQTe821AMgonXkDaBptEA8OqG8bA4rbFV0gEhpaxkCUmOSXfjbk316Lnn6_wOLKrwNTj5EE65UTpJD3tIyQ4TWaBxLWKWWmokedfIp9ebVUjiOwmoXT79J0tVkLzazfXzVSDp8dHvO3gnun3GKM6HEVAr.DJrkVeJX62aO4FVNsrmrlcc8Z6CIOihz6w9y6Oq9fHNu.V3X9IDd.XTBEAluWOf8ko.QROsBCc.d3L2zgVFqTKCqGX3D2sv8S9yEabTB7.2PlVVeYZebSczPnYY2mgoXfQ62MSCVOSswx1toTyVYieUKFBoYjyAJ8M33gOqG1s38lwkajtyZci5ccxU8KtMcOXhYNUK9ALWgBotG807OqW9RflSwv.TeFHEvDTMQ87SmutmS_1B64qniQ16VhiQQHZacrx.STCsx114piAtPX1jE7mJr.6ot_oBR9EMYWk5091KGoZb1DGIucjuRbw0hnM8qSS0XYvAJxedKMaPFqurdRl9_XP53n4xYo75pk95JYi1MA7JjebIqqXW4vZ8bZs1uV6GYFTJRN_acEe2lXbtOtCFuvCvpBRMrD964iqXMdzKL9171WUd1Tbts5hwwNN9ooK1uP8tvrPictcpx60STN7CYY6Rv6FkbepMHWaKFjY6LdaqBcPJBKaGbX_bS21iX24E0VfyYjw7CLfoTwSUG1ugbmFF6zVtYyR0xaX5zGjVYSIeAjHzVIImOfSI0DPcg.RJIFX5PTOgmkcmLjNPSy9K0Wzkq1aZJi7liRfT0g5yJAoMSIl7kqRCcskhsfhWFwNU3Ivnkf2KrtcwR416IuKCn.2Tg4vqn01fEkqiw61ZemVIbEq2mNnCyIGhcIzj_Whyh1JMhViQZ_YCkroLiNj2vHYfv2bCxOAIQ_OCZVBcrnK82kEglLcm5AeW76_7qHYP5qqcASB4uYlmF.2dts6cQ72mwIEECKVc4MsA5G1dO8cH26mwgPjlZKgNXBPIInmZb69fSzk',mdrd: 'qLUfM4KoVZQYNY1ga8ucVMMijBfoxxI3wrEGbsMOj2o-1766345258-1.2.1.1-08vC7l0z1V.bDD7ut0KojSrJ.qzo5l.ZjjzjrYKZivQUPRyh8g.E_OhjM5IOdTX.b3Y7yfYyAd_6S5jAZZeNTIigETrFkia12ugkYKlOulEjmsH29Uo41EYN4HMrShgUbA89QovTGvmgkHHgJZ30QYzS.1XpXBaRW2gInqp4s2i2x2XEeZ19UHcXFx2stLooWeEK.nhVG6dkjTn_Xxc4SNSMYx.2C.sZ8T_0yKez.8lE9AZ1Q.xaU_DpgS_pvRpO3l_ojWgNY0BVFciTwCqb.Ii4l7BrSBV6KwGJfu9zRG_cDDHU4m_ETmICSjegBINNKk1WVH32AcGNOmUdbEky6_nmxRwFdGw0FYTAJm_ZLqdqIr5.622al1KOqXlTHkkH5QUF_VUE86MkOZLzm9HOuyyhxSaxY47mGmzInUCcchgfz8XAHMLRryb89roXO7DExAznK36QjZk1HUjpIOUpYwoioG_faRiOM8uawZfIyibuSmQwJucuBdmxmfFxSCTiIwyT4qMxD3CYizM9bhBiaeA1.ZemUqv5ab86hy9Eq5j.UaE8GQTE8.qDAwJqJY3CPRqJjGKJnDQkNfXu8rw0icNbJY01OEj7zTTWWDl_j35XkKnYooUUh7X0Y5qovsnSWclRLgArL1pMoKsOxWjPrIMCJtQRXufJJGsrPSZv.j3rIjkYCeWR0Q5IdLh4LEH72cpExcnRJHkS1eG07ZHzpBnRRSZLcrDftmLPDw9oWFgwfZh1HZHE0BUxfFv92JGsW9f32wgTy4OebZLBYgF5KIIeo1T8_ZOz1IldDciQpIWseOVQGsOQRqlDEbHz6D3xGxJRcYQ0K8asJYaCrtmBbIaYimJvufK79pV44UpfFGrmrwj65s6byB0E9KT3FHik.ecwrBFs7Qv4COlns03Jy4rJaTia_wcihGj18pNN0nE3a7LdV.4GQ5VYJQ0oFMQE.5JVoQJVuYHVX214n7scwydI5I15GFUZOOUnYfsiHnyfwVTmvyOWetTz0__O.out7s_BlKbrrCEJNa10LLk2Xu_LfZ_.K4QBNBar0zyDY3nfvYibKTzDS1XAAl0mubKqSeGX_pSR7sEFVLtGvmAT5UH0Ol3SxZj32c6wqseB8lErdx8h0ChOlSywkv8hYGQ0PgFJc0wDcBNh2I9farUdZyMsmaJwAxDHE3gANc0DNwrVaJMfLcwPWhKLD0lMJ7lMJCXF49xs2va.74lahPJMjNdekxvPy3XIfC4.ducqU9kGUEd2KXRMAUbC2q0FpuN0qleJK4XDe7mSOGFNKxFzgOSqtFIA1bjOagpRSRq_k7CpQPeAxbyymY.IZrAEnEYEgA9Sd3gloz7zdL67QtlOCkzuRsUomWDdnjhMrLhOqiIENa9zu8lZjRtK4gcXx72n6V8YDeCkmq8CCp5ldY8s0RqeoHVKSpGwzrcLCrwf1kw6611GojhPjGqu9h7vZyLTrqUAFz6i_zsZ4SZVTYzmd9ewLwyByvr8dtRcdb2NXMGyfIFKXaTLzlxASkeEdmVW7PsLYFTDRW8_HduO8KUvAi.qXb4HrrlVKoR5AS9We8LAk2YEAawH6SosGfktztu0DtVZDsGY561IveChSfILYFxuUcTCogcP2TExZNlCrpjq7JFeAChrK3lk.5tuL1Np8CUznXq4RG84OPpduJtLmDNS0DCb9KG2ol31ik04Ntw8TiUO2NJLgn7EtBhkSjbl8vtNgwC_G49tZcs2IQI8fs_F.T26Nvt7XG3NbzH_eucovsfecbbncNwy6F0p4q6xGoXAOrzooaaw8Z0lWv6qguNjxsIEoTw.O96ovWcIvzLt_Qek.OIEu5FnOXtMTPiz24sKxzVa8KXKdirUy3nJP5HlF_yb64FDg7Nj.g7uT5BxxV1JJGWr8qTHCl0_Q8vBokIO4JoynszsyJJX9wlvWSSuh2MVZurb5umorBFLprZhCcXyfJhjK32.3.xPfOxUrcRJVLl2kFGKDRF4TEfyZFXotqXabKxh5XIvgotZDsvKR0jjasdpCP2bL8KTmyZxhW3muCGtDcYxkXEIkcKEzXRbRlvtKjkv_MdJHsqwkXFevpBq3GD6ms.cYMbZxpYWnfJeWygpJ_6gXnAgyCf7pB_PWk1JhgPw9Taaga6dLNhK9q4NS3RY1hVcV.051hUqSWTAzN_hV1t6s6kI6ob7nPyw.hYIk6UUHP0L5b5RW57PASeK8JzM6awDc2Ak07XhqnIqOmkJixx6ORd22fymO2Eo_jOLU4e4jrQrrG.oEuD1aB6VmZ.8cYMnT2HCjLgF98JdADQL9sB6lzlIIcLvm5wEdU2.bAaeSAvPUCCDS27BcZbwKYkoasyGJLWthNjzM9tZ1DhPD_w4PDdXqWZSWw7nAVxRSNTb9FHIFs5z1OKf39Aq7BJ6hkGJlfPdr22seIRCJsPRWBRqfi0lUvHV0HtcxbY59_hXC_6Hm1SSEovfyS.GziU5C3JVmNpWTCaSRyqsKhYwLa0KRfjhaVRYK6GcgSlP6DKUBfwBaFeNelQY7R5jHJdlbsGpL9Y5k6bPXibM.c4zVenOTcM1pUZDQ23luTZ_fbrrDm.ux8EgSnrCewOW4ic_LeCFIPK03RyaZ1K_kbjb.vcb5M6v9vgSHMuSUZyWw38WKdjPs6XoY_g',};var a = document.createElement('script');a.src = '/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9b19c72b8f6c85c1';window._cf_chl_opt.cOgUHash = location.hash === '' && location.href.indexOf('#') !== -1 ? '#' : location.hash;window._cf_chl_opt.cOgUQuery = location.search === '' && location.href.slice(0, location.href.length - window._cf_chl_opt.cOgUHash.length).indexOf('?') !== -1 ? '?' : location.search;if (window.history && window.history.replaceState) {var ogU = location.pathname + window._cf_chl_opt.cOgUQuery + window._cf_chl_opt.cOgUHash;history.replaceState(null, null,"\/vi\/du-lich-pho-co-hoi-an-review-day-du-va-chi-tiet-tu-a-z?__cf_chl_rt_tk=zhEwxnR6r4Ghsmx2r.3u2Sm7y9dL5PVIHGhqIf0MWEs-1766345258-1.0.1.1-QM9oIdMWPcM9mJfu5GJYeOQMAJa8_vyLl02TYuikXy4"+ window._cf_chl_opt.cOgUHash);a.onload = function() {history.replaceState(null, null, ogU);}}document.getElementsByTagName('head')[0].appendChild(a);}());</script><script defer="" src="https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015" integrity="sha512-ZpsOmlRQV6y907TI0dKBHq9Md29nnaEIPlkf84rnaERnq6zvWvPUqr2ft8M1aS28oN72PdrCzSjY4U6VaAw1EQ==" data-cf-beacon="{"rayId":"9b19c72b8f6c85c1","version":"2025.9.1","serverTiming":{"name":{"cfExtPri":true,"cfEdge":true,"cfOrigin":true,"cfL4":true,"cfSpeedBrain":true,"cfCacheStatus":true}},"token":"65fee8db7c864d448cf726041d01eb37","b":1}" crossorigin="anonymous"></script>
|
| 2 |
+
<div class="footer" role="contentinfo"><div class="footer-inner"><div class="clearfix diagnostic-wrapper"><div class="ray-id">Ray ID: <code>9b19c72b8f6c85c1</code></div></div><div class="text-center" id="footer-text">Performance & security by <a rel="noopener noreferrer" href="https://www.cloudflare.com?utm_source=challenge&utm_campaign=m" target="_blank">Cloudflare</a></div></div></div></body></html>
|
retrieval/data/131.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/132.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/133.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/134.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/135.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/136.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retrieval/data/137.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|