dngan0365 commited on
Commit
d9255cf
·
1 Parent(s): 1f5eadf

Initial deploy FastAPI backend to Hugging Face Space

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
.env ADDED
File without changes
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ retrieval/docs_no_stop.json filter=lfs diff=lfs merge=lfs -text
37
+ retrieval/ind/*.seg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ .venv/
3
+ venv/
4
+ env/
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from typing import List, Dict, Optional
5
+ import pandas as pd
6
+ from whoosh import index, qparser
7
+ from whoosh.qparser import MultifieldParser
8
+ from whoosh.scoring import BM25F
9
+ from whoosh.index import open_dir
10
+ import os
11
+ import nltk
12
+ from nltk import sent_tokenize
13
+ import re
14
+ import unicodedata
15
+ from pyvi import ViTokenizer
16
+
17
+ nltk.download('punkt_tab')
18
+ nltk.download('stopwords')
19
+
20
+ app = FastAPI(title="Document Search API")
21
+
22
+ # CORS configuration
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=[
26
+ "http://localhost:3000", # Next.js development
27
+ "http://localhost:3001", # Alternative port
28
+ "https://blue-information-retrieval.vercel.app", # Production frontend URL
29
+ ],
30
+ allow_credentials=True,
31
+ allow_methods=["*"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+ # Configuration
36
+ INDEX_DIR = "./retrieval/ind"
37
+ META_CSV = "./retrieval/final_document_tfidf_pagerank.csv"
38
+ DATA_CLEAN_DIR = "./retrieval/data_clean"
39
+ IMAGE_CSV = "./retrieval/docs_with_images.csv" # File CSV chứa URL ảnh
40
+ STOPWORDS_PATH = "./retrieval/vietnamese-stopwords-dash.txt"
41
+
42
+ # Global variables
43
+ ix = None
44
+ meta_df = None
45
+ image_df = None # DataFrame chứa mapping doc_id -> image_url
46
+ docs_cache = {}
47
+ pagerank_dict = {}
48
+ vi_stopwords = None
49
+
50
+
51
+ class SearchRequest(BaseModel):
52
+ query: str
53
+ model: str = "bm25"
54
+ limit: int = 100
55
+ B: float = 0.75
56
+ K1: float = 1.2
57
+ title_boost: float = 1.5
58
+ content_boost: float = 1.0
59
+
60
+
61
+ class SearchResult(BaseModel):
62
+ doc_id: str
63
+ title: str
64
+ url: str
65
+ snippet: str
66
+ score: float
67
+ relevance_percentage: float
68
+ image_url: Optional[str] = None
69
+ pagerank_score: Optional[float] = None
70
+ total_words: Optional[int] = None
71
+ unique_words: Optional[int] = None
72
+ top_words: Optional[str] = None
73
+ top_tfidf: Optional[str] = None
74
+ avg_tfidf: Optional[float] = None
75
+ final_score: Optional[float] = None
76
+
77
+
78
+ def split_sentences(text):
79
+ return sent_tokenize(text)
80
+
81
+
82
+ def tokenize_vi_sentence_level(text: str) -> list[str]:
83
+ sentences = sent_tokenize(text)
84
+ tokens = []
85
+ for sent in sentences:
86
+ sent = sent.strip()
87
+ if not sent:
88
+ continue
89
+ sent_tokens = ViTokenizer.tokenize(sent)
90
+ tokens.extend(sent_tokens.split())
91
+ return tokens
92
+
93
+
94
+ VI_TOKEN_REGEX = re.compile(
95
+ r"[a-zàáạảãâầấậẩẫăằắặẳẵ"
96
+ r"èéẹẻẽêềếệểễ"
97
+ r"ìíịỉĩ"
98
+ r"òóọỏõôồốộổỗơờớợởỡ"
99
+ r"ùúụủũưừứựửữ"
100
+ r"ỳýỵỷỹđ0-9_]+$"
101
+ )
102
+
103
+
104
+ def is_valid_vi_token(token: str) -> bool:
105
+ return bool(VI_TOKEN_REGEX.fullmatch(token))
106
+
107
+
108
+ def load_stopwords(path):
109
+ with open(path, "r", encoding="utf-8") as f:
110
+ stopwords = set(line.strip().lower() for line in f if line.strip())
111
+ return stopwords
112
+
113
+
114
+ def clean_text(text):
115
+ if text is None:
116
+ return ""
117
+ text = unicodedata.normalize("NFC", text)
118
+ text = re.sub(r"http\S+|www\S+", "", text)
119
+ text = re.sub(r"[.,!?]+", " ", text)
120
+ text = re.sub(r"\s+", " ", text).strip()
121
+ return text
122
+
123
+
124
+ def preprocess_query(query: str, stopwords: set[str] | None = None) -> str:
125
+ query = clean_text(query)
126
+ tokens = tokenize_vi_sentence_level(query)
127
+ processed_tokens = []
128
+ for tok in tokens:
129
+ tok = tok.lower()
130
+ if not is_valid_vi_token(tok):
131
+ continue
132
+ if tok.isnumeric():
133
+ continue
134
+ if stopwords and tok in stopwords:
135
+ continue
136
+ processed_tokens.append(tok)
137
+ return " ".join(processed_tokens)
138
+
139
+
140
+ def load_pagerank(meta_csv: str) -> Dict[str, float]:
141
+ """Load PageRank scores from CSV"""
142
+ try:
143
+ df = pd.read_csv(meta_csv)
144
+ if 'pagerank' in df.columns and 'id' in df.columns:
145
+ return dict(zip(df['id'].astype(str), df['pagerank']))
146
+ return {}
147
+ except Exception as e:
148
+ print(f"Warning: Could not load PageRank scores: {e}")
149
+ return {}
150
+
151
+
152
+ def load_images_csv(image_csv: str) -> pd.DataFrame:
153
+ """Load image URLs from CSV file"""
154
+ try:
155
+ if not os.path.exists(image_csv):
156
+ print(f"⚠️ Image CSV not found: {image_csv}")
157
+ return pd.DataFrame(columns=['doc_id', 'image_url'])
158
+
159
+ df = pd.read_csv(image_csv)
160
+
161
+ # Đảm bảo có cả 2 cột cần thiết
162
+ if 'doc_id' not in df.columns or 'image_url' not in df.columns:
163
+ print("⚠️ Image CSV missing required columns: doc_id, image_url")
164
+ return pd.DataFrame(columns=['doc_id', 'image_url'])
165
+
166
+ # Convert doc_id to string để dễ mapping
167
+ df['doc_id'] = df['doc_id'].astype(str)
168
+
169
+ # Loại bỏ các dòng có image_url null/empty
170
+ df = df[df['image_url'].notna() & (df['image_url'] != '')]
171
+
172
+ print(f"✅ Loaded {len(df)} image URLs from CSV")
173
+ return df
174
+
175
+ except Exception as e:
176
+ print(f"❌ Error loading image CSV: {e}")
177
+ return pd.DataFrame(columns=['doc_id', 'image_url'])
178
+
179
+
180
+ def get_image_url(doc_id: str) -> Optional[str]:
181
+ """Get image URL for a document from CSV"""
182
+ global image_df
183
+
184
+ if image_df is None or image_df.empty:
185
+ return None
186
+
187
+ try:
188
+ # Tìm image_url theo doc_id
189
+ result = image_df[image_df['doc_id'] == str(doc_id)]
190
+
191
+ if not result.empty:
192
+ image_url = result.iloc[0]['image_url']
193
+ # Kiểm tra URL hợp lệ
194
+ if pd.notna(image_url) and str(image_url).strip() != '':
195
+ return str(image_url)
196
+
197
+ return None
198
+
199
+ except Exception as e:
200
+ print(f"Error getting image URL for doc {doc_id}: {e}")
201
+ return None
202
+
203
+
204
+ def load_document_content(doc_id: str) -> str:
205
+ """Load document content from data_clean directory"""
206
+ global docs_cache
207
+
208
+ if doc_id in docs_cache:
209
+ return docs_cache[doc_id]
210
+
211
+ try:
212
+ file_path = os.path.join(DATA_CLEAN_DIR, f"{doc_id}.txt")
213
+
214
+ if not os.path.exists(file_path):
215
+ print(f"Warning: File not found: {file_path}")
216
+ return ""
217
+
218
+ with open(file_path, 'r', encoding='utf-8') as f:
219
+ content = f.read()
220
+
221
+ docs_cache[doc_id] = content
222
+ return content
223
+
224
+ except Exception as e:
225
+ print(f"Error loading document {doc_id}: {e}")
226
+ return ""
227
+
228
+
229
+ def get_snippet(doc_id: str, query_terms: List[str], max_length: int = 200) -> str:
230
+ """Extract relevant snippet from document based on query terms"""
231
+ content = load_document_content(doc_id)
232
+
233
+ if not content or content.strip() == "":
234
+ return "Không có nội dung xem trước."
235
+
236
+ try:
237
+ content_lower = content.lower()
238
+ query_lower = [term.lower() for term in query_terms if term.strip()]
239
+
240
+ if not query_lower:
241
+ words = content.split()
242
+ snippet_words = words[:30]
243
+ snippet = ' '.join(snippet_words)
244
+ if len(snippet) > max_length:
245
+ snippet = snippet[:max_length] + "..."
246
+ return snippet
247
+
248
+ best_pos = 0
249
+ max_matches = 0
250
+
251
+ words = content.split()
252
+ window_size = min(30, len(words))
253
+
254
+ for i in range(max(1, len(words) - window_size + 1)):
255
+ window = ' '.join(words[i:i+window_size]).lower()
256
+ matches = sum(1 for term in query_lower if term in window)
257
+ if matches > max_matches:
258
+ max_matches = matches
259
+ best_pos = i
260
+
261
+ snippet_words = words[best_pos:best_pos+window_size]
262
+ snippet = ' '.join(snippet_words)
263
+
264
+ if len(snippet) > max_length:
265
+ snippet = snippet[:max_length] + "..."
266
+
267
+ if best_pos > 0:
268
+ snippet = "..." + snippet
269
+
270
+ return snippet
271
+
272
+ except Exception as e:
273
+ print(f"Error generating snippet for doc {doc_id}: {e}")
274
+ return "Lỗi khi tạo đoạn trích."
275
+
276
+
277
+ def bm25_search(ix, query_str: str, vi_stopwords: set[str] | None = None, top_k: int = 100,
278
+ B: float = 0.75, K1: float = 1.2,
279
+ title_boost: float = 1.5, content_boost: float = 1.0) -> Dict[str, float]:
280
+ """BM25 search with title and content fields"""
281
+ query_str = preprocess_query(query_str, stopwords=vi_stopwords)
282
+ results = {}
283
+ weighting = BM25F(B=B, K1=K1)
284
+
285
+ with ix.searcher(weighting=weighting) as searcher:
286
+ field_boosts = {
287
+ "title": title_boost,
288
+ "content": content_boost
289
+ }
290
+
291
+ parser = MultifieldParser(
292
+ ["title", "content"],
293
+ schema=ix.schema,
294
+ fieldboosts=field_boosts,
295
+ group=qparser.OrGroup
296
+ )
297
+
298
+ q = parser.parse(query_str)
299
+ hits = searcher.search(q, limit=top_k)
300
+
301
+ for hit in hits:
302
+ results[str(hit["docid"])] = float(hit.score)
303
+
304
+ return results
305
+
306
+
307
+ def normalize_scores(scores: Dict[str, float]) -> Dict[str, float]:
308
+ """Normalize scores to percentage (0-100)"""
309
+ if not scores:
310
+ return {}
311
+ max_score = max(scores.values())
312
+ if max_score == 0:
313
+ return {k: 0.0 for k in scores}
314
+ return {k: (v / max_score) * 100 for k, v in scores.items()}
315
+
316
+
317
+ @app.on_event("startup")
318
+ async def startup_event():
319
+ """Initialize index and load data on startup"""
320
+ global ix, meta_df, image_df, pagerank_dict, vi_stopwords
321
+
322
+ try:
323
+ # Load Whoosh index
324
+ if os.path.exists(INDEX_DIR):
325
+ ix = open_dir(INDEX_DIR)
326
+ print("✅ Loaded Whoosh index")
327
+ else:
328
+ print("❌ Index directory not found:", INDEX_DIR)
329
+
330
+ # Load metadata
331
+ if os.path.exists(META_CSV):
332
+ meta_df = pd.read_csv(META_CSV)
333
+ print(f"✅ Loaded {len(meta_df)} documents metadata")
334
+ else:
335
+ print("❌ Metadata CSV not found:", META_CSV)
336
+
337
+ # Load image CSV
338
+ image_df = load_images_csv(IMAGE_CSV)
339
+
340
+ # Check data_clean directory
341
+ if os.path.exists(DATA_CLEAN_DIR):
342
+ num_files = len([f for f in os.listdir(DATA_CLEAN_DIR) if f.endswith('.txt')])
343
+ print(f"✅ Found {num_files} text files in {DATA_CLEAN_DIR}")
344
+ else:
345
+ print("❌ Data clean directory not found:", DATA_CLEAN_DIR)
346
+
347
+ # Load PageRank scores
348
+ pagerank_dict = load_pagerank(META_CSV)
349
+ print(f"✅ Loaded PageRank scores for {len(pagerank_dict)} documents")
350
+
351
+ # Load StopWords
352
+ if os.path.exists(STOPWORDS_PATH):
353
+ vi_stopwords = load_stopwords(STOPWORDS_PATH)
354
+ print(f"✅ Loaded {len(vi_stopwords)} Vietnamese stopwords")
355
+ else:
356
+ print("⚠️ Stopwords file not found, continuing without stopwords")
357
+ vi_stopwords = set()
358
+
359
+ except Exception as e:
360
+ print(f"❌ Error during startup: {e}")
361
+ raise
362
+
363
+
364
+ @app.get("/")
365
+ async def root():
366
+ """Health check endpoint"""
367
+ return {
368
+ "status": "ok",
369
+ "message": "Document Search API is running",
370
+ "total_documents": len(meta_df) if meta_df is not None else 0,
371
+ "total_images": len(image_df) if image_df is not None else 0,
372
+ "data_clean_dir": DATA_CLEAN_DIR,
373
+ "image_csv": IMAGE_CSV,
374
+ "index_dir": INDEX_DIR
375
+ }
376
+
377
+
378
+ @app.post("/search", response_model=List[SearchResult])
379
+ async def search(request: SearchRequest):
380
+ """Search documents using BM25 algorithm"""
381
+ if not request.query.strip():
382
+ raise HTTPException(status_code=400, detail="Query cannot be empty")
383
+
384
+ if ix is None or meta_df is None:
385
+ raise HTTPException(status_code=503, detail="Search index not initialized")
386
+
387
+ try:
388
+ raw_scores = bm25_search(
389
+ ix,
390
+ request.query,
391
+ vi_stopwords=vi_stopwords,
392
+ top_k=request.limit,
393
+ B=request.B,
394
+ K1=request.K1,
395
+ title_boost=request.title_boost,
396
+ content_boost=request.content_boost
397
+ )
398
+
399
+ if not raw_scores:
400
+ return []
401
+
402
+ normalized_scores = normalize_scores(raw_scores)
403
+ query_terms = request.query.split()
404
+
405
+ results = []
406
+ for doc_id, score in raw_scores.items():
407
+ doc_row = meta_df[meta_df['id'].astype(str) == doc_id]
408
+
409
+ if doc_row.empty:
410
+ continue
411
+
412
+ doc_info = doc_row.iloc[0]
413
+ snippet = get_snippet(doc_id, query_terms, max_length=300)
414
+ pr_score = pagerank_dict.get(doc_id)
415
+
416
+ # Lấy image_url từ CSV
417
+ image_url = get_image_url(doc_id)
418
+
419
+ result = SearchResult(
420
+ doc_id=doc_id,
421
+ title=str(doc_info.get('title', 'Untitled')),
422
+ url=str(doc_info.get('url', '')),
423
+ snippet=snippet,
424
+ score=round(score, 4),
425
+ relevance_percentage=round(normalized_scores[doc_id], 2),
426
+ image_url=image_url,
427
+ pagerank_score=round(pr_score, 6) if pr_score else None,
428
+ total_words=int(doc_info.get('total_words', 0)) if pd.notna(doc_info.get('total_words')) else None,
429
+ unique_words=int(doc_info.get('unique_words', 0)) if pd.notna(doc_info.get('unique_words')) else None,
430
+ top_words=str(doc_info.get('top_words', '')) if pd.notna(doc_info.get('top_words')) else None,
431
+ top_tfidf=str(doc_info.get('top_tfidf', '')) if pd.notna(doc_info.get('top_tfidf')) else None,
432
+ avg_tfidf=round(float(doc_info.get('avg_tfidf', 0)), 6) if pd.notna(doc_info.get('avg_tfidf')) else None,
433
+ final_score=round(float(doc_info.get('final_score', 0)), 6) if pd.notna(doc_info.get('final_score')) else None
434
+ )
435
+
436
+ results.append(result)
437
+
438
+ results.sort(key=lambda x: x.score, reverse=True)
439
+ return results[:request.limit]
440
+
441
+ except Exception as e:
442
+ raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
443
+
444
+
445
+ @app.get("/stats")
446
+ async def get_stats():
447
+ """Get statistics about the search index"""
448
+ if meta_df is None:
449
+ raise HTTPException(status_code=503, detail="Index not initialized")
450
+
451
+ num_cached_docs = len(docs_cache)
452
+ num_txt_files = 0
453
+
454
+ if os.path.exists(DATA_CLEAN_DIR):
455
+ num_txt_files = len([f for f in os.listdir(DATA_CLEAN_DIR) if f.endswith('.txt')])
456
+
457
+ return {
458
+ "total_documents": len(meta_df),
459
+ "total_images": len(image_df) if image_df is not None else 0,
460
+ "text_files_available": num_txt_files,
461
+ "cached_documents": num_cached_docs,
462
+ "pagerank_scores": len(pagerank_dict),
463
+ "index_directory": INDEX_DIR,
464
+ "data_clean_directory": DATA_CLEAN_DIR,
465
+ "image_csv": IMAGE_CSV
466
+ }
467
+
468
+
469
+ @app.get("/document/{doc_id}")
470
+ async def get_document(doc_id: str):
471
+ """Get full document content and metadata"""
472
+ if meta_df is None:
473
+ raise HTTPException(status_code=503, detail="Index not initialized")
474
+
475
+ doc_row = meta_df[meta_df['id'].astype(str) == doc_id]
476
+
477
+ if doc_row.empty:
478
+ raise HTTPException(status_code=404, detail="Document not found")
479
+
480
+ doc_info = doc_row.iloc[0]
481
+ content = load_document_content(doc_id)
482
+ image_url = get_image_url(doc_id)
483
+
484
+ return {
485
+ "doc_id": doc_id,
486
+ "title": str(doc_info.get('title', 'Untitled')),
487
+ "url": str(doc_info.get('url', '')),
488
+ "content": content,
489
+ "image_url": image_url,
490
+ "pagerank": float(doc_info.get('pagerank', 0)) if pd.notna(doc_info.get('pagerank')) else None,
491
+ "total_words": int(doc_info.get('total_words', 0)) if pd.notna(doc_info.get('total_words')) else None,
492
+ "unique_words": int(doc_info.get('unique_words', 0)) if pd.notna(doc_info.get('unique_words')) else None,
493
+ "top_words": str(doc_info.get('top_words', '')) if pd.notna(doc_info.get('top_words')) else None,
494
+ "top_tfidf": str(doc_info.get('top_tfidf', '')) if pd.notna(doc_info.get('top_tfidf')) else None,
495
+ "avg_tfidf": float(doc_info.get('avg_tfidf', 0)) if pd.notna(doc_info.get('avg_tfidf')) else None,
496
+ "final_score": float(doc_info.get('final_score', 0)) if pd.notna(doc_info.get('final_score')) else None
497
+ }
498
+
499
+
500
+ if __name__ == "__main__":
501
+ import uvicorn
502
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ alembic==1.17.2
2
+ annotated-doc==0.0.4
3
+ annotated-types==0.7.0
4
+ anyio==4.12.0
5
+ asttokens==3.0.0
6
+ attrs==25.4.0
7
+ bcrypt==5.0.0
8
+ beautifulsoup4==4.14.2
9
+ certifi==2025.10.5
10
+ cffi==2.0.0
11
+ charset-normalizer==3.4.4
12
+ click==8.3.0
13
+ colorama==0.4.6
14
+ colorlog==6.10.1
15
+ comm==0.2.3
16
+ contourpy==1.3.3
17
+ cryptography==46.0.3
18
+ cycler==0.12.1
19
+ datasketch==1.6.5
20
+ debugpy==1.8.17
21
+ decorator==5.2.1
22
+ ecdsa==0.19.1
23
+ emoji==2.15.0
24
+ executing==2.2.1
25
+ fastapi==0.128.0
26
+ filelock==3.20.0
27
+ fonttools==4.61.1
28
+ fsspec==2025.9.0
29
+ gensim==4.4.0
30
+ greenlet==3.3.0
31
+ h11==0.16.0
32
+ huggingface-hub==0.36.0
33
+ idna==3.11
34
+ ipykernel==7.0.1
35
+ ipython==9.6.0
36
+ ipython_pygments_lexers==1.1.1
37
+ jedi==0.19.2
38
+ Jinja2==3.1.6
39
+ joblib==1.5.2
40
+ jupyter_client==8.6.3
41
+ jupyter_core==5.9.1
42
+ kiwisolver==1.4.9
43
+ Mako==1.3.10
44
+ MarkupSafe==3.0.3
45
+ matplotlib==3.10.8
46
+ matplotlib-inline==0.1.7
47
+ mpmath==1.3.0
48
+ nest-asyncio==1.6.0
49
+ networkx==3.5
50
+ nltk==3.9.2
51
+ numpy==2.3.4
52
+ optuna==4.6.0
53
+ outcome==1.3.0.post0
54
+ packaging==25.0
55
+ pandas==2.3.3
56
+ parso==0.8.5
57
+ passlib==1.7.4
58
+ pillow==12.0.0
59
+ platformdirs==4.5.0
60
+ prompt_toolkit==3.0.52
61
+ protobuf==6.33.0
62
+ psutil==7.1.1
63
+ psycopg2-binary==2.9.11
64
+ pure_eval==0.2.3
65
+ py_vncorenlp==0.1.4
66
+ pyasn1==0.6.1
67
+ pycparser==2.23
68
+ pydantic==2.12.5
69
+ pydantic_core==2.41.5
70
+ Pygments==2.19.2
71
+ pyjnius==1.7.0
72
+ pyparsing==3.2.5
73
+ PySocks==1.7.1
74
+ python-crfsuite==0.9.11
75
+ python-dateutil==2.9.0.post0
76
+ python-dotenv==1.2.1
77
+ python-jose==3.5.0
78
+ pytrec_eval-terrier==0.5.10
79
+ pytz==2025.2
80
+ pyvi==0.1.1
81
+ PyYAML==6.0.3
82
+ pyzmq==27.1.0
83
+ regex==2025.10.23
84
+ requests==2.32.5
85
+ rsa==4.9.1
86
+ safetensors==0.7.0
87
+ scikit-learn==1.7.2
88
+ scipy==1.16.2
89
+ selenium==4.37.0
90
+ sentence-transformers==5.2.0
91
+ setuptools==80.9.0
92
+ six==1.17.0
93
+ sklearn-crfsuite==0.5.0
94
+ smart_open==7.5.0
95
+ sniffio==1.3.1
96
+ sortedcontainers==2.4.0
97
+ soupsieve==2.8
98
+ SQLAlchemy==2.0.45
99
+ stack-data==0.6.3
100
+ stanza==1.11.0
101
+ starlette==0.50.0
102
+ sympy==1.14.0
103
+ tabulate==0.9.0
104
+ threadpoolctl==3.6.0
105
+ tokenizers==0.22.1
106
+ torch==2.9.0
107
+ tornado==6.5.2
108
+ tqdm==4.67.1
109
+ traitlets==5.14.3
110
+ transformers==4.57.3
111
+ trio==0.31.0
112
+ trio-websocket==0.12.2
113
+ typing-inspection==0.4.2
114
+ typing_extensions==4.15.0
115
+ tzdata==2025.2
116
+ urllib3==2.5.0
117
+ uvicorn==0.40.0
118
+ wcwidth==0.2.14
119
+ websocket-client==1.9.0
120
+ Whoosh==2.7.4
121
+ wordcloud==1.9.5
122
+ wrapt==2.0.1
123
+ wsproto==1.2.0
retrieval/data/0.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/1.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/10.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/100.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/101.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/102.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/103.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/104.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/105.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/106.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/107.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/108.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/109.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/11.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/110.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/111.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/112.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/113.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/114.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/115.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/116.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/117.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/118.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/119.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/12.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/120.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/121.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/122.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/123.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/124.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/125.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/126.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/127.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/128.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/129.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/13.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/130.html ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ <html lang="en-US" dir="ltr"><head><title>Just a moment...</title><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=Edge"><meta name="robots" content="noindex,nofollow"><meta name="viewport" content="width=device-width,initial-scale=1"><style>*{box-sizing:border-box;margin:0;padding:0}html{line-height:1.15;-webkit-text-size-adjust:100%;color:#313131;font-family:system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"}body{display:flex;flex-direction:column;height:100vh;min-height:100vh}.main-content{margin:8rem auto;padding-left:1.5rem;max-width:60rem}@media (width <= 720px){.main-content{margin-top:4rem}}.h2{line-height:2.25rem;font-size:1.5rem;font-weight:500}@media (width <= 720px){.h2{line-height:1.5rem;font-size:1.25rem}}#challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI0IyMEYwMyIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjQjIwRjAzIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+");background-repeat:no-repeat;background-size:contain;padding-left:34px}@media (prefers-color-scheme: dark){body{background-color:#222;color:#d9d9d9}}</style><meta http-equiv="refresh" content="360"><script src="/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9b19c72b8f6c85c1"></script><style>*{box-sizing:border-box;margin:0;padding:0}html{line-height:1.15;-webkit-text-size-adjust:100%;color:#313131;font-family:system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"}button{font-family:system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"}body{display:flex;flex-direction:column;height:100vh;min-height:100vh}body.theme-dark{background-color:#222;color:#d9d9d9}body.theme-dark a{color:#fff}body.theme-dark a:hover{text-decoration:underline;color:#ee730a}body.theme-dark .lds-ring div{border-color:#999 rgba(0,0,0,0) rgba(0,0,0,0)}body.theme-dark .font-red{color:#b20f03}body.theme-dark .ctp-button{background-color:#4693ff;color:#1d1d1d}body.theme-dark #challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4")}body.theme-dark #challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI0IyMEYwMyIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjQjIwRjAzIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+")}body.theme-light{background-color:#fff;color:#313131}body.theme-light a{color:#0051c3}body.theme-light a:hover{text-decoration:underline;color:#ee730a}body.theme-light .lds-ring div{border-color:#595959 rgba(0,0,0,0) rgba(0,0,0,0)}body.theme-light .font-red{color:#fc574a}body.theme-light .ctp-button{border-color:#003681;background-color:#003681;color:#fff}body.theme-light #challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4=")}body.theme-light #challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI2ZjNTc0YSIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjZmM1NzRhIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+")}a{transition:color 150ms ease;background-color:rgba(0,0,0,0);text-decoration:none;color:#0051c3}a:hover{text-decoration:underline;color:#ee730a}.main-content{margin:8rem auto;padding-right:1.5rem;padding-left:1.5rem;width:100%;max-width:60rem}.main-content .loading-verifying{height:76.391px}.spacer{margin:2rem 0}.spacer-top{margin-top:4rem}.spacer-bottom{margin-bottom:2rem}.heading-favicon{margin-right:.5rem;width:2rem;height:2rem}@media (width <= 720px){.main-content{margin-top:4rem}.heading-favicon{width:1.5rem;height:1.5rem}}.main-wrapper{display:flex;flex:1;flex-direction:column;align-items:center}.font-red{color:#b20f03}.h1{line-height:3.75rem;font-size:2.5rem;font-weight:500}.h2{line-height:2.25rem;font-size:1.5rem;font-weight:500}.core-msg{line-height:2.25rem;font-size:1.5rem;font-weight:400}.body-text{line-height:1.25rem;font-size:1rem;font-weight:400}@media (width <= 720px){.h1{line-height:1.75rem;font-size:1.5rem}.h2{line-height:1.5rem;font-size:1.25rem}.core-msg{line-height:1.5rem;font-size:1rem}}#challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI2ZjNTc0YSIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjZmM1NzRhIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+");background-repeat:no-repeat;background-size:contain;padding-left:34px}#challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjMzEzMTMxIiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4=");background-repeat:no-repeat;background-size:contain;padding-left:42px}.text-center{text-align:center}.ctp-button{transition-duration:200ms;transition-property:background-color,border-color,color;transition-timing-function:ease;margin:2rem 0;border:.063rem solid #0051c3;border-radius:.313rem;background-color:#0051c3;cursor:pointer;padding:.375rem 1rem;line-height:1.313rem;color:#fff;font-size:.875rem}.ctp-button:hover{border-color:#003681;background-color:#003681;cursor:pointer;color:#fff}.footer{margin:0 auto;padding-right:1.5rem;padding-left:1.5rem;width:100%;max-width:60rem;line-height:1.125rem;font-size:.75rem}.footer-inner{border-top:1px solid #d9d9d9;padding-top:1rem;padding-bottom:1rem}.clearfix::after{display:table;clear:both;content:""}.clearfix .column{float:left;padding-right:1.5rem;width:50%}.diagnostic-wrapper{margin-bottom:.5rem}.footer .ray-id{text-align:center}.footer .ray-id code{font-family:monaco,courier,monospace}.core-msg,.zone-name-title{overflow-wrap:break-word}@media (width <= 720px){.diagnostic-wrapper{display:flex;flex-wrap:wrap;justify-content:center}.clearfix::after{display:initial;clear:none;text-align:center;content:none}.column{padding-bottom:2rem}.clearfix .column{float:none;padding:0;width:auto;word-break:keep-all}.zone-name-title{margin-bottom:1rem}}.loading-verifying{height:76.391px}.lds-ring{display:inline-block;position:relative;width:1.875rem;height:1.875rem}.lds-ring div{box-sizing:border-box;display:block;position:absolute;border:.3rem solid #595959;border-radius:50%;border-color:#313131 rgba(0,0,0,0) rgba(0,0,0,0);width:1.875rem;height:1.875rem;animation:lds-ring 1.2s cubic-bezier(.5, 0, .5, 1) infinite}.lds-ring div:nth-child(1){animation-delay:-.45s}.lds-ring div:nth-child(2){animation-delay:-.3s}.lds-ring div:nth-child(3){animation-delay:-.15s}@keyframes lds-ring{0%{transform:rotate(0deg)}100%{transform:rotate(360deg)}}.rtl .heading-favicon{margin-right:0;margin-left:.5rem}.rtl #challenge-success-text{background-position:right;padding-right:42px;padding-left:0}.rtl #challenge-error-text{background-position:right;padding-right:34px;padding-left:0}.challenge-content .loading-verifying{height:76.391px}@media (prefers-color-scheme: dark){body{background-color:#222;color:#d9d9d9}body a{color:#fff}body a:hover{text-decoration:underline;color:#ee730a}body .lds-ring div{border-color:#999 rgba(0,0,0,0) rgba(0,0,0,0)}body .font-red{color:#b20f03}body .ctp-button{background-color:#4693ff;color:#1d1d1d}body #challenge-success-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSIgdmlld0JveD0iMCAwIDI2IDI2Ij48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJNMTMgMGExMyAxMyAwIDEgMCAwIDI2IDEzIDEzIDAgMCAwIDAtMjZtMCAyNGExMSAxMSAwIDEgMSAwLTIyIDExIDExIDAgMCAxIDAgMjIiLz48cGF0aCBmaWxsPSIjZDlkOWQ5IiBkPSJtMTAuOTU1IDE2LjA1NS0zLjk1LTQuMTI1LTEuNDQ1IDEuMzg1IDUuMzcgNS42MSA5LjQ5NS05LjYtMS40Mi0xLjQwNXoiLz48L3N2Zz4")}body #challenge-error-text{background-image:url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgZmlsbD0ibm9uZSI+PHBhdGggZmlsbD0iI0IyMEYwMyIgZD0iTTE2IDNhMTMgMTMgMCAxIDAgMTMgMTNBMTMuMDE1IDEzLjAxNSAwIDAgMCAxNiAzbTAgMjRhMTEgMTEgMCAxIDEgMTEtMTEgMTEuMDEgMTEuMDEgMCAwIDEtMTEgMTEiLz48cGF0aCBmaWxsPSIjQjIwRjAzIiBkPSJNMTcuMDM4IDE4LjYxNUgxNC44N0wxNC41NjMgOS41aDIuNzgzem0tMS4wODQgMS40MjdxLjY2IDAgMS4wNTcuMzg4LjQwNy4zODkuNDA3Ljk5NCAwIC41OTYtLjQwNy45ODQtLjM5Ny4zOS0xLjA1Ny4zODktLjY1IDAtMS4wNTYtLjM4OS0uMzk4LS4zODktLjM5OC0uOTg0IDAtLjU5Ny4zOTgtLjk4NS40MDYtLjM5NyAxLjA1Ni0uMzk3Ii8+PC9zdmc+")}}</style><script src="https://challenges.cloudflare.com/turnstile/v0/g/d39f91d70ce1/api.js?onload=REiSI4&amp;render=explicit" async="" defer="" crossorigin="anonymous"></script></head><body><div class="main-wrapper" role="main"><div class="main-content"><h1 class="zone-name-title h1">vinpearl.com</h1><p id="zutqc7" class="h2 spacer-bottom">Verifying you are human. This may take a few seconds.</p><div id="SoGDz7" style="display: grid;"><div><div><input type="hidden" name="cf-turnstile-response" id="cf-chl-widget-bzbso_response"></div></div></div><div id="UfiHG7" class="spacer loading-verifying" style="display: none; visibility: hidden;"><div class="lds-ring"><div></div><div></div><div></div><div></div></div></div><div id="NGtV8" class="core-msg spacer spacer-top">vinpearl.com needs to review the security of your connection before proceeding.</div><div id="TuMjR6" style="display: none;"><div id="challenge-success-text" class="h2">Verification successful</div><div class="core-msg spacer">Waiting for vinpearl.com to respond...</div></div><noscript><div class="h2"><span id="challenge-error-text">Enable JavaScript and cookies to continue</span></div></noscript></div></div><script>(function(){window._cf_chl_opt = {cvId: '3',cZone: 'vinpearl.com',cType: 'managed',cRay: '9b19c72b8f6c85c1',cH: 'iEdYnWyAIF4BRxyi.ybPoMX_JuiV3juldoI_TG7Tuyo-1766345258-1.2.1.1-iE2GMa9ivjpmYJWkY.pu7plkbUsnhanuJrG0U_sXNDM0WxbjaGNa0stwmasxRmkh',cUPMDTk:"\/vi\/du-lich-pho-co-hoi-an-review-day-du-va-chi-tiet-tu-a-z?__cf_chl_tk=zhEwxnR6r4Ghsmx2r.3u2Sm7y9dL5PVIHGhqIf0MWEs-1766345258-1.0.1.1-QM9oIdMWPcM9mJfu5GJYeOQMAJa8_vyLl02TYuikXy4",cFPWv: 'g',cITimeS: '1766345258',cTplC:0,cTplV:5,cTplB: '0',fa:"\/vi\/du-lich-pho-co-hoi-an-review-day-du-va-chi-tiet-tu-a-z?__cf_chl_f_tk=zhEwxnR6r4Ghsmx2r.3u2Sm7y9dL5PVIHGhqIf0MWEs-1766345258-1.0.1.1-QM9oIdMWPcM9mJfu5GJYeOQMAJa8_vyLl02TYuikXy4",md: 'VSJHYv7_XFPKYpMzf5YBhtYFrxnMZYc2gLJHnf_cGRc-1766345258-1.2.1.1-zQglMRnSnLs9OofJjNR7Ajg9D1GSO_TS2RVDA7LOxxJehfc2lMG1fg0SLJap._RHREs9Wd65bBk0oOpcFp3wayAP3HpEvolccYlmYgYq_Z7EUdu7gQJBFJdAN.LjBQfaqsBA3JKOSqvN2KIkUzlwDfbLiJi4hr6akKvC1137Nvw4KsJo9JUDeOLFdY9Amzpmu_JRQTe821AMgonXkDaBptEA8OqG8bA4rbFV0gEhpaxkCUmOSXfjbk316Lnn6_wOLKrwNTj5EE65UTpJD3tIyQ4TWaBxLWKWWmokedfIp9ebVUjiOwmoXT79J0tVkLzazfXzVSDp8dHvO3gnun3GKM6HEVAr.DJrkVeJX62aO4FVNsrmrlcc8Z6CIOihz6w9y6Oq9fHNu.V3X9IDd.XTBEAluWOf8ko.QROsBCc.d3L2zgVFqTKCqGX3D2sv8S9yEabTB7.2PlVVeYZebSczPnYY2mgoXfQ62MSCVOSswx1toTyVYieUKFBoYjyAJ8M33gOqG1s38lwkajtyZci5ccxU8KtMcOXhYNUK9ALWgBotG807OqW9RflSwv.TeFHEvDTMQ87SmutmS_1B64qniQ16VhiQQHZacrx.STCsx114piAtPX1jE7mJr.6ot_oBR9EMYWk5091KGoZb1DGIucjuRbw0hnM8qSS0XYvAJxedKMaPFqurdRl9_XP53n4xYo75pk95JYi1MA7JjebIqqXW4vZ8bZs1uV6GYFTJRN_acEe2lXbtOtCFuvCvpBRMrD964iqXMdzKL9171WUd1Tbts5hwwNN9ooK1uP8tvrPictcpx60STN7CYY6Rv6FkbepMHWaKFjY6LdaqBcPJBKaGbX_bS21iX24E0VfyYjw7CLfoTwSUG1ugbmFF6zVtYyR0xaX5zGjVYSIeAjHzVIImOfSI0DPcg.RJIFX5PTOgmkcmLjNPSy9K0Wzkq1aZJi7liRfT0g5yJAoMSIl7kqRCcskhsfhWFwNU3Ivnkf2KrtcwR416IuKCn.2Tg4vqn01fEkqiw61ZemVIbEq2mNnCyIGhcIzj_Whyh1JMhViQZ_YCkroLiNj2vHYfv2bCxOAIQ_OCZVBcrnK82kEglLcm5AeW76_7qHYP5qqcASB4uYlmF.2dts6cQ72mwIEECKVc4MsA5G1dO8cH26mwgPjlZKgNXBPIInmZb69fSzk',mdrd: 'qLUfM4KoVZQYNY1ga8ucVMMijBfoxxI3wrEGbsMOj2o-1766345258-1.2.1.1-08vC7l0z1V.bDD7ut0KojSrJ.qzo5l.ZjjzjrYKZivQUPRyh8g.E_OhjM5IOdTX.b3Y7yfYyAd_6S5jAZZeNTIigETrFkia12ugkYKlOulEjmsH29Uo41EYN4HMrShgUbA89QovTGvmgkHHgJZ30QYzS.1XpXBaRW2gInqp4s2i2x2XEeZ19UHcXFx2stLooWeEK.nhVG6dkjTn_Xxc4SNSMYx.2C.sZ8T_0yKez.8lE9AZ1Q.xaU_DpgS_pvRpO3l_ojWgNY0BVFciTwCqb.Ii4l7BrSBV6KwGJfu9zRG_cDDHU4m_ETmICSjegBINNKk1WVH32AcGNOmUdbEky6_nmxRwFdGw0FYTAJm_ZLqdqIr5.622al1KOqXlTHkkH5QUF_VUE86MkOZLzm9HOuyyhxSaxY47mGmzInUCcchgfz8XAHMLRryb89roXO7DExAznK36QjZk1HUjpIOUpYwoioG_faRiOM8uawZfIyibuSmQwJucuBdmxmfFxSCTiIwyT4qMxD3CYizM9bhBiaeA1.ZemUqv5ab86hy9Eq5j.UaE8GQTE8.qDAwJqJY3CPRqJjGKJnDQkNfXu8rw0icNbJY01OEj7zTTWWDl_j35XkKnYooUUh7X0Y5qovsnSWclRLgArL1pMoKsOxWjPrIMCJtQRXufJJGsrPSZv.j3rIjkYCeWR0Q5IdLh4LEH72cpExcnRJHkS1eG07ZHzpBnRRSZLcrDftmLPDw9oWFgwfZh1HZHE0BUxfFv92JGsW9f32wgTy4OebZLBYgF5KIIeo1T8_ZOz1IldDciQpIWseOVQGsOQRqlDEbHz6D3xGxJRcYQ0K8asJYaCrtmBbIaYimJvufK79pV44UpfFGrmrwj65s6byB0E9KT3FHik.ecwrBFs7Qv4COlns03Jy4rJaTia_wcihGj18pNN0nE3a7LdV.4GQ5VYJQ0oFMQE.5JVoQJVuYHVX214n7scwydI5I15GFUZOOUnYfsiHnyfwVTmvyOWetTz0__O.out7s_BlKbrrCEJNa10LLk2Xu_LfZ_.K4QBNBar0zyDY3nfvYibKTzDS1XAAl0mubKqSeGX_pSR7sEFVLtGvmAT5UH0Ol3SxZj32c6wqseB8lErdx8h0ChOlSywkv8hYGQ0PgFJc0wDcBNh2I9farUdZyMsmaJwAxDHE3gANc0DNwrVaJMfLcwPWhKLD0lMJ7lMJCXF49xs2va.74lahPJMjNdekxvPy3XIfC4.ducqU9kGUEd2KXRMAUbC2q0FpuN0qleJK4XDe7mSOGFNKxFzgOSqtFIA1bjOagpRSRq_k7CpQPeAxbyymY.IZrAEnEYEgA9Sd3gloz7zdL67QtlOCkzuRsUomWDdnjhMrLhOqiIENa9zu8lZjRtK4gcXx72n6V8YDeCkmq8CCp5ldY8s0RqeoHVKSpGwzrcLCrwf1kw6611GojhPjGqu9h7vZyLTrqUAFz6i_zsZ4SZVTYzmd9ewLwyByvr8dtRcdb2NXMGyfIFKXaTLzlxASkeEdmVW7PsLYFTDRW8_HduO8KUvAi.qXb4HrrlVKoR5AS9We8LAk2YEAawH6SosGfktztu0DtVZDsGY561IveChSfILYFxuUcTCogcP2TExZNlCrpjq7JFeAChrK3lk.5tuL1Np8CUznXq4RG84OPpduJtLmDNS0DCb9KG2ol31ik04Ntw8TiUO2NJLgn7EtBhkSjbl8vtNgwC_G49tZcs2IQI8fs_F.T26Nvt7XG3NbzH_eucovsfecbbncNwy6F0p4q6xGoXAOrzooaaw8Z0lWv6qguNjxsIEoTw.O96ovWcIvzLt_Qek.OIEu5FnOXtMTPiz24sKxzVa8KXKdirUy3nJP5HlF_yb64FDg7Nj.g7uT5BxxV1JJGWr8qTHCl0_Q8vBokIO4JoynszsyJJX9wlvWSSuh2MVZurb5umorBFLprZhCcXyfJhjK32.3.xPfOxUrcRJVLl2kFGKDRF4TEfyZFXotqXabKxh5XIvgotZDsvKR0jjasdpCP2bL8KTmyZxhW3muCGtDcYxkXEIkcKEzXRbRlvtKjkv_MdJHsqwkXFevpBq3GD6ms.cYMbZxpYWnfJeWygpJ_6gXnAgyCf7pB_PWk1JhgPw9Taaga6dLNhK9q4NS3RY1hVcV.051hUqSWTAzN_hV1t6s6kI6ob7nPyw.hYIk6UUHP0L5b5RW57PASeK8JzM6awDc2Ak07XhqnIqOmkJixx6ORd22fymO2Eo_jOLU4e4jrQrrG.oEuD1aB6VmZ.8cYMnT2HCjLgF98JdADQL9sB6lzlIIcLvm5wEdU2.bAaeSAvPUCCDS27BcZbwKYkoasyGJLWthNjzM9tZ1DhPD_w4PDdXqWZSWw7nAVxRSNTb9FHIFs5z1OKf39Aq7BJ6hkGJlfPdr22seIRCJsPRWBRqfi0lUvHV0HtcxbY59_hXC_6Hm1SSEovfyS.GziU5C3JVmNpWTCaSRyqsKhYwLa0KRfjhaVRYK6GcgSlP6DKUBfwBaFeNelQY7R5jHJdlbsGpL9Y5k6bPXibM.c4zVenOTcM1pUZDQ23luTZ_fbrrDm.ux8EgSnrCewOW4ic_LeCFIPK03RyaZ1K_kbjb.vcb5M6v9vgSHMuSUZyWw38WKdjPs6XoY_g',};var a = document.createElement('script');a.src = '/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9b19c72b8f6c85c1';window._cf_chl_opt.cOgUHash = location.hash === '' && location.href.indexOf('#') !== -1 ? '#' : location.hash;window._cf_chl_opt.cOgUQuery = location.search === '' && location.href.slice(0, location.href.length - window._cf_chl_opt.cOgUHash.length).indexOf('?') !== -1 ? '?' : location.search;if (window.history && window.history.replaceState) {var ogU = location.pathname + window._cf_chl_opt.cOgUQuery + window._cf_chl_opt.cOgUHash;history.replaceState(null, null,"\/vi\/du-lich-pho-co-hoi-an-review-day-du-va-chi-tiet-tu-a-z?__cf_chl_rt_tk=zhEwxnR6r4Ghsmx2r.3u2Sm7y9dL5PVIHGhqIf0MWEs-1766345258-1.0.1.1-QM9oIdMWPcM9mJfu5GJYeOQMAJa8_vyLl02TYuikXy4"+ window._cf_chl_opt.cOgUHash);a.onload = function() {history.replaceState(null, null, ogU);}}document.getElementsByTagName('head')[0].appendChild(a);}());</script><script defer="" src="https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015" integrity="sha512-ZpsOmlRQV6y907TI0dKBHq9Md29nnaEIPlkf84rnaERnq6zvWvPUqr2ft8M1aS28oN72PdrCzSjY4U6VaAw1EQ==" data-cf-beacon="{&quot;rayId&quot;:&quot;9b19c72b8f6c85c1&quot;,&quot;version&quot;:&quot;2025.9.1&quot;,&quot;serverTiming&quot;:{&quot;name&quot;:{&quot;cfExtPri&quot;:true,&quot;cfEdge&quot;:true,&quot;cfOrigin&quot;:true,&quot;cfL4&quot;:true,&quot;cfSpeedBrain&quot;:true,&quot;cfCacheStatus&quot;:true}},&quot;token&quot;:&quot;65fee8db7c864d448cf726041d01eb37&quot;,&quot;b&quot;:1}" crossorigin="anonymous"></script>
2
+ <div class="footer" role="contentinfo"><div class="footer-inner"><div class="clearfix diagnostic-wrapper"><div class="ray-id">Ray ID: <code>9b19c72b8f6c85c1</code></div></div><div class="text-center" id="footer-text">Performance &amp; security by <a rel="noopener noreferrer" href="https://www.cloudflare.com?utm_source=challenge&amp;utm_campaign=m" target="_blank">Cloudflare</a></div></div></div></body></html>
retrieval/data/131.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/132.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/133.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/134.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/135.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/136.html ADDED
The diff for this file is too large to render. See raw diff
 
retrieval/data/137.html ADDED
The diff for this file is too large to render. See raw diff