DIVYA-NSHU99 commited on
Commit
049099a
·
1 Parent(s): de1718e

Initial deployment

Browse files
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python slim image
2
+ FROM python:3.10-slim
3
+
4
+ # ---- Set cache directories FIRST ----
5
+ # All model downloads will go to /tmp/.cache (writable runtime disk)
6
+ ENV HF_HOME=/tmp/.cache/huggingface \
7
+ TRANSFORMERS_CACHE=/tmp/.cache/huggingface \
8
+ SENTENCE_TRANSFORMERS_HOME=/tmp/.cache/sentence-transformers \
9
+ SPACY_DATA=/tmp/.cache/spacy \
10
+ NLTK_DATA=/tmp/.cache/nltk
11
+
12
+ # ---- Install system dependencies (if any) ----
13
+ # (none required for this project, but keep if needed)
14
+
15
+ # ---- Set working directory ----
16
+ WORKDIR /app
17
+
18
+ # ---- Copy requirements first (for Docker layer caching) ----
19
+ COPY requirements.txt .
20
+
21
+ # ---- Install Python dependencies ----
22
+ # This does NOT download any models (spacy model is NOT downloaded here)
23
+ RUN pip install --no-cache-dir -r requirements.txt
24
+
25
+ # ---- Copy the rest of the application ----
26
+ COPY . .
27
+
28
+ # ---- Expose the port Hugging Face Spaces expects ----
29
+ EXPOSE 7860
30
+
31
+ # ---- Start the FastAPI server ----
32
+ # Models will be downloaded automatically on first request to /tmp/.cache
33
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/data/descriptive_keywords.json ADDED
File without changes
app/main.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Optional
4
+ import os
5
+
6
+ # Ensure models are cached to the runtime disk (important for Hugging Face Spaces)
7
+ os.environ["HF_HOME"] = "/tmp/huggingface"
8
+
9
+ # Import your analyzer
10
+ from app.src.main import TrademarkAnalyzer
11
+
12
+ app = FastAPI(title="Trademark Descriptiveness API")
13
+
14
+ # Check that the data file exists (optional, but helpful)
15
+ data_path = "app/data/descriptive_keywords.json"
16
+ if not os.path.exists(data_path):
17
+ print(f"Warning: Data file not found at {data_path}. Keyword overlap will be disabled.")
18
+
19
+ # Initialize analyzer
20
+ analyzer = TrademarkAnalyzer(descriptive_keywords_path=data_path)
21
+
22
+ class AnalyzeRequest(BaseModel):
23
+ mark: str
24
+ goods: str
25
+ goods_class: Optional[str] = None
26
+
27
+ class AnalyzeResponse(BaseModel):
28
+ descriptive_score: float
29
+ generic_score: float
30
+ reasons: list[str]
31
+ explanation: str
32
+ details: dict
33
+
34
+ @app.get("/")
35
+ def read_root():
36
+ return {"message": "Trademark API is running"}
37
+
38
+ @app.get("/health")
39
+ def health_check():
40
+ return {"status": "ok"}
41
+
42
+ @app.post("/analyze", response_model=AnalyzeResponse)
43
+ def analyze(request: AnalyzeRequest):
44
+ try:
45
+ result = analyzer.analyze(
46
+ mark=request.mark,
47
+ goods=request.goods,
48
+ goods_class=request.goods_class
49
+ )
50
+ return AnalyzeResponse(**result)
51
+ except Exception as e:
52
+ # Log the error (optional)
53
+ print(f"Error during analysis: {e}")
54
+ raise HTTPException(status_code=500, detail=str(e))
app/src/__init__.py ADDED
File without changes
app/src/cross_encoder.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import CrossEncoder
2
+ from nltk import sent_tokenize
3
+ import numpy as np
4
+
5
+ class CrossEncoderSimilarity:
6
+ """
7
+ Uses a cross‑encoder to compute deep semantic similarity between mark and goods.
8
+ Supports sentence‑level segmentation and returns attention weights for explainability.
9
+ """
10
+
11
+ def __init__(self, model_name='cross-encoder/stsb-roberta-large'):
12
+ self.model = CrossEncoder(model_name, num_labels=1) # regression output
13
+ # We'll store the last attention scores if needed (for explainability)
14
+ self.last_attention = None
15
+
16
+ def similarity(self, mark, goods, return_segments=False):
17
+ """
18
+ Returns a score between 0 and 1. If return_segments=True, also returns
19
+ the maximum segment score and the segment text.
20
+ """
21
+ if not goods:
22
+ return 0.0 if not return_segments else (0.0, None)
23
+ sentences = sent_tokenize(goods)
24
+ if not sentences:
25
+ return 0.0 if not return_segments else (0.0, None)
26
+
27
+ pairs = [(mark, sent) for sent in sentences]
28
+ scores = self.model.predict(pairs)
29
+ # Normalize: assume model output range roughly 0-5 (for stsb models)
30
+ # If using a different model, adjust normalization accordingly.
31
+ scores_norm = [min(1.0, max(0.0, s / 5.0)) for s in scores]
32
+ max_score = max(scores_norm)
33
+ max_idx = int(np.argmax(scores_norm))
34
+
35
+ if return_segments:
36
+ return max_score, sentences[max_idx]
37
+ return max_score
38
+
39
+ def similarity_with_explanation(self, mark, goods):
40
+ """
41
+ Returns score and the most relevant sentence from goods, plus optionally attention.
42
+ For attention, we'd need a model that returns cross‑attention; not all do.
43
+ This method provides a simple explanation.
44
+ """
45
+ max_score, best_sentence = self.similarity(mark, goods, return_segments=True)
46
+ explanation = f"Highest similarity with segment: '{best_sentence}' (score: {max_score:.2f})"
47
+ return max_score, explanation
app/src/embeddings.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer, util
3
+ from nltk import sent_tokenize
4
+
5
+ class EmbeddingSimilarity:
6
+ """
7
+ Uses a sentence‑transformer model to compute semantic similarity between
8
+ the mark and a list of descriptive terms, and also between mark and goods.
9
+ """
10
+
11
+ def __init__(self, model_name='all-MiniLM-L6-v2'):
12
+ self.model = SentenceTransformer(model_name)
13
+ # Cache for pre‑computed class centroids (optional)
14
+ self.class_centroids = {}
15
+
16
+ def encode(self, text):
17
+ """Return embedding for a single text."""
18
+ return self.model.encode(text, convert_to_tensor=True)
19
+
20
+ def similarity(self, emb1, emb2):
21
+ """Cosine similarity between two embeddings."""
22
+ return float(util.cos_sim(emb1, emb2)[0][0])
23
+
24
+ def max_similarity_to_terms(self, mark, descriptive_terms):
25
+ """
26
+ Compute the maximum cosine similarity between the mark embedding
27
+ and each individual descriptive term's embedding.
28
+ """
29
+ if not descriptive_terms:
30
+ return 0.0
31
+ mark_emb = self.encode(mark)
32
+ term_embs = self.encode(descriptive_terms)
33
+ sims = util.cos_sim(mark_emb, term_embs)[0]
34
+ return float(sims.max())
35
+
36
+ def similarity_to_class_centroid(self, mark, class_terms):
37
+ """
38
+ Pre‑compute centroid for a class (average of all term embeddings)
39
+ and compare mark against it. (Useful for speed when class_terms are static.)
40
+ """
41
+ if not class_terms:
42
+ return 0.0
43
+ # Create a key for the class (e.g., tuple of terms sorted)
44
+ # For simplicity, we'll just compute on the fly; you can cache.
45
+ term_embs = self.encode(class_terms)
46
+ centroid = term_embs.mean(axis=0)
47
+ mark_emb = self.encode(mark)
48
+ return self.similarity(mark_emb, centroid)
49
+
50
+ def similarity_to_goods(self, mark, goods):
51
+ """
52
+ Compute similarity between mark and goods using the bi‑encoder.
53
+ This is a fast alternative to the cross‑encoder.
54
+ """
55
+ if not goods:
56
+ return 0.0
57
+ mark_emb = self.encode(mark)
58
+ goods_emb = self.encode(goods)
59
+ return self.similarity(mark_emb, goods_emb)
60
+
61
+ def similarity_to_goods_segments(self, mark, goods):
62
+ """
63
+ Split goods into sentences and take the maximum similarity.
64
+ """
65
+ if not goods:
66
+ return 0.0
67
+ sentences = sent_tokenize(goods)
68
+ if not sentences:
69
+ return 0.0
70
+ mark_emb = self.encode(mark)
71
+ sent_embs = self.encode(sentences)
72
+ sims = util.cos_sim(mark_emb, sent_embs)[0]
73
+ return float(sims.max())
app/src/heuristics.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ class DescriptivenessHeuristic:
4
+ """
5
+ Combines outputs from linguistic, embedding, and cross‑encoder modules
6
+ to produce final descriptiveness and genericness scores.
7
+ """
8
+
9
+ def __init__(self, ling_analyzer, emb_similarity, cross_encoder, weights=None):
10
+ self.ling = ling_analyzer
11
+ self.emb = emb_similarity
12
+ self.cross = cross_encoder
13
+ # Default weights – can be tuned via validation
14
+ self.weights = weights or {
15
+ 'linguistic': 0.25,
16
+ 'embedding_max_term': 0.25,
17
+ 'embedding_goods': 0.20,
18
+ 'cross_encoder': 0.30
19
+ }
20
+
21
+ def assess(self, mark, goods, goods_class=None, descriptive_terms=None):
22
+ """
23
+ Returns a dict with scores and reasons.
24
+ """
25
+ # 1. Linguistic features
26
+ ling_feat = self.ling.analyze(mark, goods, goods_class)
27
+
28
+ # Construct a linguistic score (example: weighted combination)
29
+ ling_score = (
30
+ (0.2 if ling_feat['pos']['adjective_count'] > 0 else 0) +
31
+ 0.3 * ling_feat['dictionary_word_ratio'] +
32
+ 0.2 * ling_feat['descriptive_keyword_overlap'] +
33
+ 0.2 * ling_feat['ngram_overlap_with_goods'] +
34
+ (0.1 if ling_feat['has_descriptive_suffix'] else 0)
35
+ )
36
+ ling_score = min(1.0, ling_score)
37
+
38
+ # 2. Embedding similarity to descriptive terms (if provided)
39
+ emb_term_score = 0.0
40
+ if descriptive_terms:
41
+ emb_term_score = self.emb.max_similarity_to_terms(mark, descriptive_terms)
42
+
43
+ # 3. Embedding similarity to goods (bi‑encoder)
44
+ emb_goods_score = self.emb.similarity_to_goods_segments(mark, goods)
45
+
46
+ # 4. Cross‑encoder score
47
+ cross_score = self.cross.similarity(mark, goods)
48
+
49
+ # Weighted combination
50
+ descriptive_score = (
51
+ self.weights['linguistic'] * ling_score +
52
+ self.weights['embedding_max_term'] * emb_term_score +
53
+ self.weights['embedding_goods'] * emb_goods_score +
54
+ self.weights['cross_encoder'] * cross_score
55
+ )
56
+
57
+ # Genericness detection (simplified)
58
+ generic_score = 0.0
59
+ reasons = []
60
+
61
+ # If the mark is a dictionary word and highly similar to goods, could be generic
62
+ if ling_feat['dictionary_word_ratio'] > 0.8 and cross_score > 0.7:
63
+ generic_score = 0.8
64
+ reasons.append("High similarity to goods and common word – potential genericness")
65
+ elif ling_feat['dictionary_word_ratio'] > 0.9:
66
+ generic_score = 0.4
67
+ reasons.append("All words are common dictionary terms")
68
+
69
+ # If mark is a hyponym of a goods category? (could be added with WordNet)
70
+
71
+ # Build explanation
72
+ explanation = f"Descriptiveness score: {descriptive_score:.2f}. "
73
+ if reasons:
74
+ explanation += "Reasons: " + "; ".join(reasons)
75
+
76
+ return {
77
+ 'descriptive_score': round(descriptive_score, 2),
78
+ 'generic_score': round(generic_score, 2),
79
+ 'reasons': reasons,
80
+ 'explanation': explanation,
81
+ 'details': {
82
+ 'linguistic': ling_feat,
83
+ 'embedding_term': emb_term_score,
84
+ 'embedding_goods': emb_goods_score,
85
+ 'cross_encoder': cross_score
86
+ }
87
+ }
app/src/linguistic.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import json
3
+ import os
4
+ import math
5
+ from collections import Counter
6
+ from nltk import word_tokenize
7
+ from nltk.corpus import wordnet
8
+ from nltk.corpus.reader.wordnet import NOUN, ADJ, ADV, VERB
9
+
10
+ # Load spaCy model (download if not present: python -m spacy download en_core_web_sm)
11
+ nlp = spacy.load("en_core_web_sm")
12
+
13
+ # Optional: load word frequency data (e.g., SUBTLEX frequency file)
14
+ # If not available, we use a simple fallback (all words equally frequent).
15
+ FREQ_DICT = {}
16
+ FREQ_PATH = os.path.join(os.path.dirname(__file__), '..', 'data', 'word_freq.json')
17
+ if os.path.exists(FREQ_PATH):
18
+ with open(FREQ_PATH, 'r') as f:
19
+ FREQ_DICT = json.load(f)
20
+
21
+ class LinguisticAnalyzer:
22
+ """
23
+ Extracts rich linguistic features from a trademark string.
24
+ Features include POS tags, dependency relations, dictionary membership,
25
+ word frequency, n‑gram overlap with goods description, and named entities.
26
+ """
27
+
28
+ def __init__(self, descriptive_keywords_path=None):
29
+ self.descriptive_keywords = {}
30
+ if descriptive_keywords_path and os.path.exists(descriptive_keywords_path):
31
+ with open(descriptive_keywords_path, 'r', encoding='utf-8') as f:
32
+ self.descriptive_keywords = json.load(f) # e.g., {"class_030": ["fresh", "creamy"]}
33
+
34
+ # List of common descriptive suffixes (e.g., -y, -er, -ing)
35
+ self.descriptive_suffixes = ('y', 'er', 'ing', 'ive', 'ous', 'al', 'ic')
36
+
37
+ def pos_tags(self, text):
38
+ """Return list of (token, POS, detailed tag) using spaCy."""
39
+ doc = nlp(text)
40
+ return [(token.text, token.pos_, token.tag_) for token in doc]
41
+
42
+ def dependency_relations(self, text):
43
+ """Extract adjective‑noun and other modifier relations."""
44
+ doc = nlp(text)
45
+ modifiers = []
46
+ for token in doc:
47
+ # amod: adjectival modifier, nmod: nominal modifier
48
+ if token.dep_ in ('amod', 'nmod') and token.head.pos_ in ('NOUN', 'PROPN'):
49
+ modifiers.append((token.text, token.head.text, token.dep_))
50
+ return modifiers
51
+
52
+ def is_dictionary_word(self, word):
53
+ """Check if word exists in WordNet."""
54
+ return bool(wordnet.synsets(word))
55
+
56
+ def word_frequency(self, word):
57
+ """
58
+ Return log frequency of word (if available). Higher = more common.
59
+ Defaults to 0 if not in frequency dictionary.
60
+ """
61
+ return FREQ_DICT.get(word.lower(), 0)
62
+
63
+ def extract_ngrams(self, text, n=2, use_words=True):
64
+ """Generate word n‑grams or character n‑grams."""
65
+ if use_words:
66
+ words = word_tokenize(text.lower())
67
+ ngrams = [' '.join(words[i:i+n]) for i in range(len(words)-n+1)]
68
+ else:
69
+ # character n‑grams
70
+ text_clean = text.lower().replace(' ', '')
71
+ ngrams = [text_clean[i:i+n] for i in range(len(text_clean)-n+1)]
72
+ return ngrams
73
+
74
+ def ngram_overlap_with_goods(self, mark, goods, n=2):
75
+ """
76
+ Compute the fraction of mark word n‑grams that appear verbatim in the goods description.
77
+ """
78
+ if not goods:
79
+ return 0.0
80
+ mark_ngrams = set(self.extract_ngrams(mark, n=n, use_words=True))
81
+ goods_ngrams = set(self.extract_ngrams(goods, n=n, use_words=True))
82
+ if not mark_ngrams:
83
+ return 0.0
84
+ overlap = mark_ngrams.intersection(goods_ngrams)
85
+ return len(overlap) / len(mark_ngrams)
86
+
87
+ def descriptive_keyword_overlap(self, mark, goods_class=None):
88
+ """
89
+ Return fraction of mark words that appear (as lemmas) in the descriptive list for the given class.
90
+ Uses lemmatization to catch inflected forms.
91
+ """
92
+ if not self.descriptive_keywords or not goods_class:
93
+ return 0.0
94
+ # Lemmatize mark words
95
+ doc = nlp(mark)
96
+ mark_lemmas = {token.lemma_.lower() for token in doc if token.is_alpha}
97
+ desc_words = set(self.descriptive_keywords.get(goods_class, []))
98
+ if not mark_lemmas or not desc_words:
99
+ return 0.0
100
+ overlap = mark_lemmas.intersection(desc_words)
101
+ return len(overlap) / len(mark_lemmas)
102
+
103
+ def has_descriptive_suffix(self, word):
104
+ """Check if word ends with a common descriptive suffix."""
105
+ return any(word.lower().endswith(suf) for suf in self.descriptive_suffixes)
106
+
107
+ def extract_entities(self, text):
108
+ """Return list of named entities (PERSON, ORG, GPE, etc.)."""
109
+ doc = nlp(text)
110
+ return [(ent.text, ent.label_) for ent in doc.ents]
111
+
112
+ def analyze(self, mark, goods=None, goods_class=None):
113
+ """
114
+ Main method: returns a dictionary of linguistic features.
115
+ """
116
+ doc = nlp(mark)
117
+ tokens = [token.text.lower() for token in doc if token.is_alpha]
118
+ if not tokens:
119
+ return {'pos': {}, 'dictionary_word_ratio': 0, 'avg_word_freq': 0,
120
+ 'descriptive_keyword_overlap': 0, 'ngram_overlap_with_goods': 0,
121
+ 'has_descriptive_suffix': False, 'has_entity': False, 'ngrams': []}
122
+
123
+ # POS summary
124
+ pos_tags = [(token.text, token.pos_, token.tag_) for token in doc]
125
+ pos_summary = {
126
+ 'adjective_count': sum(1 for _, pos, _ in pos_tags if pos == 'ADJ'),
127
+ 'comparative_count': sum(1 for _, _, tag in pos_tags if tag in ('JJR', 'JJS')),
128
+ 'noun_count': sum(1 for _, pos, _ in pos_tags if pos == 'NOUN'),
129
+ 'verb_count': sum(1 for _, pos, _ in pos_tags if pos == 'VERB')
130
+ }
131
+
132
+ # Dependency modifiers
133
+ modifiers = self.dependency_relations(mark)
134
+
135
+ # Dictionary word ratio
136
+ dict_word_ratio = sum(1 for w in tokens if self.is_dictionary_word(w)) / len(tokens) if tokens else 0
137
+
138
+ # Average word frequency (log)
139
+ avg_freq = sum(self.word_frequency(w) for w in tokens) / len(tokens) if tokens else 0
140
+
141
+ # Overlap with goods n‑grams
142
+ ngram_overlap = self.ngram_overlap_with_goods(mark, goods, n=2) if goods else 0.0
143
+
144
+ # Descriptive keyword overlap (lemma‑based)
145
+ desc_overlap = self.descriptive_keyword_overlap(mark, goods_class)
146
+
147
+ # Suffix check on the longest word (or any)
148
+ has_desc_suffix = any(self.has_descriptive_suffix(w) for w in tokens)
149
+
150
+ # Named entities
151
+ entities = self.extract_entities(mark)
152
+ has_entity = len(entities) > 0
153
+
154
+ # Word n‑grams for later use
155
+ ngrams = self.extract_ngrams(mark, n=2, use_words=True)
156
+
157
+ return {
158
+ 'pos': pos_summary,
159
+ 'modifiers': modifiers,
160
+ 'dictionary_word_ratio': dict_word_ratio,
161
+ 'avg_word_freq': avg_freq,
162
+ 'descriptive_keyword_overlap': desc_overlap,
163
+ 'ngram_overlap_with_goods': ngram_overlap,
164
+ 'has_descriptive_suffix': has_desc_suffix,
165
+ 'has_entity': has_entity,
166
+ 'ngrams': ngrams
167
+ }
app/src/main.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from .linguistic import LinguisticAnalyzer
3
+ from .embeddings import EmbeddingSimilarity
4
+ from .cross_encoder import CrossEncoderSimilarity
5
+ from .heuristics import DescriptivenessHeuristic
6
+
7
+ class TrademarkAnalyzer:
8
+ """
9
+ High-level API for trademark descriptiveness analysis.
10
+ Initializes all sub-modules and provides a unified analyze() method.
11
+ """
12
+
13
+ def __init__(self, descriptive_keywords_path=None):
14
+ """
15
+ Args:
16
+ descriptive_keywords_path: Path to JSON file with class‑specific descriptive terms.
17
+ """
18
+ # Ensure models are cached in the runtime disk (if not already set)
19
+ if "HF_HOME" not in os.environ:
20
+ os.environ["HF_HOME"] = "/tmp/huggingface"
21
+
22
+ # Initialize sub-modules
23
+ self.linguistic = LinguisticAnalyzer(descriptive_keywords_path)
24
+ self.embedding = EmbeddingSimilarity() # uses sentence-transformers
25
+ self.cross_encoder = CrossEncoderSimilarity()
26
+ self.heuristic = DescriptivenessHeuristic(
27
+ self.linguistic,
28
+ self.embedding,
29
+ self.cross_encoder
30
+ )
31
+
32
+ def analyze(self, mark, goods, goods_class=None):
33
+ """
34
+ Perform full descriptiveness analysis.
35
+
36
+ Args:
37
+ mark (str): The trademark text.
38
+ goods (str): Description of goods/services.
39
+ goods_class (str, optional): USPTO class (e.g., "30").
40
+
41
+ Returns:
42
+ dict: Contains descriptive_score, generic_score, reasons, explanation, details.
43
+ """
44
+ # Load descriptive terms for the class (if any)
45
+ descriptive_terms = None
46
+ if goods_class and self.linguistic.descriptive_keywords:
47
+ class_key = f"class_{goods_class.zfill(3)}" # e.g., class_030
48
+ descriptive_terms = self.linguistic.descriptive_keywords.get(class_key, [])
49
+
50
+ # Run the heuristic assessment
51
+ result = self.heuristic.assess(
52
+ mark=mark,
53
+ goods=goods,
54
+ goods_class=goods_class,
55
+ descriptive_terms=descriptive_terms
56
+ )
57
+ return result
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn==0.30.0
3
+ spacy==3.7.2
4
+ nltk==3.8.1
5
+ sentence-transformers==3.0.1
6
+ transformers==4.41.0
7
+ torch==2.3.0
8
+ numpy==1.24.3
9
+ huggingface-hub==0.23.0
10
+ pydantic==2.7.0