Bases: LFBase
Labelling function that uses vector similarity to label examples.
Source code in src/annotation/similarity.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43 | class SimilarityLF(LFBase):
"""
Labelling function that uses vector similarity to label examples.
"""
def __init__(self, taxonomy: TaxonomyBase, embedding: AbstractEmbeddingModel):
super().__init__(taxonomy)
self.embedding = embedding
self.label_vecs = self.embed_labels()
self.content = False
def annotate(self, name: str, content: str) -> np.array:
content_vec = [self.embedding.get_embedding(content.lower())]
try:
sims = cosine_similarity(content_vec, self.label_vecs)
except ValueError:
logger.error(f"Error in {name}\nContent: {content}\nContent vec: {content_vec}")
sims = np.zeros(len(self.label_vecs)) - 1
# Adding 1 (-1 is the lowest value for cosine sim) to bring the vector in the range 0-1 when normalizing
sims = sims[0] + 1
norm = np.linalg.norm(sims)
node_labels = sims / norm if norm else sims
return node_labels
def embed_labels(self) -> List[np.array]:
res = []
for label in self.taxonomy:
res.append(self.embedding.get_embedding(label.name.lower()))
return res
|