Skip to content

Gensim w2v

W2VEmbedding

Bases: AbstractEmbeddingModel

Class for embedding models using Word2Vec model.

Source code in src/embedding/gensim_w2v.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class W2VEmbedding(AbstractEmbeddingModel):
    """
    Class for embedding models using Word2Vec model.
    """

    def __init__(self, path: str, model: str = 'W2V-Unk', split_camel: bool = False):
        super().__init__(split_camel)
        self._name = f'{model}'
        self.model = KeyedVectors.load_word2vec_format(path, binary=True)

    def get_embedding(self, text: str) -> np.ndarray:
        """
        Returns the embedding of the text.
        :param text:
        :return:
        """
        embeddings = []
        if not text:
            embeddings.append(np.zeros(self.model.vector_size))
        for word in self.split(str(text)):
            if word in self.model:
                embeddings.append(self.model[word])
            else:
                embeddings.append(np.zeros(self.model.vector_size))
        return np.mean(embeddings, axis=0)

get_embedding(text)

Returns the embedding of the text.

Parameters:

Name Type Description Default
text str
required

Returns:

Type Description
ndarray
Source code in src/embedding/gensim_w2v.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def get_embedding(self, text: str) -> np.ndarray:
    """
    Returns the embedding of the text.
    :param text:
    :return:
    """
    embeddings = []
    if not text:
        embeddings.append(np.zeros(self.model.vector_size))
    for word in self.split(str(text)):
        if word in self.model:
            embeddings.append(self.model[word])
        else:
            embeddings.append(np.zeros(self.model.vector_size))
    return np.mean(embeddings, axis=0)