Keyword

`KeywordLF`

Bases: LFBase

Labelling function that uses identifiers and keywords.

Source code in src/annotation/keyword.py

class KeywordLF(LFBase):
    """
    Labelling function that uses identifiers and keywords.
    """

    def __init__(self, taxonomy: TaxonomyBase, key: str):
        super().__init__(taxonomy)
        self.key = key

    def annotate(self, name: str, content: str) -> np.array:
        """
        Compute the probability for the file given the name and/or the content.

        :param name: Source file name
        :param content: Content of the file (usually identifiers)
        :return:
        """
        node_labels = np.zeros(len(self.taxonomy))
        for _label in self.taxonomy:
            label: KeywordLabel = _label
            intersection = list(label.keywords.intersection(Multiset(content.split())))
            intersection = Counter(intersection)
            node_labels[label.index] = sum(
                [intersection[k] * label.weights[k] for k in intersection.keys()]
            )

        norm = np.sum(node_labels)
        node_vec = node_labels / norm if norm > 0 else np.zeros(len(self.taxonomy))

        return node_vec

`annotate(name, content)`

Compute the probability for the file given the name and/or the content.

Parameters:

Name	Type	Description	Default
`name`	`str`	Source file name	required
`content`	`str`	Content of the file (usually identifiers)	required

Returns:

Type	Description
`array`

Source code in src/annotation/keyword.py

def annotate(self, name: str, content: str) -> np.array:
    """
    Compute the probability for the file given the name and/or the content.

    :param name: Source file name
    :param content: Content of the file (usually identifiers)
    :return:
    """
    node_labels = np.zeros(len(self.taxonomy))
    for _label in self.taxonomy:
        label: KeywordLabel = _label
        intersection = list(label.keywords.intersection(Multiset(content.split())))
        intersection = Counter(intersection)
        node_labels[label.index] = sum(
            [intersection[k] * label.weights[k] for k in intersection.keys()]
        )

    norm = np.sum(node_labels)
    node_vec = node_labels / norm if norm > 0 else np.zeros(len(self.taxonomy))

    return node_vec