Mentions légales du service

Skip to content
Snippets Groups Projects

Enh bert

Merged Gözükan Hande requested to merge hgozukan/cartolabe-data:ENH_bert into master
7 files
+ 638
1401
Compare changes
  • Side-by-side
  • Inline
Files
7
import logging
from pathlib import Path
import gzip
import numpy as np
@@ -52,7 +53,7 @@ class Neighbors(ScoringBase):
the base nature of calculation.
source: str
the source nature to calculate the score for.
dir_mat: Path
dir_mat: str, Path
the path of the directory that contains the entity matrices.
dumps_dir: Path
the directory that contains x-dimensional projection matrices.
@@ -67,6 +68,9 @@ class Neighbors(ScoringBase):
evals: Evaluation
Returns the calculated scores and descriptive scores.
"""
dir_mat = Path(dir_mat)
dir_xD = Path(dir_xD)
scores_source = load_scores([source], dir_mat)[0]
matrix_source = load_matrices_from_dumps([source], "mat", dir_mat)[0]
@@ -123,6 +127,7 @@ class Neighbors(ScoringBase):
scores_nature=None, matrix_nature_xD=None,
min_score=20, recompute=True, sample_size=None,
n_neighbors=10, random_state=42):
dir_xD = Path(dir_xD)
logger.info(f"Calculating scores for {cls.KEY}_{nature}_{source}.")
@@ -135,7 +140,12 @@ class Neighbors(ScoringBase):
"random_state": random_state
})
assert neighbors_filename is not None
if neighbors_filename is None:
neighbors_filename = (
dir_xD / NEIGHBORS_FILENAME_FORMAT.format(
nature, nature
)
)
if recompute:
assert (dir_xD is not None and scores_nature is not None and
@@ -260,6 +270,9 @@ class Trustworthiness(ScoringBase):
def load_and_evaluate(cls, natures, key_nD, key_2D, dir_nD, dir_2D,
n_neighbors=5, metric="euclidean"):
dir_nD = Path(dir_nD)
dir_2D = Path(dir_2D)
matrices_nD = load_matrices_from_dumps(natures, key_nD, dir_nD)
matrices_2D = load_matrices_from_dumps(natures, key_2D, dir_2D)
@@ -333,6 +346,11 @@ class Clustering(ScoringBase):
dir_clus, iter_stab=2,
remove_stab=[0, .01, .03, .1, .25],
metric='euclidean', random_state=None):
dir_mat = Path(dir_mat)
dir_nD = Path(dir_nD)
dir_2D = Path(dir_2D)
dir_clus = Path(dir_clus)
# load necessary matrices and scores
matrices = load_matrices_from_dumps(natures, "mat", dir_mat)
scores = load_scores(natures, dir_mat)
Loading