-
Jérôme Euzenat authoredJérôme Euzenat authored
dblinker.py 2.44 KiB
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from lib.fca import *
from lib.logger import *
from lib.linkkey import *
from operator import attrgetter
class FCADatabaseLinker:
def __init__(self, db):
self._db = db
@property
def db(self):
return self._db
def build_fca_context(self):
attrs0 = self.db.attributes0
attrs1 = self.db.attributes1
set_attrs01 = list(it.product(attrs0, attrs1))
iset_tuples0 = range(len(self.db.tuples0))
iset_tuples1 = range(len(self.db.tuples1))
iset_tuples01 = list(it.product(iset_tuples0, iset_tuples1))
context = FormalContext("DB FCA", iset_tuples01, set_attrs01)
for i_tuples_01, attrs_01 in it.product(iset_tuples01, set_attrs01):
index_attr0 = self.db.attributes0.index(attrs_01[0])
index_attr1 = self.db.attributes1.index(attrs_01[1])
val0 = self.db.tuples0[i_tuples_01[0]][index_attr0]
val1 = self.db.tuples1[i_tuples_01[1]][index_attr1]
if val0 == val1:
context.add_relation(i_tuples_01, attrs_01)
return context
def measure_linkkey(self, concept):
linkkey = DatabaseLinkkey(concept.intent)
if len(concept.extent) != 0:
nb_extent = len(concept.extent)
nb_insts = len(self.db.tuples0) + len(self.db.tuples1)
nb_links0 = len(set([v[0] for v in concept.extent]))
nb_links1 = len(set([v[1] for v in concept.extent]))
linkkey.discrimi = min(nb_links0, nb_links1) / nb_extent
linkkey.coverage = (nb_links0 + nb_links1) / nb_insts
linkkey.fmeasure = 2.0 * (linkkey.discrimi * linkkey.coverage)
linkkey.fmeasure /= (linkkey.discrimi + linkkey.coverage)
concept.fmeasure = linkkey.fmeasure
return linkkey
def extract_linkkeys(self, exporter=None):
Logger.log('FCA database linker')
Logger.log('Building formal context')
context = self.build_fca_context()
Logger.log('Building lattice')
lattice = context.build_lattice(exporter)
Logger.log('Determining candidate link keys')
linkkeys = [self.measure_linkkey(c) for c in lattice]
if exporter is not None:
exporter.iteration += 1
exporter.export_lattice(lattice)
Logger.log('Searching best linkkey')
best_linkkey = max(linkkeys, key=attrgetter('fmeasure'))
return linkkeys, best_linkkey