Commit 3041dbc5 authored by Mathieu Giraud's avatar Mathieu Giraud

Merge branch 'feature-sc/3506-speedup-clonedb' into 'dev'

Speedup clonedb

Closes #3506

See merge request !316
parents 705baa45 67ca46d4
Pipeline #48391 passed with stages
in 6 minutes and 51 seconds
......@@ -36,37 +36,49 @@ def search_clonedb(sequences, sample_set_id):
results = []
parent_group = get_default_creation_group(auth)[1]
for sequence in sequences:
options = clonedb.build_grep_clones_options({'sequence': sequence+' -sample_set:%d' % sample_set_id,
auth.load_permissions(, 'sample_set')
auth.load_permissions(PermissionEnum.anon.value, 'sample_set')
options = clonedb.build_grep_clones_options({'sequence': sequences[0]+' -sample_set:%d' % sample_set_id,
'index': 'clonedb_{}'.format(parent_group)})
args = grep_clones.parser.parse_args(options)
occurrences = grep_clones.launch_search(args)
except ValueError:
return error_message('Are you sure your account has an enabled CloneDB?')
except Exception as e:
return error_message(e.message)
for occ in occurrences:
options += sequences[1:]
args = grep_clones.parser.parse_args(options)
occurrences = grep_clones.launch_search(args)
except ValueError:
return error_message('Are you sure your account has an enabled CloneDB?')
except Exception as e:
return error_message(e.message)
sample_set_ids = [ sid for occurrences_one_seq in occurrences for occ in occurrences_one_seq if 'tags' in occ and 'sample_set' in occ['tags'] for sid in occ['tags']['sample_set'] ]
sample_sets = SampleSets(sample_set_ids)
sample_names = sample_sets.get_names()
sample_tags = sample_sets.get_tag_names()
for occurrences_one_seq in occurrences:
for occ in occurrences_one_seq:
if 'tags' in occ and 'sample_set' in occ['tags']:
info = get_info_of_viewable_sample_set([int(sample_id) for sample_id in occ['tags']['sample_set']], int(occ['tags']['config_id'][0]))
info = get_info_of_viewable_sample_set([int(sample_id) for sample_id in occ['tags']['sample_set']], int(occ['tags']['config_id'][0]), sample_names, sample_tags)
occ['tags']['sample_set_viewable'] = info['viewable']
occ['tags']['sample_set_name'] = info['name']
occ['tags']['sample_tags'] = info['sample_tags']
config_db = db.config[occ['tags']['config_id'][0]]
occ['tags']['config_name'] = [ if config_db else None]
return response.json(results)
def get_info_of_viewable_sample_set(sample_sets, config):
def get_info_of_viewable_sample_set(sample_sets, config, sample_names, sample_tags):
info = {'viewable': [], 'name': [], 'sample_tags': []}
for sample_id in sample_sets:
viewable = auth.can_view_sample_set(sample_id, auth.user)
viewable = auth.can_view_sample_set(sample_id,
if viewable:
tags = get_sample_set_tags(sample_id)
for row in tags:
info['sample_tags'].append("#" +
tags = sample_tags.get(sample_id)
if tags:
for row in tags:
info['sample_tags'].append("#" +
return info
......@@ -260,7 +260,7 @@ def get_data():
if (sample_set.sample_type == defs.SET_TYPE_PATIENT):
for row in db( db.patient.sample_set_id == request.vars["sample_set_id"] ).select() :
log_reference_id =
patient_name = vidjil_utils.anon_ids(
patient_name = vidjil_utils.anon_ids([])[0]
data["dataFileName"] = patient_name + " (" + config_name + ")"
data["info"] = db.patient[].info
data["patient_id"] =
......@@ -401,7 +401,7 @@ def get_custom_data():
patient_run = db(db[sample_set.sample_type].sample_set_id ==
config_id = db.results_file[id].config_id
name = vidjil_utils.anon_ids( if sample_set.sample_type == defs.SET_TYPE_PATIENT else
name = vidjil_utils.anon_ids([])[0] if sample_set.sample_type == defs.SET_TYPE_PATIENT else
filename = db.sequence_file[sequence_file_id].filename
data["samples"]["original_names"].append(name + "_" + filename)
......@@ -13,7 +13,7 @@ if request.env.http_origin:
def anon_names(data):
for row in data:
# TODO use helper ? = vidjil_utils.anon_ids( = vidjil_utils.anon_ids([])[0]
return data
def get_data_list(table):
......@@ -72,7 +72,7 @@ def index():
for row in query:
if row.patient.first_name is not None:
row.names = vidjil_utils.anon_ids(row.user_log.record_id)
row.names = vidjil_utils.anon_ids([row.user_log.record_id])[0]
row.names =
return dict(query=query,
......@@ -162,7 +162,7 @@ def get_sample_name(sample_set_id):
if patient_or_run is None:
return None
if sample.sample_type == defs.SET_TYPE_PATIENT:
return vidjil_utils.anon_ids(
return vidjil_utils.anon_ids([])[0]
def get_set_group(sid):
from collections import defaultdict
import vidjil_utils
class SampleSets:
The SampleSets class represent many sample sets, possibly with different types.
This class allows to recover information on sample sets more efficiently than
by retrieving information for each sample set separately.
ids = []
sample_types = set() # All the sample typs we have in our IDs
sample_sets = []
def __init__(self, ids):
Build a class for the given sample set IDs
self.ids = ids
self.sample_sets = db(,
self.sample_types = set([s.sample_type for s in self.sample_sets])
def get_names(self):
'''Returns the names of all the sample sets.
The function returns a dictionary whose keys are sample set
IDs and value is the name.
The number of queries is constant (which is true only if
permissions have been cached: anon_ids checks for permission)
results = {}
for stype in self.sample_types:
stype_ids = self.get_sample_type_ids(stype)
if stype == defs.SET_TYPE_PATIENT:
patient_ids = db(db[stype].sample_set_id.belongs(stype_ids)).select(db[stype].id, db[stype].sample_set_id)
names = vidjil_utils.anon_ids([ for p in patient_ids])
for i, p_id in enumerate(patient_ids):
results[p_id.sample_set_id] = names[i]
set_names = db(db[stype].sample_set_id.belongs(stype_ids)).select(db[stype].sample_set_id, db[stype].name)
for s in set_names:
results[s.sample_set_id] =
return results
def get_sample_type_ids(self, sample_type):
Return a list of all the sample set IDs corresponding to this sample type.
Performs no query on the database
return [ for s in self.sample_sets if s.sample_type == sample_type]
def get_tag_names(self):
'''Returns the name of the tags associated with all the sample sets.
The function returns a dictionary whose keys are sample set
IDs and values are a list of tag names.
The number of queries is constant.
# Getting tags
results = defaultdict(list)
for stype in self.sample_types:
stype_ids = self.get_sample_type_ids(stype)
tags = db((db[stype].sample_set_id.belongs(stype_ids))
& (db.tag_ref.record_id == db[stype].id)
& (db.tag_ref.table_name == stype)
& (db.tag_ref.tag_id ==\
.select(, db[stype].sample_set_id)
for t in tags:
return results
......@@ -446,14 +446,17 @@ class VidjilAuth(Auth):
or self.is_admin(user))
def can_view_sample_set(self, sample_set_id, user = None) :
perm = self.get_permission(, 'sample_set', sample_set_id, user)\
or self.is_admin(user)
if perm:
return perm
sample_set = db.sample_set[sample_set_id]
sample_type = sample_set.sample_type
if sample_set is None:
return False
perm = self.get_permission(, 'sample_set', sample_set_id, user)\
or self.is_admin(user)
for row in db( db[sample_type].sample_set_id == sample_set_id ).select() :
if self.can_view(sample_type,, user):
perm = True;
......@@ -87,14 +87,18 @@ def anon_birth(patient_id, user_id):
return age
def anon_ids(patient_id):
'''Anonymize patient name. Only the 'anon' access see the full patient name.'''
def anon_ids(patient_ids, can_view = None):
'''Anonymize patient name. Only the 'anon' access see the full patient name.
patient_ids is a list of patient IDs
db = current.db
patient = db.patient[patient_id]
patients = db(,
return display_names(patient.sample_set_id, patient.first_name, patient.last_name)
return [display_names(p.sample_set_id, p.first_name, p.last_name, can_view) for p in patients]
def anon_names(sample_set_id, first_name, last_name, can_view=None):
import unittest
class SamplesetsModel(unittest.TestCase):
def __init__(self, p):
global auth
unittest.TestCase.__init__(self, p)
def setUp(self):
# Load the to-be-tested file
execfile("applications/vidjil/models/", globals())
global auth
auth = VidjilAuth(globals(), db)
auth.login_bare("", "1234")
# We have the following sample sets
# fake_sample_set_id linked to fake_patient_id
# whose first and last name are plop
# permission_sample_set linked to permission_patient
# whose name is foo bar
def testGetNames(self):
samples = SampleSets([fake_sample_set_id, permission_sample_set])
names = samples.get_names()
self.assertEquals(names[fake_sample_set_id], "plo")
self.assertEquals(names[permission_sample_set], "bar")
def testGetTagNames(self):
samples = SampleSets([fake_sample_set_id, permission_sample_set])
names = samples.get_tag_names()
self.assertEquals('first_fake_tag' in names[fake_sample_set_id], True)
self.assertEquals('sec_fake_tag' in names[fake_sample_set_id], True)
self.assertEquals(permission_sample_set in names, False)
......@@ -63,6 +63,8 @@ for tablename in db.tables: # Copy tables!
db = test_db
auth.db = test_db
current.db = db
current.auth = auth
# build default database if doesn't exist
vidjil_utils.init_db_helper(db, auth, True)
......@@ -90,6 +92,8 @@ user_id = db.auth_user.insert(
unique_group = db.auth_group.insert(role="user_"+str(user_id), description=" ")
db.auth_membership.insert(user_id=user_id, group_id=unique_group)
auth.user = db.auth_user[user_id]
# with admin privilege
group_id = 1 #admin group
db.auth_membership.insert(user_id=user_id, group_id=group_id)
......@@ -237,6 +241,13 @@ sec_fake_tag_id = db.tag.insert(name="sec_fake_tag")
db.group_tag.insert(group_id=unique_group, tag_id=first_fake_tag_id)
db.group_tag.insert(group_id=fake_group_id, tag_id=sec_fake_tag_id)
first_fake_tag_ref = db.tag_ref.insert(tag_id = first_fake_tag_id,
table_name = 'patient',
record_id = fake_sample_set_id)
sec_fake_tag_ref = db.tag_ref.insert(tag_id = sec_fake_tag_id,
table_name = 'patient',
record_id = fake_sample_set_id)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment