Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vidjil
vidjil
Commits
1973f7e8
Commit
1973f7e8
authored
Apr 26, 2019
by
Mathieu Giraud
Browse files
Merge branch 'feature-a/some-optim' into 'dev'
Optimizations, including e-value computations, max12 See merge request
!468
parents
2ddbea56
eb8a268e
Pipeline
#75869
failed with stages
in 2 minutes and 31 seconds
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
algo/core/affectanalyser.cpp
View file @
1973f7e8
#include "affectanalyser.h"
#include <algorithm>
#include <unordered_map>
bool
operator
==
(
const
affect_infos
&
ai1
,
const
affect_infos
&
ai2
)
{
return
ai1
.
first_pos_max
==
ai2
.
first_pos_max
...
...
@@ -312,6 +314,33 @@ int KmerAffectAnalyser::last(const KmerAffect &affect) const{
}
pair
<
KmerAffect
,
KmerAffect
>
KmerAffectAnalyser
::
max12
(
const
set
<
KmerAffect
>
forbidden
)
const
{
pair
<
KmerAffect
,
int
>
max_counts
[
2
]
=
{
make_pair
(
KmerAffect
::
getUnknown
(),
-
1
),
make_pair
(
KmerAffect
::
getUnknown
(),
-
1
)};
std
::
unordered_map
<
KmerAffect
,
int
>
counts
;
for
(
KmerAffect
affect
:
affectations
)
{
if
(
forbidden
.
count
(
affect
)
==
0
)
{
if
(
counts
.
count
(
affect
)
>
0
)
counts
[
affect
]
++
;
else
counts
[
affect
]
=
1
;
}
}
for
(
auto
it
:
counts
)
{
if
(
it
.
second
>
max_counts
[
1
].
second
)
{
if
(
it
.
second
>
max_counts
[
0
].
second
)
{
max_counts
[
1
]
=
max_counts
[
0
];
max_counts
[
0
]
=
it
;
}
else
{
max_counts
[
1
]
=
it
;
}
}
}
return
make_pair
(
max_counts
[
0
].
first
,
max_counts
[
1
].
first
);
}
string
KmerAffectAnalyser
::
toString
()
const
{
string
kmer
;
for
(
size_t
i
=
0
;
i
<
affectations
.
size
();
i
++
)
{
...
...
@@ -349,12 +378,8 @@ CountKmerAffectAnalyser::CountKmerAffectAnalyser(IKmerStore<KmerAffect> &kms, co
CountKmerAffectAnalyser
::~
CountKmerAffectAnalyser
()
{
set
<
KmerAffect
>
affects
=
this
->
getDistinctAffectations
();
/* Initialize each key with a 0-integer array */
for
(
set
<
KmerAffect
>::
iterator
it
=
affects
.
begin
();
it
!=
affects
.
end
();
it
++
)
{
delete
[]
counts
[
*
it
];
for
(
auto
it
:
counts
)
{
delete
[]
it
.
second
;
}
}
...
...
@@ -390,38 +415,6 @@ KmerAffect CountKmerAffectAnalyser::max(const set<KmerAffect> forbidden) const {
return
max_affect
;
}
pair
<
KmerAffect
,
KmerAffect
>
CountKmerAffectAnalyser
::
max12
(
const
set
<
KmerAffect
>
forbidden
)
const
{
map
<
KmerAffect
,
int
*
>::
const_iterator
it
=
counts
.
begin
();
KmerAffect
max1_affect
=
KmerAffect
::
getUnknown
();
KmerAffect
max2_affect
=
KmerAffect
::
getUnknown
();
int
max1_count
=
-
1
;
int
max2_count
=
-
1
;
for
(;
it
!=
counts
.
end
();
it
++
)
{
if
(
forbidden
.
count
(
it
->
first
)
==
0
)
{
int
current_count
=
count
(
it
->
first
);
if
(
current_count
>
max1_count
)
{
max2_affect
=
max1_affect
;
max2_count
=
max1_count
;
max1_affect
=
it
->
first
;
max1_count
=
current_count
;
}
else
if
(
current_count
>
max2_count
)
{
max2_affect
=
it
->
first
;
max2_count
=
current_count
;
}
}
}
return
make_pair
(
max1_affect
,
max2_affect
);
}
int
CountKmerAffectAnalyser
::
countBefore
(
const
KmerAffect
&
affect
,
int
pos
)
const
{
if
(
pos
==
0
||
counts
.
count
(
affect
)
==
0
)
return
0
;
...
...
algo/core/affectanalyser.h
View file @
1973f7e8
...
...
@@ -121,6 +121,14 @@ class AffectAnalyser {
*/
virtual
int
last
(
const
KmerAffect
&
affect
)
const
=
0
;
/*
* @return the two affectations that are seen the most frequently in the sequence
* taken apart the forbidden ones.
* @complexity n + m log m where n is the input sequence length and m the number
* of affectations
*/
virtual
pair
<
KmerAffect
,
KmerAffect
>
max12
(
const
set
<
KmerAffect
>
forbidden
)
const
=
0
;
/**
* @return a string representation of the object
*/
...
...
@@ -222,6 +230,8 @@ class KmerAffectAnalyser: public AffectAnalyser {
int
last
(
const
KmerAffect
&
affect
)
const
;
pair
<
KmerAffect
,
KmerAffect
>
max12
(
const
set
<
KmerAffect
>
forbidden
)
const
;
string
toString
()
const
;
string
toStringValues
()
const
;
...
...
@@ -302,12 +312,6 @@ class CountKmerAffectAnalyser: public KmerAffectAnalyser {
*/
KmerAffect
max
(
const
set
<
KmerAffect
>
forbidden
=
set
<
KmerAffect
>
())
const
;
/*
* @return the two affectations that are seen the most frequently in the sequence
* taken apart the forbidden ones.
*/
pair
<
KmerAffect
,
KmerAffect
>
max12
(
const
set
<
KmerAffect
>
forbidden
)
const
;
/**
* Set the overlap allowed between two k-mers with two different affectations,
* when looking for the maximum.
...
...
algo/core/automaton.h
View file @
1973f7e8
...
...
@@ -24,6 +24,7 @@ class AbstractACAutomaton: public IKmerStore<Info> {
protected:
void
*
initialState
;
float
all_index_load
;
map
<
Info
,
size_t
>
kmers_inserted
;
public:
AbstractACAutomaton
();
...
...
algo/core/automaton.hpp
View file @
1973f7e8
...
...
@@ -15,16 +15,16 @@ void AbstractACAutomaton<Info>::finish_building() {
IKmerStore
<
Info
>::
finish_building
();
build_failure_functions
();
}
all_index_load
=
0
;
for
(
auto
iter
:
kmers_inserted
)
{
all_index_load
+=
getIndexLoad
(
iter
.
first
);
}
}
template
<
class
Info
>
float
AbstractACAutomaton
<
Info
>::
getIndexLoad
(
Info
kmer
)
const
{
float
load
=
0
;
if
(
kmers_inserted
.
count
(
kmer
)
==
0
)
{
for
(
auto
iter
:
kmers_inserted
)
{
load
+=
getIndexLoad
(
iter
.
first
);
}
return
(
kmer
.
isUnknown
())
?
1
-
load
:
load
;
return
(
kmer
.
isUnknown
())
?
1
-
all_index_load
:
all_index_load
;
}
else
{
return
kmers_inserted
.
at
(
kmer
)
/
pow
(
4.0
,
kmer
.
getLength
());
}
...
...
algo/core/bioreader.cpp
View file @
1973f7e8
...
...
@@ -126,6 +126,7 @@ BioReader::BioReader(bool virtualfasta, string name)
init
(
0
,
""
);
this
->
name
=
name
;
basename
=
extract_basename
(
name
);
filenames
.
push_back
(
this
->
name
);
}
BioReader
::
BioReader
(
int
extract_field
,
string
extract_separator
,
int
mark_pos
)
...
...
@@ -157,6 +158,7 @@ void BioReader::add(const string &filename, bool verbose) {
name
+=
filename
;
basename
+=
extract_basename
(
filename
);
filenames
.
push_back
(
name
);
if
(
verbose
)
cout
<<
" <== "
<<
filename
;
...
...
algo/core/bioreader.hpp
View file @
1973f7e8
...
...
@@ -189,6 +189,7 @@ public:
string
name
;
string
basename
;
list
<
string
>
filenames
;
int
size
()
const
;
size_t
totalSize
()
const
;
...
...
algo/core/kmeraffect.h
View file @
1973f7e8
...
...
@@ -174,6 +174,17 @@ bool operator>=(const KmerAffect &a1, const KmerAffect &a2);
bool
operator
!=
(
const
KmerAffect
&
a1
,
const
KmerAffect
&
a2
);
ostream
&
operator
<<
(
ostream
&
os
,
const
KmerAffect
&
kmer
);
namespace
std
{
template
<
>
struct
hash
<
KmerAffect
>
{
size_t
operator
()(
const
KmerAffect
&
affect
)
const
{
return
(
affect
.
getLabel
()[
0
]
<<
8
)
|
affect
.
getLength
();
}
};
}
#define AFFECT_NOT_UNKNOWN_SYMBOL "*"
#define AFFECT_AMBIGUOUS_SYMBOL "\0"
#define AFFECT_UNKNOWN_SYMBOL "\1"
...
...
algo/core/kmerstore.h
View file @
1973f7e8
...
...
@@ -15,6 +15,7 @@ using namespace std;
typedef
enum
{
KMER_INDEX
,
AC_AUTOMATON
}
IndexTypes
;
#define MAX_PRECOMPUTED_PROBA 500
/* Precompute 500 probabilities for each index load */
class
Kmer
{
public:
unsigned
int
count
;
...
...
@@ -74,6 +75,8 @@ protected:
size_t
nb_kmers_inserted
;
size_t
max_size_indexing
;
bool
finished_building
;
map
<
float
,
vector
<
double
>
>
precomputed_proba_with_system
,
precomputed_proba_without_system
;
public:
...
...
@@ -149,7 +152,7 @@ public:
/**
* @return probability that the number of kmers is 'at_least' or more in a sequence of length 'length'
*/
double
getProbabilityAtLeastOrAbove
(
T
kmer
,
int
at_least
,
int
length
)
const
;
double
getProbabilityAtLeastOrAbove
(
T
kmer
,
int
at_least
,
int
length
);
/**
* @return the value of k
...
...
@@ -199,6 +202,10 @@ public:
* @pre word.length() == this->k
*/
virtual
T
&
operator
[](
seqtype
&
word
)
=
0
;
protected:
void
precompute_proba
(
float
index_load
);
};
template
<
class
T
>
...
...
@@ -366,17 +373,45 @@ int IKmerStore<T>::atMostMaxSizeIndexing(int n) const {
}
template
<
class
T
>
double
IKmerStore
<
T
>::
getProbabilityAtLeastOrAbove
(
const
T
kmer
,
int
at_least
,
int
length
)
const
{
void
IKmerStore
<
T
>::
precompute_proba
(
float
index_load
)
{
precomputed_proba_with_system
[
index_load
]
=
vector
<
double
>
(
MAX_PRECOMPUTED_PROBA
);
precomputed_proba_without_system
[
index_load
]
=
vector
<
double
>
(
MAX_PRECOMPUTED_PROBA
);
vector
<
double
>
&
pproba_with
=
precomputed_proba_with_system
[
index_load
];
vector
<
double
>
&
pproba_without
=
precomputed_proba_without_system
[
index_load
];
pproba_with
[
0
]
=
1
;
pproba_without
[
0
]
=
1
;
for
(
int
i
=
1
;
i
<
MAX_PRECOMPUTED_PROBA
;
i
++
)
{
pproba_with
[
i
]
=
pproba_with
[
i
-
1
]
*
index_load
;
pproba_without
[
i
]
=
pproba_without
[
i
-
1
]
*
(
1
-
index_load
);
}
}
template
<
class
T
>
double
IKmerStore
<
T
>::
getProbabilityAtLeastOrAbove
(
const
T
kmer
,
int
at_least
,
int
length
)
{
if
(
at_least
==
0
)
return
1.0
;
// even if 'length' is very small
// n: number of kmers in the sequence
int
n
=
length
-
getS
()
+
1
;
float
index_load
=
getIndexLoad
(
kmer
)
;
if
(
!
precomputed_proba_without_system
.
count
(
index_load
))
{
precompute_proba
(
index_load
);
}
double
proba
=
0
;
double
probability_having_system
=
pow
(
index_load
,
at_least
);
double
probability_not_having_system
=
pow
(
1
-
index_load
,
n
-
at_least
);
double
probability_not_having_system
;
double
probability_having_system
;
if
(
precomputed_proba_with_system
.
at
(
index_load
).
size
()
>
(
size_t
)
at_least
)
probability_having_system
=
precomputed_proba_with_system
.
at
(
index_load
)[
at_least
];
else
probability_having_system
=
pow
(
index_load
,
at_least
);
if
(
precomputed_proba_without_system
.
at
(
index_load
).
size
()
>
(
size_t
)
n
-
at_least
)
probability_not_having_system
=
precomputed_proba_without_system
.
at
(
index_load
)[
n
-
at_least
];
else
probability_not_having_system
=
pow
(
1
-
index_load
,
n
-
at_least
);
for
(
int
i
=
at_least
;
i
<=
n
;
i
++
)
{
proba
+=
nChoosek
(
n
,
i
)
*
probability_having_system
*
probability_not_having_system
;
probability_having_system
*=
index_load
;
...
...
algo/core/segment.cpp
View file @
1973f7e8
...
...
@@ -442,7 +442,6 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
info_extra
=
"seed"
;
segmented
=
false
;
segmented_germline
=
germline
;
system
=
germline
->
code
;
// useful ?
reversed
=
false
;
because
=
NOT_PROCESSED
;
// Cause of unsegmentation
score
=
0
;
...
...
@@ -524,7 +523,7 @@ KmerSegmenter::KmerSegmenter(Sequence seq, Germline *germline, double threshold,
||
(
germline
->
seg_method
==
SEG_METHOD_MAX1U
))
{
// Pseudo-germline, MAX12 and MAX1U
pair
<
KmerAffect
,
KmerAffect
>
max12
;
Count
KmerAffectAnalyser
ckaa
(
*
(
germline
->
index
),
sequence
)
;
KmerAffectAnalyser
ckaa
=
*
kaa
;
set
<
KmerAffect
>
forbidden
;
...
...
algo/core/windowExtractor.cpp
View file @
1973f7e8
...
...
@@ -82,7 +82,7 @@ WindowsStorage *WindowExtractor::extract(OnlineBioReader *reads,
stats
[
TOTAL_SEG_AND_WINDOW
].
insert
(
read_length
)
;
if
(
seg
->
isJunctionChanged
())
stats
[
SEG_CHANGED_WINDOW
].
insert
(
read_length
);
stats_reads
[
seg
->
s
ystem
].
addScore
(
read_length
);
stats_reads
[
seg
->
s
egmented_germline
->
code
].
addScore
(
read_length
);
if
(
out_segmented
)
{
*
out_segmented
<<
*
seg
;
// KmerSegmenter output (V/N/J)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment