Commit ede7e98d authored by BENOIT Gaetan's avatar BENOIT Gaetan
Browse files

RepartitionAlgo: get sample from each bank when multi bank counting

parent 8779601e
......@@ -208,6 +208,7 @@ public:
_nbSuperKmersSeenSoFar ++;
if (_nbSuperKmersSeenSoFar > _nbSeqsToSee) // it's a bit of an approximation here (comparing # of superkmers to # of seqs)
{
//cout << _nbSuperKmersSeenSoFar << " " << _nbSeqsToSee << endl;
*_cancelIterator = true;
}
}
......@@ -405,40 +406,75 @@ void RepartitorAlgorithm<span>::computeRepartition (Repartitor& repartitor)
PartiInfo<5> sample_info (_config._nb_partitions, mmsize);
string bankShortName = System::file().getBaseName(_bank->getId());
CancellableIterator<Sequence>* cancellable_it = new CancellableIterator<Sequence> (*(_bank->iterator()));
LOCAL(cancellable_it);
// how many seqs we need to see
u_int64_t nbseq_sample = std::max ( u_int64_t (_config._estimateSeqNb * 0.05) ,u_int64_t( 1000000ULL) ) ;
/** We create a sequence iterator and give it a progress message */
Iterator<Sequence>* it_all_reads = createIterator<Sequence> (
cancellable_it,
_bank->getNbItems(),
Stringify::format (progressFormat0, bankShortName.c_str()).c_str()
);
LOCAL (it_all_reads);
BankStats bstatsDummy;
size_t currentPass = 0;
/** We compute a distribution of Superkmers from a part of the bank. */
SerialDispatcher serialDispatcher;
serialDispatcher.iterate (it_all_reads, SampleRepart<span> (
model,
_config,
1, // we don't care about the actual number of passes, we just use 1
0, // we don't care about the actual number of passes, the current one is 0
_config._nb_partitions,
NULL,
&(cancellable_it->_cancel), // will be set to true when iteration needs to be stopped
nbseq_sample, // how many sequences we need to see
bstatsDummy,
sample_info
));
// In case of multi bank counting, we get a sample from each bank
if(_bank->getCompositionNb() > 1){
SerialDispatcher serialDispatcher;
BankStats bstatsDummy;
u_int64_t nbseq_sample = (_config._estimateSeqNb / _config._nb_banks) * 0.02;
std::vector<Iterator<Sequence>*> itBanks = _bank->iterator()->getComposition();
bool dummyBoolean = false;
SampleRepart<span> sampleRepart (
model,
_config,
1, // we don't care about the actual number of passes, we just use 1
0, // we don't care about the actual number of passes, the current one is 0
_config._nb_partitions,
NULL,
&dummyBoolean, // will be set to true when iteration needs to be stopped
nbseq_sample, // how many sequences we need to see
bstatsDummy,
sample_info
);
for(size_t i=0; i<_config._nb_banks; i++){
TruncateIterator<Sequence>* it = new TruncateIterator<Sequence> (*(itBanks[i]), nbseq_sample);
LOCAL(it);
serialDispatcher.iterate (it, sampleRepart);
}
}
else{
CancellableIterator<Sequence>* cancellable_it = new CancellableIterator<Sequence> (*(_bank->iterator()));
LOCAL(cancellable_it);
// how many seqs we need to see
u_int64_t nbseq_sample = std::max ( u_int64_t (_config._estimateSeqNb * 0.05) ,u_int64_t( 1000000ULL) ) ;
cout << nbseq_sample << endl;
/** We create a sequence iterator and give it a progress message */
Iterator<Sequence>* it_all_reads = createIterator<Sequence> (
cancellable_it,
_bank->getNbItems(),
Stringify::format (progressFormat0, bankShortName.c_str()).c_str()
);
LOCAL (it_all_reads);
BankStats bstatsDummy;
size_t currentPass = 0;
/** We compute a distribution of Superkmers from a part of the bank. */
SerialDispatcher serialDispatcher;
serialDispatcher.iterate (it_all_reads, SampleRepart<span> (
model,
_config,
1, // we don't care about the actual number of passes, we just use 1
0, // we don't care about the actual number of passes, the current one is 0
_config._nb_partitions,
NULL,
&(cancellable_it->_cancel), // will be set to true when iteration needs to be stopped
nbseq_sample, // how many sequences we need to see
bstatsDummy,
sample_info
));
}
if (_config._minimizerType == 1)
{
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment