Commit df5023d0 authored by Rayan Chikhi's avatar Rayan Chikhi
Browse files

small mphf bugfixes

parent 6a69a25c
......@@ -25,7 +25,7 @@
// We include required definitions
/********************************************************************************/
#define DEBUG(a) //a
#define DEBUG(a) a
// this is to control whether we instrument code for timing or not (shouldn't affect performance, in principle)
#define TIME(a) a
......@@ -120,7 +120,7 @@ void Simplifications<Node,Edge,GraphDataVariant>::simplify()
do
{
nbECRemovedPreviously = nbECRemoved;
nbECRemoved = removeErroneousConnections(); // now we're using bulges removal, not bubbles (to follow SPAdes)
//nbECRemoved = removeErroneousConnections(); // now we're using bulges removal, not bubbles (to follow SPAdes)
if (ECRemoval.size() != 0)
ECRemoval += " + ";
ECRemoval += to_string(nbECRemoved);
......@@ -128,6 +128,7 @@ void Simplifications<Node,Edge,GraphDataVariant>::simplify()
while (((nbECRemovedPreviously == 0 && nbECRemoved > 0 ) || nbECRemoved >= 10)
&& _nbECRemovalPasses < 20);
return; // FIXME!!!!!!!
nbECRemoved = 0; // reset EC removal counter
do
......@@ -191,7 +192,7 @@ double Simplifications<Node,Edge,GraphDataVariant>::getMeanAbundanceOfNeighbors(
meanNeighborsCoverage += simplePathCoverage;
nbNeighbors++;
DEBUG(cout << endl << "got simple path coverage for neighbor " << nbNeighbors << " : " << " meancoverage: " <<simplePathCoverage << " over " << pathLen << " kmers" << endl);
//DEBUG(cout << endl << "got simple path coverage for neighbor " << nbNeighbors << " : " << " meancoverage: " <<simplePathCoverage << " over " << pathLen << " kmers" << endl);
}
meanNeighborsCoverage /= nbNeighbors;
return meanNeighborsCoverage;
......@@ -482,7 +483,7 @@ unsigned long Simplifications<Node,Edge,GraphDataVariant>::removeTips()
// so mark the origin as non interesting! (big speed up)
if ( ! (isShortTopological || isShortRCTC) )
{
interestingNodes[index] = false; // unflag the original end-of-tip node // FIXME
interestingNodes[index] = false; // unflag the original end-of-tip node. // there was a fixme note here, i've removed it because i don't see why, but let's keep that in mind next time i investigate the algo
TIME(__sync_fetch_and_add(&timeSimplePathLong, diff_wtime(start_simplepath_t,end_simplepath_t)));
TIME(auto end_thread_t=get_wtime());
TIME(__sync_fetch_and_add(&timeAll, diff_wtime(start_thread_t,end_thread_t)));
......@@ -1076,8 +1077,8 @@ unsigned long Simplifications<Node,Edge,GraphDataVariant>::removeErroneousConnec
DEBUG(cout << endl << "putative EC node: " << _graph.toString (node) << endl);
__sync_fetch_and_add(&nbECCandidates,1);
/** We follow the outcoming simple paths (so, if it's outdegree 2, we follow the outcoming simple paths.
* to get their length and last neighbor */
/** We follow the outcoming simple paths
* (so, if it's outdegree 2, we follow them to get their length and last neighbor */
typename GraphTemplate<Node,Edge,GraphDataVariant>::template Vector<Edge> neighbors = _graph.neighborsEdge(node, dir);
// do everying for each possible short simple path that is neighbor of that node
......@@ -1161,7 +1162,7 @@ unsigned long Simplifications<Node,Edge,GraphDataVariant>::removeErroneousConnec
DEBUG(cout << endl << "EC of length " << pathLen << " FOUND: " << _graph.toString (node) << endl);
for (typename vector<Node>::iterator itVecNodes = nodes.begin(); itVecNodes != nodes.end(); itVecNodes++)
{
//DEBUG(cout << endl << "deleting EC node: " << _graph.toString (*itVecNodes) << endl);
DEBUG(cout << endl << "deleting EC node: " << _graph.toString (*itVecNodes) << endl);
nodesDeleter.markToDelete(*itVecNodes); // parallel version
}
......
......@@ -92,7 +92,7 @@ public:
{
MPHFKind mphfKind = MPHF_BOOPHF; // TODO: test with emphf also
if (MPHFAlgorithm<>::AbundanceMap::enabled == false) { return; }
if (MPHFAlgorithm<>::AbundanceMap::enabled == false) { std::cout << "can't test mphf, it is disabled" << std::endl; return; }
size_t kmerSize = 11;
size_t nks = 1;
......@@ -114,16 +114,25 @@ public:
/** We launch DSK. */
sortingCount.execute();
if (sortingCount.getSolidCounts()->getNbItems() != (int)(strlen(seqs[0]) - kmerSize + 1))
std::cout << "problem with sortingcount nb items: " << sortingCount.getSolidCounts()->getNbItems() << " != " << (int)(strlen(seqs[0]) - kmerSize + 1) << std::endl;
CPPUNIT_ASSERT (sortingCount.getSolidCounts()->getNbItems() == (int)(strlen(seqs[0]) - kmerSize + 1) );
/** We get the storage instance. */
Storage* storage = sortingCount.getStorage();
/** We create a mphf instance. */
MPHFAlgorithm<> mphf (mphfKind, storage->getGroup("dsk"), "mphf", sortingCount.getSolidCounts(), sortingCount.getSolidKmers(), 1, true);
/** We actually execute the mphf construction. */
mphf.execute();
if (mphf.getAbundanceMap() == 0)
std::cout << "could not get abundance map" << std::endl;
CPPUNIT_ASSERT (mphf.getAbundanceMap() != 0);
......@@ -131,6 +140,9 @@ public:
// below are quick tests
if (theMap.size() != 130)
std::cout << "incorrect map size:" << theMap.size() << " != 130" << std::endl;
CPPUNIT_ASSERT (theMap.size() == 130);
typedef /*typename*/ Kmer<32>::ModelCanonical Model;
......@@ -141,6 +153,8 @@ public:
theMap[kmer.value()] = 4;
if (theMap[kmer.value()] != 4)
std::cout << "bad map value " << theMap[kmer.value()] << " != 4" << std::endl;
CPPUNIT_ASSERT (theMap[kmer.value()] == 4);
}
......
......@@ -725,7 +725,7 @@ we need this 2-functors scheme because HashFunctors won't work with unordered_ma
uint64_t lookup(elem_t elem)
{
auto hashes = _hasher(elem);
//auto hashes = _hasher(elem);
uint64_t non_minimal_hp,minimal_hp;
......@@ -810,7 +810,7 @@ we need this 2-functors scheme because HashFunctors won't work with unordered_ma
{
uint64_t idxl2 = __sync_fetch_and_add(& _idxLevelsetLevelFastmode,1);
//si depasse taille attendue pour setLevelFastmode, fall back sur slow mode mais devrait pas arriver si hash ok et proba avec nous
if(idxl2> setLevelFastmode.size())
if(idxl2>= setLevelFastmode.size())
_fastmode = false;
else
setLevelFastmode[idxl2] = val; // create set for fast mode
......@@ -924,7 +924,7 @@ we need this 2-functors scheme because HashFunctors won't work with unordered_ma
is.read(reinterpret_cast<char *>(&final_hash_size), sizeof(size_t));
for(int ii=0; ii<final_hash_size; ii++)
for(unsigned int ii=0; ii<final_hash_size; ii++)
{
elem_t key;
uint64_t value;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment