Commit 223f9743 authored by Rayan Chikhi's avatar Rayan Chikhi
Browse files

this is THE big commit for GATB-core of november 2015. changelog is:

- Graph simplifications are moved from Minia to GATB-core. Call them using: graph.simplify()

- New data structure for faster neighborhood queries. Enable it using: graph.precomputeAdjacency()

- As a consequence of optimizations, it is now much faster to call indegree(node), outdegree(node) and
degree(node,size_t &in,size_t &out) rather than neighbors(node, direction).size() if you're only interested
in degrees.

- LargeInt (the type of kmer) constructor has been removed, for speed reasons. Be warned that it might
break existing code that implicitly rely on 0-initialization of kmers, but problems can hopefully be detected using valgrind.

- Graph becomes GraphTemplate<Node,Edge,GraphDataVariant>, and compatibility is preserved via typedefs

- This enables to define GraphFast<span>, a graph object which only holds Node's and Edge's for a single k-mer size,
as opposed to a boost::variant of multiple kmer sizes before. It is faster.

- Graph API has been changed:

neighbors<Node> becomes neighbors,
neighbors<Edge> becomes neighborsEdge,
iterator<Node> becomes iterator,
iterator<BranchingNode> becomes iteratorBranching

- The change above was necessary, because it is difficult to specialize nested templates in C++. Actually, not all templated
graph functions have been un-templated (because some aren't used in conjunction with GraphFast). There is still work to do.

- Due to graph template, the following classes have also been changed to be also templatized: BranchingAlgorithm, all Frontline's, all Terminator's
typedef's have been created to preserve compatibility

- For speed of tools, it is now advised to follow Minia.cpp's functor technique and use GraphFast<span> instead of Graph. However
Graph should still work and offer same performance as before.

- GraphData is moved from Graph.cpp to Graph.hpp

- MPHF index of a node is now cached in the Node object

- because of that, 'const Node&' should now be just 'Node&', everywhere.

- added a function graph.disableNodeState() to disable recording node state (normal, deleted, marked).
Graph then avoids making MPHF queries when checking if a node exists (also involved in neighbors() queries).
This makes the bloom flavor of graphs faster, but once precomputeAdjacency() is called, it is not relevant anymore.

- added scripts/parse_gcc_output.py for visual inspection of gcc compilation/link errors involving graph templates

- slightly modified src/CMakeLists so that tools may set their own KSIZE_DEFAULT_LIST (e.g. Minia)

- speedup to LargeInt::hash1()

- a few unit tests have been added, as well as one benchmark significantly improved: bench_graph.cpp

- added minimizer stuff that was missing from last commit (some bugfixes, and also specialization to LargeInt<1>)

- LargeInt's are not instances of ArrayData anymore. Instead, ArrayData is a member. This is faster.
parent d6baaf4e
......@@ -133,11 +133,50 @@ if (use_new_cxx)
set (LIBRARY_COMPILE_DEFINITIONS "${LIBRARY_COMPILE_DEFINITIONS} -DWITH_LAMBDA_EXPRESSION ${CXX_STD_VERSION}")
endif()
# detect SSE
# from https://github.com/rurban/smhasher/blob/master/CMakeLists.txt
# i do not see much performance gain for now, but let's keep that code here, might be useful later.
# list of performance gain observed:
# popcount in Graph::countNeighbors
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
CPUINFO)
STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
STRING(REGEX REPLACE "^.*(SSE4.2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE4.2" "${SSE_THERE}" SSE42_TRUE)
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 not available")
ENDIF (SSE2_TRUE)
IF (SSE42_TRUE)
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available")
ELSE (SSE42_TRUE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 not available")
ENDIF (SSE42_TRUE)
# I could use LIBRARY_COMPILE_DEFINITIONS, but it's actually passed to "add_definitions", which isn't made for passing compilation flags, only -D ones.
IF(SSE4_2_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2 -msse4.2 -mpopcnt")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -msse2 -msse4.2 -mpopcnt")
message ("-- SSE 4.2 detected")
ENDIF()
if (use_mphf)
set (LIBRARY_COMPILE_DEFINITIONS "${LIBRARY_COMPILE_DEFINITIONS} -DWITH_MPHF ")
message ("-------------------------------------------------------------------------------------")
message ("-- WILL COMPILE MPHF! (COMPILER VERSION IS HIGH ENOUGH) ")
message ("-------------------------------------------------------------------------------------")
if (SSE4_2_FOUND)
set (LIBRARY_COMPILE_DEFINITIONS "${LIBRARY_COMPILE_DEFINITIONS} -DEMPHF_USE_POPCOUNT")
endif()
endif()
# We also set a flag for TR1 management
......
#usage: make -j 4 2> MAKE ; cat MAKE | python parse_gcc_output.py | less -R
#!/usr/bin/env python
import fileinput
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
import re
for line in fileinput.input():
line = line.replace("gatb::core::debruijn::impl::Edge_t<gatb::core::debruijn::impl::Node_t<gatb::core::tools::math::IntegerTemplate<boost::mpl::vector4<mpl_::int_<32>, mpl_::int_<64>, mpl_::int_<96>, mpl_::int_<128> > > > >", bcolors.OKBLUE + "Edge" + bcolors.ENDC) # default KSIZE_LIST
line = line.replace("gatb::core::debruijn::impl::Edge_t<gatb::core::debruijn::impl::Node_t<gatb::core::tools::math::IntegerTemplate<boost::mpl::vector1<mpl_::int_<32> > > > >", bcolors.OKBLUE + "Edge" + bcolors.ENDC) # when compiled with KSIZE_LIST=32
line = line.replace("gatb::core::debruijn::impl::Edge_t<gatb::core::debruijn::impl::Node_t<gatb::core::tools::math::LargeInt<1> > >", bcolors.OKBLUE + "EdgeFast<1>" + bcolors.ENDC)
line = line.replace("gatb::core::debruijn::impl::Node_t<gatb::core::tools::math::IntegerTemplate<boost::mpl::vector4<mpl_::int_<32>, mpl_::int_<64>, mpl_::int_<96>, mpl_::int_<128> > > >", bcolors.OKBLUE + "Node" + bcolors.ENDC)
line = line.replace("gatb::core::debruijn::impl::Node_t<gatb::core::tools::math::IntegerTemplate<boost::mpl::vector1<mpl_::int_<32> > > >", bcolors.OKBLUE + "Node" + bcolors.ENDC)
line = line.replace("gatb::core::debruijn::impl::Node_t<gatb::core::tools::math::LargeInt<1> >", bcolors.OKBLUE + "NodeFast<1>" + bcolors.ENDC)
line = line.replace("boost::variant<boost::detail::variant::over_sequence<boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<128ul>, boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<96ul>, boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<64ul>, boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<32ul>, boost::mpl::vector0<mpl_::na>, 0>, 0>, 0>, 0> >> ", bcolors.OKBLUE + "GraphDataVariant" + bcolors.ENDC)
line = line.replace("boost::variant<boost::detail::variant::over_sequence<boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<32ul>, boost::mpl::vector0<mpl_::na>, 0> >>", bcolors.OKBLUE + "GraphDataVariant" + bcolors.ENDC)
line = line.replace("boost::variant<gatb::core::debruijn::impl::GraphData<32ul>>", bcolors.OKBLUE + "GraphDataVariantFast<1>" + bcolors.ENDC)
line = line.replace("boost::variant<gatb::core::debruijn::impl::GraphData<32ul> >", bcolors.OKBLUE + "GraphDataVariantFast<1>" + bcolors.ENDC)
line = line.replace("boost::variant<boost::detail::variant::over_sequence<boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<128ul>, boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<96ul>, boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<64ul>, boost::mpl::v_item<gatb::core::debruijn::impl::GraphData<32ul>, boost::mpl::vector0<mpl_::na>, 0>, 0>, 0>, 0> > >", bcolors.OKBLUE + "GraphDataVariant" + bcolors.ENDC)
line = line.replace("gatb::core::tools::math::LargeInt<1>", bcolors.OKBLUE + "LargeInt<1>" + bcolors.ENDC)
line = line.replace("gatb::core::debruijn::impl::GraphTemplate", bcolors.OKBLUE + "GraphTemplate" + bcolors.ENDC)
line = line.replace("undefined reference to ", bcolors.FAIL + "undefined reference to " + bcolors.ENDC)
line = line.replace("In function", bcolors.WARNING + "In function" + bcolors.ENDC)
line = line.replace("gatb::core::tools::math::IntegerTemplate<boost::mpl::vector4<mpl_::int_<32>, mpl_::int_<64>, mpl_::int_<96>, mpl_::int_<128> > > ", bcolors.OKBLUE + "Integer" + bcolors.ENDC)
line = re.sub(r"([^\:]*):\(.text.[^\)]*\)", bcolors.OKGREEN + r"\1:(..)" + bcolors.ENDC,line)
print line,
......@@ -3,11 +3,13 @@
################################################################################
# We define the default values for kmer sizes.
#list (APPEND KSIZE_DEFAULT_LIST 32 64)
list (APPEND KSIZE_DEFAULT_LIST 32 64 96 128)
#list (APPEND KSIZE_DEFAULT_LIST 32 64 96 128 160 192 224 256)
if (NOT KSIZE_DEFAULT_LIST) # only if the tool cmake didn't override it
#list (APPEND KSIZE_DEFAULT_LIST 32 64)
list (APPEND KSIZE_DEFAULT_LIST 32 64 96 128)
#list (APPEND KSIZE_DEFAULT_LIST 32 64 96 128 160 192 224 256)
endif()
# We use a default list of kmer sizes if no one is provided by the user
# We use a default list of kmer sizes if no one is provided by the user (via cmake -dKSIZE_LIST command)
if (NOT KSIZE_LIST)
set (gatb-core-klist ${KSIZE_DEFAULT_LIST})
else()
......
......@@ -56,9 +56,9 @@ static const char* progressFormat2 = "Graph: nb branching found : %-9d ";
** RETURN :
** REMARKS :
*********************************************************************/
template<size_t span>
BranchingAlgorithm<span>::BranchingAlgorithm (
const Graph& graph,
template<size_t span, typename Node, typename Edge, typename GraphDataVariant>
BranchingAlgorithm<span, Node, Edge, GraphDataVariant>::BranchingAlgorithm (
const GraphTemplate<Node, Edge, GraphDataVariant>& graph,
tools::storage::impl::Storage& storage,
tools::misc::BranchingKind kind,
size_t nb_cores,
......@@ -77,8 +77,8 @@ BranchingAlgorithm<span>::BranchingAlgorithm (
** RETURN :
** REMARKS :
*********************************************************************/
template<size_t span>
BranchingAlgorithm<span>::BranchingAlgorithm (tools::storage::impl::Storage& storage)
template<size_t span, typename Node, typename Edge, typename GraphDataVariant>
BranchingAlgorithm<span, Node, Edge, GraphDataVariant>::BranchingAlgorithm (tools::storage::impl::Storage& storage)
: Algorithm("branching", 0, 0), _graph(0), _storage(storage), _branchingCollection(0)
{
setBranchingCollection (& storage("branching").getCollection<Count> ("nodes"));
......@@ -95,8 +95,8 @@ BranchingAlgorithm<span>::BranchingAlgorithm (tools::storage::impl::Storage& sto
** RETURN :
** REMARKS :
*********************************************************************/
template<size_t span>
BranchingAlgorithm<span>::~BranchingAlgorithm ()
template<size_t span, typename Node, typename Edge, typename GraphDataVariant>
BranchingAlgorithm<span, Node, Edge, GraphDataVariant>::~BranchingAlgorithm ()
{
setBranchingCollection(0);
}
......@@ -109,8 +109,8 @@ BranchingAlgorithm<span>::~BranchingAlgorithm ()
** RETURN :
** REMARKS :
*********************************************************************/
template<size_t span>
IOptionsParser* BranchingAlgorithm<span>::getOptionsParser ()
template<size_t span, typename Node, typename Edge, typename GraphDataVariant>
IOptionsParser* BranchingAlgorithm<span, Node, Edge, GraphDataVariant>::getOptionsParser ()
{
IOptionsParser* parser = new OptionsParser ("branching");
......@@ -139,26 +139,26 @@ struct FunctorData
map <InOut_t, size_t> topology;
};
template<typename Count, typename Type>
template<typename Count, typename Type, typename Node, typename Edge, typename GraphDataVariant>
struct FunctorNodes
{
const Graph* graph;
const GraphTemplate<Node, Edge, GraphDataVariant>* graph;
ThreadObject<FunctorData<Count,Type> >& functorData;
FunctorNodes (const Graph* graph, ThreadObject<FunctorData<Count,Type> >& functorData)
FunctorNodes (const GraphTemplate<Node, Edge, GraphDataVariant>* graph, ThreadObject<FunctorData<Count,Type> >& functorData)
: graph(graph), functorData(functorData) {}
void operator() (const Node& node)
void operator() (Node& node)
{
// We get branching nodes neighbors for the current branching node.
Graph::Vector<Node> successors = graph->successors <Node> (node);
Graph::Vector<Node> predecessors = graph->predecessors<Node> (node);
typename GraphTemplate<Node, Edge, GraphDataVariant>::template Vector<Node> successors = graph->template successors <Node> (node);
typename GraphTemplate<Node, Edge, GraphDataVariant>::template Vector<Node> predecessors = graph->template predecessors<Node> (node);
if ( ! (successors.size()==1 && predecessors.size()==1) )
{
FunctorData<Count,Type>& data = functorData();
data.branchingNodes.push_back (Count (node.kmer.get<Type>(), node.abundance));
data.branchingNodes.push_back (Count (node.template getKmer<Type>(), node.abundance));
data.topology [make_pair(predecessors.size(), successors.size())] ++;
}
......@@ -207,11 +207,11 @@ public:
/*********************************************************************/
template<size_t span>
void BranchingAlgorithm<span>::execute ()
template <size_t span, typename Node, typename Edge, typename GraphDataVariant>
void BranchingAlgorithm<span, Node, Edge, GraphDataVariant>::execute ()
{
/** We get an iterator over all graph nodes. */
Graph::Iterator<Node> itNodes = _graph->iterator<Node>();
typename GraphTemplate<Node, Edge, GraphDataVariant>::template Iterator<Node> itNodes = _graph->GraphTemplate<Node, Edge, GraphDataVariant>::iterator();
/** We create a custom listener that refines the finish method in order it does nothing...
* => we define our own 'finishPostponed' method that is called when all the information is ok. */
......@@ -232,7 +232,7 @@ void BranchingAlgorithm<span>::execute ()
/** We get a synchronized object on the data handled by functors. */
ThreadObject <FunctorData<Count,Type> > functorData;
FunctorNodes<Count,Type> functorNodes (this->_graph, functorData);
FunctorNodes<Count,Type, Node, Edge, GraphDataVariant> functorNodes (this->_graph, functorData);
/** We iterate the nodes. */
tools::dp::IDispatcher::Status status = getDispatcher()->iterate (iter, functorNodes);
......@@ -256,7 +256,7 @@ void BranchingAlgorithm<span>::execute ()
/** We use a cache to improve IO performances. */
CollectionCache<Count> branchingCache (*_branchingCollection, 16*1024, 0);
Type checksum = 0;
Type checksum; checksum.setVal( 0);
/** We initialize our priority queue. */
priority_queue <BranchingIteratorPair, vector<BranchingIteratorPair>, Compare<BranchingIteratorPair> > pq;
......
......@@ -50,7 +50,7 @@ namespace impl {
* Actually, this class is mainly used in the debruijn::impl::Graph class as a fourth step for
* the de Bruijn graph creation.
*/
template <size_t span=KMER_DEFAULT_SPAN>
template <size_t span=KMER_DEFAULT_SPAN, typename Node=Node_t<>, typename Edge=Edge_t<Node_t<>>, typename GraphDataVariant_t=GraphDataVariant>
class BranchingAlgorithm : public gatb::core::tools::misc::impl::Algorithm
{
public:
......@@ -68,7 +68,7 @@ public:
* \param[in] options : extra options
*/
BranchingAlgorithm (
const Graph& graph,
const GraphTemplate<Node, Edge, GraphDataVariant_t>& graph,
tools::storage::impl::Storage& storage,
tools::misc::BranchingKind kind,
size_t nb_cores = 0,
......@@ -98,7 +98,7 @@ public:
private:
const Graph* _graph;
const GraphTemplate<Node, Edge, GraphDataVariant_t>* _graph;
tools::storage::impl::Storage& _storage;
......
......@@ -40,18 +40,19 @@ namespace gatb { namespace core { namespace debruijn { namespace impl {
** REMARKS :
*********************************************************************/
// a frontline is a set of nodes having equal depth in the BFS
Frontline::Frontline (
template <typename Node, typename Edge, typename GraphDataVariant>
FrontlineTemplate<Node,Edge,GraphDataVariant>::FrontlineTemplate (
Direction direction,
const Graph& graph,
Terminator& terminator,
const Node& startingNode
const GraphTemplate<Node,Edge,GraphDataVariant>& graph,
TerminatorTemplate<Node,Edge,GraphDataVariant>& terminator,
Node& startingNode
) :
_direction(direction), _graph(graph), _terminator(terminator), _depth(0),
_all_involved_extensions(0)
{
_already_frontlined.insert (startingNode.kmer);
_frontline.push (NodeNt (startingNode, kmer::NUCL_UNKNOWN));
_frontline.push (NodeNt<Node>(startingNode, kmer::NUCL_UNKNOWN));
}
/*********************************************************************
......@@ -63,12 +64,13 @@ Frontline::Frontline (
** REMARKS :
*********************************************************************/
// a frontline is a set of nodes having equal depth in the BFS
Frontline::Frontline (
template <typename Node, typename Edge, typename GraphDataVariant>
FrontlineTemplate<Node,Edge,GraphDataVariant>::FrontlineTemplate (
Direction direction,
const Graph& graph,
Terminator& terminator,
const Node& startingNode,
const Node& previousNode,
const GraphTemplate<Node,Edge,GraphDataVariant>& graph,
TerminatorTemplate<Node,Edge,GraphDataVariant>& terminator,
Node& startingNode,
Node& previousNode,
std::set<Node>* all_involved_extensions
) :
_direction(direction), _graph(graph), _terminator(terminator), _depth(0),
......@@ -77,7 +79,7 @@ Frontline::Frontline (
_already_frontlined.insert (startingNode.kmer);
_already_frontlined.insert (previousNode.kmer);
_frontline.push (NodeNt (startingNode, kmer::NUCL_UNKNOWN));
_frontline.push (NodeNt<Node>(startingNode, kmer::NUCL_UNKNOWN));
}
/*********************************************************************
......@@ -88,29 +90,30 @@ Frontline::Frontline (
** RETURN :
** REMARKS :
*********************************************************************/
bool Frontline::go_next_depth()
template <typename Node, typename Edge, typename GraphDataVariant>
bool FrontlineTemplate<Node,Edge,GraphDataVariant>::go_next_depth()
{
// extend all nodes in this frontline simultaneously, creating a new frontline
stopped_reason=NONE;
queue_nodes new_frontline;
while (!_frontline.empty())
while (!this->_frontline.empty())
{
/** We get the first item of the queue and remove it from the queue. */
NodeNt current_node = _frontline.front();
NodeNt<Node> current_node = this->_frontline.front();
_frontline.pop();
/** We check whether we use this node or not. we always use the first node at depth 0 */
if (_depth > 0 && check(current_node.node) == false) { return false; }
/** We loop the neighbors edges of the current node. */
Graph::Vector<Edge> edges = _graph.neighbors<Edge> (current_node.node, _direction);
typename GraphTemplate<Node,Edge,GraphDataVariant>::template Vector<Edge> edges = _graph.neighborsEdge (current_node.node, _direction);
for (size_t i=0; i<edges.size(); i++)
{
/** Shortcuts. */
const Edge& edge = edges[i];
const Node& neighbor = edge.to;
Edge& edge = edges[i];
Node& neighbor = edge.to;
// test if that node hasn't already been explored
if (_already_frontlined.find (neighbor.kmer) != _already_frontlined.end()) { continue; }
......@@ -119,7 +122,7 @@ bool Frontline::go_next_depth()
//if (_terminator.isEnabled() && _terminator.is_branching (neighbor) && _terminator.is_marked_branching(neighbor)) // legacy, before MPHFTerminator
if (_terminator.isEnabled() && _terminator.is_marked(neighbor)) // to accomodate MPHFTerminator
{
stopped_reason=Frontline::MARKED;
stopped_reason=FrontlineTemplate<Node,Edge,GraphDataVariant>::MARKED;
return false;
}
......@@ -127,7 +130,7 @@ bool Frontline::go_next_depth()
kmer::Nucleotide from_nt = (current_node.nt == kmer::NUCL_UNKNOWN) ? edge.nt : current_node.nt;
/** We add the new node to the new front line. */
new_frontline.push (NodeNt (neighbor, from_nt));
new_frontline.push (NodeNt<Node> (neighbor, from_nt));
/** We memorize the new node. */
_already_frontlined.insert (neighbor.kmer);
......@@ -151,14 +154,15 @@ bool Frontline::go_next_depth()
** RETURN :
** REMARKS :
*********************************************************************/
FrontlineBranching::FrontlineBranching (
template <typename Node, typename Edge, typename GraphDataVariant>
FrontlineBranchingTemplate<Node,Edge,GraphDataVariant>::FrontlineBranchingTemplate (
Direction direction,
const Graph& graph,
Terminator& terminator,
const Node& startingNode,
const Node& previousNode,
const GraphTemplate<Node,Edge,GraphDataVariant>& graph,
TerminatorTemplate<Node,Edge,GraphDataVariant>& terminator,
Node& startingNode,
Node& previousNode,
std::set<Node>* all_involved_extensions
) : Frontline (direction,graph,terminator,startingNode,previousNode,all_involved_extensions)
) : FrontlineTemplate<Node,Edge,GraphDataVariant>(direction,graph,terminator,startingNode,previousNode,all_involved_extensions)
{
}
......@@ -170,12 +174,13 @@ FrontlineBranching::FrontlineBranching (
** RETURN :
** REMARKS :
*********************************************************************/
FrontlineBranching::FrontlineBranching (
template <typename Node, typename Edge, typename GraphDataVariant>
FrontlineBranchingTemplate<Node,Edge,GraphDataVariant>::FrontlineBranchingTemplate (
Direction direction,
const Graph& graph,
Terminator& terminator,
const Node& startingNode
) : Frontline(direction,graph,terminator,startingNode)
const GraphTemplate<Node,Edge,GraphDataVariant>& graph,
TerminatorTemplate<Node,Edge,GraphDataVariant>& terminator,
Node& startingNode
) : FrontlineTemplate<Node,Edge,GraphDataVariant>(direction,graph,terminator,startingNode)
{
}
......@@ -188,13 +193,14 @@ FrontlineBranching::FrontlineBranching (
** REMARKS :
*********************************************************************/
// new code, not in monument, to detect any in-branching longer than 3k
bool FrontlineBranching::check (const Node& node)
template <typename Node, typename Edge, typename GraphDataVariant>
bool FrontlineBranchingTemplate<Node,Edge,GraphDataVariant>::check (Node& node)
{
/** We reverse the node for the inbranching path. */
Node actual = _graph.reverse(node);
Node actual = this->_graph.reverse(node);
/** We loop the neighbors nodes of the current node. */
Graph::Vector<Node> neighbors = _graph.neighbors<Node> (actual, (_direction));
typename GraphTemplate<Node,Edge,GraphDataVariant>::template Vector<Node> neighbors = this->_graph.neighbors(actual, (this->_direction));
for (size_t i=0; i<neighbors.size(); i++)
{
......@@ -204,31 +210,31 @@ bool FrontlineBranching::check (const Node& node)
// only check in-branching from kmers not already frontlined
// which, for the first extension, includes the previously traversed kmer (previous_kmer)
// btw due to avance() invariant, previous_kmer is always within a simple path
if (_already_frontlined.find (neighbor.kmer) != _already_frontlined.end()) { continue; }
if (this->_already_frontlined.find (neighbor.kmer) != this->_already_frontlined.end()) { continue; }
// create a new frontline inside this frontline to check for large in-branching (i know, we need to go deeper, etc..)
Frontline frontline (_direction, _graph, _terminator, neighbor, actual, _all_involved_extensions);
FrontlineTemplate<Node,Edge,GraphDataVariant> frontline (this->_direction, this->_graph, this->_terminator, neighbor, actual, this->_all_involved_extensions);
do {
bool should_continue = frontline.go_next_depth();
if (!should_continue)
{
stopped_reason=Frontline::IN_BRANCHING_OTHER;
this->stopped_reason = FrontlineTemplate<Node,Edge,GraphDataVariant>::IN_BRANCHING_OTHER;
break;
}
// don't allow a depth > 3k
if (frontline.depth() > 3 * _graph.getKmerSize())
if (frontline.depth() > 3 * this->_graph.getKmerSize())
{
stopped_reason=Frontline::IN_BRANCHING_DEPTH;
this->stopped_reason = FrontlineTemplate<Node,Edge,GraphDataVariant>::IN_BRANCHING_DEPTH;
break;
}
// don't allow a breadth too large
if (frontline.size() > 10)
{
stopped_reason=Frontline::IN_BRANCHING_BREADTH;
this->stopped_reason = FrontlineTemplate<Node,Edge,GraphDataVariant>::IN_BRANCHING_BREADTH;
break;
}
......@@ -253,32 +259,34 @@ bool FrontlineBranching::check (const Node& node)
** RETURN :
** REMARKS :
*********************************************************************/
FrontlineReachable::FrontlineReachable(
template <typename Node, typename Edge, typename GraphDataVariant>
FrontlineReachableTemplate<Node,Edge,GraphDataVariant>::FrontlineReachableTemplate(
Direction direction,
const Graph& graph,
Terminator& terminator,
const Node& startingNode,
const Node& previousNode,
const GraphTemplate<Node,Edge,GraphDataVariant>& graph,
TerminatorTemplate<Node,Edge,GraphDataVariant>& terminator,
Node& startingNode,
Node& previousNode,
std::set<Node>* all_involved_extensions
) : Frontline (direction,graph,terminator,startingNode,previousNode,all_involved_extensions)
) : FrontlineTemplate<Node,Edge,GraphDataVariant> (direction,graph,terminator,startingNode,previousNode,all_involved_extensions)
{
}
bool FrontlineReachable::check (const Node& node)
template <typename Node, typename Edge, typename GraphDataVariant>
bool FrontlineReachableTemplate<Node,Edge,GraphDataVariant>::check (Node& node)
{
/** We reverse the node for the inbranching path. */
Node actual = _graph.reverse(node);
Node actual = this->_graph.reverse(node);
/** neighbors nodes of the current node. */
Graph::Vector<Node> neighbors = _graph.neighbors<Node> (actual, (_direction));
typename GraphTemplate<Node,Edge,GraphDataVariant>::template Vector<Node> neighbors = this->_graph.neighbors(actual, (this->_direction));
for (size_t i=0; i<neighbors.size(); i++)
{
/** Shortcut. */
Node& neighbor = neighbors[i];
if (_already_frontlined.find (neighbor.kmer) == _already_frontlined.end()) {
if (this->_already_frontlined.find (neighbor.kmer) == this->_already_frontlined.end()) {
checkLater.insert(neighbor);
//return false; // strict
}
......@@ -286,11 +294,12 @@ bool FrontlineReachable::check (const Node& node)
return true;
}
bool FrontlineReachable::isReachable()
template <typename Node, typename Edge, typename GraphDataVariant>
bool FrontlineReachableTemplate<Node,Edge,GraphDataVariant>::isReachable()
{
for (set<Node>::iterator itNode = checkLater.begin(); itNode != checkLater.end(); itNode++)
for (typename std::set<Node>::iterator itNode = checkLater.begin(); itNode != checkLater.end(); itNode++)
{
if (_already_frontlined.find((*itNode).kmer) == _already_frontlined.end())
if (this->_already_frontlined.find((*itNode).kmer) == this->_already_frontlined.end())
return false;
}
......
......@@ -34,6 +34,7 @@ namespace impl {
/********************************************************************************/
// types using in advanced traversal functions
template <typename Node>
struct NodeNt
{
Node node;
......@@ -53,30 +54,31 @@ struct NodeNt
/********************************************************************************/
// auxiliary class that is used by MonumentTraversal and deblooming
class Frontline
template <typename Node, typename Edge, typename GraphDataVariant>
class FrontlineTemplate
{
public:
/** Constructor. */
Frontline (
FrontlineTemplate (
Direction direction,
const Graph& graph,
Terminator& terminator,
const Node& startingNode,
const Node& previousNode,
const GraphTemplate<Node,Edge,GraphDataVariant>& graph,
TerminatorTemplate<Node,Edge,GraphDataVariant>& terminator,
Node& startingNode,
Node& previousNode,
std::set<Node>* all_involved_extensions = 0
);
/** Constructor. */
Frontline (
FrontlineTemplate (
Direction direction,
const Graph& graph,
Terminator& terminator,
const Node& startingNode
const GraphTemplate<Node,Edge,GraphDataVariant>& graph,
TerminatorTemplate<Node,Edge,GraphDataVariant>& terminator,
Node& startingNode
);
/** */
virtual ~Frontline() {}
virtual ~FrontlineTemplate() {}
/** */
bool go_next_depth();
......@@ -84,7 +86,7 @@ public:
size_t size () const { return _frontline.size(); }
size_t depth () const { return _depth; }