Index building with multiprocessing fails on OSX
On OSX install, trying to build index fails because multiprocessing is apparently unsupported.
Stack trace
$ python3.13 -m vizitig index build mini_bcalm
INFO::2025-03-04 14:42:59,133::- ::0:00:00/0:00:00 :: start :: Index (RustIndex) Build (/Users/loup/.vizitig/index/mini_bcalm/RustIndex) from DNA.
WARNING::2025-03-04 14:42:59,133::- ::0:00:00/0:00:00 :: Index /Users/loup/.vizitig/index/mini_bcalm/RustIndex, erasing and rebuilding :: Index (RustIndex) Build (/Users/loup/.vizitig/index/mini_bcalm/RustIndex) from DNA.
INFO::2025-03-04 14:42:59,151::- ::0:00:00/0:00:00 :: done :: Index (RustIndex) Build (/Users/loup/.vizitig/index/mini_bcalm/RustIndex) from DNA.
Traceback (most recent call last):
File "<frozen runpy>", line 198, in _run_module_as_main
File "<frozen runpy>", line 88, in _run_code
File "/Users/loup/inria/src/vizitig/vizitig/vizitig/__main__.py", line 15, in <module>
main()
~~~~^^
File "/Users/loup/inria/src/vizitig/vizitig/vizitig/__main__.py", line 11, in main
args.func(args)
~~~~~~~~~^^^^^^
File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/__init__.py", line 198, in main
build_kmer_index(args.name, args.type, args.shard_number, small_k=args.small_k)
~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/__init__.py", line 133, in build_kmer_index
GraphIndex.build_dna(gname, IndexType, shard_number, small_k=small_k)
~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/classes.py", line 519, in build_dna
return cls._build_dna(
~~~~~~~~~~~~~~^
path,
^^^^^
...<3 lines>...
k,
^^
)
^
File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/classes.py", line 347, in _build_dna
proc.start()
~~~~~~~~~~^^
File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
~~~~~~~~~~~^^^^^^
File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/context.py", line 289, in _Popen
return Popen(process_obj)
File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/popen_fork.py", line 20, in __init__
self._launch(process_obj)
~~~~~~~~~~~~^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^
AttributeError: Can't get local object 'GraphIndex.build_dna.<locals>.dna_iter.<locals>.<lambda>'
Current workaround: disable multiprocessing
diff --git a/vizitig/index/classes.py b/vizitig/index/classes.py
index 3754d2c..73877c7 100644
--- a/vizitig/index/classes.py
+++ b/vizitig/index/classes.py
@@ -232,7 +232,7 @@ class KmerSetIndex(BaseKmerIndex, Set[Kmer]):
logger.warning(f"Index {path}, erasing and rebuilding")
shutil.rmtree(path)
path.mkdir()
-
+
process = []
for shard_index in range(shard_number):
shard_path = path / str(shard_index)
@@ -265,22 +265,28 @@ class KmerSetIndex(BaseKmerIndex, Set[Kmer]):
shutil.rmtree(path)
path.mkdir()
- process = []
- for shard_index in range(shard_number):
- shard_path = path / str(shard_index)
- proc = Process(
- target=index_type.build_dna,
- args=(shard_path, shard_index, shard_number, dna_iter(), k),
- )
- proc.start()
- process.append(proc)
- if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
- for proc in process:
- proc.join()
- process = []
+ if VIZITIG_PROC_NB == 1:
+ for shard_index in range(shard_number):
+ shard_path = path / str(shard_index)
+ index_type.build_dna(shard_path, shard_index, shard_number, dna_iter(), k)
- for proc in process:
- proc.join()
+ else:
+ process = []
+ for shard_index in range(shard_number):
+ shard_path = path / str(shard_index)
+ proc = Process(
+ target=index_type.build_dna,
+ args=(shard_path, shard_index, shard_number, dna_iter(), k),
+ )
+ proc.start()
+ process.append(proc)
+ if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
+ for proc in process:
+ proc.join()
+ process = []
+
+ for proc in process:
+ proc.join()
return cls(path, index_type, k)
@@ -337,22 +343,28 @@ class KmerIndex(BaseKmerIndex, Mapping[Kmer, int]):
shutil.rmtree(path)
path.mkdir()
- process = []
- for shard_index in range(shard_number):
- shard_path = path / str(shard_index)
- proc = Process(
- target=index_type.build_dna,
- args=(shard_path, shard_index, shard_number, dna_iter(), k),
- )
- proc.start()
- process.append(proc)
- if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
- for proc in process:
- proc.join()
- process = []
+ if VIZITIG_PROC_NUMBER == 1:
+ for shard_index in range(shard_number):
+ shard_path = path / str(shard_index)
+ index_type.build_dna(shard_path, shard_index, shard_number, dna_iter(), k)
- for proc in process:
- proc.join()
+ else:
+ process = []
+ for shard_index in range(shard_number):
+ shard_path = path / str(shard_index)
+ proc = Process(
+ target=index_type.build_dna,
+ args=(shard_path, shard_index, shard_number, dna_iter(), k),
+ )
+ proc.start()
+ process.append(proc)
+ if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
+ for proc in process:
+ proc.join()
+ process = []
+
+ for proc in process:
+ proc.join()
return cls(path, index_type, k)
@@ -423,6 +435,9 @@ def smallk_graph_index_path(gname: str, small_k, index_type: Type[Shard]):
)
return index_path_name(gname) / "small_k" / str(small_k) / index_type.__name__
+def f(e):
+ return (DNA(e[1]), e[0])
+
class GraphIndex(KmerIndex):
_gname: str | None = None
@@ -511,7 +526,7 @@ class GraphIndex(KmerIndex):
def dna_iter():
it = G.nbunch_iter(data="sequence")
- return map(lambda e: (DNA(e[1]), e[0]), it)
+ return map(f, it)
with GraphLogger(
gname, f"Index ({index_type.__name__}) Build ({path}) from DNA"