Mentions légales du service

Skip to content

Index building with multiprocessing fails on OSX

On OSX install, trying to build index fails because multiprocessing is apparently unsupported.

Stack trace

$ python3.13 -m vizitig index build mini_bcalm
INFO::2025-03-04 14:42:59,133::-        ::0:00:00/0:00:00       :: start        :: Index (RustIndex) Build (/Users/loup/.vizitig/index/mini_bcalm/RustIndex) from DNA.
WARNING::2025-03-04 14:42:59,133::-     ::0:00:00/0:00:00       :: Index /Users/loup/.vizitig/index/mini_bcalm/RustIndex, erasing and rebuilding  :: Index (RustIndex) Build (/Users/loup/.vizitig/index/mini_bcalm/RustIndex) from DNA.
INFO::2025-03-04 14:42:59,151::-        ::0:00:00/0:00:00       :: done         :: Index (RustIndex) Build (/Users/loup/.vizitig/index/mini_bcalm/RustIndex) from DNA.
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/loup/inria/src/vizitig/vizitig/vizitig/__main__.py", line 15, in <module>
    main()
    ~~~~^^
  File "/Users/loup/inria/src/vizitig/vizitig/vizitig/__main__.py", line 11, in main
    args.func(args)
    ~~~~~~~~~^^^^^^
  File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/__init__.py", line 198, in main
    build_kmer_index(args.name, args.type, args.shard_number, small_k=args.small_k)
    ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/__init__.py", line 133, in build_kmer_index
    GraphIndex.build_dna(gname, IndexType, shard_number, small_k=small_k)
    ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/classes.py", line 519, in build_dna
    return cls._build_dna(
           ~~~~~~~~~~~~~~^
        path,
        ^^^^^
    ...<3 lines>...
        k,
        ^^
    )
    ^
  File "/Users/loup/inria/src/vizitig/vizitig/vizitig/index/classes.py", line 347, in _build_dna
    proc.start()
    ~~~~~~~~~~^^
  File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/process.py", line 121, in start
    self._popen = self._Popen(self)
                  ~~~~~~~~~~~^^^^^^
  File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/context.py", line 224, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)
           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/context.py", line 289, in _Popen
    return Popen(process_obj)
  File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/popen_spawn_posix.py", line 32, in __init__
    super().__init__(process_obj)
    ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/popen_fork.py", line 20, in __init__
    self._launch(process_obj)
    ~~~~~~~~~~~~^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/popen_spawn_posix.py", line 47, in _launch
    reduction.dump(process_obj, fp)
    ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.13/3.13.0_1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/reduction.py", line 60, in dump
    ForkingPickler(file, protocol).dump(obj)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^
AttributeError: Can't get local object 'GraphIndex.build_dna.<locals>.dna_iter.<locals>.<lambda>'

Current workaround: disable multiprocessing

diff --git a/vizitig/index/classes.py b/vizitig/index/classes.py
index 3754d2c..73877c7 100644
--- a/vizitig/index/classes.py
+++ b/vizitig/index/classes.py
@@ -232,7 +232,7 @@ class KmerSetIndex(BaseKmerIndex, Set[Kmer]):
             logger.warning(f"Index {path}, erasing and rebuilding")
             shutil.rmtree(path)
             path.mkdir()
-
+       
         process = []
         for shard_index in range(shard_number):
             shard_path = path / str(shard_index)
@@ -265,22 +265,28 @@ class KmerSetIndex(BaseKmerIndex, Set[Kmer]):
             shutil.rmtree(path)
         path.mkdir()
 
-        process = []
-        for shard_index in range(shard_number):
-            shard_path = path / str(shard_index)
-            proc = Process(
-                target=index_type.build_dna,
-                args=(shard_path, shard_index, shard_number, dna_iter(), k),
-            )
-            proc.start()
-            process.append(proc)
-            if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
-                for proc in process:
-                    proc.join()
-                process = []
+        if VIZITIG_PROC_NB == 1:
+            for shard_index in range(shard_number):
+                shard_path = path / str(shard_index)
+                index_type.build_dna(shard_path, shard_index, shard_number, dna_iter(), k)
 
-        for proc in process:
-            proc.join()
+        else:
+            process = []
+            for shard_index in range(shard_number):
+                shard_path = path / str(shard_index)
+                proc = Process(
+                    target=index_type.build_dna,
+                    args=(shard_path, shard_index, shard_number, dna_iter(), k),
+                )
+                proc.start()
+                process.append(proc)
+                if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
+                    for proc in process:
+                        proc.join()
+                    process = []
+
+            for proc in process:
+                proc.join()
 
         return cls(path, index_type, k)
 
@@ -337,22 +343,28 @@ class KmerIndex(BaseKmerIndex, Mapping[Kmer, int]):
             shutil.rmtree(path)
         path.mkdir()
 
-        process = []
-        for shard_index in range(shard_number):
-            shard_path = path / str(shard_index)
-            proc = Process(
-                target=index_type.build_dna,
-                args=(shard_path, shard_index, shard_number, dna_iter(), k),
-            )
-            proc.start()
-            process.append(proc)
-            if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
-                for proc in process:
-                    proc.join()
-                process = []
+        if VIZITIG_PROC_NUMBER == 1:
+            for shard_index in range(shard_number):
+                shard_path = path / str(shard_index)
+                index_type.build_dna(shard_path, shard_index, shard_number, dna_iter(), k)
 
-        for proc in process:
-            proc.join()
+        else:
+            process = []
+            for shard_index in range(shard_number):
+                shard_path = path / str(shard_index)
+                proc = Process(
+                    target=index_type.build_dna,
+                    args=(shard_path, shard_index, shard_number, dna_iter(), k),
+                )
+                proc.start()
+                process.append(proc)
+                if (shard_index + 1) % VIZITIG_PROC_NUMBER == 0:
+                    for proc in process:
+                        proc.join()
+                    process = []
+
+            for proc in process:
+                proc.join()
 
         return cls(path, index_type, k)
 
@@ -423,6 +435,9 @@ def smallk_graph_index_path(gname: str, small_k, index_type: Type[Shard]):
     )
     return index_path_name(gname) / "small_k" / str(small_k) / index_type.__name__
 
+def f(e):
+    return (DNA(e[1]), e[0])
+
 
 class GraphIndex(KmerIndex):
     _gname: str | None = None
@@ -511,7 +526,7 @@ class GraphIndex(KmerIndex):
 
         def dna_iter():
             it = G.nbunch_iter(data="sequence")
-            return map(lambda e: (DNA(e[1]), e[0]), it)
+            return map(f, it)
 
         with GraphLogger(
             gname, f"Index ({index_type.__name__}) Build ({path}) from DNA"