Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
vidjil
vidjil
Commits
2ff77480
Commit
2ff77480
authored
Aug 07, 2018
by
Ryan Herbert
Browse files
Merge branch 'vidjil_parser_prototype' into 'dev'
Vidjil parser prototype See merge request
!201
parents
68fd3a75
38ae3783
Pipeline
#36289
canceled with stages
in 1 minute and 8 seconds
Changes
7
Pipelines
3
Hide whitespace changes
Inline
Side-by-side
tools/fuse.py
View file @
2ff77480
#!/usr/bin/env python
#!/usr/bin/env python
3
# -*- coding: utf-8 -*-
### fuse.py - Vidjil utility to parse and regroup list of clones of different timepoints or origins
...
...
@@ -36,7 +36,7 @@ import os
import
datetime
import
subprocess
import
tempfile
from
operator
import
itemgetter
from
operator
import
itemgetter
,
le
from
utils
import
*
from
defs
import
*
from
collections
import
defaultdict
...
...
@@ -336,32 +336,47 @@ class ListWindows(VidjilJson):
self
.
load_vidjil
(
file_path
,
*
args
,
**
kwargs
)
else
:
self
.
load_clntab
(
file_path
,
*
args
,
**
kwargs
)
def
loads
(
self
,
string
,
*
args
,
**
kwargs
):
self
.
loads_vidjil
(
string
,
*
args
,
**
kwargs
)
def
init_data
(
self
,
data
):
self
.
d
=
data
.
d
# Be robust against 'null' values for clones
if
not
self
.
d
[
"clones"
]:
self
.
d
[
"clones"
]
=
[]
if
"diversity"
in
self
.
d
.
keys
():
self
.
d
[
"diversity"
]
=
Diversity
(
self
.
d
[
"diversity"
])
else
:
self
.
d
[
"diversity"
]
=
Diversity
()
if
'distribution'
not
in
self
.
d
[
'reads'
].
d
:
self
.
d
[
'reads'
].
d
[
'distribution'
]
=
{}
self
.
id_lengths
=
defaultdict
(
int
)
print
(
"%%"
)
for
clone
in
self
:
self
.
id_lengths
[
len
(
clone
.
d
[
'id'
])]
+=
1
print
(
"%% lengths .vidjil -> "
,
self
.
id_lengths
)
try
:
print
(
"%% run_v ->"
,
self
.
d
[
"samples"
].
d
[
"producer"
],
self
.
d
[
"samples"
].
d
[
"run_timestamp"
])
except
KeyError
:
pass
def
load_vidjil
(
self
,
file_path
,
pipeline
,
verbose
=
True
):
'''init listWindows with data file
Detects and selects the parser according to the file extension.'''
# name = file_path.split("/")[-1]
extension
=
file_path
.
split
(
'.'
)[
-
1
]
if
verbose
:
print
(
"<=="
,
file_path
,
"
\t
"
,
end
=
' '
)
with
open
(
file_path
,
"r"
)
as
f
:
tmp
=
json
.
load
(
f
,
object_hook
=
self
.
toPython
)
self
.
d
=
tmp
.
d
# Be robust against 'null' values for clones
if
not
self
.
d
[
"clones"
]:
self
.
d
[
"clones"
]
=
[]
self
.
check_version
(
file_path
)
if
"diversity"
in
self
.
d
.
keys
():
self
.
d
[
"diversity"
]
=
Diversity
(
self
.
d
[
"diversity"
])
else
:
self
.
d
[
"diversity"
]
=
Diversity
()
if
'distribution'
not
in
self
.
d
[
'reads'
].
d
:
self
.
d
[
'reads'
].
d
[
'distribution'
]
=
{}
self
.
init_data
(
json
.
load
(
f
,
object_hook
=
self
.
toPython
))
self
.
check_version
(
file_path
)
if
pipeline
:
# renaming, private pipeline
f
=
'/'
.
join
(
file_path
.
split
(
'/'
)[
2
:
-
1
])
...
...
@@ -372,20 +387,13 @@ class ListWindows(VidjilJson):
f
=
file_path
if
verbose
:
print
()
time
=
os
.
path
.
getmtime
(
file_path
)
self
.
d
[
"samples"
].
d
[
"timestamp"
]
=
[
datetime
.
datetime
.
fromtimestamp
(
time
).
strftime
(
"%Y-%m-%d %H:%M:%S"
)]
self
.
id_lengths
=
defaultdict
(
int
)
print
(
"%%"
)
for
clone
in
self
:
self
.
id_lengths
[
len
(
clone
.
d
[
'id'
])]
+=
1
print
(
"%% lengths .vidjil -> "
,
self
.
id_lengths
)
try
:
print
(
"%% run_v ->"
,
self
.
d
[
"samples"
].
d
[
"producer"
],
self
.
d
[
"samples"
].
d
[
"run_timestamp"
])
except
KeyError
:
pass
def
loads_vidjil
(
self
,
string
,
pipeline
,
verbose
=
True
):
'''init listWindows with a json string'''
self
.
init_data
(
json
.
loads
(
string
,
object_hook
=
self
.
toPython
))
def
getTop
(
self
,
top
):
result
=
[]
...
...
@@ -687,6 +695,7 @@ def main():
group_options
.
add_argument
(
'--compress'
,
'-c'
,
action
=
'store_true'
,
help
=
'compress point names, removing common substrings'
)
group_options
.
add_argument
(
'--pipeline'
,
'-p'
,
action
=
'store_true'
,
help
=
'compress point names (internal Bonsai pipeline)'
)
group_options
.
add_argument
(
'--ijson'
,
action
=
'store_true'
,
help
=
'use the ijson vidjilparser'
)
group_options
.
add_argument
(
'--output'
,
'-o'
,
type
=
str
,
default
=
'fused.vidjil'
,
help
=
'output file (%(default)s)'
)
group_options
.
add_argument
(
'--top'
,
'-t'
,
type
=
int
,
default
=
50
,
help
=
'keep only clones in the top TOP of some point (%(default)s)'
)
...
...
@@ -716,17 +725,39 @@ def main():
#filtre
f
=
[]
if
args
.
ijson
:
from
vidjilparser
import
VidjilParser
vparser
=
VidjilParser
()
vparser
.
addPrefix
(
'clones.item'
,
'clones.item.top'
,
le
,
args
.
top
)
for
path_name
in
files
:
jlist
=
ListWindows
()
jlist
.
load
(
path_name
,
args
.
pipeline
)
f
+=
jlist
.
getTop
(
args
.
top
)
if
args
.
ijson
:
json_clones
=
vparser
.
extract
(
path_name
)
clones
=
json
.
loads
(
json_clones
)
if
clones
[
"clones"
]
is
not
None
:
f
+=
[
c
[
'id'
]
for
c
in
clones
[
"clones"
]]
else
:
jlist
=
ListWindows
()
jlist
.
load
(
path_name
,
args
.
pipeline
)
f
+=
jlist
.
getTop
(
args
.
top
)
f
=
sorted
(
set
(
f
))
if
args
.
ijson
:
vparser
.
reset
()
vparser
.
addPrefix
(
''
)
vparser
.
addPrefix
(
'clones.item'
,
'clones.item.id'
,
(
lambda
x
,
y
:
x
in
y
),
f
)
if
args
.
multi
:
for
path_name
in
files
:
jlist
=
ListWindows
()
jlist
.
load
(
path_name
,
args
.
pipeline
)
jlist
.
build_stat
()
if
args
.
ijson
:
json_reads
=
vparser
.
extract
(
path_name
)
jlist
.
loads
(
json_reads
,
args
.
pipeline
)
else
:
jlist
.
load
(
path_name
,
args
.
pipeline
)
jlist
.
build_stat
()
print
(
"
\t
"
,
jlist
,
end
=
' '
)
...
...
@@ -742,9 +773,13 @@ def main():
print
(
"### Read and merge input files"
)
for
path_name
in
files
:
jlist
=
ListWindows
()
jlist
.
load
(
path_name
,
args
.
pipeline
)
jlist
.
build_stat
()
jlist
.
filter
(
f
)
if
args
.
ijson
:
json_reads
=
vparser
.
extract
(
path_name
)
jlist
.
loads
(
json_reads
,
args
.
pipeline
)
else
:
jlist
.
load
(
path_name
,
args
.
pipeline
)
jlist
.
build_stat
()
jlist
.
filter
(
f
)
w1
=
Window
(
1
)
w2
=
Window
(
2
)
...
...
@@ -780,7 +815,7 @@ def main():
for
i
in
range
(
len
(
jlist_fused
.
d
[
"clones"
]))
:
fasta
+=
">>"
+
str
(
i
)
+
"
\n
"
fasta
+=
jlist_fused
.
d
[
"clones"
][
i
].
d
[
"id"
]
+
"
\n
"
fasta_file
=
tempfile
.
NamedTemporaryFile
(
delete
=
False
)
fasta_file
=
tempfile
.
NamedTemporaryFile
(
mode
=
"w"
,
delete
=
False
)
fasta_file
.
write
(
fasta
)
try
:
out
=
subprocess
.
check_output
([
TOOL_SIMILARITY
,
"-j"
,
fasta_file
.
name
])
...
...
tools/should.py
View file @
2ff77480
...
...
@@ -275,7 +275,7 @@ def populate_variables(var):
for
v
in
var
:
try
:
key
,
var
=
v
.
split
(
'='
)
variables
.
append
(
(
'$'
+
key
,
var
)
)
variables
=
[
(
'$'
+
key
,
var
)
]
+
variables
except
IOError
:
raise
ShouldException
(
'Error in parsing variable definition: '
+
v
)
...
...
@@ -681,7 +681,7 @@ class TestSuite():
# Directive -- Options
if
l
.
startswith
(
DIRECTIVE_OPTIONS
):
opts
,
unknown
=
options
.
parse_known_args
(
l
[
len
(
DIRECTIVE_OPTIONS
):].
split
())
self
.
variables
+
=
populate_variables
(
opts
.
var
)
self
.
variables
=
populate_variables
(
opts
.
var
)
+
self
.
variables
if
opts
.
mod
:
self
.
modifiers
+=
''
.
join
(
opts
.
mod
)
continue
...
...
tools/tests/Makefile
View file @
2ff77480
...
...
@@ -7,6 +7,9 @@ should-get-tests/fuse-doc.tap: should-get-tests/fuse-doc.should-get force
should
:
should-get-tests/fuse-doc.tap
python3 ../should.py should-get-tests/
*
.should-get
should-ijson
:
python3 ../should.py
--var
FUSE_OPTIONS
=
--ijson
should-get-tests/fuse-
*
.should-get
doctests
:
@
echo
"*** Launching python tests..."
python
-m
doctest
-v
../fuse.py
...
...
tools/tests/should-get-tests/fuse-three-files.should-get
View file @
2ff77480
!LAUNCH: python ../../fuse.py ../../../algo/tests/data/results-two-clones-1-2.vidjil ../../../algo/tests/data/results-two-clones-1-2.vidjil ../../../algo/tests/data/results-two-clones-1-2.vidjil ; cat fused.vidjil
!LAUNCH: python ../../fuse.py
$FUSE_OPTIONS
../../../algo/tests/data/results-two-clones-1-2.vidjil ../../../algo/tests/data/results-two-clones-1-2.vidjil ../../../algo/tests/data/results-two-clones-1-2.vidjil ; cat fused.vidjil
$ Fuse the three files
2: ListWindows: .1000, 1000, 1000.
...
...
tools/tests/should-get-tests/fuse-two-files-0.should-get
View file @
2ff77480
!LAUNCH: python ../../fuse.py ../../../algo/tests/data/no_clones.vidjil ../../../algo/tests/data/results-two-clones-1-2.vidjil -o fused_no_clones.vidjil; cat fused_no_clones.vidjil
!LAUNCH: python ../../fuse.py
$FUSE_OPTIONS
../../../algo/tests/data/no_clones.vidjil ../../../algo/tests/data/results-two-clones-1-2.vidjil -o fused_no_clones.vidjil; cat fused_no_clones.vidjil
$ Fuse the three files
2: ListWindows: .0, 1000.
...
...
tools/tests/should.cfg
View file @
2ff77480
...
...
@@ -11,3 +11,5 @@ rZb
--var
VIDJIL_DIR=../../../
--var
FUSE_OPTIONS=
tools/vidjilparser.py
0 → 100644
View file @
2ff77480
#!/usr/bin/python
import
ijson.backends.yajl2_cffi
as
ijson
from
six
import
string_types
from
enum
import
Enum
class
MatchingEvent
(
Enum
):
end_map
=
"start_map"
end_array
=
"start_array"
class
VidjilWriter
(
object
):
def
__init__
(
self
,
pretty
=
False
):
self
.
pretty
=
pretty
self
.
buffer
=
[]
self
.
buffering
=
False
self
.
conserveBuffer
=
False
def
__enter__
(
self
):
return
self
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
pass
def
write
(
self
,
prefix
,
event
,
value
,
previous
):
res
=
self
.
_write
(
prefix
,
event
,
value
,
previous
)
if
self
.
buffering
:
self
.
buffer
.
append
(
res
)
return
""
return
res
def
_write
(
self
,
prefix
,
event
,
value
,
previous
):
if
self
.
pretty
:
end
=
'
\n
'
else
:
end
=
''
if
event
==
'start_map'
:
mstr
=
'{{'
elif
event
==
'end_map'
:
mstr
=
'}}'
elif
event
==
'start_array'
:
mstr
=
'['
elif
event
==
'end_array'
:
mstr
=
']'
elif
event
==
'map_key'
:
mstr
=
'
\"
{}
\"
:'
end
=
''
elif
event
==
'string'
:
mstr
=
'
\"
{}
\"
'
else
:
if
event
==
'boolean'
:
value
=
str
(
value
).
lower
()
if
value
is
None
:
mstr
=
'null'
else
:
mstr
=
'{}'
padding
=
''
if
isinstance
(
value
,
string_types
)
:
value
=
value
.
replace
(
"
\n
"
,
"
\\
n"
)
value
=
value
.
replace
(
"
\r
"
,
"
\\
r"
)
if
previous
not
in
[
''
,
'map_key'
,
'start_map'
,
'start_array'
]
and
event
not
in
[
'end_map'
,
'end_array'
]:
mstr
=
","
+
mstr
if
self
.
pretty
and
previous
!=
'map_key'
:
if
len
(
prefix
)
>
0
:
padding
=
''
.
join
([
'
\t
'
for
i
in
range
(
len
(
prefix
.
split
(
'.'
)))])
mstr
=
'{}'
+
mstr
+
end
return
mstr
.
format
(
padding
,
value
)
def
purgeBuffer
(
self
):
self
.
buffer
=
[]
self
.
conserveBuffer
=
False
def
writeBuffer
(
self
):
try
:
return
''
.
join
(
self
.
buffer
)
finally
:
self
.
purgeBuffer
()
def
startBuffering
(
self
):
self
.
conserveBuffer
=
False
self
.
buffering
=
True
def
endBuffering
(
self
):
self
.
buffering
=
False
if
self
.
conserveBuffer
:
self
.
conserveBuffer
=
False
return
self
.
writeBuffer
()
else
:
self
.
purgeBuffer
()
return
""
class
VidjilFileWriter
(
VidjilWriter
):
def
__init__
(
self
,
filepath
=
None
,
pretty
=
False
):
super
(
VidjilWriter
,
self
).
__init__
()
self
.
_filepath
=
filepath
self
.
file
=
None
def
__enter__
(
self
):
self
.
file
=
open
(
self
.
_filepath
,
'wb'
)
return
self
def
__exit__
(
self
,
exc_type
,
exc_value
,
traceback
):
self
.
file
.
close
()
def
write
(
self
,
prefix
,
event
,
value
,
previous
):
res
=
super
(
VidjilWriter
,
self
).
write
(
prefix
,
event
,
value
,
previous
)
self
.
file
.
write
(
res
)
return
res
def
writeBuffer
(
self
):
res
=
super
(
VidjilWriter
,
self
).
writeBuffer
()
self
.
file
.
write
(
res
)
return
res
class
Predicate
(
object
):
def
__init__
(
self
,
field
,
comparator
,
value
):
self
.
comp
=
comparator
self
.
field
=
field
self
.
value
=
value
def
compare
(
self
,
field
,
other
):
if
self
.
comp
is
None
:
return
field
==
self
.
field
try
:
res
=
field
==
self
.
field
and
self
.
comp
(
other
,
self
.
value
)
return
res
except
:
return
False
class
VidjilParser
(
object
):
def
__init__
(
self
,
writer
=
None
):
if
writer
is
not
None
:
self
.
_writer
=
writer
else
:
self
.
_writer
=
VidjilWriter
()
self
.
_model_prefixes
=
[]
self
.
prefixes
=
[]
def
initModel
(
self
,
model_path
):
with
open
(
model_path
,
'rb'
)
as
model
:
parser
=
ijson
.
parse
(
model
)
for
prefix
,
event
,
value
in
parser
:
if
(
prefix
,
event
)
not
in
self
.
_model_prefixes
:
self
.
_model_prefixes
.
append
((
prefix
,
event
))
def
validate
(
self
,
filepath
):
with
open
(
filepath
,
'rb'
)
as
vfile
:
parser
=
ijson
.
parse
(
vfile
)
model
=
list
(
self
.
_model_prefixes
)
for
prefix
,
event
,
value
in
parser
:
pair
=
(
prefix
,
event
)
if
pair
in
model
:
model
.
remove
(
pair
)
return
len
(
model
)
==
0
def
writer
(
self
):
return
self
.
_writer
def
addPrefix
(
self
,
prefix
,
conditional
=
None
,
comp
=
None
,
value
=
None
):
if
conditional
is
None
:
conditional
=
prefix
self
.
prefixes
.
append
((
prefix
,
Predicate
(
conditional
,
comp
,
value
)))
def
reset
(
self
):
self
.
prefixes
=
[]
self
.
_writer
.
purgeBuffer
()
def
extract
(
self
,
filepath
):
vidjilfile
=
open
(
filepath
,
'rb'
)
parser
=
ijson
.
parse
(
vidjilfile
)
with
self
.
writer
()
as
writer
:
return
self
.
_extract
(
parser
,
writer
)
def
isStartEvent
(
self
,
event
):
return
event
in
[
'start_map'
,
'start_array'
]
def
isEndEvent
(
self
,
event
):
return
event
in
[
'end_map'
,
'end_array'
,
'number'
,
'string'
,
'boolean'
]
def
isMatching
(
self
,
mbuffer
,
other
):
if
other
[
1
]
not
in
MatchingEvent
.
__members__
:
return
False
return
(
mbuffer
[
0
]
==
other
[
0
])
and
(
mbuffer
[
1
]
==
MatchingEvent
[
other
[
1
]].
value
)
def
_extract
(
self
,
parser
,
writer
):
previous
=
''
res
=
""
bufferStart
=
(
None
,
None
)
for
prefix
,
event
,
value
in
parser
:
subelem
=
lambda
x
,
y
:
x
.
startswith
(
y
)
if
any
(
subelem
(
prefix
,
item
[
0
])
\
or
(
subelem
(
item
[
0
],
prefix
)
and
(
value
is
None
or
subelem
(
item
[
0
],
str
(
value
))))
\
for
item
in
self
.
prefixes
):
bufferOn
=
any
(
prefix
==
item
[
0
]
for
item
in
self
.
prefixes
)
and
self
.
isStartEvent
(
event
)
if
bufferOn
:
bufferStart
=
(
prefix
,
event
)
saved_previous
=
previous
self
.
_writer
.
startBuffering
()
if
not
self
.
_writer
.
conserveBuffer
\
and
any
((
item
[
1
].
compare
(
prefix
,
value
))
for
item
in
self
.
prefixes
):
self
.
_writer
.
conserveBuffer
=
True
res
+=
writer
.
write
(
prefix
,
event
,
value
,
previous
)
previous
=
event
if
(
self
.
writer
().
buffering
and
(
self
.
isEndEvent
(
event
)
and
self
.
isMatching
(
bufferStart
,
(
prefix
,
event
))
or
self
.
_writer
.
conserveBuffer
)):
if
not
self
.
_writer
.
conserveBuffer
:
previous
=
saved_previous
res
+=
self
.
_writer
.
endBuffering
()
return
res
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment