Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
declearn2
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Monitor
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Magnet
DecLearn
declearn2
Commits
474f5447
Verified
Commit
474f5447
authored
1 year ago
by
ANDREY Paul
Browse files
Options
Downloads
Patches
Plain Diff
Expand 'InMemoryDataset' unit tests.
parent
d8d078ee
No related branches found
Branches containing commit
No related tags found
1 merge request
!57
Improve tests coverage and fix test-digged bugs
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
test/dataset/test_inmemory.py
+290
-2
290 additions, 2 deletions
test/dataset/test_inmemory.py
with
290 additions
and
2 deletions
test/dataset/test_inmemory.py
+
290
−
2
View file @
474f5447
...
@@ -15,13 +15,20 @@
...
@@ -15,13 +15,20 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
Unit tests
objects
for
'
declearn.dataset.InMemoryDataset
'"""
"""
Unit tests for
'
declearn.dataset.InMemoryDataset
'"""
import
json
import
os
import
os
import
numpy
as
np
import
pandas
as
pd
import
pytest
import
pytest
import
scipy.sparse
# type: ignore
import
sklearn.datasets
# type: ignore
from
declearn.dataset
import
InMemoryDataset
from
declearn.dataset
import
InMemoryDataset
from
declearn.dataset.utils
import
save_data_array
from
declearn.test_utils
import
make_importable
from
declearn.test_utils
import
make_importable
# relative imports from `dataset_testbase.py`
# relative imports from `dataset_testbase.py`
...
@@ -32,6 +39,9 @@ with make_importable(os.path.dirname(__file__)):
...
@@ -32,6 +39,9 @@ with make_importable(os.path.dirname(__file__)):
SEED
=
0
SEED
=
0
### Shared-tests-based tests, revolving around batches generation.
class
InMemoryDatasetTestToolbox
(
DatasetTestToolbox
):
class
InMemoryDatasetTestToolbox
(
DatasetTestToolbox
):
"""
Toolbox for InMemoryDataset
"""
"""
Toolbox for InMemoryDataset
"""
...
@@ -44,10 +54,288 @@ class InMemoryDatasetTestToolbox(DatasetTestToolbox):
...
@@ -44,10 +54,288 @@ class InMemoryDatasetTestToolbox(DatasetTestToolbox):
@pytest.fixture
(
name
=
"
toolbox
"
)
@pytest.fixture
(
name
=
"
toolbox
"
)
def
fixture_
dataset
()
->
DatasetTestToolbox
:
def
fixture_
toolbox
()
->
DatasetTestToolbox
:
"""
Fixture to access a InMemoryDatasetTestToolbox.
"""
"""
Fixture to access a InMemoryDatasetTestToolbox.
"""
return
InMemoryDatasetTestToolbox
()
return
InMemoryDatasetTestToolbox
()
class
TestInMemoryDataset
(
DatasetTestSuite
):
class
TestInMemoryDataset
(
DatasetTestSuite
):
"""
Unit tests for declearn.dataset.InMemoryDataset.
"""
"""
Unit tests for declearn.dataset.InMemoryDataset.
"""
### InMemoryDataset-specific unit tests.
@pytest.fixture
(
name
=
"
dataset
"
)
def
dataset_fixture
()
->
pd
.
DataFrame
:
"""
Fixture providing with a small toy dataset.
"""
rng
=
np
.
random
.
default_rng
(
seed
=
SEED
)
wgt
=
rng
.
normal
(
size
=
10
).
astype
(
"
float32
"
)
data
=
{
"
col_a
"
:
np
.
arange
(
10
,
dtype
=
"
float32
"
),
"
col_b
"
:
rng
.
normal
(
size
=
10
).
astype
(
"
float32
"
),
"
col_y
"
:
rng
.
choice
(
3
,
size
=
10
,
replace
=
True
),
"
col_w
"
:
wgt
/
sum
(
wgt
),
}
return
pd
.
DataFrame
(
data
)
class
TestInMemoryDatasetInit
:
"""
Unit tests for
'
declearn.dataset.InMemoryDataset
'
instantiation.
"""
def
test_from_inputs
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test instantiating with (x, y, w) array data.
"""
# Split data into distinct objects with various types.
y_dat
=
dataset
.
pop
(
"
col_y
"
)
w_dat
=
dataset
.
pop
(
"
col_w
"
).
values
x_dat
=
scipy
.
sparse
.
coo_matrix
(
dataset
.
values
)
# Test that an InMemoryDataset can be instantiated from that data.
dst
=
InMemoryDataset
(
data
=
x_dat
,
target
=
y_dat
,
s_wght
=
w_dat
)
assert
dst
.
feats
is
x_dat
assert
dst
.
target
is
y_dat
assert
dst
.
weights
is
w_dat
def
test_from_dataframe
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test instantiating with a pandas DataFrame and column names.
"""
dst
=
InMemoryDataset
(
data
=
dataset
,
target
=
"
col_y
"
,
s_wght
=
"
col_w
"
)
assert
np
.
allclose
(
dst
.
feats
,
dataset
[[
"
col_a
"
,
"
col_b
"
]])
assert
np
.
allclose
(
dst
.
target
,
dataset
[
"
col_y
"
])
# type: ignore
assert
np
.
allclose
(
dst
.
weights
,
dataset
[
"
col_w
"
])
# type: ignore
def
test_from_dataframe_with_fcols_str
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test instantiating from a pandas Dataframe with string f_cols.
"""
dst
=
InMemoryDataset
(
data
=
dataset
,
target
=
"
col_y
"
,
s_wght
=
"
col_w
"
,
f_cols
=
[
"
col_a
"
]
)
assert
np
.
allclose
(
dst
.
feats
,
dataset
[[
"
col_a
"
]])
assert
np
.
allclose
(
dst
.
target
,
dataset
[
"
col_y
"
])
# type: ignore
assert
np
.
allclose
(
dst
.
weights
,
dataset
[
"
col_w
"
])
# type: ignore
def
test_from_dataframe_with_fcols_int
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test instantiating from a pandas Dataframe with string f_cols.
"""
dst
=
InMemoryDataset
(
data
=
dataset
,
target
=
"
col_y
"
,
s_wght
=
"
col_w
"
,
f_cols
=
[
1
]
)
assert
np
.
allclose
(
dst
.
feats
,
dataset
[[
"
col_b
"
]])
assert
np
.
allclose
(
dst
.
target
,
dataset
[
"
col_y
"
])
# type: ignore
assert
np
.
allclose
(
dst
.
weights
,
dataset
[
"
col_w
"
])
# type: ignore
def
test_from_csv_file
(
self
,
dataset
:
pd
.
DataFrame
,
tmp_path
:
str
,
)
->
None
:
"""
Test instantiating from a single csv file and column names.
"""
# Dump the dataset to a csv file and instantiate from it.
path
=
os
.
path
.
join
(
tmp_path
,
"
dataset.csv
"
)
dataset
.
to_csv
(
path
,
index
=
False
)
dst
=
InMemoryDataset
(
data
=
path
,
target
=
"
col_y
"
,
s_wght
=
"
col_w
"
)
# Test that the data matches expectations.
assert
np
.
allclose
(
dst
.
feats
,
dataset
[[
"
col_a
"
,
"
col_b
"
]])
assert
np
.
allclose
(
dst
.
target
,
dataset
[
"
col_y
"
])
# type: ignore
assert
np
.
allclose
(
dst
.
weights
,
dataset
[
"
col_w
"
])
# type: ignore
def
test_from_csv_file_feats_only
(
self
,
dataset
:
pd
.
DataFrame
,
tmp_path
:
str
,
)
->
None
:
"""
Test instantiating from a single csv file without y nor w.
"""
# Dump the dataset to a csv file and instantiate from it.
path
=
os
.
path
.
join
(
tmp_path
,
"
dataset.csv
"
)
dataset
.
to_csv
(
path
,
index
=
False
)
dst
=
InMemoryDataset
(
data
=
path
)
# Test that the data matches expectations.
assert
np
.
allclose
(
dst
.
feats
,
dataset
)
assert
dst
.
target
is
None
assert
dst
.
weights
is
None
def
test_from_data_files
(
self
,
dataset
:
pd
.
DataFrame
,
tmp_path
:
str
,
)
->
None
:
"""
Test instantiating from a collection of files.
"""
# Split data into distinct objects with various types.
y_dat
=
dataset
.
pop
(
"
col_y
"
)
w_dat
=
dataset
.
pop
(
"
col_w
"
).
values
x_dat
=
scipy
.
sparse
.
coo_matrix
(
dataset
.
values
)
# Save these objects to files.
x_path
=
save_data_array
(
os
.
path
.
join
(
tmp_path
,
"
data_x
"
),
x_dat
)
y_path
=
save_data_array
(
os
.
path
.
join
(
tmp_path
,
"
data_y
"
),
y_dat
)
w_path
=
save_data_array
(
os
.
path
.
join
(
tmp_path
,
"
data_w
"
),
w_dat
)
# Tes that an InMemoryDataset can be instantiated from these files.
dst
=
InMemoryDataset
(
data
=
x_path
,
target
=
y_path
,
s_wght
=
w_path
)
assert
isinstance
(
dst
.
feats
,
scipy
.
sparse
.
coo_matrix
)
assert
np
.
allclose
(
dst
.
feats
.
toarray
(),
x_dat
.
toarray
())
assert
isinstance
(
dst
.
target
,
pd
.
Series
)
assert
np
.
allclose
(
dst
.
target
,
y_dat
)
assert
isinstance
(
dst
.
weights
,
np
.
ndarray
)
assert
np
.
allclose
(
dst
.
weights
,
w_dat
)
# type: ignore
def
test_from_svmlight
(
self
,
dataset
:
pd
.
DataFrame
,
tmp_path
:
str
,
)
->
None
:
"""
Test instantiating from a svmlight file.
"""
path
=
os
.
path
.
join
(
tmp_path
,
"
dataset.svmlight
"
)
sklearn
.
datasets
.
dump_svmlight_file
(
scipy
.
sparse
.
coo_matrix
(
dataset
[[
"
col_a
"
,
"
col_b
"
]].
values
),
dataset
[
"
col_y
"
].
values
,
path
,
)
dst
=
InMemoryDataset
.
from_svmlight
(
path
)
assert
isinstance
(
dst
.
data
,
scipy
.
sparse
.
csr_matrix
)
assert
np
.
allclose
(
dst
.
data
.
toarray
(),
dataset
[[
"
col_a
"
,
"
col_b
"
]].
values
)
assert
isinstance
(
dst
.
target
,
np
.
ndarray
)
assert
np
.
allclose
(
dst
.
target
,
dataset
[
"
col_y
"
].
to_numpy
())
assert
dst
.
weights
is
None
class
TestInMemoryDatasetProperties
:
"""
Unit tests for
'
declearn.dataset.InMemoryDataset
'
properties.
"""
def
test_classes_array
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) classes access with numpy array targets.
"""
dst
=
InMemoryDataset
(
data
=
dataset
,
target
=
dataset
[
"
col_y
"
].
values
,
expose_classes
=
True
)
assert
dst
.
classes
==
{
0
,
1
,
2
}
def
test_classes_series
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) classes access with pandas Series targets.
"""
dst
=
InMemoryDataset
(
data
=
dataset
,
target
=
"
col_y
"
,
expose_classes
=
True
)
assert
dst
.
classes
==
{
0
,
1
,
2
}
def
test_classes_dataframe
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) classes access with pandas DataFrame targets.
"""
dst
=
InMemoryDataset
(
data
=
dataset
,
target
=
dataset
[[
"
col_y
"
]],
expose_classes
=
True
)
assert
dst
.
classes
==
{
0
,
1
,
2
}
def
test_classes_sparse
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) classes access with scipy spmatrix targets.
"""
y_dat
=
scipy
.
sparse
.
coo_matrix
(
dataset
[[
"
col_y
"
]]
+
1
)
dst
=
InMemoryDataset
(
data
=
dataset
,
target
=
y_dat
,
expose_classes
=
True
)
assert
dst
.
classes
==
{
1
,
2
,
3
}
def
test_data_type_dataframe
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) data-type access with pandas DataFrame data.
"""
dst
=
InMemoryDataset
(
data
=
dataset
[[
"
col_a
"
,
"
col_b
"
]],
expose_data_type
=
True
)
assert
dst
.
data_type
==
"
float32
"
def
test_data_type_dataframe_mixed
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test that an exception is raised with a mixed-type DataFrame.
"""
dst
=
InMemoryDataset
(
data
=
dataset
,
expose_data_type
=
True
)
with
pytest
.
raises
(
ValueError
):
dst
.
data_type
# pylint: disable=pointless-statement
def
test_data_type_series
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) data-type access with pandas Series data.
"""
dst
=
InMemoryDataset
(
data
=
dataset
[
"
col_a
"
],
expose_data_type
=
True
)
assert
dst
.
data_type
==
"
float32
"
def
test_data_type_array
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) data-type access with numpy array data.
"""
data
=
dataset
[[
"
col_a
"
,
"
col_b
"
]].
values
dst
=
InMemoryDataset
(
data
=
data
,
expose_data_type
=
True
)
assert
dst
.
data_type
==
"
float32
"
def
test_data_type_sparse
(
self
,
dataset
:
pd
.
DataFrame
,
)
->
None
:
"""
Test (authorized) data-type access with scipy spmatrix data.
"""
data
=
scipy
.
sparse
.
coo_matrix
(
dataset
[[
"
col_a
"
,
"
col_b
"
]].
values
)
dst
=
InMemoryDataset
(
data
=
data
,
expose_data_type
=
True
)
assert
dst
.
data_type
==
"
float32
"
class
TestInMemoryDatasetSaveLoad
:
"""
Test JSON-file saving/loading features of InMemoryDataset.
"""
def
test_save_load_json
(
self
,
dataset
:
pd
.
DataFrame
,
tmp_path
:
str
,
)
->
None
:
"""
Test that a dataset can be saved to and loaded from JSON.
"""
dst
=
InMemoryDataset
(
dataset
,
target
=
"
col_y
"
,
s_wght
=
"
col_w
"
)
# Test that the dataset can be saved to JSON.
path
=
os
.
path
.
join
(
tmp_path
,
"
dataset.json
"
)
dst
.
save_to_json
(
path
)
assert
os
.
path
.
isfile
(
path
)
# Test that it can be reloaded from JSON.
bis
=
InMemoryDataset
.
load_from_json
(
path
)
assert
np
.
allclose
(
dst
.
data
,
bis
.
data
)
assert
np
.
allclose
(
dst
.
target
,
bis
.
target
)
# type: ignore
assert
np
.
allclose
(
dst
.
weights
,
bis
.
weights
)
# type: ignore
assert
dst
.
f_cols
==
bis
.
f_cols
assert
dst
.
expose_classes
==
bis
.
expose_classes
assert
dst
.
expose_data_type
==
bis
.
expose_data_type
def
test_load_json_malformed
(
self
,
tmp_path
:
str
,
)
->
None
:
"""
Test with a JSON file that has nothing to do with a dataset.
"""
path
=
os
.
path
.
join
(
tmp_path
,
"
dataset.json
"
)
with
open
(
path
,
"
w
"
,
encoding
=
"
utf-8
"
)
as
file
:
json
.
dump
({
"
not-a-dataset
"
:
"
at-all
"
},
file
)
with
pytest
.
raises
(
KeyError
):
InMemoryDataset
.
load_from_json
(
path
)
def
test_load_json_partial
(
self
,
tmp_path
:
str
,
)
->
None
:
"""
Test with a JSON file that contains a partial dataset config.
"""
path
=
os
.
path
.
join
(
tmp_path
,
"
dataset.json
"
)
with
open
(
path
,
"
w
"
,
encoding
=
"
utf-8
"
)
as
file
:
json
.
dump
({
"
config
"
:
{
"
data
"
:
"
mock
"
,
"
target
"
:
"
mock
"
}},
file
)
with
pytest
.
raises
(
KeyError
):
InMemoryDataset
.
load_from_json
(
path
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment