From a917a4e0208903d98c5113f9c4dfb6d0a1573bb9 Mon Sep 17 00:00:00 2001 From: BUI Van Tuan <buivantuan07@gmail.com> Date: Fri, 8 Dec 2023 01:12:27 +0100 Subject: [PATCH] upgrade jsonschema to 4.19 --- dnadna/schemas/dataset.yml | 4 +- dnadna/schemas/nets/cnn.yml | 2 +- dnadna/schemas/nets/custom_cnn.yml | 2 +- dnadna/schemas/nets/mlp.yml | 2 +- dnadna/schemas/param-set.yml | 10 +- dnadna/schemas/predict.yml | 4 +- dnadna/schemas/preprocessing.yml | 8 +- dnadna/schemas/simulation.yml | 6 +- dnadna/schemas/summary-statistics.yml | 6 +- dnadna/schemas/training-run.yml | 2 +- dnadna/schemas/training.yml | 18 +- dnadna/utils/config.py | 286 ++++++++++++- dnadna/utils/jsonschema.py | 569 +++++++++----------------- dnadna/utils/plugins.py | 2 +- dnadna/utils/torch_plugin_mixin.py | 6 +- environment-cpu.yml | 4 +- environment-cuda.yml | 4 +- tests/test_params.py | 9 +- tests/test_schemas.py | 7 +- tests/test_simulation.py | 4 +- tests/test_utils.py | 2 +- 21 files changed, 505 insertions(+), 452 deletions(-) diff --git a/dnadna/schemas/dataset.yml b/dnadna/schemas/dataset.yml index 146a7d12..0fac7a9b 100644 --- a/dnadna/schemas/dataset.yml +++ b/dnadna/schemas/dataset.yml @@ -46,7 +46,7 @@ properties: enum: ["dnadna"] required: ["format"] oneOf: - - {"$ref": "dataset_formats/dnadna.yml"} + - {"$ref": "py-pkgdata:dnadna.schemas/dataset_formats/dnadna.yml"} position_format: type: "object" @@ -111,7 +111,7 @@ properties: type: "boolean" default: false - dnadna_version: {"$ref": "definitions.yml#/definitions/version"} + dnadna_version: {"$ref": "py-pkgdata:dnadna.schemas/definitions.yml#/definitions/version"} required: - data_root diff --git a/dnadna/schemas/nets/cnn.yml b/dnadna/schemas/nets/cnn.yml index 64c6f93b..27114edd 100644 --- a/dnadna/schemas/nets/cnn.yml +++ b/dnadna/schemas/nets/cnn.yml @@ -2,4 +2,4 @@ $schema: "http://json-schema.org/draft-07/schema#" $id: "py-pkgdata:dnadna.schemas/nets/cnn.yml" description: "net parameters for CNN" allOf: - - "$ref": "base.yml#/definitions/fixed_inputs" + - "$ref": "py-pkgdata:dnadna.schemas/nets/base.yml#/definitions/fixed_inputs" diff --git a/dnadna/schemas/nets/custom_cnn.yml b/dnadna/schemas/nets/custom_cnn.yml index 319aeb1b..c8d3c022 100644 --- a/dnadna/schemas/nets/custom_cnn.yml +++ b/dnadna/schemas/nets/custom_cnn.yml @@ -2,7 +2,7 @@ $schema: "http://json-schema.org/draft-07/schema#" $id: "py-pkgdata:dnadna.schemas/nets/custom_cnn.yml" description: "net parameters for CustomCNN" allOf: - - "$ref": "base.yml#/definitions/fixed_inputs" + - "$ref": "py-pkgdata:dnadna.schemas/nets/base.yml#/definitions/fixed_inputs" - properties: n_snp: diff --git a/dnadna/schemas/nets/mlp.yml b/dnadna/schemas/nets/mlp.yml index 2720d0c0..b10cd95a 100644 --- a/dnadna/schemas/nets/mlp.yml +++ b/dnadna/schemas/nets/mlp.yml @@ -2,4 +2,4 @@ $schema: "http://json-schema.org/draft-07/schema#" $id: "py-pkgdata:dnadna.schemas/nets/mlp.yml" description: "net parameters for MLP" allOf: - - "$ref": "base.yml#/definitions/fixed_inputs" + - "$ref": "py-pkgdata:dnadna.schemas/nets/base.yml#/definitions/fixed_inputs" diff --git a/dnadna/schemas/param-set.yml b/dnadna/schemas/param-set.yml index b7dba8c9..8f33259e 100644 --- a/dnadna/schemas/param-set.yml +++ b/dnadna/schemas/param-set.yml @@ -9,9 +9,9 @@ description: >- single-element mappings); in the latter case the specified order of the parameters is preserved when mapping parameters to optimization targets oneOf: - - {"$ref": "#/definitions/parameters"} + - {"$ref": "py-pkgdata:dnadna.schemas/param-set.yml#/definitions/parameters"} - type: "array" - items: {"$ref": "#/definitions/parameters"} + items: {"$ref": "py-pkgdata:dnadna.schemas/param-set.yml#/definitions/parameters"} minItems: 1 errorMsg: minItems: at least one parameter must be declared in {property} @@ -50,7 +50,7 @@ definitions: # We use additionalProperties here because the property names are the # parameter names, which are arbitrary strings; hence every key/value # pair in this object is assumed to be a parameter definition - additionalProperties: {"$ref": "#/definitions/parameter"} + additionalProperties: {"$ref": "py-pkgdata:dnadna.schemas/param-set.yml#/definitions/parameter"} parameter: description: details about a single parameter @@ -122,10 +122,10 @@ definitions: properties: type: {"const": "classification"} loss_func: - "$ref": "#/definitions/loss_func" + "$ref": "py-pkgdata:dnadna.schemas/param-set.yml#/definitions/loss_func" default: "Cross Entropy" loss_weight: - "$ref": "#/definitions/loss_weight" + "$ref": "py-pkgdata:dnadna.schemas/param-set.yml#/definitions/loss_weight" default: 1 classes: description: >- diff --git a/dnadna/schemas/predict.yml b/dnadna/schemas/predict.yml index 0ace101b..2e12bb18 100644 --- a/dnadna/schemas/predict.yml +++ b/dnadna/schemas/predict.yml @@ -7,7 +7,7 @@ description: >- allOf: - properties: predict_transforms: - "$ref": "#/definitions/pred_transforms" + "$ref": "py-pkgdata:dnadna.schemas/predict.yml#/definitions/pred_transforms" default: [] # Inherits the preprocessing config format @@ -31,5 +31,5 @@ definitions: 'param1':, 'param2':, ...}, where the params map param names (specific to the transform) to their values type: "object" - patternProperties: {"^(?!.*(training|validation))[a-zA-Z0-9_]+": {"$ref": "#/definitions/transform_list"}} + patternProperties: {"^(?!.*(training|validation))[a-zA-Z0-9_]+": {"$ref": "py-pkgdata:dnadna.schemas/predict.yml#/definitions/transform_list"}} additionalProperties: false diff --git a/dnadna/schemas/preprocessing.yml b/dnadna/schemas/preprocessing.yml index 037fe4e4..b33c146a 100644 --- a/dnadna/schemas/preprocessing.yml +++ b/dnadna/schemas/preprocessing.yml @@ -8,7 +8,7 @@ description: >- properties: dataset: description: the dataset/simulation configuration - "$ref": "dataset.yml" + "$ref": "py-pkgdata:dnadna.schemas/dataset.yml" model_root: type: "string" @@ -28,7 +28,7 @@ properties: learned_params: description: >- description of the parameters the network will be trained on - "$ref": "param-set.yml" + "$ref": "py-pkgdata:dnadna.schemas/param-set.yml" dataset_splits: description: >- @@ -104,9 +104,9 @@ properties: minimum: 0 default: 0 - dnadna_version: {"$ref": "definitions.yml#/definitions/version"} + dnadna_version: {"$ref": "py-pkgdata:dnadna.schemas/definitions.yml#/definitions/version"} - plugins: {"$ref": "plugins.yml"} + plugins: {"$ref": "py-pkgdata:dnadna.schemas/plugins.yml"} required: - dataset diff --git a/dnadna/schemas/simulation.yml b/dnadna/schemas/simulation.yml index f515b1da..7f6a044c 100644 --- a/dnadna/schemas/simulation.yml +++ b/dnadna/schemas/simulation.yml @@ -5,10 +5,10 @@ description: >- JSON Schema (YAML-formatted) for basic properties of a simulation on which a model will be trained. allOf: - - {"$ref": "dataset.yml"} + - {"$ref": "py-pkgdata:dnadna.schemas/dataset.yml"} - properties: - plugins: {"$ref": "plugins.yml"} + plugins: {"$ref": "py-pkgdata:dnadna.schemas/plugins.yml"} simulator_name: type: "string" @@ -38,7 +38,7 @@ allOf: then the PRNG's default seeding method is used default: null - summary_statistics: {"$ref": "summary-statistics.yml#/definitions/summary_statistics"} + summary_statistics: {"$ref": "py-pkgdata:dnadna.schemas/summary-statistics.yml#/definitions/summary_statistics"} required: - n_scenarios diff --git a/dnadna/schemas/summary-statistics.yml b/dnadna/schemas/summary-statistics.yml index 7d882f34..ea0b263f 100644 --- a/dnadna/schemas/summary-statistics.yml +++ b/dnadna/schemas/summary-statistics.yml @@ -33,7 +33,7 @@ definitions: settings for calculating and outputting summary statistics on this simulation properties: - plugins: {"$ref": "plugins.yml"} + plugins: {"$ref": "py-pkgdata:dnadna.schemas/plugins.yml"} filename_format: type: "string" @@ -112,8 +112,8 @@ oneOf: - {"$ref": "#/definitions/summary_statistics"} - properties: - simulation: {"$ref": "simulation.yml"} + simulation: {"$ref": "py-pkgdata:dnadna.schemas/simulation.yml"} required: ["simulation"] - allOf: - - {"$ref": "simulation.yml"} + - {"$ref": "py-pkgdata:dnadna.schemas/simulation.yml"} - {"required": ["summary_statistics"]} diff --git a/dnadna/schemas/training-run.yml b/dnadna/schemas/training-run.yml index 9da8bf18..c45af60a 100644 --- a/dnadna/schemas/training-run.yml +++ b/dnadna/schemas/training-run.yml @@ -7,7 +7,7 @@ description: >- the config after running the `dnadna train` command, containing additional details about the training run allOf: - - { "$ref": "training.yml" } + - { "$ref": "py-pkgdata:dnadna.schemas/training.yml" } - properties: run_id: diff --git a/dnadna/schemas/training.yml b/dnadna/schemas/training.yml index 84ff97da..bf9d29d5 100644 --- a/dnadna/schemas/training.yml +++ b/dnadna/schemas/training.yml @@ -51,7 +51,7 @@ allOf: - {"$ref": "py-obj:dnadna.schemas.plugins.lr_scheduler"} dataset_transforms: - "$ref": "#/definitions/transforms" + "$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/transforms" default: [] n_epochs: @@ -152,18 +152,18 @@ allOf: default: "run_{run_id}" train_mean: - "$ref": "#/definitions/param_stats" + "$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/param_stats" description: >- mean of each regression parameter over the training set train_std: - "$ref": "#/definitions/param_stats" + "$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/param_stats" description: >- standard deviation of each regression parameter over the training set # Inherits the preprocessing config format - - {"$ref": "preprocessing.yml"} + - {"$ref": "py-pkgdata:dnadna.schemas/preprocessing.yml"} @@ -183,13 +183,13 @@ definitions: 'param1':, 'param2':, ...}, where the params map param names (specific to the transform) to their values oneOf: - - "$ref": "#/definitions/transform_list" + - "$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/transform_list" - type: "object" properties: - training: {"$ref": "#/definitions/transform_list"} - validation: {"$ref": "#/definitions/transform_list"} - test: {"$ref": "#/definitions/transform_list"} - patternProperties: {"^[a-zA-Z0-9_]+$": {"$ref": "#/definitions/transform_list"}} + training: {"$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/transform_list"} + validation: {"$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/transform_list"} + test: {"$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/transform_list"} + patternProperties: {"^[a-zA-Z0-9_]+$": {"$ref": "py-pkgdata:dnadna.schemas/training.yml#/definitions/transform_list"}} additionalProperties: false param_stats: type: "object" diff --git a/dnadna/utils/config.py b/dnadna/utils/config.py index 922e8652..62247896 100644 --- a/dnadna/utils/config.py +++ b/dnadna/utils/config.py @@ -8,12 +8,16 @@ import os.path as pth import pathlib from collections.abc import Mapping, KeysView, ValuesView, ItemsView from itertools import chain +from urllib.parse import urlparse import jsonschema +from jsonschema.exceptions import best_match, relevance +from referencing.jsonschema import DRAFT7 +from referencing import Resource from .. import DEFAULTS_DIR from .decorators import cached_classproperty, lru_cache_with_dict -from .jsonschema import make_config_validator, normpath, SCHEMA_DIRS +from .jsonschema import make_config_validator, normpath, SCHEMA_DIRS, CustomValidationError, REGISTRY from .serializers import DictSerializer, JSONSerializer, YAMLSerializer from .yaml import CommentedMapping, CommentedYAMLDumper @@ -149,7 +153,7 @@ def save_dict_annotated(obj, filename, schema=None, validate=False, return serializer.save(obj, filename, **kwargs) # Build a CommentedMapping with comments extracted from the schema - def get_comment(schema): + def get_comment(schema, parent_schema): # Technically if a schema contains a $ref property, the ref'd schema is # supposed to completely override any other properties in the schema. # However, if we have a schema with some description properties *and* @@ -170,12 +174,21 @@ def save_dict_annotated(obj, filename, schema=None, validate=False, ref = schema.get('$ref') if comment is None and ref: # Try the ref'd schema - with validator.resolver.resolving(ref) as schema: - return get_comment(schema) + if not ref.startswith('#'): + schema = validator.registry.resolver().lookup(ref).contents + else: + resource = Resource.from_contents(parent_schema, DRAFT7) + # Resolver with a specific root resource in case '$ref' is relative + schema = validator.registry.resolver_with_root(resource).lookup(ref).contents + + if not ref.startswith('#'): + return get_comment(schema, schema) + else: + return get_comment(schema, parent_schema) return comment - comment = get_comment(schema) + comment = get_comment(schema, schema) comments = {} path = [] @@ -189,28 +202,37 @@ def save_dict_annotated(obj, filename, schema=None, validate=False, # # Perhaps it would be better to actually implement a special schema # validator that also extracts comments as it goes. - def get_comments_recursive(schema): + def get_comments_recursive(schema, parent_schema): for key in ('allOf', 'anyOf', 'oneOf'): for subschema in schema.get(key, []): - get_comments_recursive(subschema) + get_comments_recursive(subschema, parent_schema) ref = schema.get('$ref') if ref: # resolve references - with validator.resolver.resolving(ref) as schema: - get_comments_recursive(schema) - return + if not ref.startswith('#'): + schema = validator.registry.resolver().lookup(ref).contents + else: + resource = Resource.from_contents(parent_schema, DRAFT7) + # Resolver with a specific root resource in case '$ref' is relative + schema = validator.registry.resolver_with_root(resource).lookup(ref).contents + + if not ref.startswith('#'): + get_comments_recursive(schema, schema) + else: + get_comments_recursive(schema, parent_schema) + return for prop, subschema in schema.get('properties', {}).items(): path.append(prop) - comment = get_comment(subschema) + comment = get_comment(subschema, schema) if comment is not None: comments.setdefault(tuple(path), comment) - get_comments_recursive(subschema) + get_comments_recursive(subschema, schema) path.pop() - get_comments_recursive(schema) + get_comments_recursive(schema, schema) commented_obj = CommentedMapping(obj, comment=comment, comments=comments) serializer.save(commented_obj, filename, Dumper=CommentedYAMLDumper, @@ -1464,9 +1486,9 @@ def _get_config_key_path(config, key): return '.' -class ConfigValidator(make_config_validator(get_path=_get_config_key_path)): +class ConfigValidator(): """ - A `jsonschema.IValidator` class which supports special validation + A custom validator wrapping `jsonschema.Draft7Validator` class which supports special validation functionality for DNADNA `Config` objects:: * Recognizes `Config` objects as JSON ``object`` s. @@ -1525,7 +1547,219 @@ class ConfigValidator(make_config_validator(get_path=_get_config_key_path)): Config({'abspath': '/bar/baz/qux', 'relpath': '/foo/bar/fred', 'nonpath': 'barney', 'has_default_2': 'c', 'has_default_1': 'a'}) """ + + def __init__(self, schema, *args, resolve_plugins=True, + resolve_defaults=True, resolve_filenames=True, + posixify_filenames=False, **kwargs): + self._resolve_plugins = resolve_plugins + self._resolve_defaults = resolve_defaults + self._resolve_filenames = resolve_filenames + self._posixify_filenames = posixify_filenames + + # Creates a new `jsonschema.Draft7Validator` class fully overriding the built in of the + # `jsonschema.Draft7Validator` by default + validator = make_config_validator(get_path=_get_config_key_path, + resolve_plugins=self._resolve_plugins, + resolve_defaults=self._resolve_defaults, + resolve_filenames=self._resolve_filenames, + posixify_filenames=self._posixify_filenames) + + self.validator = validator(schema, registry=REGISTRY, format_checker=validator.FORMAT_CHECKER) + self.registry = REGISTRY + + + def iter_errors(self, config, *args, **kwargs): + return self.validator.iter_errors(config, *args, **kwargs) + + + @staticmethod + def relevance_with_const_select(error): + """ + This implements a custom heuristic for choose the best-match error + with `dnadna.utils.config.ConfigValidator.`. + + It prioritizes `CustomValidatonError`\\s over other errors, so that + a schema with custom ``errorMsg`` properties can decide through that + means which errors are most important. This can be especially useful + when using ``errorMsg`` in a ``oneOf`` suite, where the custom error + is perhaps more important than default reason given for why none of the + sub-schemas matched. Here's an example:: + >>> schema = { + ... 'oneOf': [{ + ... 'type': 'object', + ... 'minProperties': 1, + ... 'errorMsg': { + ... 'minProperties': 'must have at least 1 entry' + ... } + ... }, { + ... 'type': 'array', + ... 'minItems': 1, + ... 'errorMsg': { + ... 'minItems': 'must have at least 1 entry' + ... } + ... }] + ... } + + This schema matches either an array or an object, which in either case + must have a least one property (in the object case) or item (in the + array case). Without this custom relevance function, ``best_match`` + will just choose one of the errors from one of the ``oneOf`` schemas + which caused it not to match. In this case it happens to select the + type error from the first sub-schema:: + + >>> from jsonschema.exceptions import best_match + >>> from dnadna.utils.config import ConfigValidator + >>> validator = ConfigValidator(schema) + >>> errors = validator.iter_errors([]) # try an empty list + >>> best_match(errors) + <ValidationError: '[] is too short'> + + Using this custom error ranking algorithm, the `CustomValidationError` + will be preferred:: + + >>> errors = validator.iter_errors([]) # try an empty list + >>> validator.best_match(errors, + ... key=ConfigValidator.relevance_with_const_select) + <CustomValidationError: 'must have at least 1 entry'> + + Otherwise it's the same as the default heuristic with extra support for + a common pattern where ``oneOf`` combined with ``const`` or ``enum`` is + used to select from a list of sub-schemas based on the value of a + single property. + + For example:: + + >>> schema = { + ... 'required': ['type'], + ... 'oneOf': [{ + ... 'properties': { + ... 'type': {'const': 'regression'}, + ... } + ... }, { + ... 'properties': { + ... 'type': {'const': 'classification'}, + ... 'classes': {'type': 'integer'}, + ... }, + ... 'required': ['classes'] + ... }] + ... } + ... + + The first schema in the `oneOf` list will match if and only if + the document contains ``{'type': 'regression'}`` and the second will + match if and only if ``{'type': 'classification'}`` with no ambiguity. + + In this case, when ``type`` matches a specific sub-schema, the more + interesting error will be errors that occur within the sub-schema. + But the default heuristics are such that it will think the ``type`` + error is more interesting. For example:: + + >>> import jsonschema + >>> jsonschema.validate({'type': 'classification'}, schema) + Traceback (most recent call last): + ... + jsonschema.exceptions.ValidationError: 'regression' was expected + ... + + Here the error that matched the heuristic happens to be the the one + that caused the first sub-schema to be skipped over, because + ``properties.type.const`` did not match. But actual reason an error + was raised at all was because the second sub-schema didn't match either + due to the required ``'classes'`` property being missing. Under this + use case, that would be the more interesting error. This heuristic + solves that. In order to demonstrate this, we have to call + ``best_match`` directly, since `jsonschema.validate` doesn't have an + option to pass down a different heuristic key:: + + >>> from dnadna.utils.config import ConfigValidator + >>> validator = ConfigValidator(schema) + >>> errors = validator.iter_errors({'type': 'classification'}) + >>> raise validator.best_match(errors, + ... key=ConfigValidator.relevance_with_const_select) + Traceback (most recent call last): + ... + jsonschema.exceptions.ValidationError: 'classes' is a required property + ... + + This also supports a similar pattern (used by several plugins) where + instead of ``const`` being used to select a specific sub-schema, + ``enum`` is used with a unique list of values (in fact ``const`` is + just a special case of ``enum`` with only one value). For example:: + + >>> schema = { + ... 'required': ['name'], + ... 'oneOf': [{ + ... 'properties': { + ... 'name': {'enum': ['my-plugin', 'MyPlugin']}, + ... } + ... }, { + ... 'properties': { + ... 'name': {'enum': ['my-plugin2', 'MyPlugin2']}, + ... 'x': {'type': 'integer'}, + ... }, + ... 'required': ['x'] + ... }] + ... } + ... + >>> validator = ConfigValidator(schema) + >>> errors = validator.iter_errors({'name': 'my-plugin2'}) + >>> raise validator.best_match(errors, + ... key=ConfigValidator.relevance_with_const_select) + Traceback (most recent call last): + ... + jsonschema.exceptions.ValidationError: 'x' is a required property + ... + """ + + # How it works: the default heuristic used "relevance" returns a tuple + # of integers that are used to rank errors + # When looping over the 'context' errors of a oneOf error, the + # error with the lowest ranking wins (see the docstring of best_match + # for explanation) + # + # In this case we just extend that tuple with one more value which + # always contains 1 *if* the error's parent is a oneOf and the error's + # validator is 'const'. So this assumes somewhat implicitly that this + # oneOf+const select pattern is in use (most of the time the most + # likely reason to use const in a oneOf sub-schema) + rank = relevance(error) + + # Also add True/False depending on whether the error is a + # CustomValidationError, so that these are weighted more heavily + # (remember, False is ranked higher in this case) + rank = (not isinstance(error, CustomValidationError),) + rank + + if (error.parent and error.parent.validator == 'oneOf' and + error.validator in ('const', 'enum')): + # It looks like we are using the oneOf+const pattern + return (True,) + rank + else: + return (False,) + rank + + + def best_match(self, errors, key=relevance_with_const_select): + """ + Wraps the `jsonschema.exceptions.best_match` to return `CustomValidatonError` + See the `relevance_with_const_select` documentation above + """ + + error = best_match(errors, + key=key) + + if error is None: + return + else: + errormsg = error.schema.get('errorMsg') + if isinstance(errormsg, dict): + # raise ValueError('TODO') + errormsg = list(errormsg.values())[0] + if errormsg is not None: + error = CustomValidationError.create_from(error) + error.message = errormsg + return error + + def validate(self, config, *args, **kwargs): """ Validate the config against the schema and raise a `ConfigError` if @@ -1625,8 +1859,7 @@ class ConfigValidator(make_config_validator(get_path=_get_config_key_path)): >>> validator.validate([1, 2]) Traceback (most recent call last): ... - dnadna.utils.config.ConfigError: error in config at '0': must be an - array of at least 1 unique string + dnadna.utils.config.ConfigError: error in config at '0': 1 is not of type 'string' >>> validator.validate(['a', 'a']) Traceback (most recent call last): ... @@ -1635,19 +1868,32 @@ class ConfigValidator(make_config_validator(get_path=_get_config_key_path)): >>> validator.validate([]) Traceback (most recent call last): ... - dnadna.utils.config.ConfigError: error in config: array was empty - (it must have at least 1 item) + dnadna.utils.config.ConfigError: error in config: must be an array + of at least 1 unique string >>> validator.validate(['a', 'b', 'c']) """ try: - return super().validate(config, *args, **kwargs) + error = best_match(self.iter_errors(config, *args, **kwargs), + key=self.relevance_with_const_select) + + if error is None: + return + + raise error except jsonschema.ValidationError as exc: # if the error came from a sub-schema in a one/all/anyOf then the # full path must also include the parent schema path; this can # get a little confusing depending on the nesting, but in general # the longest path is always the best one it seems parent = exc + errormsg = parent.schema.get('errorMsg') + if isinstance(errormsg, dict): + errormsg = list(errormsg.values())[0] + if errormsg is not None: + exc = CustomValidationError.create_from(exc) + exc.message = errormsg + path = () while parent is not None: if len(parent.path) > len(path): diff --git a/dnadna/utils/jsonschema.py b/dnadna/utils/jsonschema.py index 09e44bcc..cc376608 100644 --- a/dnadna/utils/jsonschema.py +++ b/dnadna/utils/jsonschema.py @@ -5,16 +5,20 @@ import copy import os import os.path as pth import pathlib -from collections import ChainMap from collections.abc import Mapping from datetime import datetime -from functools import partial from urllib.parse import urlparse from urllib.request import url2pathname import jsonschema -from jsonschema.exceptions import best_match, relevance -from jsonschema_pyref import RefResolver +import referencing.jsonschema +from referencing.jsonschema import DRAFT7 +from referencing import Registry, Resource +from .jsonschema_pyref import _resolve_url_py_obj +from jsonschema import ( + _keywords, + _legacy_keywords, +) from .serializers import DictSerializer @@ -111,367 +115,25 @@ class CustomValidationError(jsonschema.ValidationError): self.__dict__['message'] = value -class ConfigValidatorMixin: +def make_config_validator(validator_cls=jsonschema.Draft7Validator, + get_path=None, + resolve_plugins=True, + resolve_defaults=True, + resolve_filenames=True, + posixify_filenames=False + ): """ - Mix-in class to combine with `jsonschema.IValidator` classes to add new - functionality provided by `make_config_validator`. - + Creates a new `jsonschema.Draft7Validator` class fully overriding the built in of the + ``validator_cls`` (`jsonschema.Draft7Validator` by default) which supports + special functionality for DNADNA `.Config` objects, though it can be adapted + to other types. + The new validator has additional options for controlling how to resolve relative filenames in the schema and how to handle the ``default`` property in schemas. It also allows specifying a default format checker. - See the implementation in `~dnadna.utils.config.ConfigValidator` class for - more details and example usage. - """ - - format_checker = None - - def __init__(self, schema, *args, resolve_plugins=True, - resolve_defaults=True, resolve_filenames=True, - posixify_filenames=False, **kwargs): - self._resolve_plugins = resolve_plugins - kwargs.setdefault('format_checker', self.format_checker) - kwargs.setdefault('resolver', self.get_resolver_for(schema)) - super().__init__(schema, *args, **kwargs) - self._resolve_defaults = resolve_defaults - self._resolve_filenames = resolve_filenames - self._posixify_filenames = posixify_filenames - self._schema_stack = [self.schema] - - def evolve(self, **kwargs): - # Needed for jsonschema 4.0+ which uses attrs for the Validator class - # and frequently calls the evolve() method which makes a *copy* of the - # validator - # However, we've attached some custom attributes to the validator class - # which attrs doesn't know about. There is probably a "proper" way to - # do this using attrs, but since I also want to support jsonschema<4.0 - # an easier way to do this is to simply copy the desired attributes - # to the new instance returned by evolve() - # - # For jsonschema<4.0 this method is never called. - new = super().evolve(**kwargs) - - for attr in ('_resolve_plugins', '_resolve_defaults', - '_resolve_filenames', '_posixify_filenames', - '_schema_stack'): - setattr(new, attr, getattr(self, attr)) - - return new - - def iter_errors(self, instance, _schema=None): - # Support for jsonschema 4.0+: the _schema argument is deprecated, and - # instead there is a schema attribute on the validator - if _schema is None and hasattr(self, 'schema'): - _schema = self.schema - iter_errors = super().iter_errors - else: - iter_errors = partial(super().iter_errors, _schema=_schema) - - pop_schema_stack = False - if isinstance(_schema, dict) and '$ref' not in _schema: - self._schema_stack.append(_schema) - pop_schema_stack = True - - err_occurred = False - orig_instance = instance - - if isinstance(orig_instance, (dict, Mapping)): - # This is to work around a bug with applying default values from - # schemas, when that schema is a sub-schema in a oneOf/anyOf schema - # composition. - # - # Only defaults from the correct/matching sub-schema should be - # applied. If an error occurs in a sub-schema, then it is not - # applicable. - # - # To work around this we must first make a copy of the original - # instance, and then only apply any updates made to that instance - # if there were no errors. I don't think this is a perfect - # workaround, but it solves the test case in the test - # test_config_validation_defaults_nested_in_one_of() in - # tests/test_utils.py - orig_instance = copy.deepcopy(orig_instance) - - try: - for error in iter_errors(instance): - err_occurred = True - if not isinstance(error, CustomValidationError): - errormsg = self._find_applicable_errormsg(error) - if errormsg is not None: - error = CustomValidationError.create_from(error) - error.message = errormsg - yield error - finally: - if pop_schema_stack: - self._schema_stack.pop() - - if err_occurred and orig_instance is not instance: - # If an error occurred, clear any updates made to the instance - # and replace it with its original values. - if isinstance(instance, ChainMap): - # Special case for ChainMaps: doing a clear/update results - # in nested ChainMaps: It's better to clear and update the - # first map since this is the one that gets modified if any - # of the instance's values were modified (or new keys - # added) - instance.maps[0].clear() - instance.maps[0].update(orig_instance.maps[0]) - else: - # The general approach for dict to restore the same dict - # object to a previous value - instance.clear() - instance.update(orig_instance) - - def descend(self, instance, schema, path=None, schema_path=None): - # This is to work around a bug/oversight (?) in jsonschema that - # context errors in a oneOf/anyOf do not have their full paths filled - # out - for error in super().descend(instance, schema, path=path, - schema_path=schema_path): - for cerr in error.context: - if path is not None: - cerr.path.appendleft(path) - if schema_path is not None: - cerr.schema_path.appendleft(schema_path) - yield error - - def validate(self, *args, **kwargs): - """ - Passes all errors through `jsonschema.exceptions.best_match` with a - custom heuristic (see `relevance_with_const_select`) to raise the most - relevant validation error. - """ - - error = best_match(self.iter_errors(*args, **kwargs), - key=self.relevance_with_const_select) - if error is None: - return - - raise error - - @staticmethod - def relevance_with_const_select(error): - """ - This implements a custom heuristic for choose the best-match error - with `jsonschema.exceptions.best_match`. - - It prioritizes `CustomValidatonError`\\s over other errors, so that - a schema with custom ``errorMsg`` properties can decide through that - means which errors are most important. This can be especially useful - when using ``errorMsg`` in a ``oneOf`` suite, where the custom error - is perhaps more important than default reason given for why none of the - sub-schemas matched. Here's an example:: - - >>> schema = { - ... 'oneOf': [{ - ... 'type': 'object', - ... 'minProperties': 1, - ... 'errorMsg': { - ... 'minProperties': 'must have at least 1 entry' - ... } - ... }, { - ... 'type': 'array', - ... 'minItems': 1, - ... 'errorMsg': { - ... 'minItems': 'must have at least 1 entry' - ... } - ... }] - ... } - - This schema matches either an array or an object, which in either case - must have a least one property (in the object case) or item (in the - array case). Without this custom relevance function, ``best_match`` - will just choose one of the errors from one of the ``oneOf`` schemas - which caused it not to match. In this case it happens to select the - type error from the first sub-schema:: - - >>> from jsonschema.exceptions import best_match - >>> from dnadna.utils.jsonschema import make_config_validator - >>> Validator = make_config_validator() - >>> validator = Validator(schema) - >>> errors = validator.iter_errors([]) # try an empty list - >>> best_match(errors) - <ValidationError: "[] is not of type 'object'"> - - Using this custom error ranking algorithm, the `CustomValidationError` - will be preferred:: - - >>> errors = validator.iter_errors([]) # try an empty list - >>> best_match(errors, - ... key=ConfigValidatorMixin.relevance_with_const_select) - <CustomValidationError: 'must have at least 1 entry'> - - Otherwise it's the same as the default heuristic with extra support for - a common pattern where ``oneOf`` combined with ``const`` or ``enum`` is - used to select from a list of sub-schemas based on the value of a - single property. - - For example:: - - >>> schema = { - ... 'required': ['type'], - ... 'oneOf': [{ - ... 'properties': { - ... 'type': {'const': 'regression'}, - ... } - ... }, { - ... 'properties': { - ... 'type': {'const': 'classification'}, - ... 'classes': {'type': 'integer'}, - ... }, - ... 'required': ['classes'] - ... }] - ... } - ... - - The first schema in the `oneOf` list will match if and only if - the document contains ``{'type': 'regression'}`` and the second will - match if and only if ``{'type': 'classification'}`` with no ambiguity. - - In this case, when ``type`` matches a specific sub-schema, the more - interesting error will be errors that occur within the sub-schema. - But the default heuristics are such that it will think the ``type`` - error is more interesting. For example:: - - >>> import jsonschema - >>> jsonschema.validate({'type': 'classification'}, schema) - Traceback (most recent call last): - ... - jsonschema.exceptions.ValidationError: 'regression' was expected - ... - - Here the error that matched the heuristic happens to be the the one - that caused the first sub-schema to be skipped over, because - ``properties.type.const`` did not match. But actual reason an error - was raised at all was because the second sub-schema didn't match either - due to the required ``'classes'`` property being missing. Under this - use case, that would be the more interesting error. This heuristic - solves that. In order to demonstrate this, we have to call - ``best_match`` directly, since `jsonschema.validate` doesn't have an - option to pass down a different heuristic key:: - - >>> from dnadna.utils.jsonschema import ConfigValidatorMixin - >>> validator = jsonschema.Draft7Validator(schema) - >>> errors = validator.iter_errors({'type': 'classification'}) - >>> raise best_match(errors, - ... key=ConfigValidatorMixin.relevance_with_const_select) - Traceback (most recent call last): - ... - jsonschema.exceptions.ValidationError: 'classes' is a required property - ... - - This also supports a similar pattern (used by several plugins) where - instead of ``const`` being used to select a specific sub-schema, - ``enum`` is used with a unique list of values (in fact ``const`` is - just a special case of ``enum`` with only one value). For example:: - - >>> schema = { - ... 'required': ['name'], - ... 'oneOf': [{ - ... 'properties': { - ... 'name': {'enum': ['my-plugin', 'MyPlugin']}, - ... } - ... }, { - ... 'properties': { - ... 'name': {'enum': ['my-plugin2', 'MyPlugin2']}, - ... 'x': {'type': 'integer'}, - ... }, - ... 'required': ['x'] - ... }] - ... } - ... - >>> validator = jsonschema.Draft7Validator(schema) - >>> errors = validator.iter_errors({'name': 'my-plugin2'}) - >>> raise best_match(errors, - ... key=ConfigValidatorMixin.relevance_with_const_select) - Traceback (most recent call last): - ... - jsonschema.exceptions.ValidationError: 'x' is a required property - ... - """ - - # How it works: the default heuristic used "relevance" returns a tuple - # of integers that are used to rank errors - # When looping over the 'context' errors of a oneOf error, the - # error with the lowest ranking wins (see the docstring of best_match - # for explanation) - # - # In this case we just extend that tuple with one more value which - # always contains 1 *if* the error's parent is a oneOf and the error's - # validator is 'const'. So this assumes somewhat implicitly that this - # oneOf+const select pattern is in use (most of the time the most - # likely reason to use const in a oneOf sub-schema) - rank = relevance(error) - - # Also add True/False depending on whether the error is a - # CustomValidationError, so that these are weighted more heavily - # (remember, False is ranked higher in this case) - rank = (not isinstance(error, CustomValidationError),) + rank - - if (error.parent and error.parent.validator == 'oneOf' and - error.validator in ('const', 'enum')): - # It looks like we are using the oneOf+const pattern - return (True,) + rank - else: - return (False,) + rank - - def _find_applicable_errormsg(self, error): - # Walk up the current chain of schemas until we find one with an - # errorMsg - # If the errorMsg is a string, or a dict with a 'default' key, it - # applies to all errors at or below that sub-schema, otherwise if it - # is a dict it only applies if the relevant validator is in found in - # the dict - schema_path = list(error.schema_path) - for schema in self._schema_stack[::-1]: - for validator in schema_path[::-1]: - errormsg = schema.get('errorMsg') - if isinstance(errormsg, dict): - errormsg = errormsg.get(validator, errormsg.get('default')) - - if errormsg is not None: - return errormsg - - # No custom error message found anywhere on the schema path - return None - - @staticmethod - def _resolver_file_handler(uri): - """ - Custom ``file://`` handler for RefResolver that can load schemas from YAML - or JSON. - - Slightly hackish, but supported workaround to the larger issue discussed at - https://github.com/Julian/jsonschema/issues/420 - """ - - filename = url2pathname(urlparse(uri).path) - return DictSerializer.load(filename) - - @classmethod - def get_resolver_for(cls, schema): - """ - Return a RefResolver that can handle relative references to other - schemas installed in main schemas directory. - - This is somewhat inflexible at the moment but is all we need currently. - """ - - base_url = f'file:///{SCHEMA_DIRS[0].as_posix()}/' - handlers = {'file': cls._resolver_file_handler} - return RefResolver(base_url, schema, handlers=handlers) - - -def make_config_validator(validator_cls=jsonschema.Draft7Validator, - get_path=None): - """ - Creates a `jsonschema.IValidator` class based on the given - ``validator_cls`` (`jsonschema.Draft7Validator` by default) which supports - special functionality for DNADNA `.Config` objects, though it can be adapted - to other types. - See the `~dnadna.utils.config.ConfigValidator` class for more details and example usage. """ @@ -482,10 +144,23 @@ def make_config_validator(validator_cls=jsonschema.Draft7Validator, # level where we have access to the full dict instance. validate_properties = validator_cls.VALIDATORS['properties'] + # From the DeprecationWarning: "Subclassing validator classes is not intended to " + # "be part of their public API. A future version " + # "will make doing so an error, as the behavior of " + # "subclasses isn't guaranteed to stay the same " + # "between releases of jsonschema. Instead, prefer " + # "composition of validators, wrapping them in an object " + # "owned entirely by the downstream library." + + # From now on, the attributs 'resolves' are to be passed to ConfigValidaror class + # that wraps the new Draft7Validator def resolve_filename(instance, prop, subschema, get_path=None, posixify=False): errors = [] - + + if subschema is True: + return [] + # Handle format: filename format_ = subschema.get('format') if (subschema.get('type') == 'string' and @@ -516,45 +191,62 @@ def make_config_validator(validator_cls=jsonschema.Draft7Validator, def validate_config_properties(validator, properties, instance, schema): errors = [] - if validator._resolve_filenames: - posixify = validator._posixify_filenames + if resolve_filenames: + posixify = posixify_filenames for prop, subschema in properties.items(): errors += resolve_filename(instance, prop, subschema, get_path, posixify) # Now run the default properties validator - errors += list(validate_properties(validator, properties, instance, - schema)) + errors = validate_properties(validator, properties, instance, + schema) + err_occurred = False + orig_instance = copy.deepcopy(instance) + # If there are no errors then the instance matches this schema or # sub-schema (in the case of a oneOf/allOf/anyOf); now we assign any # defaults so that defaults are only added from a schema that this # instance actually matches. Then, if defaults were added we re-run # validation on the properties to ensure that the defaults are also # valid. - if not errors and validator._resolve_defaults: + if resolve_defaults: added_defaults = False for prop, subschema in properties.items(): + if subschema is True: + continue + # Fill missing props with their defaults if (isinstance(instance, (dict, Mapping)) and 'default' in subschema and prop not in instance): instance[prop] = subschema['default'] added_defaults = True - + if added_defaults: # We only need to re-validate if any defaults were actually # assigned errors = validate_properties(validator, properties, instance, schema) - - for error in errors: - yield error + + try: + for error in errors: + err_occurred = True + yield error + finally: + # If an error occurred, clear any updates made to the instance + # and replace it with its original values. + # The general approach for dict to restore the same dict + # object to a previous value + if err_occurred: + instance.clear() + instance.update(orig_instance) + # Create the format checker; filename and filename! are already handled # specially in validate_config_properties since it requires special support # for resolving relative filenames. Other, simpler formats are checked # here - format_checker = copy.deepcopy(jsonschema.draft7_format_checker) + format_checker = validator_cls.FORMAT_CHECKER @format_checker.checks('python-module', raises=(ImportError,)) def check_python_module(instance): @@ -562,24 +254,135 @@ def make_config_validator(validator_cls=jsonschema.Draft7Validator, __import__(instance) return True + + # supports special functionality for DNADNA `.Config` objects + def is_config(checker, instance): + return ( + isinstance(instance, (dict, Mapping)) + ) + type_checker = validator_cls.TYPE_CHECKER.redefine( + "object", is_config, + ) + + # Creates a new `jsonschema.Draft7Validator` class fully overriding the built in of the + # ``validator_cls`` (`jsonschema.Draft7Validator` by default) + # This is to work around the issue in + # https://github.com/python-jsonschema/jsonschema/issues/1197 + validator_cls = jsonschema.validators.create( + meta_schema=make_config_meta_schema(validator_cls.META_SCHEMA), + validators={ + "$dynamicRef": _keywords.dynamicRef, + "$ref": _keywords.ref, + "additionalProperties": _keywords.additionalProperties, + "allOf": _keywords.allOf, + "anyOf": _keywords.anyOf, + "const": _keywords.const, + "contains": _keywords.contains, + "dependentRequired": _keywords.dependentRequired, + "dependentSchemas": _keywords.dependentSchemas, + "enum": _keywords.enum, + "exclusiveMaximum": _keywords.exclusiveMaximum, + "exclusiveMinimum": _keywords.exclusiveMinimum, + "format": _keywords.format, + "if": _keywords.if_, + "items": _keywords.items, + "maxItems": _keywords.maxItems, + "maxLength": _keywords.maxLength, + "maxProperties": _keywords.maxProperties, + "maximum": _keywords.maximum, + "minItems": _keywords.minItems, + "minLength": _keywords.minLength, + "minProperties": _keywords.minProperties, + "minimum": _keywords.minimum, + "multipleOf": _keywords.multipleOf, + "not": _keywords.not_, + "oneOf": _keywords.oneOf, + "pattern": _keywords.pattern, + "patternProperties": _keywords.patternProperties, + "prefixItems": _keywords.prefixItems, + "properties": validate_config_properties, + "propertyNames": _keywords.propertyNames, + "required": _keywords.required, + "type": _keywords.type, + "unevaluatedItems": _keywords.unevaluatedItems, + "unevaluatedProperties": _keywords.unevaluatedProperties, + "uniqueItems": _keywords.uniqueItems, + }, + type_checker=type_checker, + format_checker=format_checker, + version="draft7", + id_of=referencing.jsonschema.DRAFT7.id_of, + applicable_validators=_legacy_keywords.ignore_ref_siblings, + ) - validator_cls = jsonschema.validators.extend( - validator_cls, {'properties': validate_config_properties}) + return validator_cls - # Add the mix-in class - validator_cls = type(f'DNA{validator_cls.__name__}', - (ConfigValidatorMixin, validator_cls), - {'format_checker': format_checker}) - # Create the extended meta-schema with support for errorMsg - validator_cls.META_SCHEMA = \ - make_config_meta_schema(validator_cls.META_SCHEMA) +def _resolver_file_handler(uri): + """ + Custom ``file://`` handler for RefResolver that can load schemas from YAML + or JSON. - # Support arbitrary Mapping types for 'object' in addition to dict; this - # will inclue Config objects - validator_cls.TYPE_CHECKER = validator_cls.TYPE_CHECKER.redefine('object', - lambda tc, obj: isinstance(obj, (dict, Mapping))) - return validator_cls + Slightly hackish, but supported workaround to the larger issue discussed at + https://github.com/Julian/jsonschema/issues/420 + """ + + filename = url2pathname(urlparse(uri).path) + return DictSerializer.load(filename) + + +def _retrieve(uri: str): + """ + Dynamically read files off of the files and the custom ``py-obj`` object, + which can be used to retrieve any schema which is not already pre-loaded in-memory. + + Parameters + ---------- + uri : str + A Uniform Resource Identifier (URI). + + Returns + ------- + `referencing.Resource` + A document (deserialized JSON) with a concrete interpretation under the spec DRAFT7. + + """ + base_url = f'file:///{SCHEMA_DIRS[0].as_posix()}/' + + scheme = urlparse(uri).scheme + if not scheme: + uri = base_url + uri + scheme = urlparse(uri).scheme + if scheme == 'file': + try: + contents = _resolver_file_handler(uri) + except: + contents = {} + elif scheme == 'py-obj': + contents = _resolve_url_py_obj(uri) + else: + contents = {} + + if not contents.get('$id'): + contents['$id'] = 'urn:unknown-dialect' + + return Resource.from_contents(contents, DRAFT7) + + +# a specific immutable set of in-memory schemas to be available in addition to the dynamic schemas +REGISTRY = Registry(retrieve=_retrieve) +# include all in-memory schemas in the directory SCHEMA_DIRS, +# making them available for use during validation. +for schema_dir in SCHEMA_DIRS: + for curdir, dirs, files in os.walk(schema_dir): + for filename in files: + try: + contents = DictSerializer.load(pth.join(curdir, filename)) + resource = Resource(contents=contents, specification=DRAFT7) + REGISTRY = resource @ REGISTRY + except NotImplementedError: + # Not a format recognized by the dict serializer + continue def normpath(path, relto='.', _posixify=False): diff --git a/dnadna/utils/plugins.py b/dnadna/utils/plugins.py index db981fd3..88afc7ff 100644 --- a/dnadna/utils/plugins.py +++ b/dnadna/utils/plugins.py @@ -568,7 +568,7 @@ class Pluggable: >>> validator.validate(config) Traceback (most recent call last): ... - dnadna.utils.config.ConfigError: error in config at 'transform.name': + dnadna.utils.config.ConfigError: error in config at 'name': 'my_plugin2' is not one of ['my_plugin', 'MyPlugin'] Cleanup: diff --git a/dnadna/utils/torch_plugin_mixin.py b/dnadna/utils/torch_plugin_mixin.py index 8e6d28d6..9f441c95 100644 --- a/dnadna/utils/torch_plugin_mixin.py +++ b/dnadna/utils/torch_plugin_mixin.py @@ -289,8 +289,10 @@ class TorchPluginMixin: param_schema = {'type': 'string'} elif isinstance(value, tuple): param_schema = {'type': 'array'} - param_schema['items'] = [cls._schema_from_default(v, False) - for v in value] + # param_schema['items'] = [cls._schema_from_default(v, False) + # for v in value] + # the items keyword to a single schema that will be used to validate all of the items in the array. + param_schema['items'] = cls._schema_from_default(value[0], False) value = list(value) elif value is None: param_schema = {'type': 'null'} diff --git a/environment-cpu.yml b/environment-cpu.yml index f084d670..0c3be98c 100644 --- a/environment-cpu.yml +++ b/environment-cpu.yml @@ -14,7 +14,8 @@ dependencies: # must manually list it - future>=0.18.2 # dnadna breaks after 4.6.2, see issue 145 - - jsonschema>=3.0.2,<=4.6.2 +# Fixed in the issue 152 + - jsonschema>=4.19.2 - matplotlib-base>=3.1.1 # msprime version is pinned for now due to some trouble with newer versions' # dependency on openblas not taken into account; this can be fixed later @@ -41,7 +42,6 @@ dependencies: # https://github.com/python/importlib_metadata/issues/298 - importlib_metadata<3.9 - pip: - - jsonschema-pyref - py-cpuinfo # - for tests - mistletoe diff --git a/environment-cuda.yml b/environment-cuda.yml index b64d30ea..6e11e868 100644 --- a/environment-cuda.yml +++ b/environment-cuda.yml @@ -16,7 +16,8 @@ dependencies: # must manually list it - future>=0.18.2 # dnadna breaks after 4.6.2, see issue 145 - - jsonschema>=3.0.2,<=4.6.2 +# Fixed in the issue 152 + - jsonschema>=4.19.2 - matplotlib-base>=3.1.1 # msprime version is pinned for now due to some trouble with newer versions' # dependency on openblas not taken into account; this can be fixed later @@ -43,7 +44,6 @@ dependencies: # https://github.com/python/importlib_metadata/issues/298 - importlib_metadata<3.9 - pip: - - jsonschema-pyref - py-cpuinfo # - for tests - mistletoe diff --git a/tests/test_params.py b/tests/test_params.py index a74eec12..554cbdee 100644 --- a/tests/test_params.py +++ b/tests/test_params.py @@ -3,7 +3,9 @@ import os.path as pth -import jsonschema_pyref as jsonschema +# import jsonschema_pyref as jsonschema +from dnadna.utils.config import ConfigValidator +import jsonschema import pandas as pd import dnadna @@ -28,8 +30,9 @@ class TestSchema: @classmethod def valid(cls, obj): try: - return jsonschema.validate(obj, cls.schema) is None - except jsonschema.ValidationError: + validator = ConfigValidator(cls.schema) + return validator.validate(obj) is None + except ValueError: return False def test_type(self): diff --git a/tests/test_schemas.py b/tests/test_schemas.py index d01e822b..26327ca2 100644 --- a/tests/test_schemas.py +++ b/tests/test_schemas.py @@ -120,7 +120,7 @@ def test_training_schema_validation_errors(): with pytest.raises(ConfigError) as exc: config.validate(schema='training') - assert ("'learned_params.selection': must be an object like:" + assert ("'selection': must be an object like:" in str(exc.value)) # missing classes for classification param @@ -129,7 +129,7 @@ def test_training_schema_validation_errors(): with pytest.raises(ConfigError) as exc: config.validate(schema='training') - assert ("'learned_params.selection': 'classes' is a required property" + assert ("'selection': 'classes' is a required property" in str(exc.value)) @@ -148,8 +148,7 @@ def test_empty_learned_params(empty_learned_params): with pytest.raises(ConfigError) as exc: config.validate(schema='training') - assert ("'learned_params': at least one parameter must be declared in " - "learned_params" in str(exc.value)) + assert ("'learned_params': at least one parameter must be declared in " in str(exc.value)) def test_simulation_schema_inheritance(): diff --git a/tests/test_simulation.py b/tests/test_simulation.py index ceac5458..35b509ff 100644 --- a/tests/test_simulation.py +++ b/tests/test_simulation.py @@ -136,12 +136,12 @@ def test_simulator_config_validation_known_simulator(): # If we set one of the values in the config to something invalid for the # OneEventSimulator's config schema ensure that validation fails (to ensure # the OneEventSimulator schema is in fact being validated) - config['tmax'] = 'blah blah' + config['ignore_missing_scenario'] = 'blah blah' with pytest.raises(ConfigError) as exc: config.validate(schema='simulation') assert str(exc.value) == ( - "error in config at 'tmax': 'blah blah' is not of type 'integer'") + "error in config at 'ignore_missing_scenario': 'blah blah' is not of type 'boolean'") def test_simulator_config_validation_unknown_simulator(): diff --git a/tests/test_utils.py b/tests/test_utils.py index 9efedd7c..37c884a1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -535,4 +535,4 @@ def test_unknown_network_name_in_config(): config.validate(schema) assert str(exc.value).startswith( - "error in config at 'name': must be one of cnn/CNN") + "error in config: must be one of cnn/CNN") -- GitLab