edq.util.json

This file standardizes how we write and read JSON. Specifically, we try to be flexible when reading (using JSON5), and strict when writing (using vanilla JSON).

  1"""
  2This file standardizes how we write and read JSON.
  3Specifically, we try to be flexible when reading (using JSON5),
  4and strict when writing (using vanilla JSON).
  5"""
  6
  7import enum
  8import gzip
  9import io
 10import json
 11import os
 12import typing
 13
 14import json5
 15
 16import edq.util.dirent
 17
 18class DictConverter():
 19    """
 20    A base class for class that can represent (serialize) and reconstruct (deserialize) themselves as/from a dict.
 21    The intention is that the dict can then be cleanly converted to/from JSON.
 22
 23    General (but inefficient) implementations of several core Python equality, comparison, and representation methods are provided.
 24    """
 25
 26    def to_dict(self) -> typing.Dict[str, typing.Any]:
 27        """
 28        Return a dict that can be used to represent this object.
 29        If the dict is passed to from_dict(), an identical object should be reconstructed.
 30
 31        A general (but inefficient) implementation is provided by default.
 32        """
 33
 34        return vars(self).copy()
 35
 36    @classmethod
 37    # Note that `typing.Self` is returned, but that is introduced in Python 3.12.
 38    def from_dict(cls, data: typing.Dict[str, typing.Any]) -> typing.Any:
 39        """
 40        Return an instance of this subclass created using the given dict.
 41        If the dict came from to_dict(), the returned object should be identical to the original.
 42
 43        A general (but inefficient) implementation is provided by default.
 44        """
 45
 46        return cls(**data)
 47
 48    def __eq__(self, other: object) -> bool:
 49        """
 50        Check for equality.
 51
 52        This check uses to_dict() and compares the results.
 53        This may not be complete or efficient depending on the child class.
 54        """
 55
 56        # Note the hard type check (done so we can keep this method general).
 57        if (type(self) != type(other)):  # pylint: disable=unidiomatic-typecheck
 58            return False
 59
 60        return bool(self.to_dict() == other.to_dict())  # type: ignore[attr-defined]
 61
 62    def __lt__(self, other: 'DictConverter') -> bool:
 63        return dumps(self) < dumps(other)
 64
 65    def __hash__(self) -> int:
 66        return hash(dumps(self))
 67
 68    def __str__(self) -> str:
 69        return dumps(self)
 70
 71    def __repr__(self) -> str:
 72        return dumps(self)
 73
 74def _custom_handle(value: typing.Any) -> typing.Union[typing.Dict[str, typing.Any], str]:
 75    """
 76    Handle objects that are not JSON serializable by default,
 77    e.g., calling vars() on an object.
 78    """
 79
 80    if (isinstance(value, DictConverter)):
 81        return value.to_dict()
 82
 83    if (isinstance(value, enum.Enum)):
 84        return str(value)
 85
 86    if (hasattr(value, '__dict__')):
 87        return dict(vars(value))
 88
 89    raise ValueError(f"Could not JSON serialize object: '{value}'.")
 90
 91def load(
 92        file_obj: typing.TextIO,
 93        strict: bool = False,
 94        gzipped: bool = False,
 95        encoding: str = edq.util.dirent.DEFAULT_ENCODING,
 96        **kwargs: typing.Any) -> typing.Any:
 97    """
 98    Load a file object/handler as JSON.
 99    If strict is set, then use standard Python JSON,
100    otherwise use JSON5.
101
102    If `gzipped` is set, the file object is treated as a gzipped bytes stream (e.g. `open('test.json.gz', 'rb')`).
103    """
104
105    if (gzipped):
106        binary_file_obj = gzip.GzipFile(fileobj = file_obj)  # type: ignore[call-overload]
107        file_obj = io.TextIOWrapper(binary_file_obj, encoding = encoding)
108
109    if (strict):
110        return json.load(file_obj, **kwargs)
111
112    return json5.load(file_obj, **kwargs)
113
114def loads(text: str, strict: bool = False, **kwargs: typing.Any) -> typing.Any:
115    """
116    Load a string as JSON.
117    If strict is set, then use standard Python JSON,
118    otherwise use JSON5.
119    """
120
121    if (strict):
122        return json.loads(text, **kwargs)
123
124    return json5.loads(text, **kwargs)
125
126def load_path(
127        path: str,
128        strict: bool = False,
129        gzipped: typing.Union[bool, None] = None,
130        encoding: str = edq.util.dirent.DEFAULT_ENCODING,
131        **kwargs: typing.Any) -> typing.Any:
132    """
133    Load a file path as JSON.
134    If strict is set, then use standard Python JSON,
135    otherwise use JSON5.
136
137    If `gzipped` is not set, the behavior is guessed from the extension (".gz").
138    """
139
140    if (not os.path.exists(path)):
141        raise FileNotFoundError(f"File does not exist: '{path}'.")
142
143    if (os.path.isdir(path)):
144        raise IsADirectoryError(f"Cannot open JSON file, expected a file but got a directory at '{path}'.")
145
146    if (gzipped is None):
147        gzipped = (os.path.splitext(path)[-1] == '.gz')
148
149    open_func = open
150    if (gzipped):
151        open_func = gzip.open  # type: ignore[assignment]
152
153    with open_func(path, 'rt', encoding = encoding) as file:
154        try:
155            return load(file, strict = strict, **kwargs)
156        except Exception as ex:
157            raise ValueError(f"Failed to read JSON file '{path}'.") from ex
158
159def loads_object(text: str, cls: typing.Type[DictConverter], **kwargs: typing.Any) -> DictConverter:
160    """ Load a JSON string into an object (which is a subclass of DictConverter). """
161
162    data = loads(text, **kwargs)
163    if (not isinstance(data, dict)):
164        raise ValueError(f"JSON to load into an object is not a dict, found '{type(data)}'.")
165
166    return cls.from_dict(data)  # type: ignore[no-any-return]
167
168def load_object_path(path: str, cls: typing.Type[DictConverter], **kwargs: typing.Any) -> DictConverter:
169    """ Load a JSON file into an object (which is a subclass of DictConverter). """
170
171    data = load_path(path, **kwargs)
172    if (not isinstance(data, dict)):
173        raise ValueError(f"JSON to load into an object is not a dict, found '{type(data)}'.")
174
175    return cls.from_dict(data)  # type: ignore[no-any-return]
176
177def dump(
178        data: typing.Any,
179        file_obj: typing.TextIO,
180        default: typing.Union[typing.Callable, None] = _custom_handle,
181        sort_keys: bool = True,
182        **kwargs: typing.Any) -> None:
183    """ Dump an object as a JSON file object. """
184
185    json.dump(data, file_obj, default = default, sort_keys = sort_keys, **kwargs)
186
187def dumps(
188        data: typing.Any,
189        default: typing.Union[typing.Callable, None] = _custom_handle,
190        sort_keys: bool = True,
191        **kwargs: typing.Any) -> str:
192    """ Dump an object as a JSON string. """
193
194    return json.dumps(data, default = default, sort_keys = sort_keys, **kwargs)
195
196def dump_path(
197        data: typing.Any,
198        path: str,
199        default: typing.Union[typing.Callable, None] = _custom_handle,
200        sort_keys: bool = True,
201        gzipped: typing.Union[bool, None] = None,
202        encoding: str = edq.util.dirent.DEFAULT_ENCODING,
203        **kwargs: typing.Any) -> None:
204    """
205    Dump an object as a JSON file.
206
207    If `gzipped` is not set, the behavior is guessed from the extension (".gz").
208    """
209
210    if (gzipped is None):
211        gzipped = (os.path.splitext(path)[-1] == '.gz')
212
213    open_func = open
214    if (gzipped):
215        open_func = gzip.open  # type: ignore[assignment]
216
217    with open_func(path, 'wt', encoding = encoding) as file:
218        dump(data, file, default = default, sort_keys = sort_keys, **kwargs)  # type: ignore[arg-type]
class DictConverter:
19class DictConverter():
20    """
21    A base class for class that can represent (serialize) and reconstruct (deserialize) themselves as/from a dict.
22    The intention is that the dict can then be cleanly converted to/from JSON.
23
24    General (but inefficient) implementations of several core Python equality, comparison, and representation methods are provided.
25    """
26
27    def to_dict(self) -> typing.Dict[str, typing.Any]:
28        """
29        Return a dict that can be used to represent this object.
30        If the dict is passed to from_dict(), an identical object should be reconstructed.
31
32        A general (but inefficient) implementation is provided by default.
33        """
34
35        return vars(self).copy()
36
37    @classmethod
38    # Note that `typing.Self` is returned, but that is introduced in Python 3.12.
39    def from_dict(cls, data: typing.Dict[str, typing.Any]) -> typing.Any:
40        """
41        Return an instance of this subclass created using the given dict.
42        If the dict came from to_dict(), the returned object should be identical to the original.
43
44        A general (but inefficient) implementation is provided by default.
45        """
46
47        return cls(**data)
48
49    def __eq__(self, other: object) -> bool:
50        """
51        Check for equality.
52
53        This check uses to_dict() and compares the results.
54        This may not be complete or efficient depending on the child class.
55        """
56
57        # Note the hard type check (done so we can keep this method general).
58        if (type(self) != type(other)):  # pylint: disable=unidiomatic-typecheck
59            return False
60
61        return bool(self.to_dict() == other.to_dict())  # type: ignore[attr-defined]
62
63    def __lt__(self, other: 'DictConverter') -> bool:
64        return dumps(self) < dumps(other)
65
66    def __hash__(self) -> int:
67        return hash(dumps(self))
68
69    def __str__(self) -> str:
70        return dumps(self)
71
72    def __repr__(self) -> str:
73        return dumps(self)

A base class for class that can represent (serialize) and reconstruct (deserialize) themselves as/from a dict. The intention is that the dict can then be cleanly converted to/from JSON.

General (but inefficient) implementations of several core Python equality, comparison, and representation methods are provided.

def to_dict(self) -> Dict[str, Any]:
27    def to_dict(self) -> typing.Dict[str, typing.Any]:
28        """
29        Return a dict that can be used to represent this object.
30        If the dict is passed to from_dict(), an identical object should be reconstructed.
31
32        A general (but inefficient) implementation is provided by default.
33        """
34
35        return vars(self).copy()

Return a dict that can be used to represent this object. If the dict is passed to from_dict(), an identical object should be reconstructed.

A general (but inefficient) implementation is provided by default.

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> Any:
37    @classmethod
38    # Note that `typing.Self` is returned, but that is introduced in Python 3.12.
39    def from_dict(cls, data: typing.Dict[str, typing.Any]) -> typing.Any:
40        """
41        Return an instance of this subclass created using the given dict.
42        If the dict came from to_dict(), the returned object should be identical to the original.
43
44        A general (but inefficient) implementation is provided by default.
45        """
46
47        return cls(**data)

Return an instance of this subclass created using the given dict. If the dict came from to_dict(), the returned object should be identical to the original.

A general (but inefficient) implementation is provided by default.

def load( file_obj: <class 'TextIO'>, strict: bool = False, gzipped: bool = False, encoding: str = 'utf-8', **kwargs: Any) -> Any:
 92def load(
 93        file_obj: typing.TextIO,
 94        strict: bool = False,
 95        gzipped: bool = False,
 96        encoding: str = edq.util.dirent.DEFAULT_ENCODING,
 97        **kwargs: typing.Any) -> typing.Any:
 98    """
 99    Load a file object/handler as JSON.
100    If strict is set, then use standard Python JSON,
101    otherwise use JSON5.
102
103    If `gzipped` is set, the file object is treated as a gzipped bytes stream (e.g. `open('test.json.gz', 'rb')`).
104    """
105
106    if (gzipped):
107        binary_file_obj = gzip.GzipFile(fileobj = file_obj)  # type: ignore[call-overload]
108        file_obj = io.TextIOWrapper(binary_file_obj, encoding = encoding)
109
110    if (strict):
111        return json.load(file_obj, **kwargs)
112
113    return json5.load(file_obj, **kwargs)

Load a file object/handler as JSON. If strict is set, then use standard Python JSON, otherwise use JSON5.

If gzipped is set, the file object is treated as a gzipped bytes stream (e.g. open('test.json.gz', 'rb')).

def loads(text: str, strict: bool = False, **kwargs: Any) -> Any:
115def loads(text: str, strict: bool = False, **kwargs: typing.Any) -> typing.Any:
116    """
117    Load a string as JSON.
118    If strict is set, then use standard Python JSON,
119    otherwise use JSON5.
120    """
121
122    if (strict):
123        return json.loads(text, **kwargs)
124
125    return json5.loads(text, **kwargs)

Load a string as JSON. If strict is set, then use standard Python JSON, otherwise use JSON5.

def load_path( path: str, strict: bool = False, gzipped: Optional[bool] = None, encoding: str = 'utf-8', **kwargs: Any) -> Any:
127def load_path(
128        path: str,
129        strict: bool = False,
130        gzipped: typing.Union[bool, None] = None,
131        encoding: str = edq.util.dirent.DEFAULT_ENCODING,
132        **kwargs: typing.Any) -> typing.Any:
133    """
134    Load a file path as JSON.
135    If strict is set, then use standard Python JSON,
136    otherwise use JSON5.
137
138    If `gzipped` is not set, the behavior is guessed from the extension (".gz").
139    """
140
141    if (not os.path.exists(path)):
142        raise FileNotFoundError(f"File does not exist: '{path}'.")
143
144    if (os.path.isdir(path)):
145        raise IsADirectoryError(f"Cannot open JSON file, expected a file but got a directory at '{path}'.")
146
147    if (gzipped is None):
148        gzipped = (os.path.splitext(path)[-1] == '.gz')
149
150    open_func = open
151    if (gzipped):
152        open_func = gzip.open  # type: ignore[assignment]
153
154    with open_func(path, 'rt', encoding = encoding) as file:
155        try:
156            return load(file, strict = strict, **kwargs)
157        except Exception as ex:
158            raise ValueError(f"Failed to read JSON file '{path}'.") from ex

Load a file path as JSON. If strict is set, then use standard Python JSON, otherwise use JSON5.

If gzipped is not set, the behavior is guessed from the extension (".gz").

def loads_object( text: str, cls: Type[DictConverter], **kwargs: Any) -> DictConverter:
160def loads_object(text: str, cls: typing.Type[DictConverter], **kwargs: typing.Any) -> DictConverter:
161    """ Load a JSON string into an object (which is a subclass of DictConverter). """
162
163    data = loads(text, **kwargs)
164    if (not isinstance(data, dict)):
165        raise ValueError(f"JSON to load into an object is not a dict, found '{type(data)}'.")
166
167    return cls.from_dict(data)  # type: ignore[no-any-return]

Load a JSON string into an object (which is a subclass of DictConverter).

def load_object_path( path: str, cls: Type[DictConverter], **kwargs: Any) -> DictConverter:
169def load_object_path(path: str, cls: typing.Type[DictConverter], **kwargs: typing.Any) -> DictConverter:
170    """ Load a JSON file into an object (which is a subclass of DictConverter). """
171
172    data = load_path(path, **kwargs)
173    if (not isinstance(data, dict)):
174        raise ValueError(f"JSON to load into an object is not a dict, found '{type(data)}'.")
175
176    return cls.from_dict(data)  # type: ignore[no-any-return]

Load a JSON file into an object (which is a subclass of DictConverter).

def dump( data: Any, file_obj: <class 'TextIO'>, default: Optional[Callable] = <function _custom_handle>, sort_keys: bool = True, **kwargs: Any) -> None:
178def dump(
179        data: typing.Any,
180        file_obj: typing.TextIO,
181        default: typing.Union[typing.Callable, None] = _custom_handle,
182        sort_keys: bool = True,
183        **kwargs: typing.Any) -> None:
184    """ Dump an object as a JSON file object. """
185
186    json.dump(data, file_obj, default = default, sort_keys = sort_keys, **kwargs)

Dump an object as a JSON file object.

def dumps( data: Any, default: Optional[Callable] = <function _custom_handle>, sort_keys: bool = True, **kwargs: Any) -> str:
188def dumps(
189        data: typing.Any,
190        default: typing.Union[typing.Callable, None] = _custom_handle,
191        sort_keys: bool = True,
192        **kwargs: typing.Any) -> str:
193    """ Dump an object as a JSON string. """
194
195    return json.dumps(data, default = default, sort_keys = sort_keys, **kwargs)

Dump an object as a JSON string.

def dump_path( data: Any, path: str, default: Optional[Callable] = <function _custom_handle>, sort_keys: bool = True, gzipped: Optional[bool] = None, encoding: str = 'utf-8', **kwargs: Any) -> None:
197def dump_path(
198        data: typing.Any,
199        path: str,
200        default: typing.Union[typing.Callable, None] = _custom_handle,
201        sort_keys: bool = True,
202        gzipped: typing.Union[bool, None] = None,
203        encoding: str = edq.util.dirent.DEFAULT_ENCODING,
204        **kwargs: typing.Any) -> None:
205    """
206    Dump an object as a JSON file.
207
208    If `gzipped` is not set, the behavior is guessed from the extension (".gz").
209    """
210
211    if (gzipped is None):
212        gzipped = (os.path.splitext(path)[-1] == '.gz')
213
214    open_func = open
215    if (gzipped):
216        open_func = gzip.open  # type: ignore[assignment]
217
218    with open_func(path, 'wt', encoding = encoding) as file:
219        dump(data, file, default = default, sort_keys = sort_keys, **kwargs)  # type: ignore[arg-type]

Dump an object as a JSON file.

If gzipped is not set, the behavior is guessed from the extension (".gz").