fix: update simplejson

This commit is contained in:
TheSecEng 2020-04-14 14:09:14 -04:00
parent ae023947b3
commit 700a842f50
No known key found for this signature in database
GPG Key ID: A7C3BA459E8C5C4E
10 changed files with 3895 additions and 322 deletions

View File

@ -5,9 +5,8 @@ interchange format.
:mod:`simplejson` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
version of the :mod:`json` library contained in Python 2.6, but maintains
compatibility with Python 2.4 and Python 2.5 and (currently) has
significant performance advantages, even without using the optional C
extension for speedups.
compatibility back to Python 2.5 and (currently) has significant performance
advantages, even without using the optional C extension for speedups.
Encoding basic Python object hierarchies::
@ -38,8 +37,7 @@ Compact encoding::
Pretty printing::
>>> import simplejson as json
>>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')
>>> print('\n'.join([l.rstrip() for l in s.splitlines()]))
>>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' '))
{
"4": 5,
"6": 7
@ -79,7 +77,8 @@ Specializing JSON object encoding::
>>> def encode_complex(obj):
... if isinstance(obj, complex):
... return [obj.real, obj.imag]
... raise TypeError(repr(o) + " is not JSON serializable")
... raise TypeError('Object of type %s is not JSON serializable' %
... obj.__class__.__name__)
...
>>> json.dumps(2 + 1j, default=encode_complex)
'[2.0, 1.0]'
@ -88,7 +87,6 @@ Specializing JSON object encoding::
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
'[2.0, 1.0]'
Using simplejson.tool from the shell to validate and pretty-print::
$ echo '{"json":"obj"}' | python -m simplejson.tool
@ -96,21 +94,44 @@ Using simplejson.tool from the shell to validate and pretty-print::
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m simplejson.tool
Expecting property name: line 1 column 2 (char 2)
Expecting property name: line 1 column 3 (char 2)
Parsing multiple documents serialized as JSON lines (newline-delimited JSON)::
>>> import simplejson as json
>>> def loads_lines(docs):
... for doc in docs.splitlines():
... yield json.loads(doc)
...
>>> sum(doc["count"] for doc in loads_lines('{"count":1}\n{"count":2}\n{"count":3}\n'))
6
Serializing multiple objects to JSON lines (newline-delimited JSON)::
>>> import simplejson as json
>>> def dumps_lines(objs):
... for obj in objs:
... yield json.dumps(obj, separators=(',',':')) + '\n'
...
>>> ''.join(dumps_lines([{'count': 1}, {'count': 2}, {'count': 3}]))
'{"count":1}\n{"count":2}\n{"count":3}\n'
"""
from __future__ import absolute_import
__version__ = '3.0.7'
__version__ = '3.17.0'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
'OrderedDict', 'simple_first',
'OrderedDict', 'simple_first', 'RawJSON'
]
__author__ = 'Bob Ippolito <bob@redivi.com>'
from decimal import Decimal
from .decoder import JSONDecoder, JSONDecodeError
from .errors import JSONDecodeError
from .raw_json import RawJSON
from .decoder import JSONDecoder
from .encoder import JSONEncoder, JSONEncoderForHTML
def _import_OrderedDict():
import collections
@ -140,53 +161,53 @@ _default_encoder = JSONEncoder(
use_decimal=True,
namedtuple_as_object=True,
tuple_as_array=True,
iterable_as_array=False,
bigint_as_string=False,
item_sort_key=None,
for_json=False,
ignore_nan=False,
int_as_string_bitcount=None,
)
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
**kw):
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, int_as_string_bitcount=None,
iterable_as_array=False, **kw):
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object).
If ``skipkeys`` is true then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
If *skipkeys* is true then ``dict`` keys that are not basic types
(``str``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the some chunks written to ``fp``
may be ``unicode`` instances, subject to normal Python ``str`` to
``unicode`` coercion rules. Unless ``fp.write()`` explicitly
understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
to cause an error.
If *ensure_ascii* is false (default: ``True``), then the output may
contain non-ASCII characters, so long as they do not need to be escaped
by JSON. When it is true, all non-ASCII characters are escaped.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
If *allow_nan* is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
in strict compliance of the original JSON specification, instead of using
the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
*ignore_nan* for ECMA-262 compliant behavior.
If *indent* is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
for each level of nesting. ``None`` (the default) selects the most compact
representation without any newlines. For backwards compatibility with
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
representation without any newlines.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation.
If specified, *separators* should be an
``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
compact JSON representation, you should specify ``(',', ':')`` to eliminate
whitespace.
``encoding`` is the character encoding for str instances, default is UTF-8.
*encoding* is the character encoding for str instances, default is UTF-8.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
*default(obj)* is a function that should return a serializable version
of obj or raise ``TypeError``. The default simply raises ``TypeError``.
If *use_decimal* is true (default: ``True``) then decimal.Decimal
will be natively serialized to JSON with full precision.
@ -198,12 +219,20 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
If *tuple_as_array* is true (default: ``True``),
:class:`tuple` (and subclasses) will be encoded as JSON arrays.
If *iterable_as_array* is true (default: ``False``),
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise. Note that this is still a
lossy operation that will not round-trip correctly and should be used
sparingly.
If *int_as_string_bitcount* is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precedence over
@ -212,9 +241,19 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
If *sort_keys* is true (default: ``False``), the output of dictionaries
will be sorted by item.
If *for_json* is true (default: ``False``), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
``null`` in compliance with the ECMA-262 specification. If true, this will
override *allow_nan*.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg.
the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead
of subclassing whenever possible.
"""
# cached encoder
@ -222,8 +261,12 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array
and not bigint_as_string and not item_sort_key and not kw):
and namedtuple_as_object and tuple_as_array and not iterable_as_array
and not bigint_as_string and not sort_keys
and not item_sort_key and not for_json
and not ignore_nan and int_as_string_bitcount is None
and not kw
):
iterable = _default_encoder.iterencode(obj)
else:
if cls is None:
@ -234,9 +277,13 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
default=default, use_decimal=use_decimal,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
iterable_as_array=iterable_as_array,
bigint_as_string=bigint_as_string,
sort_keys=sort_keys,
item_sort_key=item_sort_key,
for_json=for_json,
ignore_nan=ignore_nan,
int_as_string_bitcount=int_as_string_bitcount,
**kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at
# a debuggability cost
@ -245,20 +292,21 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
**kw):
allow_nan=True, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=False, ignore_nan=False, int_as_string_bitcount=None,
iterable_as_array=False, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is false then ``dict`` keys that are not basic types
(``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
(``str``, ``int``, ``long``, ``float``, ``bool``, ``None``)
will be skipped instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the return value will be a
``unicode`` instance subject to normal Python ``str`` to ``unicode``
coercion rules instead of being escaped to an ASCII ``str``.
If *ensure_ascii* is false (default: ``True``), then the output may
contain non-ASCII characters, so long as they do not need to be escaped
by JSON. When it is true, all non-ASCII characters are escaped.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
@ -276,11 +324,14 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation.
If specified, ``separators`` should be an
``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')``
if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most
compact JSON representation, you should specify ``(',', ':')`` to eliminate
whitespace.
``encoding`` is the character encoding for str instances, default is UTF-8.
``encoding`` is the character encoding for bytes instances, default is
UTF-8.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
@ -295,10 +346,18 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
If *tuple_as_array* is true (default: ``True``),
:class:`tuple` (and subclasses) will be encoded as JSON arrays.
If *iterable_as_array* is true (default: ``False``),
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
If *bigint_as_string* is true (not the default), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise.
If *int_as_string_bitcount* is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, *item_sort_key* is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key. This option takes precendence over
@ -307,9 +366,19 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
If *sort_keys* is true (default: ``False``), the output of dictionaries
will be sorted by item.
If *for_json* is true (default: ``False``), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
``null`` in compliance with the ECMA-262 specification. If true, this will
override *allow_nan*.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg.
the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing
whenever possible.
"""
# cached encoder
@ -317,9 +386,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
check_circular and allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array
and namedtuple_as_object and tuple_as_array and not iterable_as_array
and not bigint_as_string and not sort_keys
and not item_sort_key and not kw):
and not item_sort_key and not for_json
and not ignore_nan and int_as_string_bitcount is None
and not kw
):
return _default_encoder.encode(obj)
if cls is None:
cls = JSONEncoder
@ -330,9 +402,13 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
use_decimal=use_decimal,
namedtuple_as_object=namedtuple_as_object,
tuple_as_array=tuple_as_array,
iterable_as_array=iterable_as_array,
bigint_as_string=bigint_as_string,
sort_keys=sort_keys,
item_sort_key=item_sort_key,
for_json=for_json,
ignore_nan=ignore_nan,
int_as_string_bitcount=int_as_string_bitcount,
**kw).encode(obj)
@ -345,14 +421,11 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
**kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object.
a JSON document as `str` or `bytes`) to a Python object.
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as :class:`unicode`.
`bytes` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding `str` objects.
*object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
@ -387,7 +460,8 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg.
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
"""
return loads(fp.read(),
@ -404,11 +478,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
document) to a Python object.
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
Note that currently only encodings that are a superset of ASCII work,
strings of other encodings should be passed in as :class:`unicode`.
:class:`bytes` objects decoded by this instance (``'utf-8'`` by
default). It has no effect when decoding :class:`unicode` objects.
*object_hook*, if specified, will be called with the result of every
JSON object decoded and its return value will be used in place of the
@ -443,7 +514,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_float=decimal.Decimal for parity with ``dump``.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg.
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
"""
if (cls is None and encoding is None and object_hook is None and

3384
simplejson/_speedups.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -5,39 +5,30 @@ if sys.version_info[0] < 3:
PY3 = False
def b(s):
return s
def u(s):
return unicode(s, 'unicode_escape')
import cStringIO as StringIO
StringIO = BytesIO = StringIO.StringIO
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
BytesIO = StringIO
text_type = unicode
binary_type = str
string_types = (basestring,)
integer_types = (int, long)
unichr = unichr
reload_module = reload
def fromhex(s):
return s.decode('hex')
else:
PY3 = True
from imp import reload as reload_module
import codecs
if sys.version_info[:2] >= (3, 4):
from importlib import reload as reload_module
else:
from imp import reload as reload_module
def b(s):
return codecs.latin_1_encode(s)[0]
def u(s):
return s
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
return bytes(s, 'latin1')
from io import StringIO, BytesIO
text_type = str
binary_type = bytes
string_types = (str,)
integer_types = (int,)
def unichr(s):
return u(chr(s))
def fromhex(s):
return bytes.fromhex(s)
unichr = chr
long_type = integer_types[-1]

View File

@ -4,8 +4,9 @@ from __future__ import absolute_import
import re
import sys
import struct
from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
from .scanner import make_scanner
from .compat import PY3, unichr
from .scanner import make_scanner, JSONDecodeError
def _import_c_scanstring():
try:
from ._speedups import scanstring
@ -14,72 +15,23 @@ def _import_c_scanstring():
return None
c_scanstring = _import_c_scanstring()
# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
# compatibility, but it was never in the __all__
__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
def _floatconstants():
_BYTES = fromhex('7FF80000000000007FF0000000000000')
# The struct module in Python 2.4 would get frexp() out of range here
# when an endian is specified in the format string. Fixed in Python 2.5+
if sys.byteorder != 'big':
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
nan, inf = struct.unpack('dd', _BYTES)
if sys.version_info < (2, 6):
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
nan, inf = struct.unpack('>dd', _BYTES)
else:
nan = float('nan')
inf = float('inf')
return nan, inf, -inf
NaN, PosInf, NegInf = _floatconstants()
class JSONDecodeError(ValueError):
"""Subclass of ValueError with the following additional properties:
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
end: The end index of doc where parsing failed (may be None)
lineno: The line corresponding to pos
colno: The column corresponding to pos
endlineno: The line corresponding to end (may be None)
endcolno: The column corresponding to end (may be None)
"""
def __init__(self, msg, doc, pos, end=None):
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
self.msg = msg
self.doc = doc
self.pos = pos
self.end = end
self.lineno, self.colno = linecol(doc, pos)
if end is not None:
self.endlineno, self.endcolno = linecol(doc, end)
else:
self.endlineno, self.endcolno = None, None
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
# Note that this function is called from _speedups
lineno, colno = linecol(doc, pos)
if end is None:
#fmt = '{0}: line {1} column {2} (char {3})'
#return fmt.format(msg, lineno, colno, pos)
fmt = '%s: line %d column %d (char %d)'
return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
_CONSTANTS = {
'-Infinity': NegInf,
'Infinity': PosInf,
@ -88,14 +40,14 @@ _CONSTANTS = {
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
'"': u('"'), '\\': u('\u005c'), '/': u('/'),
'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
'"': u'"', '\\': u'\\', '/': u'/',
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
}
DEFAULT_ENCODING = "utf-8"
def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
_PY3=PY3, _maxunicode=sys.maxunicode):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
@ -119,8 +71,8 @@ def py_scanstring(s, end, encoding=None, strict=True,
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
if content:
if not _PY3 and not isinstance(content, text_type):
content = text_type(content, encoding)
if not _PY3 and not isinstance(content, unicode):
content = unicode(content, encoding)
_append(content)
# Terminator is the end of string, a literal control character,
# or a backslash denoting that an escape sequence follows
@ -128,8 +80,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
break
elif terminator != '\\':
if strict:
msg = "Invalid control character %r at" % (terminator,)
#msg = "Invalid control character {0!r} at".format(terminator)
msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end)
else:
_append(terminator)
@ -144,45 +95,39 @@ def py_scanstring(s, end, encoding=None, strict=True,
try:
char = _b[esc]
except KeyError:
msg = "Invalid \\escape: " + repr(esc)
msg = "Invalid \\X escape sequence %r"
raise JSONDecodeError(msg, s, end)
end += 1
else:
# Unicode escape sequence
msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5]
next_end = end + 5
if len(esc) != 4:
msg = "Invalid \\uXXXX escape"
raise JSONDecodeError(msg, s, end)
escX = esc[1:2]
if len(esc) != 4 or escX == 'x' or escX == 'X':
raise JSONDecodeError(msg, s, end - 1)
try:
uni = int(esc, 16)
except ValueError:
msg = "Invalid \\uXXXX escape"
raise JSONDecodeError(msg, s, end)
raise JSONDecodeError(msg, s, end - 1)
end += 5
# Check for surrogate pair on UCS-4 systems
if _maxunicode > 65535:
unimask = uni & 0xfc00
if unimask == 0xd800:
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
if not s[end + 5:end + 7] == '\\u':
raise JSONDecodeError(msg, s, end)
esc2 = s[end + 7:end + 11]
if len(esc2) != 4:
raise JSONDecodeError(msg, s, end)
# Note that this will join high/low surrogate pairs
# but will also pass unpaired surrogates through
if (_maxunicode > 65535 and
uni & 0xfc00 == 0xd800 and
s[end:end + 2] == '\\u'):
esc2 = s[end + 2:end + 6]
escX = esc2[1:2]
if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
try:
uni2 = int(esc2, 16)
except ValueError:
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 != 0xdc00:
msg = "Unpaired high surrogate"
raise JSONDecodeError(msg, s, end)
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
next_end += 6
elif unimask == 0xdc00:
msg = "Unpaired low surrogate"
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 == 0xdc00:
uni = 0x10000 + (((uni - 0xd800) << 10) |
(uni2 - 0xdc00))
end += 6
char = unichr(uni)
end = next_end
# Append the unescaped character
_append(char)
return _join(chunks), end
@ -246,10 +191,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
except IndexError:
pass
try:
value, end = scan_once(s, end)
except StopIteration:
raise JSONDecodeError("Expecting object", s, end)
value, end = scan_once(s, end)
pairs.append((key, value))
try:
@ -264,7 +206,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
if nextchar == '}':
break
elif nextchar != ',':
raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
try:
nextchar = s[end]
@ -301,12 +243,11 @@ def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
# Look-ahead for trivial empty array
if nextchar == ']':
return values, end + 1
elif nextchar == '':
raise JSONDecodeError("Expecting value or ']'", s, end)
_append = values.append
while True:
try:
value, end = scan_once(s, end)
except StopIteration:
raise JSONDecodeError("Expecting object", s, end)
value, end = scan_once(s, end)
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
@ -316,7 +257,7 @@ def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']':
break
elif nextchar != ',':
raise JSONDecodeError("Expecting ',' delimiter", s, end)
raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
try:
if s[end] in _ws:
@ -340,7 +281,7 @@ class JSONDecoder(object):
+---------------+-------------------+
| array | list |
+---------------+-------------------+
| string | unicode |
| string | str, unicode |
+---------------+-------------------+
| number (int) | int, long |
+---------------+-------------------+
@ -424,8 +365,8 @@ class JSONDecoder(object):
instance containing a JSON document)
"""
if _PY3 and isinstance(s, binary_type):
s = s.decode(self.encoding)
if _PY3 and isinstance(s, bytes):
s = str(s, self.encoding)
obj, end = self.raw_decode(s)
end = _w(s, end).end()
if end != len(s):
@ -443,10 +384,17 @@ class JSONDecoder(object):
have extraneous data at the end.
"""
if _PY3 and not isinstance(s, text_type):
if idx < 0:
# Ensure that raw_decode bails on negative indexes, the regex
# would otherwise mask this behavior. #98
raise JSONDecodeError('Expecting value', s, idx)
if _PY3 and not isinstance(s, str):
raise TypeError("Input string must be text, not bytes")
try:
obj, end = self.scan_once(s, idx=_w(s, idx).end())
except StopIteration:
raise JSONDecodeError("No JSON object could be decoded", s, idx)
return obj, end
# strip UTF-8 bom
if len(s) > idx:
ord0 = ord(s[idx])
if ord0 == 0xfeff:
idx += 1
elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
idx += 3
return self.scan_once(s, idx=_w(s, idx).end())

View File

@ -3,8 +3,9 @@
from __future__ import absolute_import
import re
from operator import itemgetter
from decimal import Decimal
from .compat import u, unichr, binary_type, string_types, integer_types, PY3
# Do not import Decimal directly to avoid reload issues
import decimal
from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
def _import_speedups():
try:
from . import _speedups
@ -14,11 +15,9 @@ def _import_speedups():
c_encode_basestring_ascii, c_make_encoder = _import_speedups()
from .decoder import PosInf
from .raw_json import RawJSON
#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
# This is required because u() will mangle the string and ur'' isn't valid
# python3 syntax
ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
ESCAPE = re.compile(r'[\x00-\x1f\\"]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = {
@ -33,21 +32,31 @@ ESCAPE_DCT = {
for i in range(0x20):
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
for i in [0x2028, 0x2029]:
ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
FLOAT_REPR = repr
def encode_basestring(s, _PY3=PY3, _q=u('"')):
def encode_basestring(s, _PY3=PY3, _q=u'"'):
"""Return a JSON representation of a Python string
"""
if _PY3:
if isinstance(s, binary_type):
s = s.decode('utf-8')
if isinstance(s, bytes):
s = str(s, 'utf-8')
elif type(s) is not str:
# convert an str subclass instance to exact str
# raise a TypeError otherwise
s = str.__str__(s)
else:
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
s = s.decode('utf-8')
s = unicode(s, 'utf-8')
elif type(s) not in (str, unicode):
# convert an str subclass instance to exact str
# convert a unicode subclass instance to exact unicode
# raise a TypeError otherwise
if isinstance(s, str):
s = str.__str__(s)
else:
s = unicode.__getnewargs__(s)[0]
def replace(match):
return ESCAPE_DCT[match.group(0)]
return _q + ESCAPE.sub(replace, s) + _q
@ -58,11 +67,23 @@ def py_encode_basestring_ascii(s, _PY3=PY3):
"""
if _PY3:
if isinstance(s, binary_type):
s = s.decode('utf-8')
if isinstance(s, bytes):
s = str(s, 'utf-8')
elif type(s) is not str:
# convert an str subclass instance to exact str
# raise a TypeError otherwise
s = str.__str__(s)
else:
if isinstance(s, str) and HAS_UTF8.search(s) is not None:
s = s.decode('utf-8')
s = unicode(s, 'utf-8')
elif type(s) not in (str, unicode):
# convert an str subclass instance to exact str
# convert a unicode subclass instance to exact unicode
# raise a TypeError otherwise
if isinstance(s, str):
s = str.__str__(s)
else:
s = unicode.__getnewargs__(s)[0]
def replace(match):
s = match.group(0)
try:
@ -116,12 +137,14 @@ class JSONEncoder(object):
"""
item_separator = ', '
key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False,
item_sort_key=None):
check_circular=True, allow_nan=True, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False,
item_sort_key=None, for_json=False, ignore_nan=False,
int_as_string_bitcount=None, iterable_as_array=False):
"""Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt
@ -153,9 +176,10 @@ class JSONEncoder(object):
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
If specified, separators should be a (item_separator, key_separator)
tuple. The default is (', ', ': '). To get the most compact JSON
representation you should specify (',', ':') to eliminate whitespace.
If specified, separators should be an (item_separator, key_separator)
tuple. The default is (', ', ': ') if *indent* is ``None`` and
(',', ': ') otherwise. To get the most compact JSON representation,
you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
@ -165,7 +189,7 @@ class JSONEncoder(object):
transformed into unicode using that encoding prior to JSON-encoding.
The default is UTF-8.
If use_decimal is true (not the default), ``decimal.Decimal`` will
If use_decimal is true (default: ``True``), ``decimal.Decimal`` will
be supported directly by the encoder. For the inverse, decode JSON
with ``parse_float=decimal.Decimal``.
@ -175,13 +199,31 @@ class JSONEncoder(object):
If tuple_as_array is true (the default), tuple (and subclasses) will
be encoded as JSON arrays.
If *iterable_as_array* is true (default: ``False``),
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
If bigint_as_string is true (not the default), ints 2**53 and higher
or lower than -2**53 will be encoded as strings. This is to avoid the
rounding that happens in Javascript otherwise.
If int_as_string_bitcount is a positive number (n), then int of size
greater than or equal to 2**n or lower than or equal to -2**n will be
encoded as strings.
If specified, item_sort_key is a callable used to sort the items in
each dictionary. This is useful if you want to sort items other than
in alphabetical order by key.
If for_json is true (not the default), objects with a ``for_json()``
method will use the return value of that method for encoding as JSON
instead of the object.
If *ignore_nan* is true (default: ``False``), then out of range
:class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
as ``null`` in compliance with the ECMA-262 specification. If true,
this will override *allow_nan*.
"""
self.skipkeys = skipkeys
@ -192,8 +234,12 @@ class JSONEncoder(object):
self.use_decimal = use_decimal
self.namedtuple_as_object = namedtuple_as_object
self.tuple_as_array = tuple_as_array
self.iterable_as_array = iterable_as_array
self.bigint_as_string = bigint_as_string
self.item_sort_key = item_sort_key
self.for_json = for_json
self.ignore_nan = ignore_nan
self.int_as_string_bitcount = int_as_string_bitcount
if indent is not None and not isinstance(indent, string_types):
indent = indent * ' '
self.indent = indent
@ -223,7 +269,8 @@ class JSONEncoder(object):
return JSONEncoder.default(self, o)
"""
raise TypeError(repr(o) + " is not JSON serializable")
raise TypeError('Object of type %s is not JSON serializable' %
o.__class__.__name__)
def encode(self, o):
"""Return a JSON string representation of a Python data structure.
@ -237,7 +284,7 @@ class JSONEncoder(object):
if isinstance(o, binary_type):
_encoding = self.encoding
if (_encoding is not None and not (_encoding == 'utf-8')):
o = o.decode(_encoding)
o = text_type(o, _encoding)
if isinstance(o, string_types):
if self.ensure_ascii:
return encode_basestring_ascii(o)
@ -272,13 +319,13 @@ class JSONEncoder(object):
_encoder = encode_basestring_ascii
else:
_encoder = encode_basestring
if self.encoding != 'utf-8':
if self.encoding != 'utf-8' and self.encoding is not None:
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
if isinstance(o, binary_type):
o = o.decode(_encoding)
o = text_type(o, _encoding)
return _orig_encoder(o)
def floatstr(o, allow_nan=self.allow_nan,
def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
_repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on
@ -291,17 +338,23 @@ class JSONEncoder(object):
elif o == _neginf:
text = '-Infinity'
else:
if type(o) != float:
# See #118, do not trust custom str/repr
o = float(o)
return _repr(o)
if not allow_nan:
if ignore_nan:
text = 'null'
elif not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
key_memo = {}
int_as_string_bitcount = (
53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None
and self.indent is None):
_iterencode = c_make_encoder(
@ -309,18 +362,18 @@ class JSONEncoder(object):
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
self.bigint_as_string, self.item_sort_key,
self.encoding,
Decimal)
int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json,
self.ignore_nan, decimal.Decimal, self.iterable_as_array)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
self.bigint_as_string, self.item_sort_key,
self.encoding,
Decimal=Decimal)
int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json,
self.iterable_as_array, Decimal=decimal.Decimal)
try:
return _iterencode(o, 0)
finally:
@ -334,6 +387,11 @@ class JSONEncoderForHTML(JSONEncoder):
characters &, < and > should be escaped. They cannot be escaped
with the usual entities (e.g. &amp;) because they are not expanded
within <script> tags.
This class also escapes the line separator and paragraph separator
characters U+2028 and U+2029, irrespective of the ensure_ascii setting,
as these characters are not valid in JavaScript strings (see
http://timelessrepo.com/json-isnt-a-javascript-subset).
"""
def encode(self, o):
@ -351,18 +409,25 @@ class JSONEncoderForHTML(JSONEncoder):
chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c')
chunk = chunk.replace('>', '\\u003e')
if not self.ensure_ascii:
chunk = chunk.replace(u'\u2028', '\\u2028')
chunk = chunk.replace(u'\u2029', '\\u2029')
yield chunk
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
_bigint_as_string, _item_sort_key, _encoding,
_int_as_string_bitcount, _item_sort_key,
_encoding,_for_json,
_iterable_as_array,
## HACK: hand-optimized bytecode; turn globals into locals
_PY3=PY3,
ValueError=ValueError,
string_types=string_types,
Decimal=Decimal,
Decimal=None,
dict=dict,
float=float,
id=id,
@ -371,12 +436,38 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
list=list,
str=str,
tuple=tuple,
iter=iter,
):
if _use_decimal and Decimal is None:
Decimal = decimal.Decimal
if _item_sort_key and not callable(_item_sort_key):
raise TypeError("item_sort_key must be None or callable")
elif _sort_keys and not _item_sort_key:
_item_sort_key = itemgetter(0)
if (_int_as_string_bitcount is not None and
(_int_as_string_bitcount <= 0 or
not isinstance(_int_as_string_bitcount, integer_types))):
raise TypeError("int_as_string_bitcount must be a positive integer")
def _encode_int(value):
skip_quoting = (
_int_as_string_bitcount is None
or
_int_as_string_bitcount < 1
)
if type(value) not in integer_types:
# See #118, do not trust custom str/repr
value = int(value)
if (
skip_quoting or
(-1 << _int_as_string_bitcount)
< value <
(1 << _int_as_string_bitcount)
):
return str(value)
return '"' + str(value) + '"'
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
@ -401,9 +492,12 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
first = False
else:
buf = separator
if (isinstance(value, string_types) or
(_PY3 and isinstance(value, binary_type))):
if isinstance(value, string_types):
yield buf + _encoder(value)
elif _PY3 and isinstance(value, bytes) and _encoding is not None:
yield buf + _encoder(value)
elif isinstance(value, RawJSON):
yield buf + value.encoded_json
elif value is None:
yield buf + 'null'
elif value is True:
@ -411,17 +505,17 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif value is False:
yield buf + 'false'
elif isinstance(value, integer_types):
yield ((buf + str(value))
if (not _bigint_as_string or
(-1 << 53) < value < (1 << 53))
else (buf + '"' + str(value) + '"'))
yield buf + _encode_int(value)
elif isinstance(value, float):
yield buf + _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield buf + str(value)
else:
yield buf
if isinstance(value, list):
for_json = _for_json and getattr(value, 'for_json', None)
if for_json and callable(for_json):
chunks = _iterencode(for_json(), _current_indent_level)
elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
else:
_asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
@ -436,18 +530,22 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + (_indent * _current_indent_level)
yield ']'
if first:
# iterable_as_array misses the fast path at the top
yield '[]'
else:
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + (_indent * _current_indent_level)
yield ']'
if markers is not None:
del markers[markerid]
def _stringify_key(key):
if isinstance(key, string_types): # pragma: no cover
pass
elif isinstance(key, binary_type):
key = key.decode(_encoding)
elif _PY3 and isinstance(key, bytes) and _encoding is not None:
key = str(key, _encoding)
elif isinstance(key, float):
key = _floatstr(key)
elif key is True:
@ -457,13 +555,17 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif key is None:
key = 'null'
elif isinstance(key, integer_types):
if type(key) not in integer_types:
# See #118, do not trust custom str/repr
key = int(key)
key = str(key)
elif _use_decimal and isinstance(key, Decimal):
key = str(key)
elif _skipkeys:
key = None
else:
raise TypeError("key " + repr(key) + " is not a string")
raise TypeError('keys must be str, int, float, bool or None, '
'not %s' % key.__class__.__name__)
return key
def _iterencode_dict(dct, _current_indent_level):
@ -512,9 +614,12 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield item_separator
yield _encoder(key)
yield _key_separator
if (isinstance(value, string_types) or
(_PY3 and isinstance(value, binary_type))):
if isinstance(value, string_types):
yield _encoder(value)
elif _PY3 and isinstance(value, bytes) and _encoding is not None:
yield _encoder(value)
elif isinstance(value, RawJSON):
yield value.encoded_json
elif value is None:
yield 'null'
elif value is True:
@ -522,16 +627,16 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif value is False:
yield 'false'
elif isinstance(value, integer_types):
yield (str(value)
if (not _bigint_as_string or
(-1 << 53) < value < (1 << 53))
else ('"' + str(value) + '"'))
yield _encode_int(value)
elif isinstance(value, float):
yield _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield str(value)
else:
if isinstance(value, list):
for_json = _for_json and getattr(value, 'for_json', None)
if for_json and callable(for_json):
chunks = _iterencode(for_json(), _current_indent_level)
elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
else:
_asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
@ -554,9 +659,12 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
del markers[markerid]
def _iterencode(o, _current_indent_level):
if (isinstance(o, string_types) or
(_PY3 and isinstance(o, binary_type))):
if isinstance(o, string_types):
yield _encoder(o)
elif _PY3 and isinstance(o, bytes) and _encoding is not None:
yield _encoder(o)
elif isinstance(o, RawJSON):
yield o.encoded_json
elif o is None:
yield 'null'
elif o is True:
@ -564,38 +672,51 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
elif o is False:
yield 'false'
elif isinstance(o, integer_types):
yield (str(o)
if (not _bigint_as_string or
(-1 << 53) < o < (1 << 53))
else ('"' + str(o) + '"'))
yield _encode_int(o)
elif isinstance(o, float):
yield _floatstr(o)
elif isinstance(o, list):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
else:
_asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
if _asdict and callable(_asdict):
for chunk in _iterencode_dict(_asdict(), _current_indent_level):
for_json = _for_json and getattr(o, 'for_json', None)
if for_json and callable(for_json):
for chunk in _iterencode(for_json(), _current_indent_level):
yield chunk
elif (_tuple_as_array and isinstance(o, tuple)):
elif isinstance(o, list):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
elif isinstance(o, dict):
for chunk in _iterencode_dict(o, _current_indent_level):
yield chunk
elif _use_decimal and isinstance(o, Decimal):
yield str(o)
else:
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
for chunk in _iterencode(o, _current_indent_level):
yield chunk
if markers is not None:
del markers[markerid]
_asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
if _asdict and callable(_asdict):
for chunk in _iterencode_dict(_asdict(),
_current_indent_level):
yield chunk
elif (_tuple_as_array and isinstance(o, tuple)):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
elif isinstance(o, dict):
for chunk in _iterencode_dict(o, _current_indent_level):
yield chunk
elif _use_decimal and isinstance(o, Decimal):
yield str(o)
else:
while _iterable_as_array:
# Markers are not checked here because it is valid for
# an iterable to return self.
try:
o = iter(o)
except TypeError:
break
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
return
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
for chunk in _iterencode(o, _current_indent_level):
yield chunk
if markers is not None:
del markers[markerid]
return _iterencode

53
simplejson/errors.py Normal file
View File

@ -0,0 +1,53 @@
"""Error classes used by simplejson
"""
__all__ = ['JSONDecodeError']
def linecol(doc, pos):
lineno = doc.count('\n', 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex('\n', 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
lineno, colno = linecol(doc, pos)
msg = msg.replace('%r', repr(doc[pos:pos + 1]))
if end is None:
fmt = '%s: line %d column %d (char %d)'
return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
class JSONDecodeError(ValueError):
"""Subclass of ValueError with the following additional properties:
msg: The unformatted error message
doc: The JSON document being parsed
pos: The start index of doc where parsing failed
end: The end index of doc where parsing failed (may be None)
lineno: The line corresponding to pos
colno: The column corresponding to pos
endlineno: The line corresponding to end (may be None)
endcolno: The column corresponding to end (may be None)
"""
# Note that this exception is used from _speedups
def __init__(self, msg, doc, pos, end=None):
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
self.msg = msg
self.doc = doc
self.pos = pos
self.end = end
self.lineno, self.colno = linecol(doc, pos)
if end is not None:
self.endlineno, self.endcolno = linecol(doc, end)
else:
self.endlineno, self.endcolno = None, None
def __reduce__(self):
return self.__class__, (self.msg, self.doc, self.pos, self.end)

View File

@ -5,17 +5,6 @@ http://code.activestate.com/recipes/576693/
"""
from UserDict import DictMixin
# Modified from original to support Python 2.4, see
# http://code.google.com/p/simplejson/issues/detail?id=53
try:
all
except NameError:
def all(seq):
for elem in seq:
if not elem:
return False
return True
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
@ -63,12 +52,7 @@ class OrderedDict(dict, DictMixin):
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
# Modified from original to support Python 2.4, see
# http://code.google.com/p/simplejson/issues/detail?id=53
if last:
key = reversed(self).next()
else:
key = iter(self).next()
key = reversed(self).next() if last else iter(self).next()
value = self.pop(key)
return key, value

9
simplejson/raw_json.py Normal file
View File

@ -0,0 +1,9 @@
"""Implementation of RawJSON
"""
class RawJSON(object):
"""Wrap an encoded JSON document for direct embedding in the output
"""
def __init__(self, encoded_json):
self.encoded_json = encoded_json

View File

@ -1,20 +1,22 @@
"""JSON token scanner
"""
import re
from .errors import JSONDecodeError
def _import_c_make_scanner():
try:
from simplejson._speedups import make_scanner
from ._speedups import make_scanner
return make_scanner
except ImportError:
return None
c_make_scanner = _import_c_make_scanner()
__all__ = ['make_scanner']
__all__ = ['make_scanner', 'JSONDecodeError']
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
@ -30,10 +32,11 @@ def py_make_scanner(context):
memo = context.memo
def _scan_once(string, idx):
errmsg = 'Expecting value'
try:
nextchar = string[idx]
except IndexError:
raise StopIteration
raise JSONDecodeError(errmsg, string, idx)
if nextchar == '"':
return parse_string(string, idx + 1, encoding, strict)
@ -64,9 +67,14 @@ def py_make_scanner(context):
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise StopIteration
raise JSONDecodeError(errmsg, string, idx)
def scan_once(string, idx):
if idx < 0:
# Ensure the same behavior as the C speedup, otherwise
# this would work for *some* negative string indices due
# to the behavior of __getitem__ for strings. #98
raise JSONDecodeError('Expecting value', string, idx)
try:
return _scan_once(string, idx)
finally:

View File

@ -10,6 +10,7 @@ Usage::
Expecting property name: line 1 column 2 (char 2)
"""
from __future__ import with_statement
import sys
import simplejson as json
@ -18,21 +19,23 @@ def main():
infile = sys.stdin
outfile = sys.stdout
elif len(sys.argv) == 2:
infile = open(sys.argv[1], 'rb')
infile = open(sys.argv[1], 'r')
outfile = sys.stdout
elif len(sys.argv) == 3:
infile = open(sys.argv[1], 'rb')
outfile = open(sys.argv[2], 'wb')
infile = open(sys.argv[1], 'r')
outfile = open(sys.argv[2], 'w')
else:
raise SystemExit(sys.argv[0] + " [infile [outfile]]")
try:
obj = json.load(infile,
object_pairs_hook=json.OrderedDict,
use_decimal=True)
except ValueError:
raise SystemExit(sys.exc_info()[1])
json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True)
outfile.write('\n')
with infile:
try:
obj = json.load(infile,
object_pairs_hook=json.OrderedDict,
use_decimal=True)
except ValueError:
raise SystemExit(sys.exc_info()[1])
with outfile:
json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True)
outfile.write('\n')
if __name__ == '__main__':