mirror of https://github.com/swig/swig
Add missing checks for failures in calls to PyUnicode_AsUTF8String.
Previously a seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing u"\udcff" to the C layer (Python 3).
This commit is contained in:
parent
069ce1f6e9
commit
b0e29fbdf3
|
@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
|
|||
|
||||
Version 4.0.0 (in progress)
|
||||
===========================
|
||||
|
||||
2017-12-04: wsfulton
|
||||
[Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a
|
||||
seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing
|
||||
u"\udcff" to the C layer (Python 3).
|
||||
|
||||
2017-11-24: joequant
|
||||
Fix github #1124 and return R_NilValue for null pointers
|
||||
Fix #1124 and return R_NilValue for null pointers
|
||||
|
||||
2017-11-29: wsfulton
|
||||
[Java] director exception handling improvements.
|
||||
|
|
|
@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8:
|
|||
<div class="code"><pre>
|
||||
%module example
|
||||
|
||||
%include <std_string.i>
|
||||
|
||||
%inline %{
|
||||
|
||||
const char* non_utf8_c_str(void) {
|
||||
const char * non_utf8_c_str(void) {
|
||||
return "h\xe9llo w\xc3\xb6rld";
|
||||
}
|
||||
|
||||
void instring(const char *s) {
|
||||
...
|
||||
}
|
||||
|
||||
%}
|
||||
</pre></div>
|
||||
|
||||
|
@ -6590,6 +6592,20 @@ For more details about the <tt>surrogateescape</tt> error handler, please see
|
|||
<a href="https://www.python.org/dev/peps/pep-0383/">PEP 383</a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too.
|
||||
For example, when the <tt>instring</tt> method above is wrapped and called, any invalid UTF8 Unicode code strings
|
||||
will result in a TypeError because the attempted conversion fails:
|
||||
</p>
|
||||
|
||||
<div class="targetlang"><pre>
|
||||
>>> example.instring('h\xe9llo')
|
||||
>>> example.instring('h\udce9llo')
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
TypeError: in method 'instring', argument 1 of type 'char const *'
|
||||
</pre></div>
|
||||
|
||||
<p>
|
||||
In some cases, users may wish to instead handle all byte strings as bytes
|
||||
objects in Python 3. This can be accomplished by adding
|
||||
|
|
|
@ -529,8 +529,11 @@ like this:
|
|||
SWIG_fail;
|
||||
}
|
||||
pystr = PyUnicode_AsUTF8String(pyobj);
|
||||
if (!pystr) {
|
||||
SWIG_fail;
|
||||
}
|
||||
str = strdup(PyBytes_AsString(pystr));
|
||||
Py_XDECREF(pystr);
|
||||
Py_DECREF(pystr);
|
||||
%#else
|
||||
if (!PyString_Check(pyobj)) {
|
||||
PyErr_SetString(PyExc_ValueError, "Expected a string");
|
||||
|
|
|
@ -39,7 +39,11 @@ extern int gcd(int x, int y);
|
|||
%#if PY_VERSION_HEX >= 0x03000000
|
||||
{
|
||||
PyObject *utf8str = PyUnicode_AsUTF8String(s);
|
||||
const char *cstr = PyBytes_AsString(utf8str);
|
||||
const char *cstr;
|
||||
if (!utf8str) {
|
||||
SWIG_fail;
|
||||
}
|
||||
cstr = PyBytes_AsString(utf8str);
|
||||
$2[i] = strdup(cstr);
|
||||
Py_DECREF(utf8str);
|
||||
}
|
||||
|
@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]);
|
|||
SWIG_fail;
|
||||
}
|
||||
utf8str = PyUnicode_AsUTF8String($input);
|
||||
if (!utf8str) {
|
||||
SWIG_fail;
|
||||
}
|
||||
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
|
||||
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
|
||||
$2 = (int)len;
|
||||
|
@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c);
|
|||
char *cstr;
|
||||
Py_ssize_t len;
|
||||
PyObject *utf8str = PyUnicode_AsUTF8String($input);
|
||||
if (!utf8str) {
|
||||
SWIG_fail;
|
||||
}
|
||||
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
|
||||
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
|
||||
$2 = (int)len;
|
||||
|
|
|
@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0):
|
|||
check(unicode_strings.charstring(unicode("hello4")), "hello4")
|
||||
unicode_strings.charstring(u"hell\xb05")
|
||||
unicode_strings.charstring(u"hell\u00f66")
|
||||
|
||||
low_surrogate_string = u"\udcff"
|
||||
try:
|
||||
unicode_strings.instring(low_surrogate_string)
|
||||
# Will succeed with Python 2
|
||||
except TypeError, e:
|
||||
# Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError.
|
||||
# The real error is actually:
|
||||
# UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed
|
||||
pass
|
||||
|
|
|
@ -23,8 +23,11 @@
|
|||
SWIG_fail;
|
||||
}
|
||||
pystr = PyUnicode_AsUTF8String(pyobj);
|
||||
if (!pystr) {
|
||||
SWIG_fail;
|
||||
}
|
||||
str = strdup(PyBytes_AsString(pystr));
|
||||
Py_XDECREF(pystr);
|
||||
Py_DECREF(pystr);
|
||||
%#else
|
||||
if (!PyString_Check(pyobj)) {
|
||||
PyErr_SetString(PyExc_ValueError, "Expected a string");
|
||||
|
|
|
@ -20,4 +20,6 @@ char *charstring(char *s) {
|
|||
return s;
|
||||
}
|
||||
|
||||
void instring(const char *s) {
|
||||
}
|
||||
%}
|
||||
|
|
|
@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg)
|
|||
PyObject *value = 0;
|
||||
PyObject *traceback = 0;
|
||||
|
||||
if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
|
||||
if (PyErr_Occurred())
|
||||
PyErr_Fetch(&type, &value, &traceback);
|
||||
if (value) {
|
||||
char *tmp;
|
||||
PyObject *old_str = PyObject_Str(value);
|
||||
const char *tmp = SWIG_Python_str_AsChar(old_str);
|
||||
PyErr_Clear();
|
||||
Py_XINCREF(type);
|
||||
|
||||
PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
|
||||
if (tmp)
|
||||
PyErr_Format(type, "%s %s", tmp, mesg);
|
||||
else
|
||||
PyErr_Format(type, "%s", mesg);
|
||||
SWIG_Python_str_DelForPy3(tmp);
|
||||
Py_DECREF(old_str);
|
||||
Py_DECREF(value);
|
||||
|
|
|
@ -38,14 +38,16 @@ SWIGINTERN char*
|
|||
SWIG_Python_str_AsChar(PyObject *str)
|
||||
{
|
||||
#if PY_VERSION_HEX >= 0x03000000
|
||||
char *cstr;
|
||||
char *newstr;
|
||||
Py_ssize_t len;
|
||||
char *newstr = 0;
|
||||
str = PyUnicode_AsUTF8String(str);
|
||||
PyBytes_AsStringAndSize(str, &cstr, &len);
|
||||
newstr = (char *) malloc(len+1);
|
||||
memcpy(newstr, cstr, len+1);
|
||||
Py_XDECREF(str);
|
||||
if (str) {
|
||||
char *cstr;
|
||||
Py_ssize_t len;
|
||||
PyBytes_AsStringAndSize(str, &cstr, &len);
|
||||
newstr = (char *) malloc(len+1);
|
||||
memcpy(newstr, cstr, len+1);
|
||||
Py_XDECREF(str);
|
||||
}
|
||||
return newstr;
|
||||
#else
|
||||
return PyString_AsString(str);
|
||||
|
|
|
@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) {
|
|||
|
||||
SWIGINTERN int
|
||||
swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
|
||||
char *tmp;
|
||||
PyObject *str = swig_varlink_str(v);
|
||||
const char *tmp = SWIG_Python_str_AsChar(str);
|
||||
fprintf(fp,"Swig global variables ");
|
||||
fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
|
||||
fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable");
|
||||
SWIG_Python_str_DelForPy3(tmp);
|
||||
Py_DECREF(str);
|
||||
return 0;
|
||||
|
|
|
@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront)
|
|||
PyObject *traceback = 0;
|
||||
PyErr_Fetch(&type, &value, &traceback);
|
||||
if (value) {
|
||||
char *tmp;
|
||||
PyObject *old_str = PyObject_Str(value);
|
||||
const char *tmp = SWIG_Python_str_AsChar(old_str);
|
||||
if (!tmp)
|
||||
tmp = "Invalid error message";
|
||||
Py_XINCREF(type);
|
||||
PyErr_Clear();
|
||||
if (infront) {
|
||||
PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
|
||||
PyErr_Format(type, "%s %s", mesg, tmp);
|
||||
} else {
|
||||
PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
|
||||
PyErr_Format(type, "%s %s", tmp, mesg);
|
||||
}
|
||||
SWIG_Python_str_DelForPy3(tmp);
|
||||
Py_DECREF(old_str);
|
||||
|
@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
|
|||
Py_INCREF(name);
|
||||
} else {
|
||||
encoded_name = PyUnicode_AsUTF8String(name);
|
||||
if (!encoded_name)
|
||||
return -1;
|
||||
}
|
||||
PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
|
||||
Py_DECREF(encoded_name);
|
||||
|
|
|
@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|||
%#endif
|
||||
{
|
||||
char *cstr; Py_ssize_t len;
|
||||
int ret = SWIG_OK;
|
||||
%#if PY_VERSION_HEX>=0x03000000
|
||||
%#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
|
||||
if (!alloc && cptr) {
|
||||
|
@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|||
return SWIG_RuntimeError;
|
||||
}
|
||||
obj = PyUnicode_AsUTF8String(obj);
|
||||
if(alloc) *alloc = SWIG_NEWOBJ;
|
||||
if (!obj)
|
||||
return SWIG_TypeError;
|
||||
if (alloc)
|
||||
*alloc = SWIG_NEWOBJ;
|
||||
%#endif
|
||||
PyBytes_AsStringAndSize(obj, &cstr, &len);
|
||||
%#else
|
||||
|
@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|||
%#endif
|
||||
%#else
|
||||
*cptr = SWIG_Python_str_AsChar(obj);
|
||||
if (!*cptr)
|
||||
ret = SWIG_TypeError;
|
||||
%#endif
|
||||
}
|
||||
}
|
||||
|
@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|||
%#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
|
||||
Py_XDECREF(obj);
|
||||
%#endif
|
||||
return SWIG_OK;
|
||||
return ret;
|
||||
} else {
|
||||
%#if defined(SWIG_PYTHON_2_UNICODE)
|
||||
%#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
|
||||
|
@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
|
|||
return SWIG_RuntimeError;
|
||||
}
|
||||
obj = PyUnicode_AsUTF8String(obj);
|
||||
if (!obj)
|
||||
return SWIG_TypeError;
|
||||
if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
|
||||
if (cptr) {
|
||||
if (alloc) *alloc = SWIG_NEWOBJ;
|
||||
|
|
Loading…
Reference in New Issue