Add missing checks for failures in calls to PyUnicode_AsUTF8String.

Previously a seg fault could occur when passing invalid UTF8 strings (low
surrogates), eg passing u"\udcff" to the C layer (Python 3).
This commit is contained in:
William S Fulton 2017-12-04 18:41:55 +00:00
parent 069ce1f6e9
commit b0e29fbdf3
12 changed files with 92 additions and 25 deletions

View File

@ -6,8 +6,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
Version 4.0.0 (in progress)
===========================
2017-12-04: wsfulton
[Python] Add missing checks for failures in calls to PyUnicode_AsUTF8String. Previously a
seg fault could occur when passing invalid UTF8 strings (low surrogates), eg passing
u"\udcff" to the C layer (Python 3).
2017-11-24: joequant
Fix github #1124 and return R_NilValue for null pointers
Fix #1124 and return R_NilValue for null pointers
2017-11-29: wsfulton
[Java] director exception handling improvements.

View File

@ -6521,14 +6521,16 @@ string that cannot be completely decoded as UTF-8:
<div class="code"><pre>
%module example
%include &lt;std_string.i&gt;
%inline %{
const char* non_utf8_c_str(void) {
const char * non_utf8_c_str(void) {
return "h\xe9llo w\xc3\xb6rld";
}
void instring(const char *s) {
...
}
%}
</pre></div>
@ -6590,6 +6592,20 @@ For more details about the <tt>surrogateescape</tt> error handler, please see
<a href="https://www.python.org/dev/peps/pep-0383/">PEP 383</a>.
</p>
<p>
When Python 3 strings are passed to the C/C++ layer, they are expected to be valid UTF8 Unicode strings too.
For example, when the <tt>instring</tt> method above is wrapped and called, any invalid UTF8 Unicode code strings
will result in a TypeError because the attempted conversion fails:
</p>
<div class="targetlang"><pre>
&gt;&gt;&gt; example.instring('h\xe9llo')
&gt;&gt;&gt; example.instring('h\udce9llo')
Traceback (most recent call last):
File "&lt;stdin&gt;", line 1, in &lt;module&gt;
TypeError: in method 'instring', argument 1 of type 'char const *'
</pre></div>
<p>
In some cases, users may wish to instead handle all byte strings as bytes
objects in Python 3. This can be accomplished by adding

View File

@ -529,8 +529,11 @@ like this:
SWIG_fail;
}
pystr = PyUnicode_AsUTF8String(pyobj);
if (!pystr) {
SWIG_fail;
}
str = strdup(PyBytes_AsString(pystr));
Py_XDECREF(pystr);
Py_DECREF(pystr);
%#else
if (!PyString_Check(pyobj)) {
PyErr_SetString(PyExc_ValueError, "Expected a string");

View File

@ -39,7 +39,11 @@ extern int gcd(int x, int y);
%#if PY_VERSION_HEX >= 0x03000000
{
PyObject *utf8str = PyUnicode_AsUTF8String(s);
const char *cstr = PyBytes_AsString(utf8str);
const char *cstr;
if (!utf8str) {
SWIG_fail;
}
cstr = PyBytes_AsString(utf8str);
$2[i] = strdup(cstr);
Py_DECREF(utf8str);
}
@ -72,6 +76,9 @@ extern int gcdmain(int argc, char *argv[]);
SWIG_fail;
}
utf8str = PyUnicode_AsUTF8String($input);
if (!utf8str) {
SWIG_fail;
}
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
$2 = (int)len;
@ -105,6 +112,9 @@ extern int count(char *bytes, int len, char c);
char *cstr;
Py_ssize_t len;
PyObject *utf8str = PyUnicode_AsUTF8String($input);
if (!utf8str) {
SWIG_fail;
}
PyBytes_AsStringAndSize(utf8str, &cstr, &len);
$1 = strncpy((char *)malloc(len+1), cstr, (size_t)len);
$2 = (int)len;

View File

@ -25,3 +25,13 @@ if sys.version_info[0:2] < (3, 0):
check(unicode_strings.charstring(unicode("hello4")), "hello4")
unicode_strings.charstring(u"hell\xb05")
unicode_strings.charstring(u"hell\u00f66")
low_surrogate_string = u"\udcff"
try:
unicode_strings.instring(low_surrogate_string)
# Will succeed with Python 2
except TypeError, e:
# Python 3 will fail the PyUnicode_AsUTF8String conversion resulting in a TypeError.
# The real error is actually:
# UnicodeEncodeError: 'utf-8' codec can't encode character '\udcff' in position 0: surrogates not allowed
pass

View File

@ -23,8 +23,11 @@
SWIG_fail;
}
pystr = PyUnicode_AsUTF8String(pyobj);
if (!pystr) {
SWIG_fail;
}
str = strdup(PyBytes_AsString(pystr));
Py_XDECREF(pystr);
Py_DECREF(pystr);
%#else
if (!PyString_Check(pyobj)) {
PyErr_SetString(PyExc_ValueError, "Expected a string");

View File

@ -20,4 +20,6 @@ char *charstring(char *s) {
return s;
}
void instring(const char *s) {
}
%}

View File

@ -53,14 +53,17 @@ SWIG_Python_AddErrorMsg(const char* mesg)
PyObject *value = 0;
PyObject *traceback = 0;
if (PyErr_Occurred()) PyErr_Fetch(&type, &value, &traceback);
if (PyErr_Occurred())
PyErr_Fetch(&type, &value, &traceback);
if (value) {
char *tmp;
PyObject *old_str = PyObject_Str(value);
const char *tmp = SWIG_Python_str_AsChar(old_str);
PyErr_Clear();
Py_XINCREF(type);
PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
if (tmp)
PyErr_Format(type, "%s %s", tmp, mesg);
else
PyErr_Format(type, "%s", mesg);
SWIG_Python_str_DelForPy3(tmp);
Py_DECREF(old_str);
Py_DECREF(value);

View File

@ -38,14 +38,16 @@ SWIGINTERN char*
SWIG_Python_str_AsChar(PyObject *str)
{
#if PY_VERSION_HEX >= 0x03000000
char *cstr;
char *newstr;
Py_ssize_t len;
char *newstr = 0;
str = PyUnicode_AsUTF8String(str);
PyBytes_AsStringAndSize(str, &cstr, &len);
newstr = (char *) malloc(len+1);
memcpy(newstr, cstr, len+1);
Py_XDECREF(str);
if (str) {
char *cstr;
Py_ssize_t len;
PyBytes_AsStringAndSize(str, &cstr, &len);
newstr = (char *) malloc(len+1);
memcpy(newstr, cstr, len+1);
Py_XDECREF(str);
}
return newstr;
#else
return PyString_AsString(str);

View File

@ -84,10 +84,10 @@ swig_varlink_str(swig_varlinkobject *v) {
SWIGINTERN int
swig_varlink_print(swig_varlinkobject *v, FILE *fp, int SWIGUNUSEDPARM(flags)) {
char *tmp;
PyObject *str = swig_varlink_str(v);
const char *tmp = SWIG_Python_str_AsChar(str);
fprintf(fp,"Swig global variables ");
fprintf(fp,"%s\n", tmp = SWIG_Python_str_AsChar(str));
fprintf(fp,"%s\n", tmp ? tmp : "Invalid global variable");
SWIG_Python_str_DelForPy3(tmp);
Py_DECREF(str);
return 0;

View File

@ -1672,14 +1672,16 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront)
PyObject *traceback = 0;
PyErr_Fetch(&type, &value, &traceback);
if (value) {
char *tmp;
PyObject *old_str = PyObject_Str(value);
const char *tmp = SWIG_Python_str_AsChar(old_str);
if (!tmp)
tmp = "Invalid error message";
Py_XINCREF(type);
PyErr_Clear();
if (infront) {
PyErr_Format(type, "%s %s", mesg, tmp = SWIG_Python_str_AsChar(old_str));
PyErr_Format(type, "%s %s", mesg, tmp);
} else {
PyErr_Format(type, "%s %s", tmp = SWIG_Python_str_AsChar(old_str), mesg);
PyErr_Format(type, "%s %s", tmp, mesg);
}
SWIG_Python_str_DelForPy3(tmp);
Py_DECREF(old_str);
@ -1805,6 +1807,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) {
Py_INCREF(name);
} else {
encoded_name = PyUnicode_AsUTF8String(name);
if (!encoded_name)
return -1;
}
PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name));
Py_DECREF(encoded_name);

View File

@ -16,6 +16,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
%#endif
{
char *cstr; Py_ssize_t len;
int ret = SWIG_OK;
%#if PY_VERSION_HEX>=0x03000000
%#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
if (!alloc && cptr) {
@ -26,7 +27,10 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
return SWIG_RuntimeError;
}
obj = PyUnicode_AsUTF8String(obj);
if(alloc) *alloc = SWIG_NEWOBJ;
if (!obj)
return SWIG_TypeError;
if (alloc)
*alloc = SWIG_NEWOBJ;
%#endif
PyBytes_AsStringAndSize(obj, &cstr, &len);
%#else
@ -64,6 +68,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
%#endif
%#else
*cptr = SWIG_Python_str_AsChar(obj);
if (!*cptr)
ret = SWIG_TypeError;
%#endif
}
}
@ -71,7 +77,7 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
%#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
Py_XDECREF(obj);
%#endif
return SWIG_OK;
return ret;
} else {
%#if defined(SWIG_PYTHON_2_UNICODE)
%#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR)
@ -84,6 +90,8 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc)
return SWIG_RuntimeError;
}
obj = PyUnicode_AsUTF8String(obj);
if (!obj)
return SWIG_TypeError;
if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) {
if (cptr) {
if (alloc) *alloc = SWIG_NEWOBJ;