Improve handling of zero bytes in string literals

Many of the target languages don't handle this currently.  Those
that don't support this in their strings never will, but others
can probably be fixed.

Fixes #2996
Fixes #2997
This commit is contained in:
Olly Betts 2024-08-30 11:07:15 +12:00
parent 8611d1f435
commit 61e54ab054
10 changed files with 124 additions and 15 deletions

View File

@ -7,6 +7,14 @@ the issue number to the end of the URL: https://github.com/swig/swig/issues/
Version 4.3.0 (in progress)
===========================
2024-08-30: olly
#2996 Fix generic string literal handling to handle embedded zero
bytes. This allows such strings to work for C# (with %csconst), D
(with %dmanifestconst), Go and Java (with %javaconst). For other
target languages SWIG-generated wrappers still truncate such string
literals at a zero byte (which is probably the best we can do for
target languages where the native string can't contain zero bytes).
2024-08-23: wsfulton
[Java] #2991 Document solutions for mismatch in C++ access specifiers
and Java access modifiers in an inheritance hierarchy.

View File

@ -39,6 +39,9 @@ public class runme {
assert( typeof(char) == preproc_constants.CONST_CHAR.GetType() );
assert( typeof(string) == preproc_constants.CONST_STRING1.GetType() );
assert( typeof(string) == preproc_constants.CONST_STRING2.GetType() );
assert( typeof(string) == preproc_constants.CONST_STRING3.GetType() );
assert( typeof(string) == preproc_constants.CONST_STRING4.GetType() );
assert( preproc_constants.CONST_STRING4 == "zer\0zer\0" );
assert( typeof(int) == preproc_constants.INT_AND_BOOL.GetType() );
assert( typeof(int) == preproc_constants.INT_AND_CHAR.GetType() );

View File

@ -0,0 +1,9 @@
package main
import "swigtests/preproc_constants"
func main() {
if preproc_constants.CONST_STRING4 != "zer\x00zer\x00" {
panic(0)
}
}

View File

@ -0,0 +1,19 @@
import preproc_constants.*;
public class preproc_constants_runme {
static {
try {
System.loadLibrary("preproc_constants");
} catch (UnsatisfiedLinkError e) {
System.err.println("Native code library failed to load. See the chapter on Dynamic Linking Problems in the SWIG Java documentation for help.\n" + e);
System.exit(1);
}
}
public static void main(String argv[])
{
if (preproc_constants.CONST_STRING4 != "zer\0zer\0")
throw new RuntimeException("Failed");
}
}

View File

@ -0,0 +1,24 @@
import string_constants.*;
public class string_constants_runme {
static {
try {
System.loadLibrary("string_constants");
} catch (UnsatisfiedLinkError e) {
System.err.println("Native code library failed to load. See the chapter on Dynamic Linking Problems in the SWIG Java documentation for help.\n" + e);
System.exit(1);
}
}
public static void main(String argv[])
{
if (!string_constants.ES1.equals(""))
throw new RuntimeException("fail");
if (!string_constants.ES2.equals(""))
throw new RuntimeException("fail");
if (!string_constants.ZS1.equals("\000"))
throw new RuntimeException("fail");
if (!string_constants.ZS2.equals("\000"))
throw new RuntimeException("fail");
}
}

View File

@ -13,12 +13,24 @@ global CONST_DOUBLE3
global CONST_BOOL1
global CONST_CHAR
global CONST_STRING1
global CONST_STRING4
assert(CONST_INT1, 10)
assert(CONST_DOUBLE3, 12.3)
assert(CONST_BOOL1, true)
assert(CONST_CHAR, 'x')
assert(CONST_STRING1, "const string")
if (false)
# Currently SWIG/Octave truncates at a zero byte in a string constant.
# strings support embedded zero bytes so this ought to work, but is an
# uncommon case.
assert(CONST_STRING4, "zer\0zer\0")
else
# Test the current behaviour for now to ensure this testcase gets updated
# when this gets fixed, and also to check we don't mangle the value in some
# other way.
assert(CONST_STRING4, "zer")
endif
endfunction
test_global

View File

@ -34,6 +34,21 @@ check::equal(gettype(preproc_constants::CONST_BOOL2), "boolean", "preproc_consta
check::equal(gettype(preproc_constants::CONST_CHAR), "string", "preproc_constants.CONST_CHAR has unexpected type");
check::equal(gettype(preproc_constants::CONST_STRING1), "string", "preproc_constants.CONST_STRING1 has unexpected type");
check::equal(gettype(preproc_constants::CONST_STRING2), "string", "preproc_constants.CONST_STRING2 has unexpected type");
check::equal(gettype(preproc_constants::CONST_STRING3), "string", "preproc_constants.CONST_STRING3 has unexpected type");
check::equal(gettype(preproc_constants::CONST_STRING4), "string", "preproc_constants.CONST_STRING4 has unexpected type");
if (false) {
// Currently SWIG/PHP truncates at a zero byte in a string constant. PHP
// strings support embedded zero bytes so this ought to work, but is an
// uncommon case.
check::equal(preproc_constants::CONST_STRING4, "zer\0zer\0");
check::equal(CONST_STRING4, "zer\0zer\0");
} else {
// Test the current behaviour for now to ensure this testcase gets updated
// when this gets fixed, and also to check we don't mangle the value in some
// other way.
check::equal(preproc_constants::CONST_STRING4, "zer");
check::equal(CONST_STRING4, "zer");
}
check::equal(gettype(preproc_constants::INT_AND_BOOL), "integer", "preproc_constants.INT_AND_BOOL has unexpected type");
check::equal(gettype(preproc_constants::INT_AND_CHAR), "integer", "preproc_constants.INT_AND_CHAR has unexpected type");

View File

@ -1,5 +1,15 @@
%module preproc_constants
#ifdef SWIGCSHARP
%csconst(1) CONST_STRING4;
#endif
#ifdef SWIGD
%dmanifestconst CONST_STRING4;
#endif
#ifdef SWIGJAVA
%javaconst(1) CONST_STRING4;
#endif
%{
#if defined(__clang__)
//Suppress: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand]
@ -48,6 +58,9 @@
#define CONST_STRING1 "const string"
#define CONST_STRING2 "const" " string"
#define CONST_STRING3 "log-revprops"
// Ideally we shouldn't truncate at a zero byte in target languages where the
// native string type allows strings to contain a zero byte.
#define CONST_STRING4 "zer\0" "zer\0"
// Expressions - runtime tests check the type for any necessary type promotions of the expressions

View File

@ -377,11 +377,11 @@ static int yylook(void) {
return TYPE_RAW;
case SWIG_TOKEN_STRING:
yylval.id = Swig_copy_string(Char(Scanner_text(scan)));
yylval.str = NewString(Scanner_text(scan));
return STRING;
case SWIG_TOKEN_WSTRING:
yylval.id = Swig_copy_string(Char(Scanner_text(scan)));
yylval.str = NewString(Scanner_text(scan));
return WSTRING;
case SWIG_TOKEN_CHAR:

View File

@ -1710,7 +1710,7 @@ static String *add_qualifier_to_declarator(SwigType *type, SwigType *qualifier)
%token <id> ID
%token <str> HBLOCK
%token <id> POUND
%token <id> STRING WSTRING
%token <str> STRING WSTRING
%token <loc> INCLUDE IMPORT INSERT
%token <str> CHARCONST WCHARCONST
%token <dtype> NUM_INT NUM_DOUBLE NUM_FLOAT NUM_LONGDOUBLE NUM_UNSIGNED NUM_LONG NUM_ULONG NUM_LONGLONG NUM_ULONGLONG NUM_BOOL
@ -7577,27 +7577,33 @@ idcolontailnt : DCOLON identifier idcolontailnt[in] {
;
/* Concatenated strings */
string : string[in] STRING {
$$ = NewStringf("%s%s", $in, $STRING);
}
| STRING { $$ = NewString($STRING);}
;
string : string[in] STRING {
$$ = $in;
Append($$, $STRING);
Delete($STRING);
}
| STRING
;
wstring : wstring[in] WSTRING {
// Concatenated wide strings: L"str1" L"str2"
$$ = NewStringf("%s%s", $in, $WSTRING);
$$ = $in;
Append($$, $WSTRING);
Delete($WSTRING);
}
| wstring[in] STRING {
// Concatenated wide string and normal string literal: L"str1" "str2" (C++11).
$$ = NewStringf("%s%s", $in, $STRING);
$$ = $in;
Append($$, $STRING);
Delete($STRING);
}
| string[in] WSTRING {
// Concatenated normal string and wide string literal: "str1" L"str2" (C++11).
$$ = NewStringf("%s%s", $in, $WSTRING);
$$ = $in;
Append($$, $WSTRING);
Delete($WSTRING);
}
| WSTRING {
$$ = NewString($WSTRING);
}
;
| WSTRING
;
stringbrace : string
| LBRACE {