Add support for wchar_t * and std::wstring Unicode strings on Linux

Initial contributions for Linux provided in issue #1233, modified to work
on both Windows and Linux. Dual support is possible by detecting
the sizeof wchar_t which is different on each of these systems.
This commit is contained in:
William S Fulton 2022-04-29 18:13:21 +01:00
parent 05580ff1e7
commit ed42422d1c
8 changed files with 327 additions and 46 deletions

View File

@ -18,6 +18,9 @@ Version 4.1.0 (in progress)
Also add an example of using goin and godirectorin and add
a test for this situation.
2022-04-29: jason-daly, JerryJoyce, wsfulton
[C#] #1233 Add wchar_t * and std::wstring Unicode string support on Linux.
2022-04-11: robinst
#2257 Fix new Ruby 3.2 warning "undefining the allocator of T_DATA
class swig_runtime_data".

View File

@ -1,3 +1,6 @@
// This file has a BOM for UTF-8
// Notes for displaying UTF-8 properly in Windows: https://stackoverflow.com/questions/49476326/displaying-unicode-in-powershell
using System;
using li_std_wstringNamespace;
@ -5,17 +8,38 @@ public class runme
{
static private void check_equal(char a, char b)
{
if (a != b)
throw new Exception("char failed '" + a + "' != '" + b + "'");
if (a != b)
throw new Exception("char failed '" + a + "' != '" + b + "'");
}
static private void display_bytes(string s)
{
Console.Write("[");
if (s != null)
{
foreach (char x in s)
{
int n = Convert.ToInt32(x);
Console.Write(n.ToString("X") + " ");
}
}
else
Console.Write("null");
Console.WriteLine("]");
}
static private void check_equal(string a, string b)
{
if (a != b)
throw new Exception("string failed '" + a + "' != '" + b + "'");
if (li_std_wstring.debug) {
Console.WriteLine("check_equal {0} {1}", a, b);
display_bytes(a);
display_bytes(b);
}
if (a != b)
throw new Exception("string failed '" + a + "' != '" + b + "'");
}
static void Main()
static void Main()
{
char h = 'h';
check_equal(li_std_wstring.test_wcvalue(h), h);
@ -30,6 +54,8 @@ public class runme
li_std_wstring.test_pointer(null);
li_std_wstring.test_const_pointer(null);
check_equal(li_std_wstring.test_ccvalue(null), null);
try {
li_std_wstring.test_value(null);
throw new Exception("NULL check failed");
@ -37,8 +63,8 @@ public class runme
}
try {
li_std_wstring.test_reference(null);
throw new Exception("NULL check failed");
li_std_wstring.test_reference(null);
throw new Exception("NULL check failed");
} catch (ArgumentNullException e) {
if (!e.Message.Contains("type is null"))
throw new Exception("Missing text " + e);
@ -54,13 +80,24 @@ public class runme
x = "hello";
check_equal(li_std_wstring.test_const_reference(x), x);
/* Postpone, tricky, std::wstring portability problem.
/* Tricky, std::wstring portability problem.
* std::wstring is 2 bytes on Windows, 4 bytes on Linux, LPWSTR is 2 bytes.
* .NET marshalling should work on Windows but not Linux.
string s = "abc";
if (!li_std_wstring.test_equal_abc(s))
throw new Exception("Not equal " + s);
*/
*/
string ss = "abc";
if (!li_std_wstring.test_equal_abc(ss))
throw new Exception("Not equal " + ss);
ss = "JP: 日本語";
if (!li_std_wstring.test_equal_jp(ss))
throw new Exception("Not equal " + ss);
ss = "DE: Kröpeliner Straße";
if (!li_std_wstring.test_equal_de(ss))
throw new Exception("Not equal " + ss);
ss = "RU: Война и мир";
if (!li_std_wstring.test_equal_ru(ss))
throw new Exception("Not equal " + ss);
try {
li_std_wstring.test_throw();
@ -81,15 +118,22 @@ public class runme
check_equal(s.wchar_t_member, h);
s.wchar_t_ptr_member = x;
check_equal(s.wchar_t_ptr_member, "abc");
s.wchar_t_ptr_member = ss;
check_equal(s.wchar_t_ptr_member, ss);
{
// Unicode strings
// Strings below are UTF8 in this file, but .NET holds them internally as UTF16
// DE: https://www.utf8-chartable.de/
// RU: https://www.utf8-chartable.de/unicode-utf8-table.pl?start=1024
string[] test_strings = {
"JP: 日本語", "DE: Kröpeliner Straße" , "RU: Война и мир", "EN: War and Peace"
};
foreach (string expected in test_strings)
{
if (li_std_wstring.debug)
Console.WriteLine("expected (C#): " + expected);
string received = li_std_wstring.test_value(expected);
check_equal(received, expected);
}

View File

@ -0,0 +1,5 @@
// This file has a BOM set to UTF-8, which is one way for Visual C++ to correctly interpet these strings
// Alternatively, the /utf-8 command line option could be used
#define JP_WSTRING L"JP: 日本語"
#define DE_WSTRING L"DE: Kröpeliner Straße"
#define RU_WSTRING L"RU: Война и мир"

View File

@ -3,44 +3,71 @@
// The languages below are yet to provide std_wstring.i
#if !(defined(SWIGD) || defined(SWIGGO) || defined(SWIGGUILE) || defined(SWIGJAVASCRIPT) || defined(SWIGLUA) || defined(SWIGMZSCHEME) || defined(SWIGOCAML) || defined(SWIGOCTAVE) || defined(SWIGPERL) || defined(SWIGPHP) || defined(SWIGR) || defined(SWIGSCILAB))
%warnfilter(SWIGWARN_TYPEMAP_WCHARLEAK_MSG) wchar_t_const_ptr_member; // Setting a const wchar_t * variable may leak memory.
%include <std_wstring.i>
// throw is invalid in C++17 and later, only SWIG to use it
#define TESTCASE_THROW1(T1) throw(T1)
%{
#define TESTCASE_THROW1(T1)
%}
%{
// Unicode strings are stored in li_std_wstring.h file which has the BOM appropriately set, primarily for Visual C++ to correctly interpret the wide strings
#include "li_std_wstring.h"
%}
%inline %{
#include <string>
#include <iostream>
bool debug = false;
void show_wstring_bytes(const std::wstring &s) {
unsigned char *p = (unsigned char *)s.data();
size_t len = s.size()*sizeof(wchar_t);
std::wcout << L"s: " << /*s <<*/ L"[";
for (size_t i = 0; i<len; i++) {
std::wcout << std::hex << *p << L" ";
p++;
}
std::wcout << L"]" << std::endl;
std::wcout << std::flush;
}
wchar_t test_wcvalue(wchar_t x) {
return x;
return x;
}
const wchar_t* test_ccvalue(const wchar_t* x) {
return x;
return x;
}
wchar_t* test_cvalue(wchar_t* x) {
return x;
return x;
}
wchar_t* test_wchar_overload() {
return 0;
return 0;
}
wchar_t* test_wchar_overload(wchar_t *x) {
return x;
return x;
}
std::wstring test_value(std::wstring x) {
return x;
if (debug) {
std::wcout << "received(C++): " /*<< x */<< std::endl;
show_wstring_bytes(x);
}
return x;
}
const std::wstring& test_const_reference(const std::wstring &x) {
return x;
return x;
}
void test_pointer(std::wstring *x) {
@ -52,8 +79,28 @@ void test_const_pointer(const std::wstring *x) {
void test_reference(std::wstring &x) {
}
bool test_equal(const wchar_t *wcs, const std::wstring& s) {
if (debug) {
show_wstring_bytes(wcs);
show_wstring_bytes(s);
}
return wcs == s;
}
bool test_equal_abc(const std::wstring &s) {
return L"abc" == s;
return test_equal(L"abc", s);
}
bool test_equal_jp(const std::wstring &s) {
return test_equal(JP_WSTRING, s);
}
bool test_equal_de(const std::wstring &s) {
return test_equal(DE_WSTRING, s);
}
bool test_equal_ru(const std::wstring &s) {
return test_equal(RU_WSTRING, s);
}
void test_throw() TESTCASE_THROW1(std::wstring){
@ -73,6 +120,8 @@ size_t size_wstring(const std::wstring& s) {
struct wchar_test_struct {
wchar_t wchar_t_member;
wchar_t* wchar_t_ptr_member;
const wchar_t* wchar_t_const_ptr_member;
wchar_test_struct() : wchar_t_member(), wchar_t_ptr_member(), wchar_t_const_ptr_member() {}
};
%}

View File

@ -15,6 +15,28 @@
#include <string>
%}
%fragment("Swig_csharp_UTF16ToWString", "header") %{
/* For converting from .NET UTF16 (2 byte unicode) strings. wchar_t is 2 bytes on Windows, 4 bytes on Linux. */
static std::wstring Swig_csharp_UTF16ToWString(const wchar_t *str) {
if (sizeof(wchar_t) == 2) {
return std::wstring(str);
} else {
const unsigned short *pBegin((const unsigned short *)(str));
const unsigned short *ptr(pBegin);
while (*ptr != 0)
++ptr;
std::wstring result;
result.reserve(ptr - pBegin);
while(pBegin != ptr)
result.push_back(*pBegin++);
return result;
}
}
%}
namespace std {
%naturalvar wstring;
@ -31,22 +53,22 @@ class wstring;
%typemap(csdirectorin) wstring "$iminput"
%typemap(csdirectorout) wstring "$cscall"
%typemap(in, canthrow=1) wstring
%typemap(in, canthrow=1, fragment="Swig_csharp_UTF16ToWString") wstring
%{ if (!$input) {
SWIG_CSharpSetPendingExceptionArgument(SWIG_CSharpArgumentNullException, "null wstring", 0);
return $null;
}
$1.assign($input); %}
%typemap(out) wstring %{ $result = SWIG_csharp_wstring_callback($1.c_str()); %}
$1 = Swig_csharp_UTF16ToWString($input); %}
%typemap(out) wstring %{ $result = SWIG_csharp_wstring_with_length_callback($1.c_str(), (int)$1.size()); %}
%typemap(directorout, canthrow=1) wstring
%typemap(directorout, canthrow=1) wstring
%{ if (!$input) {
SWIG_CSharpSetPendingExceptionArgument(SWIG_CSharpArgumentNullException, "null wstring", 0);
return $null;
}
$result.assign($input); %}
%typemap(directorin) wstring %{ $input = SWIG_csharp_wstring_callback($1.c_str()); %}
%typemap(directorin) wstring %{ $input = SWIG_csharp_wstring_with_length_callback($1.c_str(), (int)$1.size()); %}
%typemap(csin) wstring "$csinput"
%typemap(csout, excode=SWIGEXCODE) wstring {
@ -72,14 +94,14 @@ class wstring;
%typemap(csdirectorin) const wstring & "$iminput"
%typemap(csdirectorout) const wstring & "$cscall"
%typemap(in, canthrow=1) const wstring &
%typemap(in, canthrow=1, fragment="Swig_csharp_UTF16ToWString") const wstring &
%{ if (!$input) {
SWIG_CSharpSetPendingExceptionArgument(SWIG_CSharpArgumentNullException, "null wstring", 0);
return $null;
}
std::wstring $1_str($input);
std::wstring $1_str(Swig_csharp_UTF16ToWString($input));
$1 = &$1_str; %}
%typemap(out) const wstring & %{ $result = SWIG_csharp_wstring_callback($1->c_str()); %}
%typemap(out) const wstring & %{ $result = SWIG_csharp_wstring_with_length_callback($1->c_str(), (int)$1->size()); %}
%typemap(csin) const wstring & "$csinput"
%typemap(csout, excode=SWIGEXCODE) const wstring & {
@ -97,7 +119,7 @@ class wstring;
$1_str = $input;
$result = &$1_str; %}
%typemap(directorin) const wstring & %{ $input = SWIG_csharp_wstring_callback($1.c_str()); %}
%typemap(directorin) const wstring & %{ $input = SWIG_csharp_wstring_with_length_callback($1.c_str(), (int)$1->size()); %}
%typemap(csvarin, excode=SWIGEXCODE2) const wstring & %{
set {

View File

@ -11,28 +11,49 @@
#if !defined(SWIG_CSHARP_NO_WSTRING_HELPER)
#if !defined(SWIG_CSHARP_WSTRING_HELPER_)
#define SWIG_CSHARP_WSTRING_HELPER_
%fragment("<wchar.h>"); // TODO: %fragment("<wchar.h", "runtime");
%insert(runtime) %{
/* Callback for returning strings to C# without leaking memory */
typedef void * (SWIGSTDCALL* SWIG_CSharpWStringHelperCallback)(const wchar_t *);
static SWIG_CSharpWStringHelperCallback SWIG_csharp_wstring_callback = NULL;
typedef void * (SWIGSTDCALL* SWIG_CSharpWStringHelperCallback)(const wchar_t *, int length);
static SWIG_CSharpWStringHelperCallback SWIG_csharp_wstring_with_length_callback = NULL;
%}
%insert(header) %{
static void * SWIG_csharp_wstring_callback(const wchar_t *s) {
return SWIG_csharp_wstring_with_length_callback(s, (int)wcslen(s));
}
%}
%pragma(csharp) imclasscode=%{
protected class SWIGWStringHelper {
[return: global::System.Runtime.InteropServices.MarshalAs(global::System.Runtime.InteropServices.UnmanagedType.LPWStr)]
public delegate string SWIGWStringDelegate(global::System.IntPtr message);
static SWIGWStringDelegate wstringDelegate = new SWIGWStringDelegate(CreateWString);
public delegate string SWIGWStringDelegate(global::System.IntPtr message, int length);
static SWIGWStringDelegate wstringUTF16Delegate = new SWIGWStringDelegate(CreateWStringFromUTF16);
static SWIGWStringDelegate wstringUTF32Delegate = new SWIGWStringDelegate(CreateWStringFromUTF32);
[global::System.Runtime.InteropServices.DllImport("$dllimport", EntryPoint="SWIGRegisterWStringCallback_$module")]
public static extern void SWIGRegisterWStringCallback_$module(SWIGWStringDelegate wstringDelegate);
public static extern void SWIGRegisterWStringCallback_$module(SWIGWStringDelegate wstringUTF16Delegate, SWIGWStringDelegate wstringUTF32Delegate);
static string CreateWString([global::System.Runtime.InteropServices.MarshalAs(global::System.Runtime.InteropServices.UnmanagedType.LPWStr)]global::System.IntPtr cString) {
return global::System.Runtime.InteropServices.Marshal.PtrToStringUni(cString);
static string CreateWStringFromUTF16([global::System.Runtime.InteropServices.MarshalAs(global::System.Runtime.InteropServices.UnmanagedType.LPWStr)]global::System.IntPtr cString, int length) {
return global::System.Runtime.InteropServices.Marshal.PtrToStringUni(cString, length);
}
public static string CreateWStringFromUTF32([global::System.Runtime.InteropServices.MarshalAs(global::System.Runtime.InteropServices.UnmanagedType.LPWStr)]global::System.IntPtr cString, int length) {
if (length == 0)
return string.Empty;
byte[] buffer = new byte[length * 4];
global::System.Runtime.InteropServices.Marshal.Copy(cString, buffer, 0, buffer.Length);
byte[] utf8buffer = global::System.Text.Encoding.Convert(global::System.Text.Encoding.UTF32, global::System.Text.Encoding.UTF8, buffer);
return global::System.Text.Encoding.Default.GetString(utf8buffer);
}
static SWIGWStringHelper() {
SWIGRegisterWStringCallback_$module(wstringDelegate);
SWIGRegisterWStringCallback_$module(wstringUTF16Delegate, wstringUTF32Delegate);
}
}
@ -43,8 +64,8 @@ static SWIG_CSharpWStringHelperCallback SWIG_csharp_wstring_callback = NULL;
#ifdef __cplusplus
extern "C"
#endif
SWIGEXPORT void SWIGSTDCALL SWIGRegisterWStringCallback_$module(SWIG_CSharpWStringHelperCallback callback) {
SWIG_csharp_wstring_callback = callback;
SWIGEXPORT void SWIGSTDCALL SWIGRegisterWStringCallback_$module(SWIG_CSharpWStringHelperCallback callback_utf16, SWIG_CSharpWStringHelperCallback callback_utf32) {
SWIG_csharp_wstring_with_length_callback = sizeof(wchar_t) == 2 ? callback_utf16 : callback_utf32;
}
%}
#endif // SWIG_CSHARP_WSTRING_HELPER_
@ -77,13 +98,60 @@ SWIGEXPORT void SWIGSTDCALL SWIGRegisterWStringCallback_$module(SWIG_CSharpWStri
%typemap(typecheck) wchar_t = char;
// wchar_t *
%typemap(ctype) wchar_t * "wchar_t *"
%typemap(imtype, inattributes="[global::System.Runtime.InteropServices.MarshalAs(global::System.Runtime.InteropServices.UnmanagedType.LPWStr)]", out="global::System.IntPtr" ) wchar_t * "string"
%fragment("Swig_csharp_UTF16ToWCharPtr", "header") %{
/* For converting from .NET UTF16 (2 byte unicode) strings. wchar_t is 2 bytes on Windows, 4 bytes on Linux. */
static wchar_t * Swig_csharp_UTF16ToWCharPtr(const wchar_t *str) {
if (sizeof(wchar_t) == 2) {
return (wchar_t *)str;
} else {
wchar_t *result = 0;
if (str) {
const unsigned short *pBegin((const unsigned short *)(str));
const unsigned short *pEnd(pBegin);
wchar_t *ptr = 0;
while (*pEnd != 0)
++pEnd;
#ifdef __cplusplus
result = ptr = new wchar_t[pEnd - pBegin + 1];
#else
result = ptr = (wchar_t *)malloc(sizeof(wchar_t) * (pEnd - pBegin + 1));
#endif
while(pBegin != pEnd)
*ptr++ = *pBegin++;
*ptr++ = 0;
}
return result;
}
}
%}
%fragment("Swig_csharp_UTF16ToWCharPtrFree", "header") %{
static void Swig_csharp_UTF16ToWCharPtrFree(wchar_t *str) {
if (sizeof(wchar_t) != 2) {
#ifdef __cplusplus
delete [] str;
#else
free(str);
#endif
}
}
%}
%typemap(ctype, out="void *") wchar_t * "wchar_t *"
%typemap(imtype,
inattributes="[global::System.Runtime.InteropServices.MarshalAs(global::System.Runtime.InteropServices.UnmanagedType.LPWStr)]",
outattributes="[return: global::System.Runtime.InteropServices.MarshalAs(global::System.Runtime.InteropServices.UnmanagedType.LPWStr)]"
) wchar_t * "string"
%typemap(cstype) wchar_t * "string"
%typemap(csin) wchar_t * "$csinput"
%typemap(csout, excode=SWIGEXCODE) wchar_t * {
string ret = global::System.Runtime.InteropServices.Marshal.PtrToStringUni($imcall);$excode
string ret = $imcall;$excode
return ret;
}
%typemap(csvarin, excode=SWIGEXCODE2) wchar_t * %{
@ -92,12 +160,100 @@ SWIGEXPORT void SWIGSTDCALL SWIGRegisterWStringCallback_$module(SWIG_CSharpWStri
} %}
%typemap(csvarout, excode=SWIGEXCODE2) wchar_t * %{
get {
string ret = global::System.Runtime.InteropServices.Marshal.PtrToStringUni($imcall);$excode
string ret = $imcall;$excode
return ret;
} %}
%typemap(in) wchar_t * %{ $1 = ($1_ltype)$input; %}
%typemap(out) wchar_t * %{ $result = (wchar_t *)$1; %}
%typemap(in, fragment="Swig_csharp_UTF16ToWCharPtr") wchar_t *
%{ $1 = Swig_csharp_UTF16ToWCharPtr($input); %}
%typemap(out) wchar_t * %{ $result = $1 ? SWIG_csharp_wstring_callback((wchar_t *)$1) : 0; %}
%typemap(freearg, fragment="Swig_csharp_UTF16ToWCharPtrFree") wchar_t *
%{ Swig_csharp_UTF16ToWCharPtrFree($1); %}
%typemap(typecheck) wchar_t * = char *;
/* Default typemap for handling wchar_t * members (based on char * in swig.swg) */
#ifdef __cplusplus
%typemap(memberin,fragment="<wchar.h>") wchar_t * {
delete [] $1;
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) (new wchar_t[wcslen((const wchar_t *)$input)+1]);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
%typemap(memberin,warning=SWIGWARN_TYPEMAP_WCHARLEAK_MSG,fragment="<wchar.h>") const wchar_t * {
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) (new wchar_t[wcslen((const wchar_t *)$input)+1]);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
%typemap(globalin,fragment="<wchar.h>") wchar_t * {
delete [] $1;
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) (new wchar_t[wcslen((const wchar_t *)$input)+1]);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
%typemap(globalin,warning=SWIGWARN_TYPEMAP_WCHARLEAK_MSG,fragment="<wchar.h>") const wchar_t * {
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) (new wchar_t[wcslen((const wchar_t *)$input)+1]);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
#else
%typemap(memberin,fragment="<wchar.h>") wchar_t * {
free($1);
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) malloc(wcslen((const wchar_t *)$input)+1);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
%typemap(memberin,warning=SWIGWARN_TYPEMAP_WCHARLEAK_MSG,fragment="<wchar.h>") const wchar_t * {
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) malloc(wcslen((const wchar_t *)$input)+1);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
%typemap(globalin,fragment="<wchar.h>") wchar_t * {
free($1);
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) malloc(wcslen((const wchar_t *)$input)+1);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
%typemap(globalin,warning=SWIGWARN_TYPEMAP_WCHARLEAK_MSG,fragment="<wchar.h>") const wchar_t * {
if ($input && sizeof(wchar_t) == 2) {
$1 = ($1_type) malloc(wcslen((const wchar_t *)$input)+1);
wcscpy((wchar_t *)$1, (const wchar_t *)$input);
} else {
$1 = $input;
$input = 0;
}
}
#endif

View File

@ -52,6 +52,7 @@
%define SWIGWARN_TYPEMAP_CHARLEAK_MSG "451:Setting a const char * variable may leak memory." %enddef
%define SWIGWARN_TYPEMAP_SWIGTYPELEAK_MSG "454:Setting a pointer/reference variable may leak memory." %enddef
%define SWIGWARN_TYPEMAP_WCHARLEAK_MSG "455:Setting a const wchar_t * variable may leak memory." %enddef
%define SWIGWARN_TYPEMAP_THREAD_UNSAFE_MSG "470:Thread/reentrant unsafe wrapping, consider returning by value instead." %enddef
%define SWIGWARN_TYPEMAP_DIRECTOROUT_PTR_MSG "473:Returning a pointer or reference in a director method is not recommended." %enddef
%define SWIGWARN_TYPEMAP_INITIALIZER_LIST_MSG "476:Initialization using std::initializer_list." %enddef

View File

@ -163,6 +163,7 @@
#define WARN_TYPEMAP_SWIGTYPE 452 /* No longer issued */
#define WARN_TYPEMAP_APPLY_UNDEF 453
#define WARN_TYPEMAP_SWIGTYPELEAK 454
#define WARN_TYPEMAP_WCHARLEAK 455
#define WARN_TYPEMAP_IN_UNDEF 460
#define WARN_TYPEMAP_OUT_UNDEF 461