Unicode literals

This commit is contained in:
Vladimir Kalinin 2013-01-16 08:01:27 +04:00 committed by William S Fulton
parent 1f0db4fb9e
commit 7d800a655d
9 changed files with 137 additions and 14 deletions

View File

@ -1,4 +1,5 @@
%module csharp_features
%include "wchar.i"
// SWIG gets the method modifiers wrong occasionally, like with private inheritance, %csmethodmodifiers can fix this
%csmethodmodifiers Derived::VirtualMethod() "public virtual"
@ -19,6 +20,9 @@ public:
class MoreDerived : public Derived {
public:
int variable;
// test wide char literals support for C# module
void methodWithDefault1(const wchar_t* s = L"literal with escapes \x1234"){}
void methodWithDefault2(wchar_t c = L'\x1234'){}
};
%}

View File

@ -383,6 +383,10 @@ static int yylook(void) {
case SWIG_TOKEN_STRING:
yylval.id = Swig_copy_string(Char(Scanner_text(scan)));
return STRING;
case SWIG_TOKEN_WSTRING:
yylval.id = Swig_copy_string(Char(Scanner_text(scan)));
return WSTRING;
case SWIG_TOKEN_CHAR:
yylval.str = NewString(Scanner_text(scan));
@ -391,7 +395,15 @@ static int yylook(void) {
Printf(stdout,"%d\n", Len(Scanner_text(scan)));
}
return CHARCONST;
case SWIG_TOKEN_WCHAR:
yylval.str = NewString(Scanner_text(scan));
if (Len(yylval.str) == 0) {
Swig_error(cparse_file, cparse_line, "Empty character constant\n");
Printf(stdout,"%d\n", Len(Scanner_text(scan)));
}
return WCHARCONST;
/* Numbers */
case SWIG_TOKEN_INT:

View File

@ -13,8 +13,8 @@
* some point. Beware.
* ----------------------------------------------------------------------------- */
%expect 6
%{
#define yylex yylex
char cvsroot_parser_y[] = "$Id$";
@ -1635,9 +1635,9 @@ static void tag_nodes(Node *n, const_String_or_char_ptr attrname, DOH *value) {
%token <id> ID
%token <str> HBLOCK
%token <id> POUND
%token <id> STRING
%token <id> STRING WSTRING
%token <loc> INCLUDE IMPORT INSERT
%token <str> CHARCONST
%token <str> CHARCONST WCHARCONST
%token <dtype> NUM_INT NUM_FLOAT NUM_UNSIGNED NUM_LONG NUM_ULONG NUM_LONGLONG NUM_ULONGLONG NUM_BOOL
%token <ivalue> TYPEDEF
%token <type> TYPE_INT TYPE_UNSIGNED TYPE_SHORT TYPE_LONG TYPE_FLOAT TYPE_DOUBLE TYPE_CHAR TYPE_WCHAR TYPE_VOID TYPE_SIGNED TYPE_BOOL TYPE_COMPLEX TYPE_TYPEDEF TYPE_RAW TYPE_NON_ISO_INT8 TYPE_NON_ISO_INT16 TYPE_NON_ISO_INT32 TYPE_NON_ISO_INT64
@ -1728,7 +1728,7 @@ static void tag_nodes(Node *n, const_String_or_char_ptr attrname, DOH *value) {
%type <decl> abstract_declarator direct_abstract_declarator ctor_end;
%type <tmap> typemap_type;
%type <str> idcolon idcolontail idcolonnt idcolontailnt idtemplate stringbrace stringbracesemi;
%type <id> string stringnum ;
%type <id> string stringnum wstring;
%type <tparms> template_parms;
%type <dtype> cpp_end cpp_vend;
%type <ivalue> rename_namewarn;
@ -6001,7 +6001,7 @@ definetype : { /* scanner_check_typedef(); */ } expr {
$$ = $2;
if ($$.type == T_STRING) {
$$.rawval = NewStringf("\"%(escape)s\"",$$.val);
} else if ($$.type != T_CHAR) {
} else if ($$.type != T_CHAR && $$.type != T_WSTRING && $$.type != T_WCHAR) {
$$.rawval = 0;
}
$$.bitfield = 0;
@ -6127,6 +6127,11 @@ valexpr : exprnum { $$ = $1; }
$$.type = T_ULONG;
}
| exprcompound { $$ = $1; }
| wstring {
$$.val = NewString($1);
$$.rawval = NewStringf("L\"%s\"", $$.val);
$$.type = T_WSTRING;
}
| CHARCONST {
$$.val = NewString($1);
if (Len($$.val)) {
@ -6139,6 +6144,18 @@ valexpr : exprnum { $$ = $1; }
$$.throws = 0;
$$.throwf = 0;
}
| WCHARCONST {
$$.val = NewString($1);
if (Len($$.val)) {
$$.rawval = NewStringf("L\'%s\'", $$.val);
} else {
$$.rawval = NewString("L'\\0'");
}
$$.type = T_WCHAR;
$$.bitfield = 0;
$$.throws = 0;
$$.throwf = 0;
}
/* grouping */
| LPAREN expr RPAREN %prec CAST {
@ -6661,6 +6678,21 @@ string : string STRING {
}
| STRING { $$ = $1;}
;
/* Concatenated wide strings: L"str1" L"str2" */
wstring : wstring WSTRING {
$$ = (char *) malloc(strlen($1)+strlen($2)+1);
strcpy($$,$1);
strcat($$,$2);
}
/* Concatenated wide string and normal string literal: L"str1" "str2" */
/*not all the compilers support this concatenation mode, so perhaps better to postpone it*/
/*| wstring STRING { here $2 comes unescaped, we have to escape it back first via NewStringf("%(escape)s)"
$$ = (char *) malloc(strlen($1)+strlen($2)+1);
strcpy($$,$1);
strcat($$,$2);
}*/
| WSTRING { $$ = $1;}
;
stringbrace : string {
$$ = NewString($1);

View File

@ -274,7 +274,7 @@ int Swig_cargs(Wrapper *w, ParmList *p) {
Delete(defname);
Delete(defvalue);
}
} else if (!pvalue && ((tycode == T_POINTER) || (tycode == T_STRING))) {
} else if (!pvalue && ((tycode == T_POINTER) || (tycode == T_STRING) || (tycode == T_WSTRING))) {
pvalue = (String *) "0";
}
if (!altty) {

View File

@ -509,7 +509,6 @@ static int look(Scanner * s) {
state = 4; /* Possibly a SWIG directive */
/* Look for possible identifiers or unicode/delimiter strings */
else if ((isalpha(c)) || (c == '_') ||
(s->idstart && strchr(s->idstart, c))) {
state = 7;
@ -867,11 +866,15 @@ static int look(Scanner * s) {
break;
case 7: /* Identifier or true/false or unicode/custom delimiter string */
if (c=='R') { /* Possibly CUSTOM DELIMITER string */
if (c == 'R') { /* Possibly CUSTOM DELIMITER string */
state = 72;
break;
}
else if (c!='u' && c!='U' && c!='L') { /* Definitely an identifier */
else if (c == 'L') { /* Probably identifier but may be a wide string literal */
state = 77;
break;
}
else if (c != 'u' && c != 'U') { /* Definitely an identifier */
state = 70;
break;
}
@ -879,14 +882,14 @@ static int look(Scanner * s) {
if ((c = nextchar(s)) == 0) {
state = 76;
}
else if (c=='\"') { /* Definitely u, U or L string */
else if (c == '\"') { /* Definitely u, U or L string */
retract(s, 1);
state = 1000;
}
else if (c=='R') { /* Possibly CUSTOM DELIMITER u, U, L string */
else if (c == 'R') { /* Possibly CUSTOM DELIMITER u, U, L string */
state = 73;
}
else if (c=='8') { /* Possibly u8 string */
else if (c == '8') { /* Possibly u8 string */
state = 71;
}
else {
@ -950,6 +953,59 @@ static int look(Scanner * s) {
break;
case 77: /*identifier or wide string literal*/
if ((c = nextchar(s)) == 0)
return SWIG_TOKEN_ID;
else if (c == '\"') {
s->start_line = s->line;
Clear(s->text);
state = 78;
}
else if (c == '\'') {
s->start_line = s->line;
Clear(s->text);
state = 79;
}
else if (isalnum(c) || (c == '_') || (c == '$'))
state = 7;
else {
retract(s, 1);
return SWIG_TOKEN_ID;
}
break;
case 78: /* Processing a wide string literal*/
if ((c = nextchar(s)) == 0) {
Swig_error(cparse_file, cparse_start_line, "Unterminated wide string\n");
return SWIG_TOKEN_ERROR;
}
if (c == '\"') {
Delitem(s->text, DOH_END);
return SWIG_TOKEN_WSTRING;
} else if (c == '\\') {
if ((c = nextchar(s)) == 0) {
Swig_error(cparse_file, cparse_start_line, "Unterminated wide string\n");
return SWIG_TOKEN_ERROR;
}
}
break;
case 79: /* Processing a wide char literal */
if ((c = nextchar(s)) == 0) {
Swig_error(cparse_file, cparse_start_line, "Unterminated character constant\n");
return SWIG_TOKEN_ERROR;
}
if (c == '\'') {
Delitem(s->text, DOH_END);
return (SWIG_TOKEN_WCHAR);
} else if (c == '\\') {
if ((c = nextchar(s)) == 0) {
Swig_error(cparse_file, cparse_start_line, "Unterminated wide char literal\n");
return SWIG_TOKEN_ERROR;
}
}
break;
case 75: /* Special identifier $ */
if ((c = nextchar(s)) == 0)
return SWIG_TOKEN_DOLLAR;

View File

@ -131,12 +131,21 @@ SwigType *NewSwigType(int t) {
case T_UCHAR:
return NewString("unsigned char");
break;
case T_STRING:{
case T_STRING: {
SwigType *t = NewString("char");
SwigType_add_pointer(t);
return t;
break;
}
case T_WCHAR:
return NewString("wchar_t");
break;
case T_WSTRING: {
SwigType *t = NewString("wchar_t");
SwigType_add_pointer(t);
return t;
break;
}
case T_LONGLONG:
return NewString("long long");
break;

View File

@ -100,6 +100,9 @@ extern "C" {
#define T_SYMBOL 98
#define T_ERROR 99
/* wide string literal, may contain escaped wide chars like \x1234 as well as normal escape sequences */
#define T_WSTRING 39
/* --- File interface --- */

View File

@ -67,6 +67,9 @@ extern void Scanner_freeze_line(Scanner *s, int val);
#define SWIG_TOKEN_QUESTION 30 /* ? */
#define SWIG_TOKEN_COMMENT 31 /* C or C++ comment */
#define SWIG_TOKEN_BOOL 32 /* true or false */
#define SWIG_TOKEN_WSTRING 33 /* L"str" */
#define SWIG_TOKEN_WCHAR 34 /* L'c' */
#define SWIG_TOKEN_ILLEGAL 99
#define SWIG_TOKEN_ERROR -1

View File

@ -1194,6 +1194,8 @@ int SwigType_type(SwigType *t) {
if (strncmp(c, "p.", 2) == 0) {
if (SwigType_type(c + 2) == T_CHAR)
return T_STRING;
else if (SwigType_type(c + 2) == T_WCHAR)
return T_WSTRING;
else
return T_POINTER;
}
@ -1236,6 +1238,8 @@ int SwigType_type(SwigType *t) {
return T_SCHAR;
if (strcmp(c, "unsigned char") == 0)
return T_UCHAR;
if (strcmp(c, "wchar_t") == 0)
return T_WCHAR;
if (strcmp(c, "float") == 0)
return T_FLOAT;
if (strcmp(c, "double") == 0)