1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 #include "ppport.h"
6
count_single_char_eucjp(const unsigned char ** pos,int * byte)7 int count_single_char_eucjp( const unsigned char** pos, int* byte ){
8 *byte = 0;
9 if( **pos == 0 ) return 0;
10 if( **pos == 0x8e ){
11 (*pos)++;
12 (*byte)++;
13 if( **pos >= 0xa1 && **pos <= 0xfe ) { (*pos)++; (*byte)++; }
14 return 1;
15 }else if( **pos == 0x8f ){
16 (*pos)++; (*byte)++;
17 if( **pos >= 0xa1 && **pos <= 0xfe ) { (*pos)++; (*byte)++; }
18 if( **pos >= 0xa1 && **pos <= 0xfe ){
19 (*pos)++;
20 (*byte)++;
21 return 2;
22 }
23 return 1;
24 }else if( **pos >= 0xa1 && **pos <= 0xfe ){
25 (*pos)++;
26 (*byte)++;
27 if( **pos >= 0xa1 && **pos <= 0xfe ){
28 (*pos)++;
29 (*byte)++;
30 return 2;
31 }
32 return 1;
33 }
34 (*pos)++;
35 (*byte)++;
36 return 1;
37 }
38
get_visualwidth_eucjp(SV * str)39 SV* get_visualwidth_eucjp( SV* str ){
40 unsigned int length = 0;
41 int byte = 0;
42 const unsigned char* pos = (const unsigned char*)SvPV_nolen(str);
43 const unsigned char** posstr = &pos;
44 while( **posstr ){
45 length += count_single_char_eucjp( posstr, &byte );
46 }
47 return newSViv(length);
48 }
49
trim_visualwidth_eucjp(SV * str,SV * length_sv)50 SV* trim_visualwidth_eucjp( SV* str, SV* length_sv ){
51 unsigned int length = SvIV(length_sv);
52 int byte = 0;
53 unsigned int byte_length = 0;
54 unsigned int view_length = 0;
55 int view_char = 0;
56 int continue_flg = 1;
57 unsigned char* default_pos = (unsigned char *)SvPV_nolen(str);
58 unsigned char* pos = default_pos;
59 unsigned char** posstr = &pos;
60 while( continue_flg ){
61 view_char = count_single_char_eucjp( (const unsigned char **)posstr, &byte );
62 if( byte && ( view_char + view_length ) <= length ){
63 view_length += view_char;
64 byte_length += byte;
65 }else{
66 continue_flg = 0;
67 }
68 }
69 return newSVpvn((const char *)default_pos , byte_length);
70 }
71
count_single_char_utf8(const unsigned char ** pos,int * byte)72 int count_single_char_utf8( const unsigned char** pos, int* byte ){
73 *byte = 0;
74 if( **pos == 0 ) return 0;
75 if( **pos == 0xef && *((*pos)+1) == 0xbb && *((*pos)+2) == 0xbf ){
76 // BOM
77 (*pos)+= 3;
78 (*byte)+= 3;
79 // printf("BOM\n");
80 return 0;
81 } else if( ( **pos & 0xe0 ) == 0xc0 && ( ( *((*pos)+1) & 0xc0 ) == 0x80 ) ){
82 (*pos)+= 2;
83 (*byte)+= 2;
84 // printf("2byte\n");
85 return 1;
86 } else if( ( **pos & 0xf0 ) == 0xe0 && ( ( *((*pos)+1) & 0xc0 ) == 0x80 ) && ( ( *((*pos)+2) & 0xc0 ) == 0x80 ) ){
87 if( **pos == 0xef && ( ( *((*pos)+1) == 0xbd && *((*pos)+2) >= 0xa1 && *((*pos)+2) <= 0xbf )
88 || ( *((*pos)+1) == 0xbe && *((*pos)+2) >= 0x80 && *((*pos)+2) <= 0x9f ) ) ){
89 (*pos)+= 3;
90 (*byte)+= 3;
91 // printf("HALFWIDTH\n");
92 return 1;
93 }
94 (*pos)+= 3;
95 (*byte)+= 3;
96 // printf("FULLWIDTH\n");
97 return 2;
98 } else if( ( **pos & 0xf8 ) == 0xf0 && ( ( *((*pos)+1) & 0xc0 ) == 0x80 )
99 && ( ( *((*pos)+2) & 0xc0 ) == 0x80 ) && ( ( *((*pos)+3) & 0xc0 ) == 0x80 )){
100 (*pos)+= 4;
101 (*byte)+= 4;
102 // printf("4byte\n");
103 return 2;
104 }
105 (*pos)++;
106 (*byte)++;
107 // printf("SINGLE\n");
108 return 1;
109 }
110
get_visualwidth_utf8(SV * str)111 SV* get_visualwidth_utf8( SV* str ){
112 unsigned int length = 0;
113 int byte = 0;
114 const unsigned char* pos = (const unsigned char*)SvPV_nolen(str);
115 const unsigned char** posstr = &pos;
116 while( **posstr ){
117 length += count_single_char_utf8( posstr, &byte );
118 }
119 return newSViv(length);
120 }
121
trim_visualwidth_utf8(SV * str,SV * length_sv)122 SV* trim_visualwidth_utf8( SV* str, SV* length_sv ){
123 unsigned int length = SvIV(length_sv);
124 int byte = 0;
125 unsigned int byte_length = 0;
126 unsigned int view_length = 0;
127 int view_char = 0;
128 int continue_flg = 1;
129 unsigned char* default_pos = (unsigned char *)SvPV_nolen(str);
130 unsigned char* pos = default_pos;
131 unsigned char** posstr = &pos;
132 while( continue_flg ){
133 view_char = count_single_char_utf8( (const unsigned char **)posstr, &byte );
134 if( byte && ( view_char + view_length ) <= length ){
135 view_length += view_char;
136 byte_length += byte;
137 }else{
138 continue_flg = 0;
139 }
140 }
141 return newSVpvn((const char *)default_pos , byte_length);
142 }
143
144
145 MODULE = Text::VisualWidth PACKAGE = Text::VisualWidth::EUC_JP
146 PROTOTYPES: ENABLE
147
148 SV *
149 xs_get_visualwidth_eucjp( str )
150 SV * str
151 CODE:
152 RETVAL = get_visualwidth_eucjp(str);
153 OUTPUT:
154 RETVAL
155
156 SV *
157 xs_trim_visualwidth_eucjp( str, length_sv )
158 SV * str
159 SV * length_sv
160 CODE:
161 RETVAL = trim_visualwidth_eucjp(str, length_sv);
162 OUTPUT:
163 RETVAL
164
165 MODULE = Text::VisualWidth PACKAGE = Text::VisualWidth::UTF8
166 PROTOTYPES: ENABLE
167
168 SV *
169 xs_get_visualwidth_utf8( str )
170 SV * str
171 CODE:
172 RETVAL = get_visualwidth_utf8(str);
173 OUTPUT:
174 RETVAL
175
176 SV *
177 xs_trim_visualwidth_utf8( str, length_sv )
178 SV * str
179 SV * length_sv
180 CODE:
181 RETVAL = trim_visualwidth_utf8(str, length_sv);
182 OUTPUT:
183 RETVAL
184