1package FFI::Platypus::Type::WideString; 2 3use strict; 4use warnings; 5use 5.008004; 6use FFI::Platypus; 7use FFI::Platypus::Memory qw( memcpy ); 8use FFI::Platypus::Buffer qw( buffer_to_scalar scalar_to_pointer scalar_to_buffer ); 9use Encode qw( decode encode find_encoding ); 10use Carp (); 11 12# ABSTRACT: Platypus custom type for Unicode "wide" strings 13our $VERSION = '1.56'; # VERSION 14 15 16my @stack; # To keep buffer alive. 17 18sub _compute_wide_string_encoding 19{ 20 foreach my $need (qw( wcslen wcsnlen )) 21 { 22 die "This type plugin needs $need from libc, and cannot find it" 23 unless FFI::Platypus::Memory->can("_$need"); 24 } 25 26 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] ); 27 28 my $size = eval { $ffi->sizeof('wchar_t') }; 29 die 'no wchar_t' if $@; 30 31 my %orders = ( 32 join('', 1..$size) => 'BE', 33 join('', reverse 1..$size) => 'LE', 34 ); 35 36 my $byteorder = join '', @{ $ffi->cast( "wchar_t*", "uint8[$size]", \hex(join '', map { "0$_" } 1..$size) ) }; 37 38 my $encoding; 39 40 if($size == 2) 41 { 42 $encoding = 'UTF-16'; 43 } 44 elsif($size == 4) 45 { 46 $encoding = 'UTF-32'; 47 } 48 else 49 { 50 die "not sure what encoding to use for size $size"; 51 } 52 53 if(defined $orders{$byteorder}) 54 { 55 $encoding .= $orders{$byteorder}; 56 } 57 else 58 { 59 die "odd byteorder $byteorder not (yet) supported"; 60 } 61 62 die "Perl doesn't recognize $encoding as an encoding" 63 unless find_encoding($encoding); 64 65 return ($encoding, $size); 66} 67 68sub ffi_custom_type_api_1 69{ 70 my %args = @_; 71 72 # TODO: it wold be nice to allow arbitrary encodings, but we are 73 # relying on a couple of wcs* functions to compute the string, so 74 # we will leave that for future development. 75 my($encoding, $width) = __PACKAGE__->_compute_wide_string_encoding(); 76 77 # it is hard to come up with a default size for write buffers 78 # but 2048 is a multiple of 1024 that is large enough to fit 79 # any Windows PATH (260*4)+2 = 1042 80 # 81 # (assuming all characters in the PATH are in the BMP, which is 82 # admitedly unlikely, possilby impossible (?) and and a null 83 # termination of two bytes). 84 # 85 # it is arbitrary and based on a platform specific windows 86 # thing, but windows is where wide strings are most likely 87 # to be found, so seems good as anything. 88 my $size = $args{size} || 2048; 89 my $access = $args{access} || 'read'; 90 91 my %ct = ( 92 native_type => 'opaque', 93 ); 94 95 $ct{native_to_perl} = sub { 96 return undef unless defined $_[0]; 97 return decode($encoding, 98 buffer_to_scalar( 99 $_[0], 100 FFI::Platypus::Memory::_wcslen($_[0])*$width, 101 ) 102 ); 103 }; 104 105 if($access eq 'read') 106 { 107 $ct{perl_to_native} = sub { 108 if(defined $_[0]) 109 { 110 my $buf = encode($encoding, $_[0]."\0"); 111 push @stack, \$buf; 112 return scalar_to_pointer $buf; 113 } 114 else 115 { 116 push @stack, undef; 117 return undef; 118 } 119 }; 120 121 $ct{perl_to_native_post} = sub { 122 pop @stack; 123 return; 124 }; 125 126 } 127 elsif($access eq 'write') 128 { 129 my @stack; 130 131 $ct{perl_to_native} = sub { 132 my $ref = shift; 133 if(ref($ref) eq 'ARRAY') 134 { 135 ${ $ref->[0] } = "\0" x $size unless defined ${ $ref->[0] }; 136 my $ptr = scalar_to_pointer ${ $ref->[0] }; 137 if(defined $ref->[0]) 138 { 139 my $init = encode($encoding, $ref->[1]); 140 my($sptr, $ssize) = scalar_to_buffer($init); 141 memcpy($ptr, $sptr, $ssize); 142 } 143 push @stack, \${ $ref->[0] }; 144 return $ptr; 145 } 146 elsif(ref($ref) eq 'SCALAR') 147 { 148 push @stack, $ref; 149 $$ref = "\0" x $size unless defined $$ref; 150 return scalar_to_pointer $$ref; 151 } 152 else 153 { 154 push @stack, $ref; 155 return undef; 156 } 157 }; 158 159 $ct{perl_to_native_post} = sub { 160 my $ref = pop @stack; 161 return unless defined $ref; 162 my $len = length $$ref; 163 $len = FFI::Platypus::Memory::_wcsnlen($$ref, $len); 164 $$ref = decode($encoding, substr($$ref, 0, $len*$width)); 165 }; 166 167 } 168 else 169 { 170 Carp::croak("Unknown access type $access"); 171 } 172 173 return \%ct; 174} 175 1761; 177 178__END__ 179 180=pod 181 182=encoding UTF-8 183 184=head1 NAME 185 186FFI::Platypus::Type::WideString - Platypus custom type for Unicode "wide" strings 187 188=head1 VERSION 189 190version 1.56 191 192=head1 SYNOPSIS 193 194 use FFI::Platypus 1.00; 195 196 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] ); 197 $ffi->load_custom_type('::WideString' => 'wstring', access => 'read' ); 198 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write' ); 199 200 # call function that takes a constant wide string 201 $ffi->attach( wcscmp => ['wstring', 'wstring'] => 'int' ); 202 my $diff = wcscmp("I ❤ perl + Platypus", "I ❤ perl + Platypus"); # returns 0 203 204 # call a function that takes a wide string for writing 205 $ffi->attach( wcscpy => ['wstring_w', 'wstring'] ); 206 my $buf; 207 wcscpy(\$buf, "I ❤ perl + Platypus"); 208 print $buf, "\n"; # prints "I ❤ perl + Platypus" 209 210 # call a function that takes a wide string for modification 211 $ffi->attach( wcscat => ['wstring_w', 'wstring'] ); 212 my $buf; 213 wcscat( [ \$buf, "I ❤ perl" ], " + Platypus"); 214 print $buf, "\n"; # prints "I ❤ perl + Platypus" 215 216On Windows use with C<LPCWSTR>: 217 218 use FFI::Platypus 1.00; 219 220 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] ); 221 222 # define some custom Win32 Types 223 # to get these automatically see FFI::Platypus::Lang::Win32 224 $ffi->load_custom_type('::WideString' => 'LPCWSTR', access => 'read' ); 225 $ffi->type('opaque' => 'HWND'); 226 $ffi->type('uint' => 'UINT'); 227 228 use constant MB_OK => 0x00000000; 229 use constant MB_DEFAULT_DESKTOP_ONLY => 0x00020000; 230 231 $ffi->attach( [MessageBoxW => 'MessageBox'] => [ 'HWND', 'LPCWSTR', 'LPCWSTR', 'UINT'] => 'int' ); 232 233 MessageBox(undef, "I ❤️ Platypus", "Confession", MB_OK|MB_DEFAULT_DESKTOP_ONLY); 234 235=head1 DESCRIPTION 236 237This custom type plugin for L<FFI::Platypus> provides support for the native 238"wide" string type on your platform, if it is available. 239 240Wide strings are made of up wide characters (C<wchar_t>, also known as C<WCHAR> 241on Windows) and have enough bits to represent character sets that require 242larger than the traditional one byte C<char>. 243 244These strings are most commonly used on Windows where they are referred to as 245C<LPWSTR> and C<LPCWSTR> (The former for read/write buffers and the latter for 246const read-only strings), where they are encoded as C<UTF-16LE>. 247 248They are also supported by libc on many modern Unix systems where they are usually 249C<UTF-32> of the native byte-order of the system. APIs on Unix systems more 250commonly use UTF-8 which provides some compatibility with ASCII, but you may 251occasionally find APIs that talk in wide strings. (libarchive, for example, 252can work in both). 253 254This plugin will detect the native wide string format for you and transparently 255convert Perl strings, which are typically encoded internally as UTF-8. If for 256some reason it cannot detect the correct encoding, or if your platform is 257currently supported, an exception will be thrown (please open a ticket if this 258is the case). It can be used either for read/write buffers, for const read-only 259strings, and for return values. It supports these options: 260 261Options: 262 263=over 4 264 265=item access 266 267Either C<read> or C<write> depending on if you are using a read/write buffer 268or a const read-only string. 269 270=item size 271 272For read/write buffer, the size of the buffer to create, if not provided by 273the caller. 274 275=back 276 277=head2 read-only 278 279Read-only strings are the easiest of all, are converted to the native wide 280string format in a buffer and are freed after that function call completes. 281 282 $ffi->load_custom_type('::WideString' => 'wstring' ); 283 $ffi->function( wprintf => [ 'wstring' ] => [ 'wstring' ] => 'int' ) 284 ->call("I %s perl + Platypus", "❤"); 285 286This is the mode that you want to use when you are calling a function that 287takes a C<const wchar_t*> or a C<LPCWSTR>. 288 289=head2 return value 290 291For return values the C<access> and C<size> options are ignored. The string 292is simply copied into a Perl native string. 293 294 $ffi->load_custom_type('::WideString' => 'wstring' ); 295 # see note below in CAVEATS about wcsdup 296 my $str = $ffi->function( wcsdup => [ 'wstring' ] => 'wstring' ) 297 ->call("I ❤ perl + Platypus"); 298 299This is the mode that you want to use when you are calling a function that 300returns a C<const wchar_t*>, C<wchar_t>, C<LPWSTR> or C<LPCWSTR>. 301 302=head2 read/write 303 304Read/write strings can be passed in one of two ways. Which you choose 305depends on if you want to initialize the read/write buffer or not. 306 307=over 4 308 309=item default buffer size 310 311The simplest way is to fallback on the default buffer size, which can 312be specified using the C<size> option when creating the custom type. 313 314 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] ); 315 $ffi->load_custom_type('::WideString' => 'wstring', access => 'read' ); 316 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write', size => 512 ); 317 318 $ffi->attach( wcscpy => ['wstring_w', 'wstring'] ); 319 my $buf; 320 wcscpy(\$buf, "I ❤ perl + Platypus"); 321 print $buf, "\n"; # prints "I ❤ perl + Platypus" 322 323B<Discussion>: This is the most sensical approach when the exact size of the 324buffer is known for all usages of the string type. It can also be sensical 325if the buffer size is larger than any possible output, though care should 326be taken since this may be hard to determine reliably. 327 328The default size if none is specified when creating the custom type is 2048, 329which is probably large enough for many uses, but also probably wastes 330memory for many of them. 331 332=item allocate your buffer of a specific size 333 334The safest and most memory efficient method is of course to allocate exactly 335the amount of memory that you need. 336 337 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] ); 338 $ffi->load_custom_type('::WideString' => 'wstring', access => 'read' ); 339 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write' ); 340 341 $ffi->attach( wcscpy => ['wstring_w', 'wstring'] ); 342 my $width = $ffi->sizeof('wchar_t'); 343 my $buf = "\0" x ( (length ("I ❤ perl + Platypus") + 1)*$width); 344 wcscpy(\$buf, "I ❤ perl + Platypus"); 345 print $buf, "\n"; # prints "I ❤ perl + Platypus" 346 347B<Discussion>: By assigning C<$buf> to a string of null characters the 348length of the source string, plus one (for the null at the end) and then 349multiplying that by the size of C<wchar_t>, you get the exact number of 350bytes needed for the destination buffer. 351 352Note that although we pass in a reference to a buffer, what comes back 353is converted to a Perl string, which will be internally UTF-8, not stored 354at the original buffer location. This is slightly awkward, but what you 355need most of the time. 356 357=item initialize the read/write buffer 358 359Some functions don't expect empty null padded buffers though, in this 360case you will want to initialize the buffer. 361 362 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] ); 363 $ffi->load_custom_type('::WideString' => 'wstring', access => 'read' ); 364 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write' ); 365 366 $ffi->attach( wcscat => ['wstring_w', 'wstring'] ); 367 my $buf; 368 wcscat( [ \$buf, "I ❤ perl" ], " + Platypus"); 369 print $buf, "\n"; # prints "I ❤ perl + Platypus" 370 371B<Discussion>: To initialize we pass in an array reference instead of a 372scalar reference. The first element is a scalar reference to the buffer 373(which can be pre-allocated or not; if it is not allocated then it will 374be allocated to the default size for the type). The second argument is 375what the buffer should be initialized to before the underlying C function 376is called. The Perl string is encoded into wide string format before 377being used to initialize the buffer. 378 379As before a reference to the translated string is returned, and the 380buffer that was used to pass in is freed. 381 382=item allocate memory using C<malloc> or C<wcsdup> etc. 383 384You can also allocate memory using C<malloc> or C<wcsdup> to return 385an opaque type and manipulate it using the libc C<wcs*> functions. 386It will still probably be useful to use this plugin to cast the 387opaque back to a Perl string. The CAVEATS section below includes 388several examples. 389 390=back 391 392This is the mode that you want to use when you are calling a function that 393takes a <wchar_t*> or a C<LPWSTR>. 394 395=head1 CAVEATS 396 397As with the Platypus built in C<string> type, return values are copied into 398a Perl scalar. This is usually what you want anyway, but some APIs expect 399the caller to take responsibility for freeing the pointer to the wide string 400that it returns. For example, C<wcsdup> works in this way. The workaround 401is to return an opaque pointer, cast it from a wide string and free the 402pointer. 403 404 use FFI::Platypus::Memory qw( free ); 405 $ffi->load_custom_type('::WideString' => 'wstring' ); 406 my $ptr = $ffi->function( wcsdup => [ 'wstring' ] => 'opaque' ) 407 ->call("I ❤ perl + Platypus"); 408 my $str = $ffi->cast('opaque', 'wstring', $ptr); 409 free $ptr; 410 411Because of the order in which objects are freed you cannot return a wide 412string if it is also a wide string argument to a function. For example 413C<wcscpy> may crash if you specify the return value as a wide string: 414 415 # wchar_t *wcscpy(wchar_t *dest, const wchar_t *src); 416 $ffi->attach( wcscpy => [ 'wstring_w', 'wstring' ] => 'wstring' ); # no 417 my $str; 418 wcscpy( \$str, "I ❤ perl + Platypus"); # may crash on memory error 419 420This is because the order in which things are done here are 1. C<$str> is allocated 4212. C<$str> is re-encoded as utf and the old buffer is freed 3. the return value 422is computed based on the C<$str> buffer that was freed. 423 424If you look at C<wcscpy> though you don't actually need the return value. 425To make this code work, you can just ignore the return value: 426 427 $ffi->attach( wcscpy => [ 'wstring_w', 'wstring' ] => 'void' ); # yes 428 my $str; 429 wcscpy( \$str, "I ❤ perl + Platypus"); # good! 430 431On the other hand you do care about the return value from C<wcschr>, which returns 432a pointer to the first occurrence of a character in an argument string: 433 434 # wchar_t *wcschr(const wchar_t *wcs, wchar_t wc); 435 $ffi->attach( wcschr => [ 'wstring', 'wchar_t' ] => 'wstring' ); # no 436 # this may crash on memory error or return the wrong value 437 my $str = wcschr("I ❤ perl + Platypus", ord("❤")); 438 439Instead you need to work with pointers and casts to use this function: 440 441 use FFI::Platypus 1.00; 442 use FFI::Platypus::Memory qw( free ); 443 444 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] ); 445 446 $ffi->attach( wcsdup => ['wstring'] => 'opaque' ); 447 $ffi->attach( strchr => [ opaque', 'wchar_t' ] => 'wstring' ); 448 449 # create a wcs string in memory using wcsdup 450 my $haystack = wcsdup("I ❤ perl + Platypus"); 451 # find the heart and return as a wide string 452 my $needle = strchr($haystack, ord("❤")); 453 # safe to free the pointer to the larger string now 454 free $haystack; 455 456=head1 SEE ALSO 457 458=over 4 459 460=item L<FFI::Platypus> 461 462Core Platypus documentation. 463 464=item L<FFI::Platypus::Type> 465 466Includes documentation on handling "normal" 8 bit C strings among others. 467 468=item L<FFI::Platypus::Lang::Win32> 469 470Documentation for using Platypus with C<LPWSTR> and C<LPCWSTR> types on 471Microsoft Windows. These types are just aliases for the standard C wide 472strings. 473 474=back 475 476=head1 AUTHOR 477 478Author: Graham Ollis E<lt>plicease@cpan.orgE<gt> 479 480Contributors: 481 482Bakkiaraj Murugesan (bakkiaraj) 483 484Dylan Cali (calid) 485 486pipcet 487 488Zaki Mughal (zmughal) 489 490Fitz Elliott (felliott) 491 492Vickenty Fesunov (vyf) 493 494Gregor Herrmann (gregoa) 495 496Shlomi Fish (shlomif) 497 498Damyan Ivanov 499 500Ilya Pavlov (Ilya33) 501 502Petr Písař (ppisar) 503 504Mohammad S Anwar (MANWAR) 505 506Håkon Hægland (hakonhagland, HAKONH) 507 508Meredith (merrilymeredith, MHOWARD) 509 510Diab Jerius (DJERIUS) 511 512Eric Brine (IKEGAMI) 513 514szTheory 515 516José Joaquín Atria (JJATRIA) 517 518Pete Houston (openstrike, HOUSTON) 519 520=head1 COPYRIGHT AND LICENSE 521 522This software is copyright (c) 2015,2016,2017,2018,2019,2020 by Graham Ollis. 523 524This is free software; you can redistribute it and/or modify it under 525the same terms as the Perl 5 programming language system itself. 526 527=cut 528