1package FFI::Platypus::Type::WideString;
2
3use strict;
4use warnings;
5use 5.008004;
6use FFI::Platypus;
7use FFI::Platypus::Memory qw( memcpy );
8use FFI::Platypus::Buffer qw( buffer_to_scalar scalar_to_pointer scalar_to_buffer );
9use Encode qw( decode encode find_encoding );
10use Carp ();
11
12# ABSTRACT: Platypus custom type for Unicode "wide" strings
13our $VERSION = '1.56'; # VERSION
14
15
16my @stack;  # To keep buffer alive.
17
18sub _compute_wide_string_encoding
19{
20  foreach my $need (qw( wcslen wcsnlen ))
21  {
22    die "This type plugin needs $need from libc, and cannot find it"
23      unless FFI::Platypus::Memory->can("_$need");
24  }
25
26  my $ffi = FFI::Platypus->new( api => 1, lib => [undef] );
27
28  my $size = eval { $ffi->sizeof('wchar_t') };
29  die 'no wchar_t' if $@;
30
31  my %orders = (
32    join('', 1..$size)         => 'BE',
33    join('', reverse 1..$size) => 'LE',
34  );
35
36  my $byteorder = join '', @{ $ffi->cast( "wchar_t*", "uint8[$size]", \hex(join '', map { "0$_" } 1..$size) ) };
37
38  my $encoding;
39
40  if($size == 2)
41  {
42    $encoding = 'UTF-16';
43  }
44  elsif($size == 4)
45  {
46    $encoding = 'UTF-32';
47  }
48  else
49  {
50    die "not sure what encoding to use for size $size";
51  }
52
53  if(defined $orders{$byteorder})
54  {
55    $encoding .= $orders{$byteorder};
56  }
57  else
58  {
59    die "odd byteorder $byteorder not (yet) supported";
60  }
61
62  die "Perl doesn't recognize $encoding as an encoding"
63    unless find_encoding($encoding);
64
65  return ($encoding, $size);
66}
67
68sub ffi_custom_type_api_1
69{
70  my %args = @_;
71
72  # TODO: it wold be nice to allow arbitrary encodings, but we are
73  # relying on a couple of wcs* functions to compute the string, so
74  # we will leave that for future development.
75  my($encoding, $width) = __PACKAGE__->_compute_wide_string_encoding();
76
77  # it is hard to come up with a default size for write buffers
78  # but 2048 is a multiple of 1024 that is large enough to fit
79  # any Windows PATH (260*4)+2 = 1042
80  #
81  # (assuming all characters in the PATH are in the BMP, which is
82  #  admitedly unlikely, possilby impossible (?) and and a null
83  #  termination of two bytes).
84  #
85  # it is arbitrary and based on a platform specific windows
86  # thing, but windows is where wide strings are most likely
87  # to be found, so seems good as anything.
88  my $size   = $args{size} || 2048;
89  my $access = $args{access} || 'read';
90
91  my %ct = (
92    native_type    => 'opaque',
93  );
94
95  $ct{native_to_perl} = sub {
96    return undef unless defined $_[0];
97    return decode($encoding,
98      buffer_to_scalar(
99        $_[0],
100        FFI::Platypus::Memory::_wcslen($_[0])*$width,
101      )
102    );
103  };
104
105  if($access eq 'read')
106  {
107    $ct{perl_to_native} = sub {
108      if(defined $_[0])
109      {
110        my $buf = encode($encoding, $_[0]."\0");
111        push @stack, \$buf;
112        return scalar_to_pointer $buf;
113      }
114      else
115      {
116        push @stack, undef;
117        return undef;
118      }
119    };
120
121    $ct{perl_to_native_post} = sub {
122      pop @stack;
123      return;
124    };
125
126  }
127  elsif($access eq 'write')
128  {
129    my @stack;
130
131    $ct{perl_to_native} = sub {
132      my $ref = shift;
133      if(ref($ref) eq 'ARRAY')
134      {
135        ${ $ref->[0] } = "\0" x $size unless defined ${ $ref->[0] };
136        my $ptr = scalar_to_pointer ${ $ref->[0] };
137        if(defined $ref->[0])
138        {
139          my $init = encode($encoding, $ref->[1]);
140          my($sptr, $ssize) = scalar_to_buffer($init);
141          memcpy($ptr, $sptr, $ssize);
142        }
143        push @stack, \${ $ref->[0] };
144        return $ptr;
145      }
146      elsif(ref($ref) eq 'SCALAR')
147      {
148        push @stack, $ref;
149        $$ref = "\0" x $size unless defined $$ref;
150        return scalar_to_pointer $$ref;
151      }
152      else
153      {
154        push @stack, $ref;
155        return undef;
156      }
157    };
158
159    $ct{perl_to_native_post} = sub {
160      my $ref = pop @stack;
161      return unless defined $ref;
162      my $len = length $$ref;
163      $len = FFI::Platypus::Memory::_wcsnlen($$ref, $len);
164      $$ref = decode($encoding, substr($$ref, 0, $len*$width));
165    };
166
167  }
168  else
169  {
170    Carp::croak("Unknown access type $access");
171  }
172
173  return \%ct;
174}
175
1761;
177
178__END__
179
180=pod
181
182=encoding UTF-8
183
184=head1 NAME
185
186FFI::Platypus::Type::WideString - Platypus custom type for Unicode "wide" strings
187
188=head1 VERSION
189
190version 1.56
191
192=head1 SYNOPSIS
193
194 use FFI::Platypus 1.00;
195
196 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] );
197 $ffi->load_custom_type('::WideString' => 'wstring', access => 'read' );
198 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write' );
199
200 # call function that takes a constant wide string
201 $ffi->attach( wcscmp => ['wstring', 'wstring'] => 'int' );
202 my $diff = wcscmp("I ❤ perl + Platypus", "I ❤ perl + Platypus"); # returns 0
203
204 # call a function that takes a wide string for writing
205 $ffi->attach( wcscpy => ['wstring_w', 'wstring'] );
206 my $buf;
207 wcscpy(\$buf, "I ❤ perl + Platypus");
208 print $buf, "\n";  # prints "I ❤ perl + Platypus"
209
210 # call a function that takes a wide string for modification
211 $ffi->attach( wcscat => ['wstring_w', 'wstring'] );
212 my $buf;
213 wcscat( [ \$buf, "I ❤ perl" ], " + Platypus");
214 print $buf, "\n";  # prints "I ❤ perl + Platypus"
215
216On Windows use with C<LPCWSTR>:
217
218 use FFI::Platypus 1.00;
219
220 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] );
221
222 # define some custom Win32 Types
223 # to get these automatically see FFI::Platypus::Lang::Win32
224 $ffi->load_custom_type('::WideString' => 'LPCWSTR', access => 'read' );
225 $ffi->type('opaque' => 'HWND');
226 $ffi->type('uint'   => 'UINT');
227
228 use constant MB_OK                   => 0x00000000;
229 use constant MB_DEFAULT_DESKTOP_ONLY => 0x00020000;
230
231 $ffi->attach( [MessageBoxW => 'MessageBox'] => [ 'HWND', 'LPCWSTR', 'LPCWSTR', 'UINT'] => 'int' );
232
233 MessageBox(undef, "I ❤️ Platypus", "Confession", MB_OK|MB_DEFAULT_DESKTOP_ONLY);
234
235=head1 DESCRIPTION
236
237This custom type plugin for L<FFI::Platypus> provides support for the native
238"wide" string type on your platform, if it is available.
239
240Wide strings are made of up wide characters (C<wchar_t>, also known as C<WCHAR>
241on Windows) and have enough bits to represent character sets that require
242larger than the traditional one byte C<char>.
243
244These strings are most commonly used on Windows where they are referred to as
245C<LPWSTR> and C<LPCWSTR> (The former for read/write buffers and the latter for
246const read-only strings), where they are encoded as C<UTF-16LE>.
247
248They are also supported by libc on many modern Unix systems where they are usually
249C<UTF-32> of the native byte-order of the system.  APIs on Unix systems more
250commonly use UTF-8 which provides some compatibility with ASCII, but you may
251occasionally find APIs that talk in wide strings.  (libarchive, for example,
252can work in both).
253
254This plugin will detect the native wide string format for you and transparently
255convert Perl strings, which are typically encoded internally as UTF-8.  If for
256some reason it cannot detect the correct encoding, or if your platform is
257currently supported, an exception will be thrown (please open a ticket if this
258is the case).  It can be used either for read/write buffers, for const read-only
259strings, and for return values.  It supports these options:
260
261Options:
262
263=over 4
264
265=item access
266
267Either C<read> or C<write> depending on if you are using a read/write buffer
268or a const read-only string.
269
270=item size
271
272For read/write buffer, the size of the buffer to create, if not provided by
273the caller.
274
275=back
276
277=head2 read-only
278
279Read-only strings are the easiest of all, are converted to the native wide
280string format in a buffer and are freed after that function call completes.
281
282 $ffi->load_custom_type('::WideString' => 'wstring' );
283 $ffi->function( wprintf => [ 'wstring' ] => [ 'wstring' ] => 'int' )
284      ->call("I %s perl + Platypus", "❤");
285
286This is the mode that you want to use when you are calling a function that
287takes a C<const wchar_t*> or a C<LPCWSTR>.
288
289=head2 return value
290
291For return values the C<access> and C<size> options are ignored.  The string
292is simply copied into a Perl native string.
293
294 $ffi->load_custom_type('::WideString' => 'wstring' );
295 # see note below in CAVEATS about wcsdup
296 my $str = $ffi->function( wcsdup => [ 'wstring' ] => 'wstring' )
297               ->call("I ❤ perl + Platypus");
298
299This is the mode that you want to use when you are calling a function that
300returns a C<const wchar_t*>, C<wchar_t>, C<LPWSTR> or C<LPCWSTR>.
301
302=head2 read/write
303
304Read/write strings can be passed in one of two ways.  Which you choose
305depends on if you want to initialize the read/write buffer or not.
306
307=over 4
308
309=item default buffer size
310
311The simplest way is to fallback on the default buffer size, which can
312be specified using the C<size> option when creating the custom type.
313
314 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] );
315 $ffi->load_custom_type('::WideString' => 'wstring',   access => 'read' );
316 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write', size => 512 );
317
318 $ffi->attach( wcscpy => ['wstring_w', 'wstring'] );
319 my $buf;
320 wcscpy(\$buf, "I ❤ perl + Platypus");
321 print $buf, "\n";  # prints "I ❤ perl + Platypus"
322
323B<Discussion>: This is the most sensical approach when the exact size of the
324buffer is known for all usages of the string type.  It can also be sensical
325if the buffer size is larger than any possible output, though care should
326be taken since this may be hard to determine reliably.
327
328The default size if none is specified when creating the custom type is 2048,
329which is probably large enough for many uses, but also probably wastes
330memory for many of them.
331
332=item allocate your buffer of a specific size
333
334The safest and most memory efficient method is of course to allocate exactly
335the amount of memory that you need.
336
337 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] );
338 $ffi->load_custom_type('::WideString' => 'wstring',   access => 'read'  );
339 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write' );
340
341 $ffi->attach( wcscpy => ['wstring_w', 'wstring'] );
342 my $width = $ffi->sizeof('wchar_t');
343 my $buf = "\0" x ( (length ("I ❤ perl + Platypus") + 1)*$width);
344 wcscpy(\$buf, "I ❤ perl + Platypus");
345 print $buf, "\n";  # prints "I ❤ perl + Platypus"
346
347B<Discussion>: By assigning C<$buf> to a string of null characters the
348length of the source string, plus one (for the null at the end) and then
349multiplying that by the size of C<wchar_t>, you get the exact number of
350bytes needed for the destination buffer.
351
352Note that although we pass in a reference to a buffer, what comes back
353is converted to a Perl string, which will be internally UTF-8, not stored
354at the original buffer location.  This is slightly awkward, but what you
355need most of the time.
356
357=item initialize the read/write buffer
358
359Some functions don't expect empty null padded buffers though, in this
360case you will want to initialize the buffer.
361
362 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] );
363 $ffi->load_custom_type('::WideString' => 'wstring',   access => 'read'  );
364 $ffi->load_custom_type('::WideString' => 'wstring_w', access => 'write' );
365
366 $ffi->attach( wcscat => ['wstring_w', 'wstring'] );
367 my $buf;
368 wcscat( [ \$buf, "I ❤ perl" ], " + Platypus");
369 print $buf, "\n";  # prints "I ❤ perl + Platypus"
370
371B<Discussion>: To initialize we pass in an array reference instead of a
372scalar reference.  The first element is a scalar reference to the buffer
373(which can be pre-allocated or not; if it is not allocated then it will
374be allocated to the default size for the type).  The second argument is
375what the buffer should be initialized to before the underlying C function
376is called.  The Perl string is encoded into wide string format before
377being used to initialize the buffer.
378
379As before a reference to the translated string is returned, and the
380buffer that was used to pass in is freed.
381
382=item allocate memory using C<malloc> or C<wcsdup> etc.
383
384You can also allocate memory using C<malloc> or C<wcsdup> to return
385an opaque type and manipulate it using the libc C<wcs*> functions.
386It will still probably be useful to use this plugin to cast the
387opaque back to a Perl string.  The CAVEATS section below includes
388several examples.
389
390=back
391
392This is the mode that you want to use when you are calling a function that
393takes a <wchar_t*> or a C<LPWSTR>.
394
395=head1 CAVEATS
396
397As with the Platypus built in C<string> type, return values are copied into
398a Perl scalar.  This is usually what you want anyway, but some APIs expect
399the caller to take responsibility for freeing the pointer to the wide string
400that it returns.  For example, C<wcsdup> works in this way.  The workaround
401is to return an opaque pointer, cast it from a wide string and free the
402pointer.
403
404 use FFI::Platypus::Memory qw( free );
405 $ffi->load_custom_type('::WideString' => 'wstring' );
406 my $ptr = $ffi->function( wcsdup => [ 'wstring' ] => 'opaque' )
407               ->call("I ❤ perl + Platypus");
408 my $str = $ffi->cast('opaque', 'wstring', $ptr);
409 free $ptr;
410
411Because of the order in which objects are freed you cannot return a wide
412string if it is also a wide string argument to a function.  For example
413C<wcscpy> may crash if you specify the return value as a wide string:
414
415 # wchar_t *wcscpy(wchar_t *dest, const wchar_t *src);
416 $ffi->attach( wcscpy => [ 'wstring_w', 'wstring' ] => 'wstring' ); # no
417 my $str;
418 wcscpy( \$str, "I ❤ perl + Platypus");  # may crash on memory error
419
420This is because the order in which things are done here are 1. C<$str> is allocated
4212. C<$str> is re-encoded as utf and the old buffer is freed 3. the return value
422is computed based on the C<$str> buffer that was freed.
423
424If you look at C<wcscpy> though you don't actually need the return value.
425To make this code work, you can just ignore the return value:
426
427 $ffi->attach( wcscpy => [ 'wstring_w', 'wstring' ] => 'void' ); # yes
428 my $str;
429 wcscpy( \$str, "I ❤ perl + Platypus"); # good!
430
431On the other hand you do care about the return value from C<wcschr>, which returns
432a pointer to the first occurrence of a character in an argument string:
433
434 # wchar_t *wcschr(const wchar_t *wcs, wchar_t wc);
435 $ffi->attach( wcschr => [ 'wstring', 'wchar_t' ] => 'wstring' ); # no
436 # this may crash on memory error or return the wrong value
437 my $str = wcschr("I ❤ perl + Platypus", ord("❤"));
438
439Instead you need to work with pointers and casts to use this function:
440
441 use FFI::Platypus 1.00;
442 use FFI::Platypus::Memory qw( free );
443
444 my $ffi = FFI::Platypus->new( api => 1, lib => [undef] );
445
446 $ffi->attach( wcsdup => ['wstring'] => 'opaque' );
447 $ffi->attach( strchr => [ opaque', 'wchar_t' ] => 'wstring' );
448
449 # create a wcs string in memory using wcsdup
450 my $haystack = wcsdup("I ❤ perl + Platypus");
451 # find the heart and return as a wide string
452 my $needle = strchr($haystack, ord("❤"));
453 # safe to free the pointer to the larger string now
454 free $haystack;
455
456=head1 SEE ALSO
457
458=over 4
459
460=item L<FFI::Platypus>
461
462Core Platypus documentation.
463
464=item L<FFI::Platypus::Type>
465
466Includes documentation on handling "normal" 8 bit C strings among others.
467
468=item L<FFI::Platypus::Lang::Win32>
469
470Documentation for using Platypus with C<LPWSTR> and C<LPCWSTR> types on
471Microsoft Windows.  These types are just aliases for the standard C wide
472strings.
473
474=back
475
476=head1 AUTHOR
477
478Author: Graham Ollis E<lt>plicease@cpan.orgE<gt>
479
480Contributors:
481
482Bakkiaraj Murugesan (bakkiaraj)
483
484Dylan Cali (calid)
485
486pipcet
487
488Zaki Mughal (zmughal)
489
490Fitz Elliott (felliott)
491
492Vickenty Fesunov (vyf)
493
494Gregor Herrmann (gregoa)
495
496Shlomi Fish (shlomif)
497
498Damyan Ivanov
499
500Ilya Pavlov (Ilya33)
501
502Petr Písař (ppisar)
503
504Mohammad S Anwar (MANWAR)
505
506Håkon Hægland (hakonhagland, HAKONH)
507
508Meredith (merrilymeredith, MHOWARD)
509
510Diab Jerius (DJERIUS)
511
512Eric Brine (IKEGAMI)
513
514szTheory
515
516José Joaquín Atria (JJATRIA)
517
518Pete Houston (openstrike, HOUSTON)
519
520=head1 COPYRIGHT AND LICENSE
521
522This software is copyright (c) 2015,2016,2017,2018,2019,2020 by Graham Ollis.
523
524This is free software; you can redistribute it and/or modify it under
525the same terms as the Perl 5 programming language system itself.
526
527=cut
528