1/** Implementation of GNUSTEP string class
2   Copyright (C) 1995-2012 Free Software Foundation, Inc.
3
4   Written by:  Andrew Kachites McCallum <mccallum@gnu.ai.mit.edu>
5   Date: January 1995
6
7   Unicode implementation by Stevo Crvenkovski <stevo@btinternet.com>
8   Date: February 1997
9
10   Optimisations by Richard Frith-Macdonald <richard@brainstorm.co.uk>
11   Date: October 1998 - 2000
12
13   This file is part of the GNUstep Base Library.
14
15   This library is free software; you can redistribute it and/or
16   modify it under the terms of the GNU Lesser General Public
17   License as published by the Free Software Foundation; either
18   version 2 of the License, or (at your option) any later version.
19
20   This library is distributed in the hope that it will be useful,
21   but WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   Lesser General Public License for more details.
24
25   You should have received a copy of the GNU Lesser General Public
26   License along with this library; if not, write to the Free
27   Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
28   Boston, MA 02110 USA.
29
30   <title>NSString class reference</title>
31   $Date$ $Revision$
32*/
33
34/* Caveats:
35
36   Some implementations will need to be changed.
37   Does not support all justification directives for `%@' in format strings
38   on non-GNU-libc systems.
39*/
40
41/*
42   Locales somewhat supported.
43   Limited choice of default encodings.
44*/
45
46#define GS_UNSAFE_REGEX 1
47#import "common.h"
48#include <stdio.h>
49
50#import "Foundation/NSAutoreleasePool.h"
51#import "Foundation/NSCalendarDate.h"
52#import "Foundation/NSDecimal.h"
53#import "Foundation/NSArray.h"
54#import "Foundation/NSCharacterSet.h"
55#import "Foundation/NSException.h"
56#import "Foundation/NSValue.h"
57#import "Foundation/NSDictionary.h"
58#import "Foundation/NSFileManager.h"
59#import "Foundation/NSPortCoder.h"
60#import "Foundation/NSPathUtilities.h"
61#import "Foundation/NSRange.h"
62#import "Foundation/NSRegularExpression.h"
63#import "Foundation/NSException.h"
64#import "Foundation/NSData.h"
65#import "Foundation/NSURL.h"
66#import "Foundation/NSMapTable.h"
67#import "Foundation/NSLocale.h"
68#import "Foundation/NSLock.h"
69#import "Foundation/NSNotification.h"
70#import "Foundation/NSUserDefaults.h"
71#import "Foundation/FoundationErrors.h"
72// For private method _decodePropertyListForKey:
73#import "Foundation/NSKeyedArchiver.h"
74#import "GNUstepBase/GSMime.h"
75#import "GNUstepBase/NSString+GNUstepBase.h"
76#import "GNUstepBase/NSMutableString+GNUstepBase.h"
77#import "GSPrivate.h"
78#import "GSPThread.h"
79#include <sys/stat.h>
80#include <sys/types.h>
81
82#if	defined(HAVE_SYS_FCNTL_H)
83#  include	<sys/fcntl.h>
84#elif	defined(HAVE_FCNTL_H)
85#  include	<fcntl.h>
86#endif
87
88#include <stdio.h>
89#include <wchar.h>
90
91#ifdef HAVE_MALLOC_H
92#  ifndef __OpenBSD__
93#    include <malloc.h>
94#  endif
95#endif
96#ifdef HAVE_ALLOCA_H
97#include <alloca.h>
98#endif
99#if	defined(HAVE_UNICODE_UCOL_H)
100# include <unicode/ucol.h>
101#endif
102#if	defined(HAVE_UNICODE_UNORM2_H)
103# include <unicode/unorm2.h>
104#endif
105#if     defined(HAVE_UNICODE_USTRING_H)
106# include <unicode/ustring.h>
107#endif
108#if     defined(HAVE_UNICODE_USEARCH_H)
109# include <unicode/usearch.h>
110#endif
111
112/* Create local inline versions of key functions for case-insensitive operations
113 */
114#import "Additions/unicode/caseconv.h"
115static inline unichar
116uni_toupper(unichar ch)
117{
118  unichar result = gs_toupper_map[ch / 256][ch % 256];
119  return result ? result : ch;
120}
121static inline unichar
122uni_tolower(unichar ch)
123{
124  unichar result = gs_tolower_map[ch / 256][ch % 256];
125  return result ? result : ch;
126}
127
128#import "GNUstepBase/Unicode.h"
129
130extern BOOL GSScanDouble(unichar*, unsigned, double*);
131
132@class	GSString;
133@class	GSMutableString;
134@class	GSPlaceholderString;
135@interface GSPlaceholderString : NSString	// Help the compiler
136@end
137@class	GSMutableArray;
138@class	GSMutableDictionary;
139
140/*
141 * Cache classes and method implementations for speed.
142 */
143static Class	NSDataClass;
144static Class	NSStringClass;
145static Class	NSMutableStringClass;
146
147static Class	GSStringClass;
148static Class	GSMutableStringClass;
149static Class	GSPlaceholderStringClass;
150
151static GSPlaceholderString	*defaultPlaceholderString;
152static NSMapTable		*placeholderMap;
153static pthread_mutex_t          placeholderLock = PTHREAD_MUTEX_INITIALIZER;
154
155
156static SEL	                cMemberSel = 0;
157static NSCharacterSet	        *nonBase = nil;
158static BOOL                     (*nonBaseImp)(id, SEL, unichar) = 0;
159
160/* Macro to return the receiver if it is already immutable, but an
161 * autoreleased copy otherwise.  Used where we have to return an
162 * immutable string, but we don't want to change the parameter from
163 * a mutable string to an immutable one.
164 */
165#define	IMMUTABLE(S)	AUTORELEASE([(S) copyWithZone: NSDefaultMallocZone()])
166
167#define IS_BIT_SET(a,i) ((((a) & (1<<(i)))) > 0)
168
169static NSCharacterSet	*nonspace = nil;
170static NSData           *whitespaceBitmap;
171static unsigned const char *whitespaceBitmapRep = NULL;
172#define GS_IS_WHITESPACE(X) IS_BIT_SET(whitespaceBitmapRep[(X)/8], (X) % 8)
173
174static void setupNonspace(void)
175{
176  if (nil == nonspace)
177    {
178      NSCharacterSet *whitespace;
179
180      whitespace = [NSCharacterSet whitespaceAndNewlineCharacterSet];
181      nonspace = [[whitespace invertedSet] retain];
182    }
183}
184
185static void setupWhitespace(void)
186{
187  if (whitespaceBitmapRep == NULL)
188    {
189      NSCharacterSet *whitespace;
190
191/*
192  We can not use whitespaceAndNewlineCharacterSet here as this would lead
193  to a recursion, as this also reads in a property list.
194      whitespace = [NSCharacterSet whitespaceAndNewlineCharacterSet];
195*/
196      whitespace = [NSCharacterSet characterSetWithCharactersInString:
197				    @" \t\r\n\f\b"];
198      whitespaceBitmap = RETAIN([whitespace bitmapRepresentation]);
199      whitespaceBitmapRep = [whitespaceBitmap bytes];
200    }
201}
202
203/* A non-spacing character is one which is part of a 'user-perceived character'
204 * where the user perceived character consists of a base character followed
205 * by a sequence of non-spacing characters.  Non-spacing characters do not
206 * exist in isolation.
207 * eg. an accented 'a' might be represented as the 'a' followed by the accent.
208 */
209inline BOOL
210uni_isnonsp(unichar u)
211{
212  /* Treating upper surrogates as non-spacing is a convenient solution
213   * to a number of issues with UTF-16
214   */
215  if ((u >= 0xdc00) && (u <= 0xdfff))
216    return YES;
217
218  return (*nonBaseImp)(nonBase, cMemberSel, u);
219}
220
221/*
222 *	Include sequence handling code with instructions to generate search
223 *	and compare functions for NSString objects.
224 */
225#define	GSEQ_STRCOMP	strCompNsNs
226#define	GSEQ_STRRANGE	strRangeNsNs
227#define	GSEQ_O	GSEQ_NS
228#define	GSEQ_S	GSEQ_NS
229#include "GSeq.h"
230
231/*
232 * The path handling mode.
233 */
234static enum {
235  PH_DO_THE_RIGHT_THING,
236  PH_UNIX,
237  PH_WINDOWS
238} pathHandling = PH_DO_THE_RIGHT_THING;
239
240/**
241 * This function is intended to be called at startup (before anything else
242 * which needs to use paths, such as reading config files and user defaults)
243 * to allow a program to control the style of path handling required.<br />
244 * Almost all programs should avoid using this.<br />
245 * Changing the path handling mode is not thread-safe.<br />
246 * If mode is "windows" this sets path handling to be windows specific,<br />
247 * If mode is "unix" it sets path handling to be unix specific,<br />
248 * Any other none-null string sets do-the-right-thing mode.<br />
249 * The function returns a C String describing the old mode.
250 */
251const char*
252GSPathHandling(const char *mode)
253{
254  int	old = pathHandling;
255
256  if (mode != 0)
257    {
258      if (strcasecmp(mode, "windows") == 0)
259	{
260	  pathHandling = PH_WINDOWS;
261	}
262      else if (strcasecmp(mode, "unix") == 0)
263	{
264	  pathHandling = PH_UNIX;
265	}
266      else
267	{
268	  pathHandling = PH_DO_THE_RIGHT_THING;
269	}
270    }
271  switch (old)
272    {
273      case PH_WINDOWS:		return "windows";
274      case PH_UNIX:		return "unix";
275      default:			return "right";
276    }
277}
278
279#define	GSPathHandlingRight()	\
280  ((pathHandling == PH_DO_THE_RIGHT_THING) ? YES : NO)
281#define	GSPathHandlingUnix()	\
282  ((pathHandling == PH_UNIX) ? YES : NO)
283#define	GSPathHandlingWindows()	\
284  ((pathHandling == PH_WINDOWS) ? YES : NO)
285
286/*
287 * The pathSeps character set is used for parsing paths ... it *must*
288 * contain the '/' character, which is the internal path separator,
289 * and *may* contain additiona system specific separators.
290 *
291 * We can't have a 'pathSeps' variable initialized in the +initialize
292 * method because that would cause recursion.
293 */
294static NSCharacterSet*
295pathSeps(void)
296{
297  static NSCharacterSet	*wPathSeps = nil;
298  static NSCharacterSet	*uPathSeps = nil;
299  static NSCharacterSet	*rPathSeps = nil;
300  if (GSPathHandlingRight())
301    {
302      if (rPathSeps == nil)
303	{
304	  (void)pthread_mutex_lock(&placeholderLock);
305	  if (rPathSeps == nil)
306	    {
307	      rPathSeps
308		= [NSCharacterSet characterSetWithCharactersInString: @"/\\"];
309              rPathSeps = [NSObject leakAt: &rPathSeps];
310	    }
311	  (void)pthread_mutex_unlock(&placeholderLock);
312	}
313      return rPathSeps;
314    }
315  if (GSPathHandlingUnix())
316    {
317      if (uPathSeps == nil)
318	{
319	  (void)pthread_mutex_lock(&placeholderLock);
320	  if (uPathSeps == nil)
321	    {
322	      uPathSeps
323		= [NSCharacterSet characterSetWithCharactersInString: @"/"];
324              uPathSeps = [NSObject leakAt: &uPathSeps];
325	    }
326	  (void)pthread_mutex_unlock(&placeholderLock);
327	}
328      return uPathSeps;
329    }
330  if (GSPathHandlingWindows())
331    {
332      if (wPathSeps == nil)
333	{
334	  (void)pthread_mutex_lock(&placeholderLock);
335	  if (wPathSeps == nil)
336	    {
337	      wPathSeps
338		= [NSCharacterSet characterSetWithCharactersInString: @"\\"];
339              wPathSeps = [NSObject leakAt: &wPathSeps];
340	    }
341	  (void)pthread_mutex_unlock(&placeholderLock);
342	}
343      return wPathSeps;
344    }
345  pathHandling = PH_DO_THE_RIGHT_THING;
346  return pathSeps();
347}
348
349inline static BOOL
350pathSepMember(unichar c)
351{
352  if (c == (unichar)'/')
353    {
354      if (GSPathHandlingWindows() == NO)
355	{
356	  return YES;
357	}
358    }
359  else if (c == (unichar)'\\')
360    {
361      if (GSPathHandlingUnix() == NO)
362	{
363	  return YES;
364	}
365    }
366  return NO;
367}
368
369/* For cross-platform portability we always use slash as the separator
370 * when building paths ... unless specific windows path handling is
371 * required.
372 * This ensures that standardised paths and anything built by adding path
373 * components to them use a consistent separator character anad can be
374 * compared readily using standard string comparisons.
375 */
376inline static unichar
377pathSepChar()
378{
379  if (GSPathHandlingWindows() == NO)
380    {
381      return '/';
382    }
383  return '\\';
384}
385
386/*
387 * For cross-platform portability we always use slash as the separator
388 * when building paths ... unless specific windows path handling is
389 * required.
390 */
391inline static NSString*
392pathSepString()
393{
394  if (GSPathHandlingWindows() == NO)
395    {
396      return @"/";
397    }
398  return @"\\";
399}
400
401/*
402 * Find the end of 'root' sequence in a string.  Characters before this
403 * point in the string cannot be split into path components/extensions.
404 * This usage of the term 'root' is slightly different from the usual in
405 * that it includes the first part of any relative path.  The more normal
406 * usage of 'root' elsewhere is to indicate the first part of an absolute
407 * path.
408
409 * Possible roots are -
410 *
411 * '/'			absolute root on unix
412 * ''			if entire path is empty string
413 * 'C:/'		absolute root for a drive on windows
414 * 'C:'			if entire path is 'C:' or 'C:relativepath'
415 * '//host/share/'	absolute root for a host and share on windows
416 * '~/'			home directory for user
417 * '~'			if entire path is '~'
418 * '~username/'		home directory for user
419 * '~username'		if entire path is '~username'
420 *
421 * Most roots are terminated in '/' (or '\') unless the root is the entire
422 * path.  The exception is for windows drive-relative paths, where the root
423 * may be a drive letter followed by a colon, but there may still be path
424 * components after the root with no path separator.
425 *
426 * The presence of any non-empty root indicates an absolute path except -
427 * 1. A windows drive-relative path is not absolute unless the root
428 * ends with a path separator, since the path part on the drive is relative.
429 * 2. On windows, a root consisting of a single path separator indicates
430 * a drive-relative path with no drive ... so the path is relative.
431 */
432static unsigned rootOf(NSString *s, unsigned l)
433{
434  unsigned	root = 0;
435
436  if (l > 0)
437    {
438      unichar	c = [s characterAtIndex: 0];
439
440      if (c == '~')
441	{
442	  NSRange	range = NSMakeRange(1, l-1);
443
444	  range = [s rangeOfCharacterFromSet: pathSeps()
445				     options: NSLiteralSearch
446				       range: range];
447	  if (range.length == 0)
448	    {
449	      root = l;			// ~ or ~name
450	    }
451	  else
452	    {
453	      root = NSMaxRange(range);	// ~/... or ~name/...
454	    }
455	}
456      else
457	{
458	  if (pathSepMember(c))
459	    {
460	      root++;
461	    }
462	  if (GSPathHandlingUnix() == NO)
463	    {
464	      if (root == 0 && l > 1
465		&& ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
466		&& [s characterAtIndex: 1] == ':')
467		{
468		  // Got a drive relative path ... see if it's absolute.
469		  root = 2;
470		  if (l > 2 && pathSepMember([s characterAtIndex: 2]))
471		    {
472		      root++;
473		    }
474		}
475	      else if (root == 1
476		&& l > 4 && pathSepMember([s characterAtIndex: 1]))
477		{
478		  NSRange	range = NSMakeRange(2, l-2);
479
480		  range = [s rangeOfCharacterFromSet: pathSeps()
481					     options: NSLiteralSearch
482					       range: range];
483		  if (range.length > 0 && range.location > 2)
484		    {
485		      unsigned pos = NSMaxRange(range);
486
487		      // Found end of UNC host perhaps ... look for share
488		      if (pos < l)
489			{
490			  range = NSMakeRange(pos, l - pos);
491			  range = [s rangeOfCharacterFromSet: pathSeps()
492						     options: NSLiteralSearch
493						       range: range];
494			  if (range.length > 0)
495			    {
496			      /*
497			       * Found another slash ...  but if it comes
498			       * immediately after the last one this can't
499			       * be a UNC path as it's '//host//' rather
500			       * than '//host/share'
501			       */
502			      if (range.location > pos)
503				{
504				  /* OK ... we have the '//host/share/'
505				   * format, so this is a valid UNC path.
506				   */
507				  root = NSMaxRange(range);
508				}
509			    }
510			}
511		    }
512		}
513	    }
514	}
515    }
516  return root;
517}
518
519
520@implementation NSString
521//  NSString itself is an abstract class which provides factory
522//  methods to generate objects of unspecified subclasses.
523
524static NSStringEncoding _DefaultStringEncoding;
525static BOOL		_ByteEncodingOk;
526static const unichar byteOrderMark = 0xFEFF;
527static const unichar byteOrderMarkSwapped = 0xFFFE;
528
529#ifdef HAVE_REGISTER_PRINTF_FUNCTION
530#include <stdio.h>
531#include <printf.h>
532
533/* <sattler@volker.cs.Uni-Magdeburg.DE>, with libc-5.3.9 thinks this
534   flag PRINTF_ATSIGN_VA_LIST should be 0, but for me, with libc-5.0.9,
535   it crashes.  -mccallum
536
537   Apparently GNU libc 2.xx needs this to be 0 also, along with Linux
538   libc versions 5.2.xx and higher (including libc6, which is just GNU
539   libc). -chung */
540#if defined(_LINUX_C_LIB_VERSION_MINOR)	\
541  && _LINUX_C_LIB_VERSION_MAJOR <= 5	\
542  && _LINUX_C_LIB_VERSION_MINOR < 2
543#define PRINTF_ATSIGN_VA_LIST	1
544#else
545#define PRINTF_ATSIGN_VA_LIST	0
546#endif
547
548#if ! PRINTF_ATSIGN_VA_LIST
549static int
550arginfo_func (const struct printf_info *info, size_t n, int *argtypes
551#if     defined(HAVE_REGISTER_PRINTF_SPECIFIER)
552, int *size
553#endif
554)
555{
556  *argtypes = PA_POINTER;
557  return 1;
558}
559#endif /* !PRINTF_ATSIGN_VA_LIST */
560
561static int
562handle_printf_atsign (FILE *stream,
563		      const struct printf_info *info,
564#if PRINTF_ATSIGN_VA_LIST
565		      va_list *ap_pointer)
566#elif defined(_LINUX_C_LIB_VERSION_MAJOR)       \
567     && _LINUX_C_LIB_VERSION_MAJOR < 6
568                      const void **const args)
569#else /* GNU libc needs the following. */
570                      const void *const *args)
571#endif
572{
573#if ! PRINTF_ATSIGN_VA_LIST
574  const void *ptr = *args;
575#endif
576  id string_object;
577  int len;
578
579  /* xxx This implementation may not pay pay attention to as much
580     of printf_info as it should. */
581
582#if PRINTF_ATSIGN_VA_LIST
583  string_object = va_arg (*ap_pointer, id);
584#else
585  string_object = *((id*) ptr);
586#endif
587  string_object = [string_object description];
588
589#if HAVE_WIDE_PRINTF_FUNCTION
590  if (info->wide)
591    {
592      if (sizeof(wchar_t) == 4)
593        {
594	  unsigned	length = [string_object length];
595	  wchar_t	buf[length + 1];
596	  unsigned	i;
597
598	  for (i = 0; i < length; i++)
599	    {
600	      buf[i] = [string_object characterAtIndex: i];
601	    }
602	  buf[i] = 0;
603          len = fwprintf(stream, L"%*ls",
604	    (info->left ? - info->width : info->width), buf);
605        }
606      else
607        {
608          len = fwprintf(stream, L"%*ls",
609	    (info->left ? - info->width : info->width),
610	    [string_object cStringUsingEncoding: NSUnicodeStringEncoding]);
611	}
612    }
613  else
614#endif	/* HAVE_WIDE_PRINTF_FUNCTION */
615    {
616      len = fprintf(stream, "%*s",
617	(info->left ? - info->width : info->width),
618	[string_object lossyCString]);
619    }
620  return len;
621}
622#endif /* HAVE_REGISTER_PRINTF_FUNCTION */
623
624static void
625register_printf_atsign ()
626{
627#if     defined(HAVE_REGISTER_PRINTF_SPECIFIER)
628      if (register_printf_specifier ('@', handle_printf_atsign,
629#if PRINTF_ATSIGN_VA_LIST
630				    0))
631#else
632	                            arginfo_func))
633#endif
634	[NSException raise: NSGenericException
635		     format: @"register printf handling of %%@ failed"];
636#elif   defined(HAVE_REGISTER_PRINTF_FUNCTION)
637      if (register_printf_function ('@', handle_printf_atsign,
638#if PRINTF_ATSIGN_VA_LIST
639				    0))
640#else
641	                            arginfo_func))
642#endif
643	[NSException raise: NSGenericException
644		     format: @"register printf handling of %%@ failed"];
645#endif
646}
647
648
649#if GS_USE_ICU == 1
650/**
651 * Returns an ICU collator for the given locale and options, or returns
652 * NULL if a collator couldn't be created or the GNUstep comparison code
653 * should be used instead.
654 */
655static UCollator *
656GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
657{
658  UErrorCode status = U_ZERO_ERROR;
659  const char *localeCString;
660  UCollator *coll;
661
662  if (mask & NSLiteralSearch)
663    {
664      return NULL;
665    }
666
667  if (NO == [locale isKindOfClass: [NSLocale class]])
668    {
669      if (nil == locale)
670        {
671          /* See comments below about the posix locale.
672           * It's bad for case insensitive search, but needed for numeric
673           */
674          if (mask & NSNumericSearch)
675            {
676              locale = [NSLocale systemLocale];
677            }
678          else
679            {
680              /* A nil locale should trigger POSIX collation (i.e. 'A'-'Z' sort
681               * before 'a'), and support for this was added in ICU 4.6 under the
682               * locale name en_US_POSIX, but it doesn't fit our requirements
683               * (e.g. 'e' and 'E' don't compare as equal with case insensitive
684               * comparison.) - so return NULL to indicate that the GNUstep
685               * comparison code should be used.
686               */
687              return NULL;
688            }
689        }
690      else
691        {
692          locale = [NSLocale currentLocale];
693        }
694    }
695
696  localeCString = [[locale localeIdentifier] UTF8String];
697
698  if (localeCString != NULL && strcmp("", localeCString) == 0)
699    {
700      localeCString = NULL;
701    }
702
703  coll = ucol_open(localeCString, &status);
704
705  if (U_SUCCESS(status))
706    {
707      if (mask & (NSCaseInsensitiveSearch | NSDiacriticInsensitiveSearch))
708	{
709	  ucol_setStrength(coll, UCOL_PRIMARY);
710	}
711      else if (mask & NSCaseInsensitiveSearch)
712	{
713	  ucol_setStrength(coll, UCOL_SECONDARY);
714	}
715      else if (mask & NSDiacriticInsensitiveSearch)
716	{
717	  ucol_setStrength(coll, UCOL_PRIMARY);
718	  ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
719	}
720
721      if (mask & NSNumericSearch)
722	{
723	  ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
724	}
725
726      if (U_SUCCESS(status))
727	{
728	  return coll;
729	}
730    }
731
732  ucol_close(coll);
733  return NULL;
734}
735
736#if defined(HAVE_UNICODE_UNORM2_H)
737- (NSString *) _normalizedICUStringOfType: (const char*)normalization
738                                     mode: (UNormalization2Mode)mode
739{
740  UErrorCode            err;
741  const UNormalizer2    *normalizer;
742  int32_t               length;
743  int32_t               newLength;
744  NSString              *newString;
745
746  length = (uint32_t)[self length];
747  if (0 == length)
748    {
749      return @"";       // Simple case ... empty string
750    }
751
752  err = 0;
753  normalizer = unorm2_getInstance(NULL, normalization, mode, &err);
754  if (U_FAILURE(err))
755    {
756      [NSException raise: NSCharacterConversionException
757                  format: @"libicu unorm2_getInstance() failed"];
758    }
759
760  if (length < 200)
761    {
762      unichar   src[length];
763      unichar   dst[length*3];
764
765      /* For a short string, it's very efficient to just use on-stack
766       * buffers for the libicu work, and then let the standard string
767       * initialiser convert that to an inline string.
768       */
769      [self getCharacters: (unichar *)src range: NSMakeRange(0, length)];
770      err = 0;
771      newLength = unorm2_normalize(normalizer, (UChar*)src, length,
772        (UChar*)dst, length*3, &err);
773      if (U_FAILURE(err))
774        {
775          [NSException raise: NSCharacterConversionException
776                      format: @"precompose/decompose failed"];
777        }
778      newString = [[NSString alloc] initWithCharacters: dst length: newLength];
779    }
780  else
781    {
782      unichar   *src;
783      unichar   *dst;
784
785      /* For longer strings, we copy the source into a buffer on the heap
786       * for the libicu operation, determine the length needed for the
787       * output buffer, then do the actual conversion to build the string.
788       */
789      src = (unichar*)malloc(length * sizeof(unichar));
790      [self getCharacters: (unichar*)src range: NSMakeRange(0, length)];
791      err = 0;
792      newLength = unorm2_normalize(normalizer, (UChar*)src, length,
793        0, 0, &err);
794      if (U_BUFFER_OVERFLOW_ERROR != err)
795        {
796          free(src);
797          [NSException raise: NSCharacterConversionException
798                      format: @"precompose/decompose length check failed"];
799        }
800      dst = NSZoneMalloc(NSDefaultMallocZone(), newLength * sizeof(unichar));
801      err = 0;
802      unorm2_normalize(normalizer, (UChar*)src, length,
803        (UChar*)dst, newLength, &err);
804      free(src);
805      if (U_FAILURE(err))
806        {
807          NSZoneFree(NSDefaultMallocZone(), dst);
808          [NSException raise: NSCharacterConversionException
809                      format: @"precompose/decompose failed"];
810        }
811      newString = [[NSString alloc] initWithCharactersNoCopy: dst
812                                                      length: newLength
813                                                freeWhenDone: YES];
814    }
815
816  return AUTORELEASE(newString);
817}
818#endif
819#endif
820
821+ (void) atExit
822{
823  DESTROY(placeholderMap);
824}
825
826+ (void) initialize
827{
828  /*
829   * Flag required as we call this method explicitly from GSBuildStrings()
830   * to ensure that NSString is initialised properly.
831   */
832  static BOOL	beenHere = NO;
833
834  if (self == [NSString class] && beenHere == NO)
835    {
836      beenHere = YES;
837      cMemberSel = @selector(characterIsMember:);
838      caiSel = @selector(characterAtIndex:);
839      gcrSel = @selector(getCharacters:range:);
840      ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
841
842      nonBase = [NSCharacterSet nonBaseCharacterSet];
843      nonBase = [NSObject leakAt: &nonBase];
844      nonBaseImp
845        = (BOOL(*)(id,SEL,unichar))[nonBase methodForSelector: cMemberSel];
846
847      _DefaultStringEncoding = GSPrivateDefaultCStringEncoding();
848      _ByteEncodingOk = GSPrivateIsByteEncoding(_DefaultStringEncoding);
849
850      NSStringClass = self;
851      [self setVersion: 1];
852      NSMutableStringClass = [NSMutableString class];
853      NSDataClass = [NSData class];
854      GSPlaceholderStringClass = [GSPlaceholderString class];
855      GSStringClass = [GSString class];
856      GSMutableStringClass = [GSMutableString class];
857
858      /*
859       * Set up infrastructure for placeholder strings.
860       */
861      defaultPlaceholderString = (GSPlaceholderString*)
862	[GSPlaceholderStringClass allocWithZone: NSDefaultMallocZone()];
863      placeholderMap = NSCreateMapTable(NSNonOwnedPointerMapKeyCallBacks,
864	NSNonRetainedObjectMapValueCallBacks, 0);
865      register_printf_atsign();
866      [self registerAtExit];
867    }
868}
869
870+ (id) allocWithZone: (NSZone*)z
871{
872  if (self == NSStringClass)
873    {
874      /*
875       * For a constant string, we return a placeholder object that can
876       * be converted to a real object when its initialisation method
877       * is called.
878       */
879      if (z == NSDefaultMallocZone() || z == 0)
880	{
881	  /*
882	   * As a special case, we can return a placeholder for a string
883	   * in the default zone extremely efficiently.
884	   */
885	  return defaultPlaceholderString;
886	}
887      else
888	{
889	  id	obj;
890
891	  /*
892	   * For anything other than the default zone, we need to
893	   * locate the correct placeholder in the (lock protected)
894	   * table of placeholders.
895	   */
896	  (void)pthread_mutex_lock(&placeholderLock);
897	  obj = (id)NSMapGet(placeholderMap, (void*)z);
898	  if (obj == nil)
899	    {
900	      /*
901	       * There is no placeholder object for this zone, so we
902	       * create a new one and use that.
903	       */
904	      obj = (id)[GSPlaceholderStringClass allocWithZone: z];
905	      NSMapInsert(placeholderMap, (void*)z, (void*)obj);
906	    }
907	  (void)pthread_mutex_unlock(&placeholderLock);
908	  return obj;
909	}
910    }
911  else if ([self isKindOfClass: GSStringClass] == YES)
912    {
913      [NSException raise: NSInternalInconsistencyException
914		  format: @"Called +allocWithZone: on private string class"];
915      return nil;	/* NOT REACHED */
916    }
917  else
918    {
919      /*
920       * For user provided strings, we simply allocate an object of
921       * the given class.
922       */
923      return NSAllocateObject (self, 0, z);
924    }
925}
926
927/**
928 * Return the class used to store constant strings (those ascii strings
929 * placed in the source code using the @"this is a string" syntax).<br />
930 * Use this method to obtain the constant string class rather than
931 * using the obsolete name <em>NXConstantString</em> in your code ...
932 * with more recent compiler versions the name of this class is variable
933 * (and will automatically be changed by GNUstep to avoid conflicts
934 * with the default implementation in the Objective-C runtime library).
935 */
936+ (Class) constantStringClass
937{
938  return [@"" class];
939}
940
941/**
942 * Create an empty string.
943 */
944+ (id) string
945{
946  return AUTORELEASE([[self allocWithZone: NSDefaultMallocZone()] init]);
947}
948
949/**
950 * Create a copy of aString.
951 */
952+ (id) stringWithString: (NSString*)aString
953{
954  NSString	*obj;
955
956  if (NULL == aString)
957    [NSException raise: NSInvalidArgumentException
958      format: @"[NSString+stringWithString:]: NULL string"];
959  obj = [self allocWithZone: NSDefaultMallocZone()];
960  obj = [obj initWithString: aString];
961  return AUTORELEASE(obj);
962}
963
964/**
965 * Create a string of unicode characters.
966 */
967+ (id) stringWithCharacters: (const unichar*)chars
968		     length: (NSUInteger)length
969{
970  NSString	*obj;
971
972  obj = [self allocWithZone: NSDefaultMallocZone()];
973  obj = [obj initWithCharacters: chars length: length];
974  return AUTORELEASE(obj);
975}
976
977/**
978 * Create a string based on the given C (char[]) string, which should be
979 * null-terminated and encoded in the default C string encoding.  (Characters
980 * will be converted to unicode representation internally.)
981 */
982+ (id) stringWithCString: (const char*)byteString
983{
984  NSString	*obj;
985
986  if (NULL == byteString)
987    [NSException raise: NSInvalidArgumentException
988      format: @"[NSString+stringWithCString:]: NULL cString"];
989  obj = [self allocWithZone: NSDefaultMallocZone()];
990  obj = [obj initWithCString: byteString];
991  return AUTORELEASE(obj);
992}
993
994/**
995 * Create a string based on the given C (char[]) string, which should be
996 * null-terminated and encoded in the specified C string encoding.
997 * Characters may be converted to unicode representation internally.
998 */
999+ (id) stringWithCString: (const char*)byteString
1000		encoding: (NSStringEncoding)encoding
1001{
1002  NSString	*obj;
1003
1004  if (NULL == byteString)
1005    [NSException raise: NSInvalidArgumentException
1006      format: @"[NSString+stringWithCString:encoding:]: NULL cString"];
1007  obj = [self allocWithZone: NSDefaultMallocZone()];
1008  obj = [obj initWithCString: byteString encoding: encoding];
1009  return AUTORELEASE(obj);
1010}
1011
1012/**
1013 * Create a string based on the given C (char[]) string, which may contain
1014 * null bytes and should be encoded in the default C string encoding.
1015 * (Characters will be converted to unicode representation internally.)
1016 */
1017+ (id) stringWithCString: (const char*)byteString
1018		  length: (NSUInteger)length
1019{
1020  NSString	*obj;
1021
1022  obj = [self allocWithZone: NSDefaultMallocZone()];
1023  obj = [obj initWithCString: byteString length: length];
1024  return AUTORELEASE(obj);
1025}
1026
1027/**
1028 * Create a string based on the given UTF-8 string, null-terminated.<br />
1029 * Raises NSInvalidArgumentException if given NULL pointer.
1030 */
1031+ (id) stringWithUTF8String: (const char *)bytes
1032{
1033  NSString	*obj;
1034
1035  if (NULL == bytes)
1036    [NSException raise: NSInvalidArgumentException
1037		format: @"[NSString+stringWithUTF8String:]: NULL cString"];
1038  if (self == NSStringClass)
1039    {
1040      obj = defaultPlaceholderString;
1041    }
1042  else
1043    {
1044      obj = [self allocWithZone: NSDefaultMallocZone()];
1045    }
1046  obj = [obj initWithUTF8String: bytes];
1047  return AUTORELEASE(obj);
1048}
1049
1050/**
1051 * Load contents of file at path into a new string.  Will interpret file as
1052 * containing direct unicode if it begins with the unicode byte order mark,
1053 * else converts to unicode using default C string encoding.
1054 */
1055+ (id) stringWithContentsOfFile: (NSString *)path
1056{
1057  NSString	*obj;
1058
1059  obj = [self allocWithZone: NSDefaultMallocZone()];
1060  obj = [obj initWithContentsOfFile: path];
1061  return AUTORELEASE(obj);
1062}
1063
1064/**
1065 * Load contents of file at path into a new string using the
1066 * -initWithContentsOfFile:usedEncoding:error: method.
1067 */
1068+ (id) stringWithContentsOfFile: (NSString *)path
1069                   usedEncoding: (NSStringEncoding*)enc
1070                          error: (NSError**)error
1071{
1072  NSString	*obj;
1073
1074  obj = [self allocWithZone: NSDefaultMallocZone()];
1075  obj = [obj initWithContentsOfFile: path usedEncoding: enc error: error];
1076  return AUTORELEASE(obj);
1077}
1078
1079/**
1080 * Load contents of file at path into a new string using the
1081 * -initWithContentsOfFile:encoding:error: method.
1082 */
1083+ (id) stringWithContentsOfFile: (NSString*)path
1084                       encoding: (NSStringEncoding)enc
1085                          error: (NSError**)error
1086{
1087   NSString	*obj;
1088
1089  obj = [self allocWithZone: NSDefaultMallocZone()];
1090  obj = [obj initWithContentsOfFile: path encoding: enc error: error];
1091  return AUTORELEASE(obj);
1092}
1093
1094/**
1095 * Load contents of given URL into a new string.  Will interpret contents as
1096 * containing direct unicode if it begins with the unicode byte order mark,
1097 * else converts to unicode using default C string encoding.
1098 */
1099+ (id) stringWithContentsOfURL: (NSURL *)url
1100{
1101  NSString	*obj;
1102
1103  obj = [self allocWithZone: NSDefaultMallocZone()];
1104  obj = [obj initWithContentsOfURL: url];
1105  return AUTORELEASE(obj);
1106}
1107
1108+ (id) stringWithContentsOfURL: (NSURL*)url
1109                  usedEncoding: (NSStringEncoding*)enc
1110                         error: (NSError**)error
1111{
1112  NSString	*obj;
1113
1114  obj = [self allocWithZone: NSDefaultMallocZone()];
1115  obj = [obj initWithContentsOfURL: url usedEncoding: enc error: error];
1116  return AUTORELEASE(obj);
1117}
1118
1119+ (id) stringWithContentsOfURL: (NSURL*)url
1120                      encoding: (NSStringEncoding)enc
1121                         error: (NSError**)error
1122{
1123  NSString	*obj;
1124
1125  obj = [self allocWithZone: NSDefaultMallocZone()];
1126  obj = [obj initWithContentsOfURL: url encoding: enc error: error];
1127  return AUTORELEASE(obj);
1128}
1129
1130/**
1131 * Creates a new string using C printf-style formatting.  First argument should
1132 * be a constant format string, like '<code>@"float val = %f"</code>', remaining
1133 * arguments should be the variables to print the values of, comma-separated.
1134 */
1135+ (id) stringWithFormat: (NSString*)format,...
1136{
1137  va_list ap;
1138  NSString	*obj;
1139
1140  if (NULL == format)
1141    [NSException raise: NSInvalidArgumentException
1142      format: @"[NSString+stringWithFormat:]: NULL format"];
1143  va_start(ap, format);
1144  obj = [self allocWithZone: NSDefaultMallocZone()];
1145  obj = [obj initWithFormat: format arguments: ap];
1146  va_end(ap);
1147  return AUTORELEASE(obj);
1148}
1149
1150
1151/**
1152 * <p>In MacOS-X class clusters do not have designated initialisers,
1153 * and there is a general rule that -init is treated as the designated
1154 * initialiser of the class cluster, but that other intitialisers
1155 * may not work as expected and would need to be individually overridden
1156 * in any subclass.
1157 * </p>
1158 * <p>GNUstep tries to make it easier to subclass a class cluster,
1159 * by making class clusters follow the same convention as normal
1160 * classes, so the designated initialiser is the <em>richest</em>
1161 * initialiser.  This means that all other initialisers call the
1162 * documented designated initialiser (which calls -init only for
1163 * MacOS-X compatibility), and anyone writing a subclass only needs
1164 * to override that one initialiser in order to have all the other
1165 * ones work.
1166 * </p>
1167 * <p>For MacOS-X compatibility, you may also need to override various
1168 * other initialisers.  Exactly which ones, you will need to determine
1169 * by trial on a MacOS-X system ... and may vary between releases of
1170 * MacOS-X.  So to be safe, on MacOS-X you probably need to re-implement
1171 * <em>all</em> the class cluster initialisers you might use in conjunction
1172 * with your subclass.
1173 * </p>
1174 * <p>NB. The GNUstep designated initialiser for the NSString class cluster
1175 * has changed to -initWithBytesNoCopy:length:encoding:freeWhenDone:
1176 * from -initWithCharactersNoCopy:length:freeWhenDone: and older code
1177 * subclassing NSString will need to be updated.
1178 * </p>
1179 */
1180- (id) init
1181{
1182  self = [super init];
1183  return self;
1184}
1185
1186/**
1187 * Initialises the receiver with a copy of the supplied length of bytes,
1188 * using the specified encoding.<br />
1189 * For NSUnicodeStringEncoding and NSUTF8String encoding, a Byte Order
1190 * Marker (if present at the start of the data) is removed automatically.<br />
1191 * If the data can not be interpreted using the encoding, the receiver
1192 * is released and nil is returned.
1193 */
1194- (id) initWithBytes: (const void*)bytes
1195	      length: (NSUInteger)length
1196	    encoding: (NSStringEncoding)encoding
1197{
1198  if (length == 0)
1199    {
1200      return [self initWithBytesNoCopy: (void *)0
1201				length: 0
1202			      encoding: encoding
1203			  freeWhenDone: NO];
1204    }
1205  else
1206    {
1207      void	*buf;
1208
1209      buf = NSZoneMalloc([self zone], length);
1210      memcpy(buf, bytes, length);
1211      return [self initWithBytesNoCopy: buf
1212				length: length
1213			      encoding: encoding
1214			  freeWhenDone: YES];
1215    }
1216}
1217
1218/** <init /> <override-subclass />
1219 * Initialises the receiver with the supplied length of bytes, using the
1220 * specified encoding.<br />
1221 * For NSUnicodeStringEncoding and NSUTF8String encoding, a Byte Order
1222 * Marker (if present at the start of the data) is removed automatically.<br />
1223 * If the data is not in a format which can be used internally unmodified,
1224 * it is copied, otherwise it is used as is.  If the data is not copied
1225 * the flag determines whether the string will free it when it is no longer
1226 * needed (ie whether the new NSString instance 'owns' the memory).<br />
1227 * In the case of non-owned memory, it is the caller's responsibility to
1228 * ensure that the data continues to exist and is not modified until the
1229 * receiver is deallocated.<br />
1230 * If the data can not be interpreted using the encoding, the receiver
1231 * is released and nil is returned.
1232 * <p>Note, this is the most basic initialiser for strings.
1233 * In the GNUstep implementation, your subclasses may override
1234 * this initialiser in order to have all other functionality.</p>
1235 */
1236- (id) initWithBytesNoCopy: (void*)bytes
1237		    length: (NSUInteger)length
1238		  encoding: (NSStringEncoding)encoding
1239	      freeWhenDone: (BOOL)flag
1240{
1241  self = [self init];
1242  return self;
1243}
1244
1245/**
1246 * <p>Initialize with given unicode chars up to length, regardless of presence
1247 *  of null bytes.  Does not copy the string.  If flag, frees its storage when
1248 *  this instance is deallocated.</p>
1249 * See -initWithBytesNoCopy:length:encoding:freeWhenDone: for more details.
1250 */
1251- (id) initWithCharactersNoCopy: (unichar*)chars
1252			 length: (NSUInteger)length
1253		   freeWhenDone: (BOOL)flag
1254{
1255  return [self initWithBytesNoCopy: chars
1256			    length: length * sizeof(unichar)
1257			  encoding: NSUnicodeStringEncoding
1258		      freeWhenDone: flag];
1259}
1260
1261/**
1262 * <p>Initialize with given unicode chars up to length, regardless of presence
1263 *  of null bytes.  Copies the string and frees copy when deallocated.</p>
1264 */
1265- (id) initWithCharacters: (const unichar*)chars
1266		   length: (NSUInteger)length
1267{
1268  return [self initWithBytes: chars
1269		      length: length * sizeof(unichar)
1270		    encoding: NSUnicodeStringEncoding];
1271}
1272
1273/**
1274 * <p>Initialize with given C string byteString up to length, regardless of
1275 *  presence of null bytes.  Characters converted to unicode based on the
1276 *  default C encoding.  Does not copy the string.  If flag, frees its storage
1277 *  when this instance is deallocated.</p>
1278 * See -initWithBytesNoCopy:length:encoding:freeWhenDone: for more details.
1279 */
1280- (id) initWithCStringNoCopy: (char*)byteString
1281		      length: (NSUInteger)length
1282		freeWhenDone: (BOOL)flag
1283{
1284  return [self initWithBytesNoCopy: byteString
1285			    length: length
1286			  encoding: _DefaultStringEncoding
1287		      freeWhenDone: flag];
1288}
1289
1290/**
1291 * <p>Initialize with given C string byteString up to first nul byte.
1292 * Characters converted to unicode based on the specified C encoding.
1293 * Copies the string.</p>
1294 */
1295- (id) initWithCString: (const char*)byteString
1296	      encoding: (NSStringEncoding)encoding
1297{
1298  if (NULL == byteString)
1299    [NSException raise: NSInvalidArgumentException
1300      format: @"[NSString-initWithCString:encoding:]: NULL cString"];
1301  return [self initWithBytes: byteString
1302		      length: strlen(byteString)
1303		    encoding: encoding];
1304}
1305
1306/**
1307 * <p>Initialize with given C string byteString up to length, regardless of
1308 *  presence of null bytes.  Characters converted to unicode based on the
1309 *  default C encoding.  Copies the string.</p>
1310 */
1311- (id) initWithCString: (const char*)byteString  length: (NSUInteger)length
1312{
1313  return [self initWithBytes: byteString
1314		      length: length
1315		    encoding: _DefaultStringEncoding];
1316}
1317
1318/**
1319 * <p>Initialize with given C string byteString, which should be
1320 * null-terminated.  Characters are converted to unicode based on the default
1321 * C encoding.  Copies the string.</p>
1322 */
1323- (id) initWithCString: (const char*)byteString
1324{
1325  if (NULL == byteString)
1326    [NSException raise: NSInvalidArgumentException
1327      format: @"[NSString-initWithCString:]: NULL cString"];
1328  return [self initWithBytes: byteString
1329		      length: strlen(byteString)
1330		    encoding: _DefaultStringEncoding];
1331}
1332
1333/**
1334 * Initialize to be a copy of the given string.
1335 */
1336- (id) initWithString: (NSString*)string
1337{
1338  unsigned	length = [string length];
1339
1340  if (NULL == string)
1341    [NSException raise: NSInvalidArgumentException
1342      format: @"[NSString-initWithString:]: NULL string"];
1343  if (length > 0)
1344    {
1345      unichar	*s = NSZoneMalloc([self zone], sizeof(unichar)*length);
1346
1347      [string getCharacters: s range: ((NSRange){0, length})];
1348      self = [self initWithCharactersNoCopy: s
1349				     length: length
1350			       freeWhenDone: YES];
1351    }
1352  else
1353    {
1354      self = [self initWithCharactersNoCopy: (unichar*)0
1355				     length: 0
1356			       freeWhenDone: NO];
1357    }
1358  return self;
1359}
1360
1361/**
1362 * Initialize based on given null-terminated UTF-8 string bytes.
1363 */
1364- (id) initWithUTF8String: (const char *)bytes
1365{
1366  if (NULL == bytes)
1367    [NSException raise: NSInvalidArgumentException
1368		format: @"[NSString-initWithUTF8String:]: NULL cString"];
1369  return [self initWithBytes: bytes
1370		      length: strlen(bytes)
1371		    encoding: NSUTF8StringEncoding];
1372}
1373
1374/**
1375 * Invokes -initWithFormat:locale:arguments: with a nil locale.
1376 */
1377- (id) initWithFormat: (NSString*)format,...
1378{
1379  va_list ap;
1380  va_start(ap, format);
1381  self = [self initWithFormat: format locale: nil arguments: ap];
1382  va_end(ap);
1383  return self;
1384}
1385
1386/**
1387 * Invokes -initWithFormat:locale:arguments:
1388 */
1389- (id) initWithFormat: (NSString*)format
1390               locale: (NSDictionary*)locale, ...
1391{
1392  va_list ap;
1393  va_start(ap, locale);
1394  self = [self initWithFormat: format locale: locale arguments: ap];
1395  va_end(ap);
1396  return self;
1397}
1398
1399/**
1400 * Invokes -initWithFormat:locale:arguments: with a nil locale.
1401 */
1402- (id) initWithFormat: (NSString*)format
1403            arguments: (va_list)argList
1404{
1405  return [self initWithFormat: format locale: nil arguments: argList];
1406}
1407
1408/**
1409 * Initialises the string using the specified format and locale
1410 * to format the following arguments.
1411 */
1412- (id) initWithFormat: (NSString*)format
1413               locale: (NSDictionary*)locale
1414            arguments: (va_list)argList
1415{
1416  unsigned char	buf[2048];
1417  GSStr		f;
1418  unichar	fbuf[1024];
1419  unichar	*fmt = fbuf;
1420  size_t	len;
1421
1422  if (NULL == format)
1423    [NSException raise: NSInvalidArgumentException
1424      format: @"[NSString-initWithFormat:locale:arguments:]: NULL format"];
1425  /*
1426   * First we provide an array of unichar characters containing the
1427   * format string.  For performance reasons we try to use an on-stack
1428   * buffer if the format string is small enough ... it almost always
1429   * will be.
1430   */
1431  len = [format length];
1432  if (len >= 1024)
1433    {
1434      fmt = NSZoneMalloc(NSDefaultMallocZone(), (len+1)*sizeof(unichar));
1435    }
1436  [format getCharacters: fmt range: ((NSRange){0, len})];
1437  fmt[len] = '\0';
1438
1439  /*
1440   * Now set up 'f' as a GSMutableString object whose initial buffer is
1441   * allocated on the stack.  The GSPrivateFormat function can write into it.
1442   */
1443  f = (GSStr)alloca(class_getInstanceSize(GSMutableStringClass));
1444  object_setClass(f, GSMutableStringClass);
1445  f->_zone = NSDefaultMallocZone();
1446  f->_contents.c = buf;
1447  f->_capacity = sizeof(buf);
1448  f->_count = 0;
1449  f->_flags.wide = 0;
1450  f->_flags.owned = 0;
1451  f->_flags.unused = 0;
1452  f->_flags.hash = 0;
1453  GSPrivateFormat(f, fmt, argList, locale);
1454  GSPrivateStrExternalize(f);
1455  if (fmt != fbuf)
1456    {
1457      NSZoneFree(NSDefaultMallocZone(), fmt);
1458    }
1459
1460  /*
1461   * Don't use noCopy because f->_contents.u may be memory on the stack,
1462   * and even if it wasn't f->_capacity may be greater than f->_count so
1463   * we could be wasting quite a bit of space.  Better to accept a
1464   * performance hit due to copying data (and allocating/deallocating
1465   * the temporary buffer) for large strings.  For most strings, the
1466   * on-stack memory will have been used, so we will get better performance.
1467   */
1468  if (f->_flags.wide == 1)
1469    {
1470      self = [self initWithCharacters: f->_contents.u length: f->_count];
1471    }
1472  else
1473    {
1474      self = [self initWithCString: (char*)f->_contents.c length: f->_count];
1475    }
1476
1477  /*
1478   * If the string had to grow beyond the initial buffer size, we must
1479   * release any allocated memory.
1480   */
1481  if (f->_flags.owned == 1)
1482    {
1483      NSZoneFree(f->_zone, f->_contents.c);
1484    }
1485  return self;
1486}
1487
1488/**
1489 * Initialises the receiver with the supplied data, using the
1490 * specified encoding.<br />
1491 * For NSUnicodeStringEncoding and NSUTF8String encoding, a Byte Order
1492 * Marker (if present at the start of the data) is removed automatically.<br />
1493 * If the data can not be interpreted using the encoding, the receiver
1494 * is released and nil is returned.
1495 */
1496- (id) initWithData: (NSData*)data
1497	   encoding: (NSStringEncoding)encoding
1498{
1499  return [self initWithBytes: [data bytes]
1500		      length: [data length]
1501		    encoding: encoding];
1502}
1503
1504/**
1505 * <p>Initialises the receiver with the contents of the file at path.
1506 * </p>
1507 * <p>Invokes [NSData-initWithContentsOfFile:] to read the file, then
1508 * examines the data to infer its encoding type, and converts the
1509 * data to a string using -initWithData:encoding:
1510 * </p>
1511 * <p>The encoding to use is determined as follows ... if the data begins
1512 * with the 16-bit unicode Byte Order Marker, then it is assumed to be
1513 * unicode data in the appropriate ordering and converted as such.<br />
1514 * If it begins with a UTF8 representation of the BOM, the UTF8 encoding
1515 * is used.<br />
1516 * Otherwise, the default C String encoding is used.
1517 * </p>
1518 * <p>Releases the receiver and returns nil if the file could not be read
1519 * and converted to a string.
1520 * </p>
1521 */
1522- (id) initWithContentsOfFile: (NSString*)path
1523{
1524  NSStringEncoding	enc = _DefaultStringEncoding;
1525  NSData		*d;
1526  unsigned int		len;
1527  const unsigned char	*data_bytes;
1528
1529  d = [[NSDataClass alloc] initWithContentsOfFile: path];
1530  if (d == nil)
1531    {
1532      DESTROY(self);
1533      return nil;
1534    }
1535  len = [d length];
1536  if (len == 0)
1537    {
1538      RELEASE(d);
1539      DESTROY(self);
1540      return @"";
1541    }
1542  data_bytes = [d bytes];
1543  if ((data_bytes != NULL) && (len >= 2))
1544    {
1545      const unichar *data_ucs2chars = (const unichar *)(void*) data_bytes;
1546      if ((data_ucs2chars[0] == byteOrderMark)
1547	|| (data_ucs2chars[0] == byteOrderMarkSwapped))
1548	{
1549	  /* somebody set up us the BOM! */
1550	  enc = NSUnicodeStringEncoding;
1551	}
1552      else if (len >= 3
1553	&& data_bytes[0] == 0xEF
1554	&& data_bytes[1] == 0xBB
1555	&& data_bytes[2] == 0xBF)
1556	{
1557	  enc = NSUTF8StringEncoding;
1558	}
1559    }
1560  self = [self initWithData: d encoding: enc];
1561  RELEASE(d);
1562  if (self == nil)
1563    {
1564      NSWarnMLog(@"Contents of file '%@' are not string data using %@",
1565        path, [NSString localizedNameOfStringEncoding: enc]);
1566    }
1567  return self;
1568}
1569
1570/**
1571 * <p>Initialises the receiver with the contents of the file at path.
1572 * </p>
1573 * <p>Invokes [NSData-initWithContentsOfFile:] to read the file, then
1574 * examines the data to infer its encoding type, and converts the
1575 * data to a string using -initWithData:encoding:
1576 * </p>
1577 * <p>The encoding to use is determined as follows ... if the data begins
1578 * with the 16-bit unicode Byte Order Marker, then it is assumed to be
1579 * unicode data in the appropriate ordering and converted as such.<br />
1580 * If it begins with a UTF8 representation of the BOM, the UTF8 encoding
1581 * is used.<br />
1582 * Otherwise, the default C String encoding is used.
1583 * </p>
1584 * <p>Releases the receiver and returns nil if the file could not be read
1585 * and converted to a string.
1586 * </p>
1587 */
1588- (id) initWithContentsOfFile: (NSString*)path
1589                 usedEncoding: (NSStringEncoding*)enc
1590                        error: (NSError**)error
1591{
1592  NSData		*d;
1593  unsigned int		len;
1594  const unsigned char	*data_bytes;
1595
1596  d = [[NSDataClass alloc] initWithContentsOfFile: path];
1597  if (nil == d)
1598    {
1599      DESTROY(self);
1600      if (error != 0)
1601        {
1602          *error = [NSError errorWithDomain: NSCocoaErrorDomain
1603                                       code: NSFileReadUnknownError
1604                                   userInfo: nil];
1605        }
1606      return nil;
1607    }
1608  *enc = _DefaultStringEncoding;
1609  len = [d length];
1610  if (len == 0)
1611    {
1612      RELEASE(d);
1613      DESTROY(self);
1614      return @"";
1615    }
1616  data_bytes = [d bytes];
1617  if ((data_bytes != NULL) && (len >= 2))
1618    {
1619      const unichar *data_ucs2chars = (const unichar *)(void*) data_bytes;
1620      if ((data_ucs2chars[0] == byteOrderMark)
1621	|| (data_ucs2chars[0] == byteOrderMarkSwapped))
1622	{
1623	  /* somebody set up us the BOM! */
1624	  *enc = NSUnicodeStringEncoding;
1625	}
1626      else if (len >= 3
1627	&& data_bytes[0] == 0xEF
1628	&& data_bytes[1] == 0xBB
1629	&& data_bytes[2] == 0xBF)
1630	{
1631	  *enc = NSUTF8StringEncoding;
1632	}
1633    }
1634  self = [self initWithData: d encoding: *enc];
1635  RELEASE(d);
1636  if (nil == self)
1637    {
1638      if (error != 0)
1639        {
1640          *error = [NSError errorWithDomain: NSCocoaErrorDomain
1641                                       code: NSFileReadCorruptFileError
1642                                   userInfo: nil];
1643        }
1644    }
1645  return self;
1646}
1647
1648- (id) initWithContentsOfFile: (NSString*)path
1649                     encoding: (NSStringEncoding)enc
1650                        error: (NSError**)error
1651{
1652  NSData		*d;
1653  unsigned int		len;
1654
1655  d = [[NSDataClass alloc] initWithContentsOfFile: path];
1656  if (d == nil)
1657    {
1658      DESTROY(self);
1659      return nil;
1660    }
1661  len = [d length];
1662  if (len == 0)
1663    {
1664      RELEASE(d);
1665      DESTROY(self);
1666      return @"";
1667    }
1668  self = [self initWithData: d encoding: enc];
1669  RELEASE(d);
1670  if (self == nil)
1671    {
1672      if (error != 0)
1673        {
1674          *error = [NSError errorWithDomain: NSCocoaErrorDomain
1675                                       code: NSFileReadCorruptFileError
1676                                   userInfo: nil];
1677        }
1678    }
1679  return self;
1680}
1681
1682/**
1683 * <p>Initialises the receiver with the contents of the given URL.
1684 * </p>
1685 * <p>Invokes [NSData+dataWithContentsOfURL:] to read the contents, then
1686 * examines the data to infer its encoding type, and converts the
1687 * data to a string using -initWithData:encoding:
1688 * </p>
1689 * <p>The encoding to use is determined as follows ... if the data begins
1690 * with the 16-bit unicode Byte Order Marker, then it is assumed to be
1691 * unicode data in the appropriate ordering and converted as such.<br />
1692 * If it begins with a UTF8 representation of the BOM, the UTF8 encoding
1693 * is used.<br />
1694 * Otherwise, the default C String encoding is used.
1695 * </p>
1696 * <p>Releases the receiver and returns nil if the URL contents could not be
1697 * read and converted to a string.
1698 * </p>
1699 */
1700- (id) initWithContentsOfURL: (NSURL*)url
1701{
1702  NSStringEncoding	enc = _DefaultStringEncoding;
1703  NSData		*d = [NSDataClass dataWithContentsOfURL: url];
1704  unsigned int		len = [d length];
1705  const unsigned char	*data_bytes;
1706
1707  if (d == nil)
1708    {
1709      NSWarnMLog(@"Contents of URL '%@' are not readable", url);
1710      DESTROY(self);
1711      return nil;
1712    }
1713  if (len == 0)
1714    {
1715      DESTROY(self);
1716      return @"";
1717    }
1718  data_bytes = [d bytes];
1719  if ((data_bytes != NULL) && (len >= 2))
1720    {
1721      const unichar *data_ucs2chars = (const unichar *)(void*) data_bytes;
1722      if ((data_ucs2chars[0] == byteOrderMark)
1723	|| (data_ucs2chars[0] == byteOrderMarkSwapped))
1724	{
1725	  enc = NSUnicodeStringEncoding;
1726	}
1727      else if (len >= 3
1728	&& data_bytes[0] == 0xEF
1729	&& data_bytes[1] == 0xBB
1730	&& data_bytes[2] == 0xBF)
1731	{
1732	  enc = NSUTF8StringEncoding;
1733	}
1734    }
1735  self = [self initWithData: d encoding: enc];
1736  if (self == nil)
1737    {
1738      NSWarnMLog(@"Contents of URL '%@' are not string data using %@",
1739        url, [NSString localizedNameOfStringEncoding: enc]);
1740    }
1741  return self;
1742}
1743
1744- (id) initWithContentsOfURL: (NSURL*)url
1745                usedEncoding: (NSStringEncoding*)enc
1746                       error: (NSError**)error
1747{
1748  NSData		*d;
1749  unsigned int		len;
1750  const unsigned char	*data_bytes;
1751
1752  d = [NSDataClass dataWithContentsOfURL: url];
1753  if (d == nil)
1754    {
1755      DESTROY(self);
1756      return nil;
1757    }
1758  *enc = _DefaultStringEncoding;
1759  len = [d length];
1760  if (len == 0)
1761    {
1762      DESTROY(self);
1763      return @"";
1764    }
1765  data_bytes = [d bytes];
1766  if ((data_bytes != NULL) && (len >= 2))
1767    {
1768      const unichar *data_ucs2chars = (const unichar *)(void*) data_bytes;
1769      if ((data_ucs2chars[0] == byteOrderMark)
1770	|| (data_ucs2chars[0] == byteOrderMarkSwapped))
1771	{
1772	  /* somebody set up us the BOM! */
1773	  *enc = NSUnicodeStringEncoding;
1774	}
1775      else if (len >= 3
1776	&& data_bytes[0] == 0xEF
1777	&& data_bytes[1] == 0xBB
1778	&& data_bytes[2] == 0xBF)
1779	{
1780	  *enc = NSUTF8StringEncoding;
1781	}
1782    }
1783  self = [self initWithData: d encoding: *enc];
1784  if (self == nil)
1785    {
1786      if (error != 0)
1787        {
1788          *error = [NSError errorWithDomain: NSCocoaErrorDomain
1789                                       code: NSFileReadCorruptFileError
1790                                   userInfo: nil];
1791        }
1792    }
1793  return self;
1794}
1795
1796- (id) initWithContentsOfURL: (NSURL*)url
1797                    encoding: (NSStringEncoding)enc
1798                       error: (NSError**)error
1799{
1800  NSData		*d;
1801  unsigned int		len;
1802
1803  d = [NSDataClass dataWithContentsOfURL: url];
1804  if (d == nil)
1805    {
1806      DESTROY(self);
1807      return nil;
1808    }
1809  len = [d length];
1810  if (len == 0)
1811    {
1812      DESTROY(self);
1813      return @"";
1814    }
1815  self = [self initWithData: d encoding: enc];
1816  if (self == nil)
1817    {
1818      if (error != 0)
1819        {
1820          *error = [NSError errorWithDomain: NSCocoaErrorDomain
1821                                       code: NSFileReadCorruptFileError
1822                                   userInfo: nil];
1823        }
1824    }
1825  return self;
1826}
1827
1828/**
1829 * Returns the number of Unicode characters in this string, including the
1830 * individual characters of composed character sequences,
1831 */
1832- (NSUInteger) length
1833{
1834  [self subclassResponsibility: _cmd];
1835  return 0;
1836}
1837
1838// Accessing Characters
1839
1840/**
1841 * Returns unicode character at index.  <code>unichar</code> is an unsigned
1842 * short.  Thus, a 16-bit character is returned.
1843 */
1844- (unichar) characterAtIndex: (NSUInteger)index
1845{
1846  [self subclassResponsibility: _cmd];
1847  return (unichar)0;
1848}
1849
1850- (NSString *) decomposedStringWithCompatibilityMapping
1851{
1852#if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_UNORM2_H)
1853  return [self _normalizedICUStringOfType: "nfkc" mode: UNORM2_DECOMPOSE];
1854#else
1855  return [self notImplemented: _cmd];
1856#endif
1857}
1858
1859- (NSString *) decomposedStringWithCanonicalMapping
1860{
1861#if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_UNORM2_H)
1862  return [self _normalizedICUStringOfType: "nfc" mode: UNORM2_DECOMPOSE];
1863#else
1864  return [self notImplemented: _cmd];
1865#endif
1866}
1867
1868/**
1869 * Returns this string as an array of 16-bit <code>unichar</code> (unsigned
1870 * short) values.  buffer must be preallocated and should be capable of
1871 * holding -length shorts.
1872 */
1873// Inefficient.  Should be overridden
1874- (void) getCharacters: (unichar*)buffer
1875{
1876  [self getCharacters: buffer range: ((NSRange){0, [self length]})];
1877  return;
1878}
1879
1880/**
1881 * Returns aRange of string as an array of 16-bit <code>unichar</code>
1882 * (unsigned short) values.  buffer must be preallocated and should be capable
1883 * of holding a sufficient number of shorts.
1884 */
1885// Inefficient.  Should be overridden
1886- (void) getCharacters: (unichar*)buffer
1887		 range: (NSRange)aRange
1888{
1889  unsigned	l = [self length];
1890  unsigned	i;
1891  unichar	(*caiImp)(NSString*, SEL, NSUInteger);
1892
1893  GS_RANGE_CHECK(aRange, l);
1894
1895  caiImp = (unichar (*)(NSString*,SEL,NSUInteger))
1896    [self methodForSelector: caiSel];
1897
1898  for (i = 0; i < aRange.length; i++)
1899    {
1900      buffer[i] = (*caiImp)(self, caiSel, aRange.location + i);
1901    }
1902}
1903
1904- (NSString *) stringByAddingPercentEncodingWithAllowedCharacters: (NSCharacterSet *)aSet
1905{
1906  NSData	*data = [self dataUsingEncoding: NSUTF8StringEncoding];
1907  NSString	*s = nil;
1908
1909  if (data != nil)
1910    {
1911      unsigned char	*src = (unsigned char*)[data bytes];
1912      unsigned int	slen = [data length];
1913      unsigned char	*dst;
1914      unsigned int	spos = 0;
1915      unsigned int	dpos = 0;
1916
1917      dst = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), slen * 3);
1918      while (spos < slen)
1919	{
1920	  unichar	c = src[spos++];
1921	  unsigned int	hi;
1922	  unsigned int	lo;
1923
1924	  if([aSet characterIsMember: c]) // if the character is in the allowed set, put it in
1925	    {
1926	      dst[dpos++] = c;
1927	    }
1928	  else // if not, then encode it...
1929	    {
1930	      dst[dpos++] = '%';
1931	      hi = (c & 0xf0) >> 4;
1932	      dst[dpos++] = (hi > 9) ? 'A' + hi - 10 : '0' + hi;
1933	      lo = (c & 0x0f);
1934	      dst[dpos++] = (lo > 9) ? 'A' + lo - 10 : '0' + lo;
1935	    }
1936	}
1937      s = [[NSString alloc] initWithBytes: dst
1938				   length: dpos
1939				 encoding: NSUTF8StringEncoding];
1940      NSZoneFree(NSDefaultMallocZone(), dst);
1941      IF_NO_GC([s autorelease];)
1942    }
1943  return s;
1944}
1945
1946- (NSString *) stringByRemovingPercentEncoding
1947{
1948  NSData	*data = [self dataUsingEncoding: NSUTF8StringEncoding];
1949  const uint8_t	*s = [data bytes];
1950  NSUInteger	length = [data length];
1951  NSUInteger	lastPercent = length - 3;
1952  char		*o = (char *)NSZoneMalloc(NSDefaultMallocZone(), length + 1);
1953  char		*next = o;
1954  NSUInteger	index;
1955  NSString	*result;
1956
1957  for (index = 0; index < length; index++)
1958    {
1959      char	c = s[index];
1960
1961      if ('%' == c && index <= lastPercent)
1962	{
1963	  uint8_t	hi = s[index+1];
1964	  uint8_t	lo = s[index+2];
1965
1966	  if (isdigit(hi) && isxdigit(lo))
1967	    {
1968	      index += 2;
1969              if (hi <= '9')
1970                {
1971                  c = hi - '0';
1972                }
1973              else if (hi <= 'F')
1974                {
1975                  c = hi - 'A' + 10;
1976                }
1977              else
1978                {
1979                  c = hi - 'a' + 10;
1980                }
1981	      c <<= 4;
1982              if (lo <= '9')
1983                {
1984                  c += lo - '0';
1985                }
1986              else if (lo <= 'F')
1987                {
1988                  c += lo - 'A' + 10;
1989                }
1990              else
1991                {
1992                  c += lo - 'a' + 10;
1993                }
1994	    }
1995	}
1996      *next++ = c;
1997    }
1998  *next = '\0';
1999
2000  result = [NSString stringWithUTF8String: o];
2001  NSZoneFree(NSDefaultMallocZone(), o);
2002
2003  return result;
2004}
2005
2006/**
2007 * Constructs a new ASCII string which is a representation of the receiver
2008 * in which characters are escaped where necessary in order to produce a
2009 * version of the string legal for inclusion within a URL.<br />
2010 * The original string is converted to bytes using the specified encoding
2011 * and then those bytes are escaped unless they correspond to 'legal'
2012 * ASCII characters.  The byte values escaped are any below 32 and any
2013 * above 126 as well as 32 (space), 34 ("), 35 (#), 37 (%), 60 (&lt;),
2014 * 62 (&gt;), 91 ([), 92 (\), 93 (]), 94 (^), 96 (~), 123 ({), 124 (|),
2015 * and 125 (}).<br />
2016 * Returns nil if the receiver cannot be represented using the specified
2017 * encoding.<br />
2018 * NB. This behavior is MacOS-X (4.2) compatible, and it should be noted
2019 * that it does <em>not</em> produce a string suitable for use as a field
2020 * value in a url-encoded form as it does <strong>not</strong> escape the
2021 * '+', '=' and '&amp;' characters used in such forms.  If you need to
2022 * add a string as a form field value (or name) you must add percent
2023 * escapes for those characters yourself.
2024 */
2025- (NSString*) stringByAddingPercentEscapesUsingEncoding: (NSStringEncoding)e
2026{
2027  NSData	*data = [self dataUsingEncoding: e];
2028  NSString	*s = nil;
2029
2030  if (data != nil)
2031    {
2032      unsigned char	*src = (unsigned char*)[data bytes];
2033      unsigned int	slen = [data length];
2034      unsigned char	*dst;
2035      unsigned int	spos = 0;
2036      unsigned int	dpos = 0;
2037
2038      dst = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), slen * 3);
2039      while (spos < slen)
2040	{
2041	  unsigned char	c = src[spos++];
2042	  unsigned int	hi;
2043	  unsigned int	lo;
2044
2045	  if (c <= 32 || c > 126 || c == 34 || c == 35 || c == 37
2046	    || c == 60 || c == 62 || c == 91 || c == 92 || c == 93
2047	    || c == 94 || c == 96 || c == 123 || c == 124 || c == 125)
2048	    {
2049	      dst[dpos++] = '%';
2050	      hi = (c & 0xf0) >> 4;
2051	      dst[dpos++] = (hi > 9) ? 'A' + hi - 10 : '0' + hi;
2052	      lo = (c & 0x0f);
2053	      dst[dpos++] = (lo > 9) ? 'A' + lo - 10 : '0' + lo;
2054	    }
2055	  else
2056	    {
2057	      dst[dpos++] = c;
2058	    }
2059	}
2060      s = [[NSString alloc] initWithBytes: dst
2061				   length: dpos
2062				 encoding: NSASCIIStringEncoding];
2063      NSZoneFree(NSDefaultMallocZone(), dst);
2064      IF_NO_GC([s autorelease];)
2065    }
2066  return s;
2067}
2068
2069/**
2070 * Constructs a new string consisting of this instance followed by the string
2071 * specified by format.
2072 */
2073- (NSString*) stringByAppendingFormat: (NSString*)format,...
2074{
2075  va_list	ap;
2076  id		ret;
2077
2078  va_start(ap, format);
2079  ret = [self stringByAppendingString:
2080    [NSString stringWithFormat: format arguments: ap]];
2081  va_end(ap);
2082  return ret;
2083}
2084
2085/**
2086 * Constructs a new string consisting of this instance followed by the aString.
2087 */
2088- (NSString*) stringByAppendingString: (NSString*)aString
2089{
2090  unsigned	len = [self length];
2091  unsigned	otherLength = [aString length];
2092  NSZone	*z = [self zone];
2093  unichar	*s = NSZoneMalloc(z, (len+otherLength)*sizeof(unichar));
2094  NSString	*tmp;
2095
2096  [self getCharacters: s range: ((NSRange){0, len})];
2097  [aString getCharacters: s + len range: ((NSRange){0, otherLength})];
2098  tmp = [[NSStringClass allocWithZone: z] initWithCharactersNoCopy: s
2099    length: len + otherLength freeWhenDone: YES];
2100  return AUTORELEASE(tmp);
2101}
2102
2103// Dividing Strings into Substrings
2104
2105/**
2106 * <p>Returns an array of [NSString]s representing substrings of this string
2107 * that are separated by characters in the set (which must not be nil).
2108 * If there are no occurrences of separator, the whole string is
2109 * returned.  If string begins or ends with separator, empty strings will
2110 * be returned for those positions.</p>
2111 */
2112- (NSArray *) componentsSeparatedByCharactersInSet: (NSCharacterSet *)separator
2113{
2114  NSRange	search;
2115  NSRange	complete;
2116  NSRange	found;
2117  NSMutableArray *array;
2118  IF_NO_GC(NSAutoreleasePool *pool; NSUInteger count;)
2119
2120  if (separator == nil)
2121    [NSException raise: NSInvalidArgumentException format: @"separator is nil"];
2122
2123  array = [NSMutableArray array];
2124  IF_NO_GC(pool = [NSAutoreleasePool new]; count = 0;)
2125  search = NSMakeRange (0, [self length]);
2126  complete = search;
2127  found = [self rangeOfCharacterFromSet: separator];
2128  while (found.length != 0)
2129    {
2130      NSRange current;
2131
2132      current = NSMakeRange (search.location,
2133	found.location - search.location);
2134      [array addObject: [self substringWithRange: current]];
2135
2136      search = NSMakeRange (found.location + found.length,
2137	complete.length - found.location - found.length);
2138      found = [self rangeOfCharacterFromSet: separator
2139                                    options: 0
2140                                      range: search];
2141      IF_NO_GC(if (0 == count % 200) [pool emptyPool];)
2142    }
2143  // Add the last search string range
2144  [array addObject: [self substringWithRange: search]];
2145  IF_NO_GC([pool release];)
2146  // FIXME: Need to make mutable array into non-mutable array?
2147  return array;
2148}
2149
2150/**
2151 * <p>Returns an array of [NSString]s representing substrings of this string
2152 * that are separated by separator (which itself is never returned in the
2153 * array).  If there are no occurrences of separator, the whole string is
2154 * returned.  If string begins or ends with separator, empty strings will
2155 * be returned for those positions.</p>
2156 * <p>Note, use an [NSScanner] if you need more sophisticated parsing.</p>
2157 */
2158- (NSArray*) componentsSeparatedByString: (NSString*)separator
2159{
2160  NSRange	search;
2161  NSRange	complete;
2162  NSRange	found;
2163  NSMutableArray *array = [NSMutableArray array];
2164
2165  search = NSMakeRange (0, [self length]);
2166  complete = search;
2167  found = [self rangeOfString: separator
2168                      options: 0
2169                        range: search
2170                       locale: nil];
2171  while (found.length != 0)
2172    {
2173      NSRange current;
2174
2175      current = NSMakeRange (search.location,
2176	found.location - search.location);
2177      [array addObject: [self substringWithRange: current]];
2178
2179      search = NSMakeRange (found.location + found.length,
2180	complete.length - found.location - found.length);
2181      found = [self rangeOfString: separator
2182			  options: 0
2183			    range: search
2184                           locale: nil];
2185    }
2186  // Add the last search string range
2187  [array addObject: [self substringWithRange: search]];
2188
2189  // FIXME: Need to make mutable array into non-mutable array?
2190  return array;
2191}
2192
2193- (NSString*) stringByReplacingOccurrencesOfString: (NSString*)replace
2194                                        withString: (NSString*)by
2195                                           options: (NSStringCompareOptions)opts
2196                                             range: (NSRange)searchRange
2197{
2198  id copy;
2199
2200  copy = [[[GSMutableStringClass allocWithZone: NSDefaultMallocZone()]
2201    initWithString: self] autorelease];
2202  [copy replaceOccurrencesOfString: replace
2203                        withString: by
2204                           options: opts
2205                             range: searchRange];
2206  return GS_IMMUTABLE(copy);
2207}
2208
2209- (NSString*) stringByReplacingOccurrencesOfString: (NSString*)replace
2210                                        withString: (NSString*)by
2211{
2212  return [self
2213      stringByReplacingOccurrencesOfString: replace
2214                                withString: by
2215                                   options: 0
2216                                     range: NSMakeRange(0, [self length])];
2217}
2218
2219/**
2220 * Returns a new string where the substring in the given range is replaced by
2221 * the passed string.
2222 */
2223- (NSString*) stringByReplacingCharactersInRange: (NSRange)aRange
2224                                      withString: (NSString*)by
2225{
2226  id	copy;
2227
2228  copy = [[[GSMutableStringClass allocWithZone: NSDefaultMallocZone()]
2229    initWithString: self] autorelease];
2230  [copy replaceCharactersInRange: aRange withString: by];
2231  return GS_IMMUTABLE(copy);
2232}
2233
2234/**
2235 * Returns a substring of the receiver from character at the specified
2236 * index to the end of the string.<br />
2237 * So, supplying an index of 3 would return a substring consisting of
2238 * the entire string apart from the first three character (those would
2239 * be at index 0, 1, and 2).<br />
2240 * If the supplied index is greater than or equal to the length of the
2241 * receiver an exception is raised.
2242 */
2243- (NSString*) substringFromIndex: (NSUInteger)index
2244{
2245  return [self substringWithRange: ((NSRange){index, [self length]-index})];
2246}
2247
2248/**
2249 * Returns a substring of the receiver from the start of the
2250 * string to (but not including) the specified index position.<br />
2251 * So, supplying an index of 3 would return a substring consisting of
2252 * the first three characters of the receiver.<br />
2253 * If the supplied index is greater than the length of the receiver
2254 * an exception is raised.
2255 */
2256- (NSString*) substringToIndex: (NSUInteger)index
2257{
2258  return [self substringWithRange: ((NSRange){0,index})];
2259}
2260
2261/**
2262 * An obsolete name for -substringWithRange: ... deprecated.
2263 */
2264- (NSString*) substringFromRange: (NSRange)aRange
2265{
2266  return [self substringWithRange: aRange];
2267}
2268
2269/**
2270 * Returns a substring of the receiver containing the characters
2271 * in aRange.<br />
2272 * If aRange specifies any character position not
2273 * present in the receiver, an exception is raised.<br />
2274 * If aRange has a length of zero, an empty string is returned.
2275 */
2276- (NSString*) substringWithRange: (NSRange)aRange
2277{
2278  unichar	*buf;
2279  id		ret;
2280  unsigned	len = [self length];
2281
2282  GS_RANGE_CHECK(aRange, len);
2283
2284  if (aRange.length == 0)
2285    return @"";
2286  buf = NSZoneMalloc([self zone], sizeof(unichar)*aRange.length);
2287  [self getCharacters: buf range: aRange];
2288  ret = [[NSStringClass allocWithZone: NSDefaultMallocZone()]
2289    initWithCharactersNoCopy: buf length: aRange.length freeWhenDone: YES];
2290  return AUTORELEASE(ret);
2291}
2292
2293// Finding Ranges of Characters and Substrings
2294
2295/**
2296 * Returns position of first character in this string that is in aSet.
2297 * Positions start at 0.  If the character is a composed character sequence,
2298 * the range returned will contain the whole sequence, else just the character
2299 * itself.
2300 */
2301- (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
2302{
2303  NSRange all = NSMakeRange(0, [self length]);
2304
2305  return [self rangeOfCharacterFromSet: aSet
2306			       options: 0
2307				 range: all];
2308}
2309
2310/**
2311 * Returns position of first character in this string that is in aSet.
2312 * Positions start at 0.  If the character is a composed character sequence,
2313 * the range returned will contain the whole sequence, else just the character
2314 * itself.  mask may contain <code>NSCaseInsensitiveSearch</code>,
2315 * <code>NSLiteralSearch</code> (don't consider alternate forms of composed
2316 * characters equal), or <code>NSBackwardsSearch</code> (search from end of
2317 * string).
2318 */
2319- (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
2320			    options: (NSUInteger)mask
2321{
2322  NSRange all = NSMakeRange(0, [self length]);
2323
2324  return [self rangeOfCharacterFromSet: aSet
2325			       options: mask
2326				 range: all];
2327}
2328
2329/**
2330 * Returns position of first character in this string that is in aSet.
2331 * Positions start at 0.  If the character is a composed character sequence,
2332 * the range returned will contain the whole sequence, else just the character
2333 * itself.  mask may contain <code>NSCaseInsensitiveSearch</code>,
2334 * <code>NSLiteralSearch</code> (don't consider alternate forms of composed
2335 * characters equal), or <code>NSBackwardsSearch</code> (search from end of
2336 * string).  Search only carried out within aRange.
2337 */
2338- (NSRange) rangeOfCharacterFromSet: (NSCharacterSet*)aSet
2339			    options: (NSUInteger)mask
2340			      range: (NSRange)aRange
2341{
2342  unsigned int	i;
2343  unsigned int	start;
2344  unsigned int	stop;
2345  int		step;
2346  NSRange	range;
2347  unichar	(*cImp)(id, SEL, NSUInteger);
2348  BOOL		(*mImp)(id, SEL, unichar);
2349
2350  i = [self length];
2351  GS_RANGE_CHECK(aRange, i);
2352
2353  if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2354    {
2355      start = NSMaxRange(aRange)-1; stop = aRange.location-1; step = -1;
2356    }
2357  else
2358    {
2359      start = aRange.location; stop = NSMaxRange(aRange); step = 1;
2360    }
2361  range.location = NSNotFound;
2362  range.length = 0;
2363
2364  cImp = (unichar(*)(id,SEL,NSUInteger))
2365    [self methodForSelector: caiSel];
2366  mImp = (BOOL(*)(id,SEL,unichar))
2367    [aSet methodForSelector: cMemberSel];
2368
2369  for (i = start; i != stop; i += step)
2370    {
2371      unichar letter = (unichar)(*cImp)(self, caiSel, i);
2372
2373      if ((*mImp)(aSet, cMemberSel, letter))
2374	{
2375	  range = NSMakeRange(i, 1);
2376	  break;
2377	}
2378    }
2379
2380  return range;
2381}
2382
2383/**
2384 * Invokes -rangeOfString:options: with no options.
2385 */
2386- (NSRange) rangeOfString: (NSString*)string
2387{
2388  NSRange	all = NSMakeRange(0, [self length]);
2389
2390  return [self rangeOfString: string
2391		     options: 0
2392		       range: all
2393                      locale: nil];
2394}
2395
2396/**
2397 * Invokes -rangeOfString:options:range: with the range set
2398 * set to the range of the whole of the receiver.
2399 */
2400- (NSRange) rangeOfString: (NSString*)string
2401		  options: (NSUInteger)mask
2402{
2403  NSRange	all = NSMakeRange(0, [self length]);
2404
2405  return [self rangeOfString: string
2406		     options: mask
2407		       range: all
2408                      locale: nil];
2409}
2410
2411/**
2412 * Returns the range giving the location and length of the first
2413 * occurrence of aString within aRange.
2414 * <br/>
2415 * If aString does not exist in the receiver (an empty
2416 * string is never considered to exist in the receiver),
2417 * the length of the returned range is zero.
2418 * <br/>
2419 * If aString is nil, an exception is raised.
2420 * <br/>
2421 * If any part of aRange lies outside the range of the
2422 * receiver, an exception is raised.
2423 * <br/>
2424 * The options mask may contain the following options -
2425 * <list>
2426 *   <item><code>NSCaseInsensitiveSearch</code></item>
2427 *   <item><code>NSLiteralSearch</code></item>
2428 *   <item><code>NSBackwardsSearch</code></item>
2429 *   <item><code>NSAnchoredSearch</code></item>
2430 * </list>
2431 * The <code>NSAnchoredSearch</code> option means aString must occur at the
2432 * beginning (or end, if <code>NSBackwardsSearch</code> is also given) of the
2433 * string.  Options should be OR'd together using <code>'|'</code>.
2434 */
2435- (NSRange) rangeOfString: (NSString *)aString
2436		  options: (NSUInteger)mask
2437		    range: (NSRange)aRange
2438{
2439  return [self rangeOfString: aString
2440                     options: mask
2441                       range: aRange
2442		      locale: nil];
2443}
2444
2445- (NSRange) rangeOfString: (NSString *)aString
2446                  options: (NSStringCompareOptions)mask
2447                    range: (NSRange)searchRange
2448                   locale: (NSLocale *)locale
2449{
2450  NSUInteger    length = [self length];
2451  NSUInteger    countOther;
2452
2453  GS_RANGE_CHECK(searchRange, length);
2454  if (aString == nil)
2455    [NSException raise: NSInvalidArgumentException format: @"range of nil"];
2456
2457  if ((mask & NSRegularExpressionSearch) == NSRegularExpressionSearch)
2458    {
2459      NSRange			r = {NSNotFound, 0};
2460      NSError			*e = nil;
2461      NSUInteger		options = 0;
2462      NSRegularExpression	*regex = [NSRegularExpression alloc];
2463
2464      if ((mask & NSCaseInsensitiveSearch) == NSCaseInsensitiveSearch)
2465	{
2466	  options |= NSRegularExpressionCaseInsensitive;
2467	}
2468      regex = [regex initWithPattern: aString options: options error: &e];
2469      if (nil == e)
2470	{
2471	  options = ((mask & NSAnchoredSearch) == NSAnchoredSearch)
2472	    ? NSMatchingAnchored : 0;
2473	  r = [regex rangeOfFirstMatchInString: self
2474				       options: options
2475					 range: searchRange];
2476	}
2477      [regex release];
2478      return r;
2479    }
2480
2481  countOther = [aString length];
2482
2483  /* A zero length string is always found at the start of the given range.
2484   */
2485  if (0 == countOther)
2486    {
2487      if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2488        {
2489          searchRange.location += searchRange.length;
2490        }
2491      searchRange.length = 0;
2492      return searchRange;
2493    }
2494
2495  /* If the string to search for is a single codepoint which is not
2496   * decomposable to a sequence, then it can only match the identical
2497   * codepoint, so we can perform the much cheaper literal search.
2498   */
2499  if (1 == countOther)
2500    {
2501      unichar   u = [aString characterAtIndex: 0];
2502
2503      if ((mask & NSLiteralSearch) == NSLiteralSearch || uni_is_decomp(u))
2504        {
2505          NSRange   result;
2506
2507          if (searchRange.length < countOther)
2508            {
2509              /* Range to search is smaller than string to look for.
2510               */
2511              result = NSMakeRange(NSNotFound, 0);
2512            }
2513          else if ((mask & NSAnchoredSearch) == NSAnchoredSearch
2514            || searchRange.length == 1)
2515            {
2516              /* Range to search is a single character.
2517               */
2518              if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2519                {
2520                  searchRange.location = NSMaxRange(searchRange) - 1;
2521                }
2522              if ((mask & NSCaseInsensitiveSearch) == NSCaseInsensitiveSearch)
2523                {
2524                  u = uni_toupper(u);
2525                  if (uni_toupper([self characterAtIndex: searchRange.location])
2526                     == u)
2527                    {
2528                      result = searchRange;
2529                    }
2530                  else
2531                    {
2532                      result = NSMakeRange(NSNotFound, 0);
2533                    }
2534                }
2535              else
2536                {
2537                  if ([self characterAtIndex: searchRange.location] == u)
2538                    {
2539                      result = searchRange;
2540                    }
2541                  else
2542                    {
2543                      result = NSMakeRange(NSNotFound, 0);
2544                    }
2545                }
2546            }
2547          else
2548            {
2549              NSUInteger    pos;
2550              NSUInteger    end;
2551
2552              /* Range to search is bigger than string to look for.
2553               */
2554              GS_BEGINITEMBUF2(charsSelf, (searchRange.length*sizeof(unichar)),
2555                unichar)
2556              [self getCharacters: charsSelf range: searchRange];
2557              end = searchRange.length;
2558              if ((mask & NSCaseInsensitiveSearch) == NSCaseInsensitiveSearch)
2559                {
2560                  u = uni_toupper(u);
2561                  if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2562                    {
2563                      pos = end;
2564                      while (pos-- > 0)
2565                        {
2566                          if (uni_toupper(charsSelf[pos]) == u)
2567                            {
2568                              break;
2569                            }
2570                        }
2571                    }
2572                  else
2573                    {
2574                      pos = 0;
2575                      while (pos < end)
2576                        {
2577                          if (uni_toupper(charsSelf[pos]) == u)
2578                            {
2579                              break;
2580                            }
2581                          pos++;
2582                        }
2583                    }
2584                }
2585              else
2586                {
2587                  if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2588                    {
2589                      pos = end;
2590                      while (pos-- > 0)
2591                        {
2592                          if (charsSelf[pos] == u)
2593                            {
2594                              break;
2595                            }
2596                        }
2597                    }
2598                  else
2599                    {
2600                      pos = 0;
2601                      while (pos < end)
2602                        {
2603                          if (charsSelf[pos] == u)
2604                            {
2605                              break;
2606                            }
2607                          pos++;
2608                        }
2609                    }
2610                }
2611              GS_ENDITEMBUF2()
2612
2613              if (pos >= end)
2614                {
2615                  result = NSMakeRange(NSNotFound, 0);
2616                }
2617              else
2618                {
2619                  result = NSMakeRange(searchRange.location + pos, countOther);
2620                }
2621            }
2622          return result;
2623        }
2624    }
2625
2626  if ((mask & NSLiteralSearch) == NSLiteralSearch)
2627    {
2628      NSRange   result;
2629      BOOL      insensitive;
2630
2631      if ((mask & NSCaseInsensitiveSearch) == NSCaseInsensitiveSearch)
2632        {
2633          insensitive = YES;
2634        }
2635      else
2636        {
2637          insensitive = NO;
2638        }
2639
2640      if (searchRange.length < countOther)
2641        {
2642          /* Range to search is smaller than string to look for.
2643           */
2644          result = NSMakeRange(NSNotFound, 0);
2645        }
2646      else
2647        {
2648          GS_BEGINITEMBUF(charsOther, (countOther*sizeof(unichar)), unichar)
2649
2650          [aString getCharacters: charsOther range: NSMakeRange(0, countOther)];
2651          if (YES == insensitive)
2652            {
2653              NSUInteger        index;
2654
2655              /* Make the substring we are searching for be uppercase.
2656               */
2657              for (index = 0; index < countOther; index++)
2658                {
2659                  charsOther[index] = uni_toupper(charsOther[index]);
2660                }
2661            }
2662          if ((mask & NSAnchoredSearch) == NSAnchoredSearch
2663            || searchRange.length == countOther)
2664            {
2665              /* Range to search is same size as string to look for.
2666               */
2667              GS_BEGINITEMBUF2(charsSelf, (countOther*sizeof(unichar)), unichar)
2668              if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2669                {
2670                  searchRange.location = NSMaxRange(searchRange) - countOther;
2671                  searchRange.length = countOther;
2672                }
2673              else
2674                {
2675                  searchRange.length = countOther;
2676                }
2677              [self getCharacters: charsSelf range: searchRange];
2678              if (YES == insensitive)
2679                {
2680                  NSUInteger    index;
2681
2682                  for (index = 0; index < countOther; index++)
2683                    {
2684                      if (uni_toupper(charsSelf[index]) != charsOther[index])
2685                        {
2686                          break;
2687                        }
2688                    }
2689                  if (index < countOther)
2690                    {
2691                      result = NSMakeRange(NSNotFound, 0);
2692                    }
2693                  else
2694                    {
2695                      result = searchRange;
2696                    }
2697                }
2698              else
2699                {
2700                  if (memcmp(&charsSelf[0], &charsOther[0],
2701                    countOther * sizeof(unichar)) == 0)
2702                    {
2703                      result = searchRange;
2704                    }
2705                  else
2706                    {
2707                      result = NSMakeRange(NSNotFound, 0);
2708                    }
2709                }
2710              GS_ENDITEMBUF2()
2711            }
2712          else
2713            {
2714              NSUInteger    pos;
2715              NSUInteger    end;
2716
2717              end = searchRange.length - countOther + 1;
2718              if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2719                {
2720                  pos = end;
2721                }
2722              else
2723                {
2724                  pos = 0;
2725                }
2726              /* Range to search is bigger than string to look for.
2727               */
2728              GS_BEGINITEMBUF2(charsSelf, (searchRange.length*sizeof(unichar)),
2729                unichar)
2730              [self getCharacters: charsSelf range: searchRange];
2731
2732              if (YES == insensitive)
2733                {
2734                  NSUInteger        count;
2735                  NSUInteger        index;
2736
2737                  /* Make things uppercase in the string being searched
2738                   * Start with all but one of the characters in a substring
2739                   * and we'll uppercase one more character each time we do
2740                   * a comparison.
2741                   */
2742                  index = pos;
2743                  for (count = 1; count < countOther; count++)
2744                    {
2745                      charsSelf[index] = uni_toupper(charsSelf[index]);
2746                      index++;
2747                    }
2748                }
2749
2750              if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2751                {
2752                  if (YES == insensitive)
2753                    {
2754                      while (pos-- > 0)
2755                        {
2756                          charsSelf[pos] = uni_toupper(charsSelf[pos]);
2757                          if (memcmp(&charsSelf[pos], charsOther,
2758                            countOther * sizeof(unichar)) == 0)
2759                            {
2760                              break;
2761                            }
2762                        }
2763                    }
2764                  else
2765                    {
2766                      while (pos-- > 0)
2767                        {
2768                          if (memcmp(&charsSelf[pos], charsOther,
2769                            countOther * sizeof(unichar)) == 0)
2770                            {
2771                              break;
2772                            }
2773                        }
2774                    }
2775                }
2776              else
2777                {
2778                  if (YES == insensitive)
2779                    {
2780                      while (pos < end)
2781                        {
2782                          charsSelf[pos + countOther - 1]
2783                            = uni_toupper(charsSelf[pos + countOther - 1]);
2784                          if (memcmp(&charsSelf[pos], charsOther,
2785                            countOther * sizeof(unichar)) == 0)
2786                            {
2787                              break;
2788                            }
2789                          pos++;
2790                        }
2791                    }
2792                  else
2793                    {
2794                      while (pos < end)
2795                        {
2796                          if (memcmp(&charsSelf[pos], charsOther,
2797                            countOther * sizeof(unichar)) == 0)
2798                            {
2799                              break;
2800                            }
2801                          pos++;
2802                        }
2803                    }
2804                }
2805
2806              if (pos >= end)
2807                {
2808                  result = NSMakeRange(NSNotFound, 0);
2809                }
2810              else
2811                {
2812                  result = NSMakeRange(searchRange.location + pos, countOther);
2813                }
2814              GS_ENDITEMBUF2()
2815            }
2816          GS_ENDITEMBUF()
2817        }
2818      return result;
2819    }
2820
2821#if GS_USE_ICU == 1
2822    {
2823      UCollator *coll = GSICUCollatorOpen(mask, locale);
2824
2825      if (NULL != coll)
2826	{
2827	  NSRange       result = NSMakeRange(NSNotFound, 0);
2828	  UErrorCode    status = U_ZERO_ERROR;
2829	  NSUInteger    countSelf = searchRange.length;
2830	  UStringSearch *search = NULL;
2831          GS_BEGINITEMBUF(charsSelf, (countSelf * sizeof(unichar)), unichar)
2832          GS_BEGINITEMBUF2(charsOther, (countOther * sizeof(unichar)), unichar)
2833
2834	  // Copy to buffer
2835
2836	  [self getCharacters: charsSelf range: searchRange];
2837	  [aString getCharacters: charsOther range: NSMakeRange(0, countOther)];
2838
2839	  search = usearch_openFromCollator(charsOther, countOther,
2840					    charsSelf, countSelf,
2841					    coll, NULL, &status);
2842	  if (search != NULL && U_SUCCESS(status))
2843	    {
2844	      int32_t matchLocation;
2845	      int32_t matchLength;
2846
2847	      if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2848		{
2849		  matchLocation = usearch_last(search, &status);
2850		}
2851	      else
2852		{
2853		  matchLocation = usearch_first(search, &status);
2854		}
2855	      matchLength = usearch_getMatchedLength(search);
2856
2857	      if (matchLocation != USEARCH_DONE && matchLength != 0)
2858		{
2859		  if ((mask & NSAnchoredSearch) == NSAnchoredSearch)
2860		    {
2861		      if ((mask & NSBackwardsSearch) == NSBackwardsSearch)
2862			{
2863			  if (matchLocation + matchLength
2864                            == NSMaxRange(searchRange))
2865                            {
2866                              result = NSMakeRange(searchRange.location
2867                                + matchLocation, matchLength);
2868                            }
2869			}
2870		      else
2871			{
2872			  if (matchLocation == 0)
2873                            {
2874                              result = NSMakeRange(searchRange.location
2875                                + matchLocation, matchLength);
2876                            }
2877			}
2878		    }
2879		  else
2880		    {
2881		      result = NSMakeRange(searchRange.location
2882                        + matchLocation, matchLength);
2883		    }
2884		}
2885	    }
2886          GS_ENDITEMBUF2()
2887          GS_ENDITEMBUF()
2888	  usearch_close(search);
2889	  ucol_close(coll);
2890	  return result;
2891	}
2892    }
2893#endif
2894
2895  return strRangeNsNs(self, aString, mask, searchRange);
2896}
2897
2898- (NSUInteger) indexOfString: (NSString *)substring
2899{
2900  NSRange range = {0, [self length]};
2901
2902  range = [self rangeOfString: substring options: 0 range: range locale: nil];
2903  return range.length ? range.location : NSNotFound;
2904}
2905
2906- (NSUInteger) indexOfString: (NSString*)substring
2907                   fromIndex: (NSUInteger)index
2908{
2909  NSRange range = {index, [self length] - index};
2910
2911  range = [self rangeOfString: substring options: 0 range: range locale: nil];
2912  return range.length ? range.location : NSNotFound;
2913}
2914
2915// Determining Composed Character Sequences
2916
2917/**
2918 * Unicode utility method.  If character at anIndex is part of a composed
2919 * character sequence anIndex (note indices start from 0), returns the full
2920 * range of this sequence.
2921 */
2922- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
2923{
2924  unsigned	start;
2925  unsigned	end;
2926  unsigned	length = [self length];
2927  unichar	ch;
2928  unichar	(*caiImp)(NSString*, SEL, NSUInteger);
2929
2930  if (anIndex >= length)
2931    [NSException raise: NSRangeException format:@"Invalid location."];
2932  caiImp = (unichar (*)(NSString*,SEL,NSUInteger))
2933    [self methodForSelector: caiSel];
2934
2935  for (start = anIndex; start > 0; start--)
2936    {
2937      ch = (*caiImp)(self, caiSel, start);
2938      if (uni_isnonsp(ch) == NO)
2939        break;
2940    }
2941  for (end = start+1; end < length; end++)
2942    {
2943      ch = (*caiImp)(self, caiSel, end);
2944      if (uni_isnonsp(ch) == NO)
2945        break;
2946    }
2947
2948  return NSMakeRange(start, end-start);
2949}
2950
2951- (NSRange) rangeOfComposedCharacterSequencesForRange: (NSRange)range
2952{
2953  NSRange startRange = [self rangeOfComposedCharacterSequenceAtIndex: range.location];
2954
2955  if (NSMaxRange(startRange) >= NSMaxRange(range))
2956    {
2957      return startRange;
2958    }
2959  else
2960    {
2961      NSRange endRange = [self rangeOfComposedCharacterSequenceAtIndex: NSMaxRange(range) - 1];
2962
2963      return NSUnionRange(startRange, endRange);
2964    }
2965}
2966
2967// Identifying and Comparing Strings
2968
2969/**
2970 * <p>Compares this instance with aString.  Returns
2971 * <code>NSOrderedAscending</code>, <code>NSOrderedDescending</code>, or
2972 * <code>NSOrderedSame</code>, depending on whether this instance occurs
2973 * before or after string in lexical order, or is equal to it.</p>
2974 */
2975- (NSComparisonResult) compare: (NSString*)aString
2976{
2977  return [self compare: aString options: 0];
2978}
2979
2980/**
2981 * <p>Compares this instance with aString.  mask may be either
2982 * <code>NSCaseInsensitiveSearch</code> or <code>NSLiteralSearch</code>.  The
2983 * latter requests a literal byte-by-byte comparison, which is fastest but may
2984 * return inaccurate results in cases where two different composed character
2985 * sequences may be used to express the same character.</p>
2986 */
2987- (NSComparisonResult) compare: (NSString*)aString
2988		       options: (NSUInteger)mask
2989{
2990  return [self compare: aString options: mask
2991		 range: ((NSRange){0, [self length]})];
2992}
2993
2994/**
2995 * <p>Compares this instance with string.  mask may be either
2996 * <code>NSCaseInsensitiveSearch</code> or <code>NSLiteralSearch</code>.  The
2997 * latter requests a literal byte-by-byte comparison, which is fastest but may
2998 * return inaccurate results in cases where two different composed character
2999 * sequences may be used to express the same character.  aRange refers
3000 * to this instance, and should be set to 0..length to compare the whole
3001 * string.</p>
3002 */
3003// xxx Should implement full POSIX.2 collate
3004- (NSComparisonResult) compare: (NSString*)aString
3005		       options: (NSUInteger)mask
3006			 range: (NSRange)aRange
3007{
3008  return [self compare: aString
3009	       options: mask
3010		 range: aRange
3011		locale: nil];
3012}
3013
3014/**
3015 *  Returns whether this string starts with aString.
3016 */
3017- (BOOL) hasPrefix: (NSString*)aString
3018{
3019  NSRange	range = NSMakeRange(0, [self length]);
3020  NSUInteger    mask = NSLiteralSearch | NSAnchoredSearch;
3021
3022  range = [self rangeOfString: aString
3023                      options: mask
3024                        range: range
3025                       locale: nil];
3026  return (range.length > 0) ? YES : NO;
3027}
3028
3029/**
3030 *  Returns whether this string ends with aString.
3031 */
3032- (BOOL) hasSuffix: (NSString*)aString
3033{
3034  NSRange	range = NSMakeRange(0, [self length]);
3035  NSUInteger    mask = NSLiteralSearch | NSAnchoredSearch | NSBackwardsSearch;
3036
3037  range = [self rangeOfString: aString
3038                      options: mask
3039                        range: range
3040                       locale: nil];
3041  return (range.length > 0) ? YES : NO;
3042}
3043
3044/**
3045 *  Returns whether the receiver and an anObject are equals as strings.
3046 *  If anObject isn't an NSString, returns NO.
3047 */
3048- (BOOL) isEqual: (id)anObject
3049{
3050  if (anObject == self)
3051    {
3052      return YES;
3053    }
3054  if (anObject != nil && [anObject isKindOfClass: NSStringClass])
3055    {
3056      return [self isEqualToString: anObject];
3057    }
3058  return NO;
3059}
3060
3061/**
3062 *  Returns whether this instance is equal as a string to aString.  See also
3063 *  -compare: and related methods.
3064 */
3065- (BOOL) isEqualToString: (NSString*)aString
3066{
3067  if (aString == self)
3068    {
3069      return YES;
3070    }
3071  if ([self hash] != [aString hash])
3072    {
3073      return NO;
3074    }
3075  if (strCompNsNs(self, aString, 0, (NSRange){0, [self length]})
3076    == NSOrderedSame)
3077    {
3078      return YES;
3079    }
3080  return NO;
3081}
3082
3083/**
3084 * Return 28-bit hash value (in 32-bit integer).  The top few bits are used
3085 * for other purposes in a bitfield in the concrete string subclasses, so we
3086 * must not use the full unsigned integer.
3087 */
3088- (NSUInteger) hash
3089{
3090  uint32_t	ret = 0;
3091  int   	len = (int)[self length];
3092
3093  if (len > 0)
3094    {
3095      static const int buf_size = 64;
3096      unichar		buf[buf_size];
3097      int idx = 0;
3098      uint32_t s0 = 0;
3099      uint32_t s1 = 0;
3100
3101      while (idx < len)
3102	{
3103	  int l = MIN(len-idx, buf_size);
3104	  [self getCharacters: buf range: NSMakeRange(idx,l)];
3105	  GSPrivateIncrementalHash(&s0, &s1, buf, l * sizeof(unichar));
3106	  idx += l;
3107	}
3108
3109      ret = GSPrivateFinishHash(s0, s1, len * sizeof(unichar));
3110
3111      /*
3112       * The hash caching in our concrete string classes uses zero to denote
3113       * an empty cache value, so we MUST NOT return a hash of zero.
3114       */
3115      ret &= 0x0fffffff;
3116      if (ret == 0)
3117	{
3118	  ret = 0x0fffffff;
3119	}
3120      return ret;
3121    }
3122  else
3123    {
3124      return 0x0ffffffe;	/* Hash for an empty string.	*/
3125    }
3126}
3127
3128// Getting a Shared Prefix
3129
3130/**
3131 *  Returns the largest initial portion of this instance shared with aString.
3132 *  mask may be either <code>NSCaseInsensitiveSearch</code> or
3133 *  <code>NSLiteralSearch</code>.  The latter requests a literal byte-by-byte
3134 *  comparison, which is fastest but may return inaccurate results in cases
3135 *  where two different composed character sequences may be used to express
3136 *  the same character.
3137 */
3138- (NSString*) commonPrefixWithString: (NSString*)aString
3139			     options: (NSUInteger)mask
3140{
3141  if (mask & NSLiteralSearch)
3142    {
3143      int prefix_len = 0;
3144      unsigned	length = [self length];
3145      unsigned	aLength = [aString length];
3146      unichar *u;
3147      unichar a1[length+1];
3148      unichar *s1 = a1;
3149      unichar a2[aLength+1];
3150      unichar *s2 = a2;
3151
3152      [self getCharacters: s1 range: ((NSRange){0, length})];
3153      s1[length] = (unichar)0;
3154      [aString getCharacters: s2 range: ((NSRange){0, aLength})];
3155      s2[aLength] = (unichar)0;
3156      u = s1;
3157
3158      if (mask & NSCaseInsensitiveSearch)
3159	{
3160	  while (*s1 && *s2 && (uni_tolower(*s1) == uni_tolower(*s2)))
3161	    {
3162	      s1++;
3163	      s2++;
3164	      prefix_len++;
3165	    }
3166	}
3167      else
3168	{
3169	  while (*s1 && *s2 && (*s1 == *s2))
3170	    {
3171	      s1++;
3172	      s2++;
3173	      prefix_len++;
3174	    }
3175	}
3176      return [NSStringClass stringWithCharacters: u length: prefix_len];
3177    }
3178  else
3179    {
3180      unichar	(*scImp)(NSString*, SEL, NSUInteger);
3181      unichar	(*ocImp)(NSString*, SEL, NSUInteger);
3182      void	(*sgImp)(NSString*, SEL, unichar*, NSRange) = 0;
3183      void	(*ogImp)(NSString*, SEL, unichar*, NSRange) = 0;
3184      NSRange	(*srImp)(NSString*, SEL, NSUInteger) = 0;
3185      NSRange	(*orImp)(NSString*, SEL, NSUInteger) = 0;
3186      BOOL	gotRangeImps = NO;
3187      BOOL	gotFetchImps = NO;
3188      NSRange	sRange;
3189      NSRange	oRange;
3190      unsigned	sLength = [self length];
3191      unsigned	oLength = [aString length];
3192      unsigned	sIndex = 0;
3193      unsigned	oIndex = 0;
3194
3195      if (!sLength)
3196	return IMMUTABLE(self);
3197      if (!oLength)
3198	return IMMUTABLE(aString);
3199
3200      scImp = (unichar (*)(NSString*,SEL,NSUInteger))
3201	[self methodForSelector: caiSel];
3202      ocImp = (unichar (*)(NSString*,SEL,NSUInteger))
3203	[aString methodForSelector: caiSel];
3204
3205      while ((sIndex < sLength) && (oIndex < oLength))
3206	{
3207	  unichar	sc = (*scImp)(self, caiSel, sIndex);
3208	  unichar	oc = (*ocImp)(aString, caiSel, oIndex);
3209
3210	  if (sc == oc)
3211	    {
3212	      sIndex++;
3213	      oIndex++;
3214	    }
3215	  else if ((mask & NSCaseInsensitiveSearch)
3216	    && (uni_tolower(sc) == uni_tolower(oc)))
3217	    {
3218	      sIndex++;
3219	      oIndex++;
3220	    }
3221	  else
3222	    {
3223	      if (gotRangeImps == NO)
3224		{
3225		  gotRangeImps = YES;
3226		  srImp=(NSRange (*)())[self methodForSelector: ranSel];
3227		  orImp=(NSRange (*)())[aString methodForSelector: ranSel];
3228		}
3229	      sRange = (*srImp)(self, ranSel, sIndex);
3230	      oRange = (*orImp)(aString, ranSel, oIndex);
3231
3232	      if ((sRange.length < 2) || (oRange.length < 2))
3233		return [self substringWithRange: NSMakeRange(0, sIndex)];
3234	      else
3235		{
3236		  GSEQ_MAKE(sBuf, sSeq, sRange.length);
3237		  GSEQ_MAKE(oBuf, oSeq, oRange.length);
3238
3239		  if (gotFetchImps == NO)
3240		    {
3241		      gotFetchImps = YES;
3242		      sgImp=(void (*)())[self methodForSelector: gcrSel];
3243		      ogImp=(void (*)())[aString methodForSelector: gcrSel];
3244		    }
3245		  (*sgImp)(self, gcrSel, sBuf, sRange);
3246		  (*ogImp)(aString, gcrSel, oBuf, oRange);
3247
3248		  if (GSeq_compare(&sSeq, &oSeq) == NSOrderedSame)
3249		    {
3250		      sIndex += sRange.length;
3251		      oIndex += oRange.length;
3252		    }
3253		  else if (mask & NSCaseInsensitiveSearch)
3254		    {
3255		      GSeq_lowercase(&sSeq);
3256		      GSeq_lowercase(&oSeq);
3257		      if (GSeq_compare(&sSeq, &oSeq) == NSOrderedSame)
3258			{
3259			  sIndex += sRange.length;
3260			  oIndex += oRange.length;
3261			}
3262		      else
3263			return [self substringWithRange: NSMakeRange(0,sIndex)];
3264		    }
3265		  else
3266		    return [self substringWithRange: NSMakeRange(0,sIndex)];
3267		}
3268	    }
3269	}
3270      return [self substringWithRange: NSMakeRange(0, sIndex)];
3271    }
3272}
3273
3274/**
3275 * Determines the smallest range of lines containing aRange and returns
3276 * the information as a range.<br />
3277 * Calls -getLineStart:end:contentsEnd:forRange: to do the work.
3278 */
3279- (NSRange) lineRangeForRange: (NSRange)aRange
3280{
3281  NSUInteger startIndex;
3282  NSUInteger lineEndIndex;
3283
3284  [self getLineStart: &startIndex
3285                 end: &lineEndIndex
3286         contentsEnd: NULL
3287            forRange: aRange];
3288  return NSMakeRange(startIndex, lineEndIndex - startIndex);
3289}
3290
3291- (void) _getStart: (NSUInteger*)startIndex
3292	       end: (NSUInteger*)lineEndIndex
3293       contentsEnd: (NSUInteger*)contentsEndIndex
3294	  forRange: (NSRange)aRange
3295	   lineSep: (BOOL)flag
3296{
3297  unichar	thischar;
3298  unsigned	start, end, len, termlen;
3299  unichar	(*caiImp)(NSString*, SEL, NSUInteger);
3300
3301  len = [self length];
3302  GS_RANGE_CHECK(aRange, len);
3303
3304  caiImp = (unichar (*)())[self methodForSelector: caiSel];
3305  /* Place aRange.location at the beginning of a CR-LF sequence */
3306  if (aRange.location > 0 && aRange.location < len
3307    && (*caiImp)(self, caiSel, aRange.location - 1) == (unichar)'\r'
3308    && (*caiImp)(self, caiSel, aRange.location) == (unichar)'\n')
3309    {
3310      aRange.location--;
3311    }
3312  start = aRange.location;
3313
3314  if (startIndex)
3315    {
3316      if (start == 0)
3317	{
3318	  *startIndex = 0;
3319	}
3320      else
3321	{
3322	  start--;
3323	  while (start > 0)
3324	    {
3325	      BOOL	done = NO;
3326
3327	      thischar = (*caiImp)(self, caiSel, start);
3328	      switch (thischar)
3329		{
3330		  case (unichar)0x000A:
3331		  case (unichar)0x000D:
3332		  case (unichar)0x2029:
3333		    done = YES;
3334		    break;
3335		  case (unichar)0x2028:
3336		    if (flag)
3337		      {
3338			done = YES;
3339			break;
3340		      }
3341		  default:
3342		    start--;
3343		    break;
3344		}
3345	      if (done)
3346		break;
3347	    }
3348	  if (start == 0)
3349	    {
3350	      thischar = (*caiImp)(self, caiSel, start);
3351	      switch (thischar)
3352		{
3353		  case (unichar)0x000A:
3354		  case (unichar)0x000D:
3355		  case (unichar)0x2029:
3356		    start++;
3357		    break;
3358		  case (unichar)0x2028:
3359		    if (flag)
3360		      {
3361			start++;
3362			break;
3363		      }
3364		  default:
3365		    break;
3366		}
3367	    }
3368	  else
3369	    {
3370	      start++;
3371	    }
3372	  *startIndex = start;
3373	}
3374    }
3375
3376  if (lineEndIndex || contentsEndIndex)
3377    {
3378      BOOL found = NO;
3379      end = aRange.location;
3380      if (aRange.length)
3381        {
3382          end += (aRange.length - 1);
3383        }
3384      while (end < len)
3385	{
3386	   thischar = (*caiImp)(self, caiSel, end);
3387	   switch (thischar)
3388	     {
3389	       case (unichar)0x000A:
3390	       case (unichar)0x000D:
3391	       case (unichar)0x2029:
3392		 found = YES;
3393		 break;
3394	       case (unichar)0x2028:
3395		 if (flag)
3396		   {
3397		     found = YES;
3398		     break;
3399		   }
3400	       default:
3401		 break;
3402	     }
3403	   end++;
3404	   if (found)
3405	     break;
3406	}
3407      termlen = 1;
3408      if (lineEndIndex)
3409	{
3410	  if (end < len
3411	    && ((*caiImp)(self, caiSel, end-1) == (unichar)0x000D)
3412	    && ((*caiImp)(self, caiSel, end) == (unichar)0x000A))
3413	    {
3414	      *lineEndIndex = ++end;
3415	      termlen = 2;
3416	    }
3417	  else
3418	    {
3419	      *lineEndIndex = end;
3420	    }
3421	}
3422      if (contentsEndIndex)
3423	{
3424	  if (found)
3425	    {
3426	      *contentsEndIndex = end-termlen;
3427	    }
3428	  else
3429	    {
3430	      /* xxx OPENSTEP documentation does not say what to do if last
3431		 line is not terminated. Assume this */
3432	      *contentsEndIndex = end;
3433	    }
3434	}
3435    }
3436}
3437
3438/**
3439 * Determines the smallest range of lines containing aRange and returns
3440 * the locations in that range.<br />
3441 * Lines are delimited by any of these character sequences, the longest
3442 * (CRLF) sequence preferred.
3443 * <list>
3444 *   <item>U+000A (linefeed)</item>
3445 *   <item>U+000D (carriage return)</item>
3446 *   <item>U+2028 (Unicode line separator)</item>
3447 *   <item>U+2029 (Unicode paragraph separator)</item>
3448 *   <item>U+000D U+000A (CRLF)</item>
3449 * </list>
3450 * The index of the first character of the line at or before aRange is
3451 * returned in startIndex.<br />
3452 * The index of the first character of the next line after the line terminator
3453 * is returned in endIndex.<br />
3454 * The index of the last character before the line terminator is returned
3455 * contentsEndIndex.<br />
3456 * Raises an NSRangeException if the range is invalid, but permits the index
3457 * arguments to be null pointers (in which case no value is returned in that
3458 * argument).
3459 */
3460- (void) getLineStart: (NSUInteger *)startIndex
3461                  end: (NSUInteger *)lineEndIndex
3462          contentsEnd: (NSUInteger *)contentsEndIndex
3463	     forRange: (NSRange)aRange
3464{
3465  [self _getStart: startIndex
3466	      end: lineEndIndex
3467      contentsEnd: contentsEndIndex
3468	 forRange: aRange
3469	  lineSep: YES];
3470}
3471
3472- (void) getParagraphStart: (NSUInteger *)startIndex
3473                       end: (NSUInteger *)parEndIndex
3474               contentsEnd: (NSUInteger *)contentsEndIndex
3475                  forRange: (NSRange)range
3476{
3477  [self _getStart: startIndex
3478	      end: parEndIndex
3479      contentsEnd: contentsEndIndex
3480         forRange: range
3481	  lineSep: NO];
3482}
3483
3484// Changing Case
3485
3486/**
3487 * Returns version of string in which each whitespace-delimited <em>word</em>
3488 * is capitalized (not every letter).  Conversion to capitals is done in a
3489 * unicode-compliant manner but there may be exceptional cases where behavior
3490 * is not what is desired.
3491 */
3492// xxx There is more than this in word capitalization in Unicode,
3493// but this will work in most cases
3494- (NSString*) capitalizedString
3495{
3496  unichar	*s;
3497  unsigned	count = 0;
3498  BOOL		found = YES;
3499  unsigned	len = [self length];
3500
3501  if (len == 0)
3502    return IMMUTABLE(self);
3503  if (whitespaceBitmapRep == NULL)
3504    setupWhitespace();
3505
3506  s = NSZoneMalloc([self zone], sizeof(unichar)*len);
3507  [self getCharacters: s range: ((NSRange){0, len})];
3508  while (count < len)
3509    {
3510      if (GS_IS_WHITESPACE(s[count]))
3511	{
3512	  count++;
3513	  found = YES;
3514	  while (count < len
3515	    && GS_IS_WHITESPACE(s[count]))
3516	    {
3517	      count++;
3518	    }
3519	}
3520      if (count < len)
3521	{
3522	  if (found)
3523	    {
3524	      s[count] = uni_toupper(s[count]);
3525	      count++;
3526	    }
3527	  else
3528	    {
3529	      while (count < len
3530		&& !GS_IS_WHITESPACE(s[count]))
3531		{
3532		  s[count] = uni_tolower(s[count]);
3533		  count++;
3534		}
3535	    }
3536	}
3537      found = NO;
3538    }
3539  return AUTORELEASE([[NSString allocWithZone: NSDefaultMallocZone()]
3540    initWithCharactersNoCopy: s length: len freeWhenDone: YES]);
3541}
3542
3543/**
3544 * Returns a copy of the receiver with all characters converted
3545 * to lowercase.
3546 */
3547- (NSString*) lowercaseString
3548{
3549  static NSCharacterSet	*uc = nil;
3550  unichar	*s;
3551  unsigned	count;
3552  NSRange	start;
3553  unsigned	len = [self length];
3554
3555  if (len == 0)
3556    {
3557      return IMMUTABLE(self);
3558    }
3559  if (uc == nil)
3560    {
3561      uc = RETAIN([NSCharacterSet uppercaseLetterCharacterSet]);
3562    }
3563  start = [self rangeOfCharacterFromSet: uc
3564				options: NSLiteralSearch
3565				  range: ((NSRange){0, len})];
3566  if (start.length == 0)
3567    {
3568      return IMMUTABLE(self);
3569    }
3570  s = NSZoneMalloc([self zone], sizeof(unichar)*len);
3571  [self getCharacters: s range: ((NSRange){0, len})];
3572  for (count = start.location; count < len; count++)
3573    {
3574      s[count] = uni_tolower(s[count]);
3575    }
3576  return AUTORELEASE([[NSStringClass allocWithZone: NSDefaultMallocZone()]
3577    initWithCharactersNoCopy: s length: len freeWhenDone: YES]);
3578}
3579
3580/**
3581 * Returns a copy of the receiver with all characters converted
3582 * to uppercase.
3583 */
3584- (NSString*) uppercaseString
3585{
3586  static NSCharacterSet	*lc = nil;
3587  unichar	*s;
3588  unsigned	count;
3589  NSRange	start;
3590  unsigned	len = [self length];
3591
3592  if (len == 0)
3593    {
3594      return IMMUTABLE(self);
3595    }
3596  if (lc == nil)
3597    {
3598      lc = RETAIN([NSCharacterSet lowercaseLetterCharacterSet]);
3599    }
3600  start = [self rangeOfCharacterFromSet: lc
3601				options: NSLiteralSearch
3602				  range: ((NSRange){0, len})];
3603  if (start.length == 0)
3604    {
3605      return IMMUTABLE(self);
3606    }
3607  s = NSZoneMalloc([self zone], sizeof(unichar)*len);
3608  [self getCharacters: s range: ((NSRange){0, len})];
3609  for (count = start.location; count < len; count++)
3610    {
3611      s[count] = uni_toupper(s[count]);
3612    }
3613  return AUTORELEASE([[NSStringClass allocWithZone: NSDefaultMallocZone()]
3614    initWithCharactersNoCopy: s length: len freeWhenDone: YES]);
3615}
3616
3617// Storing the String
3618
3619/** Returns <code>self</code>. */
3620- (NSString*) description
3621{
3622  return self;
3623}
3624
3625
3626// Getting C Strings
3627
3628/**
3629 * Returns a pointer to a null terminated string of 16-bit unichar
3630 * The memory pointed to is not owned by the caller, so the
3631 * caller must copy its contents to keep it.
3632 */
3633- (const unichar*) unicharString
3634{
3635  NSMutableData	*data;
3636  unichar	*uniStr;
3637
3638  GSOnceMLog(@"deprecated ... use cStringUsingEncoding:");
3639
3640  data = [NSMutableData dataWithLength: ([self length] + 1) * sizeof(unichar)];
3641  uniStr = (unichar*)[data mutableBytes];
3642  if (uniStr != 0)
3643    {
3644      [self getCharacters: uniStr];
3645    }
3646  return uniStr;
3647}
3648
3649/**
3650 * Returns a pointer to a null terminated string of 8-bit characters in the
3651 * default encoding.  The memory pointed to is not owned by the caller, so the
3652 * caller must copy its contents to keep it.  Raises an
3653 * <code>NSCharacterConversionException</code> if loss of information would
3654 * occur during conversion.  (See -canBeConvertedToEncoding: .)
3655 */
3656- (const char*) cString
3657{
3658  NSData	*d;
3659  NSMutableData	*m;
3660
3661  d = [self dataUsingEncoding: _DefaultStringEncoding
3662	 allowLossyConversion: NO];
3663  if (d == nil)
3664    {
3665      [NSException raise: NSCharacterConversionException
3666		  format: @"unable to convert to cString"];
3667    }
3668  m = [d mutableCopy];
3669  [m appendBytes: "" length: 1];
3670  IF_NO_GC([m autorelease];)
3671  return (const char*)[m bytes];
3672}
3673
3674/**
3675 * Returns a pointer to a null terminated string of characters in the
3676 * specified encoding.<br />
3677 * NB. under GNUstep you can used this to obtain a nul terminated utf-16
3678 * string (sixteen bit characters) as well as eight bit strings.<br />
3679 * The memory pointed to is not owned by the caller, so the
3680 * caller must copy its contents to keep it.<br />
3681 * Raises an <code>NSCharacterConversionException</code> if loss of
3682 * information would occur during conversion.
3683 */
3684- (const char*) cStringUsingEncoding: (NSStringEncoding)encoding
3685{
3686  NSMutableData	*m;
3687
3688  if (NSUnicodeStringEncoding == encoding)
3689    {
3690      unichar	*u;
3691      unsigned	l;
3692
3693      l = [self length];
3694      m = [NSMutableData dataWithLength: (l + 1) * sizeof(unichar)];
3695      u = (unichar*)[m mutableBytes];
3696      [self getCharacters: u];
3697      u[l] = 0;
3698    }
3699  else
3700    {
3701      NSData	*d;
3702
3703      d = [self dataUsingEncoding: encoding allowLossyConversion: NO];
3704      if (d == nil)
3705	{
3706	  [NSException raise: NSCharacterConversionException
3707		      format: @"unable to convert to cString"];
3708	}
3709      m = [[d mutableCopy] autorelease];
3710      [m appendBytes: "" length: 1];
3711    }
3712  return (const char*)[m bytes];
3713}
3714
3715/**
3716 * Returns the number of bytes needed to encode the receiver in the
3717 * specified encoding (without adding a nul character terminator).<br />
3718 * Returns 0 if the conversion is not possible.
3719 */
3720- (NSUInteger) lengthOfBytesUsingEncoding: (NSStringEncoding)encoding
3721{
3722  NSData	*d;
3723
3724  d = [self dataUsingEncoding: encoding allowLossyConversion: NO];
3725  return [d length];
3726}
3727
3728/**
3729 * Returns a size guaranteed to be large enough to encode the receiver in the
3730 * specified encoding (without adding a nul character terminator).  This may
3731 * be larger than the actual number of bytes needed.
3732 */
3733- (NSUInteger) maximumLengthOfBytesUsingEncoding: (NSStringEncoding)encoding
3734{
3735  if (encoding == NSUnicodeStringEncoding)
3736    return [self length] * 2;
3737  if (encoding == NSUTF8StringEncoding)
3738    return [self length] * 6;
3739  if (encoding == NSUTF7StringEncoding)
3740    return [self length] * 8;
3741  return [self length];				// Assume single byte/char
3742}
3743
3744/**
3745 * Returns a C string converted using the default C string encoding, which may
3746 * result in information loss.  The memory pointed to is not owned by the
3747 * caller, so the caller must copy its contents to keep it.
3748 */
3749- (const char*) lossyCString
3750{
3751  NSData	*d;
3752  NSMutableData	*m;
3753
3754  d = [self dataUsingEncoding: _DefaultStringEncoding
3755         allowLossyConversion: YES];
3756  m = [d mutableCopy];
3757  [m appendBytes: "" length: 1];
3758  IF_NO_GC([m autorelease];)
3759  return (const char*)[m bytes];
3760}
3761
3762/**
3763 * Returns null-terminated UTF-8 version of this unicode string.  The char[]
3764 * memory comes from an autoreleased object, so it will eventually go out of
3765 * scope.
3766 */
3767- (const char *) UTF8String
3768{
3769  NSData	*d;
3770  NSMutableData	*m;
3771
3772  d = [self dataUsingEncoding: NSUTF8StringEncoding
3773         allowLossyConversion: NO];
3774  m = [d mutableCopy];
3775  [m appendBytes: "" length: 1];
3776  IF_NO_GC([m autorelease];)
3777  return (const char*)[m bytes];
3778}
3779
3780/**
3781 *  Returns length of a version of this unicode string converted to bytes
3782 *  using the default C string encoding.  If the conversion would result in
3783 *  information loss, the results are unpredictable.  Check
3784 *  -canBeConvertedToEncoding: first.
3785 */
3786- (NSUInteger) cStringLength
3787{
3788  NSData	*d;
3789
3790  d = [self dataUsingEncoding: _DefaultStringEncoding
3791         allowLossyConversion: NO];
3792  return [d length];
3793}
3794
3795/**
3796 * Deprecated ... do not use.<br />.
3797 * Use -getCString:maxLength:encoding: instead.
3798 */
3799- (void) getCString: (char*)buffer
3800{
3801  [self getCString: buffer maxLength: NSMaximumStringLength
3802	     range: ((NSRange){0, [self length]})
3803    remainingRange: NULL];
3804}
3805
3806/**
3807 * Deprecated ... do not use.<br />.
3808 * Use -getCString:maxLength:encoding: instead.
3809 */
3810- (void) getCString: (char*)buffer
3811	  maxLength: (NSUInteger)maxLength
3812{
3813  [self getCString: buffer maxLength: maxLength
3814	     range: ((NSRange){0, [self length]})
3815    remainingRange: NULL];
3816}
3817
3818/**
3819 * Retrieve up to maxLength bytes from the receiver into the buffer.<br />
3820 * In GNUstep, this method implements the actual behavior of the MacOS-X
3821 * method rather than it's documented behavior ...<br />
3822 * The maxLength argument must be the size (in bytes) of the area of
3823 * memory pointed to by the buffer argument.<br />
3824 * Returns YES on success.<br />
3825 * Returns NO if maxLength is too small to hold the entire string
3826 * including a terminating nul character.<br />
3827 * If it returns NO, the terminating nul will <em>not</em> have been
3828 * written to the buffer.<br />
3829 * Raises an exception if the string can not be converted to the
3830 * specified encoding without loss of information.<br />
3831 * eg. If the receiver is @"hello" then the provided buffer must be
3832 * at least six bytes long and the value of maxLength must be at least
3833 * six if NSASCIIStringEncoding is requested, but they must be at least
3834 * twelve if NSUnicodeStringEncoding is requested.
3835 */
3836- (BOOL) getCString: (char*)buffer
3837	  maxLength: (NSUInteger)maxLength
3838	   encoding: (NSStringEncoding)encoding
3839{
3840  if (0 == maxLength || 0 == buffer) return NO;
3841  if (encoding == NSUnicodeStringEncoding)
3842    {
3843      unsigned	length = [self length];
3844
3845      if (maxLength > length * sizeof(unichar))
3846	{
3847	  unichar	*ptr = (unichar*)(void*)buffer;
3848
3849	  maxLength = (maxLength - 1) / sizeof(unichar);
3850	  [self getCharacters: ptr
3851			range: NSMakeRange(0, maxLength)];
3852	  ptr[maxLength] = 0;
3853	  return YES;
3854	}
3855      return NO;
3856    }
3857  else
3858    {
3859      NSData	*d = [self dataUsingEncoding: encoding];
3860      unsigned	length = [d length];
3861      BOOL	result = (length < maxLength) ? YES : NO;
3862
3863      if (d == nil)
3864        {
3865	  [NSException raise: NSCharacterConversionException
3866		      format: @"Can't convert to C string."];
3867	}
3868      if (length >= maxLength)
3869        {
3870          length = maxLength-1;
3871	}
3872      memcpy(buffer, [d bytes], length);
3873      buffer[length] = '\0';
3874      return result;
3875    }
3876}
3877
3878/**
3879 * Deprecated ... do not use.<br />.
3880 * Use -getCString:maxLength:encoding: instead.
3881 */
3882- (void) getCString: (char*)buffer
3883	  maxLength: (NSUInteger)maxLength
3884	      range: (NSRange)aRange
3885     remainingRange: (NSRange*)leftoverRange
3886{
3887  NSString	*s;
3888
3889  /* As this is a deprecated method, keep things simple (but inefficient)
3890   * by copying the receiver to a new instance of a base library built-in
3891   * class, and use the implementation provided by that class.
3892   * We need an autorelease to avoid a memory leak if there is an exception.
3893   */
3894  s = AUTORELEASE([(NSString*)defaultPlaceholderString initWithString: self]);
3895  [s getCString: buffer
3896      maxLength: maxLength
3897	  range: aRange
3898 remainingRange: leftoverRange];
3899}
3900
3901
3902// Getting Numeric Values
3903
3904- (BOOL) boolValue
3905{
3906  unsigned	length = [self length];
3907
3908  if (length > 0)
3909    {
3910      unsigned	index;
3911      SEL	sel = @selector(characterAtIndex:);
3912      unichar	(*imp)() = (unichar (*)())[self methodForSelector: sel];
3913
3914      for (index = 0; index < length; index++)
3915	{
3916	  unichar	c = (*imp)(self, sel, index);
3917
3918	  if (c > 'y')
3919	    {
3920	      break;
3921	    }
3922          if (strchr("123456789yYtT", c) != 0)
3923	    {
3924	      return YES;
3925	    }
3926	  if (!isspace(c) && c != '0' && c != '-' && c != '+')
3927	    {
3928	      break;
3929	    }
3930	}
3931    }
3932  return NO;
3933}
3934
3935/**
3936 * Returns the string's content as a decimal.<br />
3937 * Undocumented feature of Aplle Foundation.
3938 */
3939- (NSDecimal) decimalValue
3940{
3941  NSDecimal     result;
3942
3943  NSDecimalFromString(&result, self, nil);
3944  return result;
3945}
3946
3947/**
3948 * Returns the string's content as a double.  Skips leading whitespace.<br />
3949 * Conversion is not localised (i.e. uses '.' as the decimal separator).<br />
3950 * Returns 0.0 on underflow or if the string does not contain a number.
3951 */
3952- (double) doubleValue
3953{
3954  unichar	buf[32];
3955  double	d = 0.0;
3956  NSRange	r;
3957
3958  setupNonspace();
3959  r = [self rangeOfCharacterFromSet: nonspace];
3960  if (NSNotFound == r.location) return 0.0;
3961  r.length = [self length] - r.location;
3962  if (r.length > 32) r.length = 32;
3963  [self getCharacters: buf range: r];
3964  GSScanDouble(buf, r.length, &d);
3965  return d;
3966}
3967
3968/**
3969 * Returns the string's content as a float.  Skips leading whitespace.<br />
3970 * Conversion is not localised (i.e. uses '.' as the decimal separator).<br />
3971 * Returns 0.0 on underflow or if the string does not contain a number.
3972 */
3973- (float) floatValue
3974{
3975  unichar	buf[32];
3976  double	d = 0.0;
3977  NSRange	r;
3978
3979  setupNonspace();
3980  r = [self rangeOfCharacterFromSet: nonspace];
3981  if (NSNotFound == r.location) return 0.0;
3982  r.length = [self length] - r.location;
3983  if (r.length > 32) r.length = 32;
3984  [self getCharacters: buf range: r];
3985  GSScanDouble(buf, r.length, &d);
3986  return (float)d;
3987}
3988
3989/**
3990 * <p>Returns the string's content as an int.<br/>
3991 * Current implementation uses a C runtime library function, which does not
3992 * detect conversion errors -- use with care!</p>
3993 */
3994- (int) intValue
3995{
3996  const char *ptr = [self UTF8String];
3997
3998  while (isspace(*ptr))
3999    {
4000      ptr++;
4001    }
4002  if ('-' == *ptr)
4003    {
4004      return (int)atoi(ptr);
4005    }
4006  else
4007    {
4008      uint64_t v;
4009
4010      v = strtoul(ptr, 0, 10);
4011      return (int)v;
4012    }
4013}
4014
4015- (NSInteger) integerValue
4016{
4017  const char *ptr = [self UTF8String];
4018
4019  while (isspace(*ptr))
4020    {
4021      ptr++;
4022    }
4023  if ('-' == *ptr)
4024    {
4025      return (NSInteger)atoll(ptr);
4026    }
4027  else
4028    {
4029      uint64_t  v;
4030
4031      v = (uint64_t)strtoull(ptr, 0, 10);
4032      return (NSInteger)v;
4033    }
4034}
4035
4036- (long long) longLongValue
4037{
4038  const char *ptr = [self UTF8String];
4039
4040  while (isspace(*ptr))
4041    {
4042      ptr++;
4043    }
4044  if ('-' == *ptr)
4045    {
4046      return atoll(ptr);
4047    }
4048  else
4049    {
4050      unsigned long long l;
4051
4052      l = strtoull(ptr, 0, 10);
4053      return (long long)l;
4054    }
4055}
4056
4057// Working With Encodings
4058
4059/**
4060 * <p>
4061 *   Returns the encoding used for any method accepting a C string.
4062 *   This value is determined automatically from the program's
4063 *   environment and cannot be changed programmatically.
4064 * </p>
4065 * <p>
4066 *   You should <em>NOT</em> override this method in an attempt to
4067 *   change the encoding being used... it won't work.
4068 * </p>
4069 * <p>
4070 *   In GNUstep, this encoding is determined by the initial value
4071 *   of the <code>GNUSTEP_STRING_ENCODING</code> environment
4072 *   variable.  If this is not defined,
4073 *   <code>NSISOLatin1StringEncoding</code> is assumed.
4074 * </p>
4075 */
4076+ (NSStringEncoding) defaultCStringEncoding
4077{
4078  return _DefaultStringEncoding;
4079}
4080
4081/**
4082 * Returns an array of all available string encodings,
4083 * terminated by a null value.
4084 */
4085+ (NSStringEncoding*) availableStringEncodings
4086{
4087  return GSPrivateAvailableEncodings();
4088}
4089
4090/**
4091 * Returns the localized name of the encoding specified.
4092 */
4093+ (NSString*) localizedNameOfStringEncoding: (NSStringEncoding)encoding
4094{
4095  id ourbundle;
4096  id ourname;
4097
4098/*
4099      Should be path to localizable.strings file.
4100      Until we have it, just make sure that bundle
4101      is initialized.
4102*/
4103  ourbundle = [NSBundle bundleForLibrary: @"gnustep-base"];
4104
4105  ourname = GSPrivateEncodingName(encoding);
4106  return [ourbundle localizedStringForKey: ourname
4107				    value: ourname
4108				    table: nil];
4109}
4110
4111/**
4112 *  Returns whether this string can be converted to the given string encoding
4113 *  without information loss.
4114 */
4115- (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding
4116{
4117  id d = [self dataUsingEncoding: encoding allowLossyConversion: NO];
4118
4119  return d != nil ? YES : NO;
4120}
4121
4122/**
4123 *  Converts string to a byte array in the given encoding, returning nil if
4124 *  this would result in information loss.
4125 */
4126- (NSData*) dataUsingEncoding: (NSStringEncoding)encoding
4127{
4128  return [self dataUsingEncoding: encoding allowLossyConversion: NO];
4129}
4130
4131/**
4132 *  Converts string to a byte array in the given encoding.  If flag is NO,
4133 *  nil would be returned if this would result in information loss.
4134 */
4135- (NSData*) dataUsingEncoding: (NSStringEncoding)encoding
4136	 allowLossyConversion: (BOOL)flag
4137{
4138  unsigned	len = [self length];
4139  NSData	*d;
4140
4141  if (len == 0)
4142    {
4143      d = [NSDataClass data];
4144    }
4145  else if (encoding == NSUnicodeStringEncoding)
4146    {
4147      unichar	*u;
4148      unsigned	l;
4149
4150      u = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
4151	(len + 1) * sizeof(unichar));
4152      *u = byteOrderMark;
4153      [self getCharacters: u + 1];
4154      l = GSUnicode(u, len, 0, 0);
4155      if (l == len || flag == YES)
4156	{
4157	  d = [NSDataClass dataWithBytesNoCopy: u
4158					length: (l + 1) * sizeof(unichar)];
4159	}
4160      else
4161	{
4162	  d = nil;
4163	  NSZoneFree(NSDefaultMallocZone(), u);
4164	}
4165    }
4166  else
4167    {
4168      unichar		buf[8192];
4169      unichar		*u = buf;
4170      unsigned int	options;
4171      unsigned char	*b = 0;
4172      unsigned int	l = 0;
4173
4174      /* Build a fake object on the stack and copy unicode characters
4175       * into its buffer from the receiver.
4176       * We can then use our concrete subclass implementation to do the
4177       * work of converting to the desired encoding.
4178       */
4179      if (len >= 4096)
4180	{
4181	  u = NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar));
4182	}
4183      [self getCharacters: u];
4184      if (flag == NO)
4185        {
4186	  options = GSUniStrict;
4187	}
4188      else
4189        {
4190	  options = 0;
4191	}
4192      if (GSFromUnicode(&b, &l, u, len, encoding, NSDefaultMallocZone(),
4193	options) == YES)
4194	{
4195	  d = [NSDataClass dataWithBytesNoCopy: b length: l];
4196	}
4197      else
4198        {
4199	  d = nil;
4200	}
4201      if (u != buf)
4202	{
4203	  NSZoneFree(NSDefaultMallocZone(), u);
4204	}
4205    }
4206  return d;
4207}
4208
4209/**
4210 * Returns the encoding with which this string can be converted without
4211 * information loss that would result in most efficient character access.
4212 */
4213- (NSStringEncoding) fastestEncoding
4214{
4215  return NSUnicodeStringEncoding;
4216}
4217
4218/**
4219 * Returns the smallest encoding with which this string can be converted
4220 * without information loss.
4221 */
4222- (NSStringEncoding) smallestEncoding
4223{
4224  return NSUnicodeStringEncoding;
4225}
4226
4227- (NSUInteger) completePathIntoString: (NSString**)outputName
4228                        caseSensitive: (BOOL)flag
4229                     matchesIntoArray: (NSArray**)outputArray
4230                          filterTypes: (NSArray*)filterTypes
4231{
4232  NSString		*basePath = [self stringByDeletingLastPathComponent];
4233  NSString		*lastComp = [self lastPathComponent];
4234  NSString		*tmpPath;
4235  NSDirectoryEnumerator *e;
4236  NSMutableArray	*op = nil;
4237  unsigned		matchCount = 0;
4238
4239  if (outputArray != 0)
4240    {
4241      op = (NSMutableArray*)[NSMutableArray array];
4242    }
4243
4244  if (outputName != NULL)
4245    {
4246      *outputName = nil;
4247    }
4248
4249  if ([basePath length] == 0)
4250    {
4251      basePath = @".";
4252    }
4253
4254  e = [[NSFileManager defaultManager] enumeratorAtPath: basePath];
4255  while (tmpPath = [e nextObject], tmpPath)
4256    {
4257      /* Prefix matching */
4258      if (flag == YES)
4259	{ /* Case sensitive */
4260	  if ([tmpPath hasPrefix: lastComp] == NO)
4261	    {
4262	      continue;
4263	    }
4264	}
4265      else if ([[tmpPath uppercaseString]
4266	hasPrefix: [lastComp uppercaseString]] == NO)
4267	{
4268	  continue;
4269	}
4270
4271      /* Extensions filtering */
4272      if (filterTypes
4273	&& ([filterTypes containsObject: [tmpPath pathExtension]] == NO))
4274	{
4275	  continue;
4276	}
4277
4278      /* Found a completion */
4279      matchCount++;
4280      if (outputArray != NULL)
4281	{
4282	  [op addObject: tmpPath];
4283	}
4284
4285      if ((outputName != NULL) &&
4286	((*outputName == nil) || (([*outputName length] < [tmpPath length]))))
4287	{
4288	  *outputName = tmpPath;
4289	}
4290    }
4291  if (outputArray != NULL)
4292    {
4293      *outputArray = AUTORELEASE([op copy]);
4294    }
4295  return matchCount;
4296}
4297
4298static NSFileManager *fm = nil;
4299
4300#if	defined(_WIN32)
4301- (const GSNativeChar*) fileSystemRepresentation
4302{
4303  if (fm == nil)
4304    {
4305      fm = RETAIN([NSFileManager defaultManager]);
4306    }
4307  return [fm fileSystemRepresentationWithPath: self];
4308}
4309
4310- (BOOL) getFileSystemRepresentation: (GSNativeChar*)buffer
4311			   maxLength: (NSUInteger)size
4312{
4313  const unichar	*ptr;
4314  unsigned	i;
4315
4316  if (size == 0)
4317    {
4318      return NO;
4319    }
4320  if (buffer == 0)
4321    {
4322      [NSException raise: NSInvalidArgumentException
4323		  format: @"%@ given null pointer",
4324	NSStringFromSelector(_cmd)];
4325    }
4326  ptr = [self fileSystemRepresentation];
4327  for (i = 0; i < size; i++)
4328    {
4329      buffer[i] = ptr[i];
4330      if (ptr[i] == 0)
4331	{
4332	  break;
4333	}
4334    }
4335  if (i == size && ptr[i] != 0)
4336    {
4337      return NO;	// Not at end.
4338    }
4339  return YES;
4340}
4341#else
4342- (const GSNativeChar*) fileSystemRepresentation
4343{
4344  if (fm == nil)
4345    {
4346      fm = RETAIN([NSFileManager defaultManager]);
4347    }
4348  return [fm fileSystemRepresentationWithPath: self];
4349}
4350
4351- (BOOL) getFileSystemRepresentation: (GSNativeChar*)buffer
4352			   maxLength: (NSUInteger)size
4353{
4354  const char* ptr;
4355
4356  if (size == 0)
4357    {
4358      return NO;
4359    }
4360  if (buffer == 0)
4361    {
4362      [NSException raise: NSInvalidArgumentException
4363		  format: @"%@ given null pointer",
4364	NSStringFromSelector(_cmd)];
4365    }
4366  ptr = [self fileSystemRepresentation];
4367  if (strlen(ptr) > size)
4368    {
4369      return NO;
4370    }
4371  strncpy(buffer, ptr, size);
4372  return YES;
4373}
4374#endif
4375
4376- (NSString*) lastPathComponent
4377{
4378  unsigned int	l = [self length];
4379  NSRange	range;
4380  unsigned int	i;
4381
4382  if (l == 0)
4383    {
4384      return @"";		// self is empty
4385    }
4386
4387  // Skip back over any trailing path separators, but not in to root.
4388  i = rootOf(self, l);
4389  while (l > i && pathSepMember([self characterAtIndex: l-1]) == YES)
4390    {
4391      l--;
4392    }
4393
4394  // If only the root is left, return it.
4395  if (i == l)
4396    {
4397      /*
4398       * NB. tilde escapes should not have trailing separator in the
4399       * path component as they are not trreated as true roots.
4400       */
4401      if ([self characterAtIndex: 0] == '~'
4402	&& pathSepMember([self characterAtIndex: i-1]) == YES)
4403	{
4404	  return [self substringToIndex: i-1];
4405	}
4406      return [self substringToIndex: i];
4407    }
4408
4409  // Got more than root ... find last component.
4410  range = [self rangeOfCharacterFromSet: pathSeps()
4411				options: NSBackwardsSearch
4412				  range: ((NSRange){i, l-i})];
4413  if (range.length > 0)
4414    {
4415      // Found separator ... adjust to point to component.
4416      i = NSMaxRange(range);
4417    }
4418  return [self substringWithRange: ((NSRange){i, l-i})];
4419}
4420
4421- (NSRange) paragraphRangeForRange: (NSRange)range
4422{
4423  NSUInteger startIndex;
4424  NSUInteger endIndex;
4425
4426  [self getParagraphStart: &startIndex
4427        end: &endIndex
4428        contentsEnd: NULL
4429        forRange: range];
4430  return NSMakeRange(startIndex, endIndex - startIndex);
4431}
4432
4433- (NSString*) pathExtension
4434{
4435  NSRange	range;
4436  unsigned int	l = [self length];
4437  unsigned int	root;
4438
4439  if (l == 0)
4440    {
4441      return @"";
4442    }
4443  root = rootOf(self, l);
4444
4445  /*
4446   * Step past trailing path separators.
4447   */
4448  while (l > root && pathSepMember([self characterAtIndex: l-1]) == YES)
4449    {
4450      l--;
4451    }
4452  range = NSMakeRange(root, l-root);
4453
4454  /*
4455   * Look for a dot in the path ... if there isn't one, or if it is
4456   * immediately after the root or a path separator, there is no extension.
4457   */
4458  range = [self rangeOfString: @"."
4459                      options: NSBackwardsSearch
4460                        range: range
4461                       locale: nil];
4462  if (range.length > 0 && range.location > root
4463    && pathSepMember([self characterAtIndex: range.location-1]) == NO)
4464    {
4465      NSRange	sepRange;
4466
4467      /*
4468       * Found a dot, so we determine the range of the (possible)
4469       * path extension, then check to see if we have a path
4470       * separator within it ... if we have a path separator then
4471       * the dot is inside the last path component and there is
4472       * therefore no extension.
4473       */
4474      range.location++;
4475      range.length = l - range.location;
4476      sepRange = [self rangeOfCharacterFromSet: pathSeps()
4477				       options: NSBackwardsSearch
4478				         range: range];
4479      if (sepRange.length == 0)
4480	{
4481	  return [self substringFromRange: range];
4482	}
4483    }
4484
4485  return @"";
4486}
4487
4488- (NSString *) precomposedStringWithCompatibilityMapping
4489{
4490#if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_UNORM2_H)
4491  return [self _normalizedICUStringOfType: "nfkc" mode: UNORM2_COMPOSE];
4492#else
4493  return [self notImplemented: _cmd];
4494#endif
4495}
4496
4497- (NSString *) precomposedStringWithCanonicalMapping
4498{
4499#if (GS_USE_ICU == 1) && defined(HAVE_UNICODE_UNORM2_H)
4500   return [self _normalizedICUStringOfType: "nfc" mode: UNORM2_COMPOSE];
4501#else
4502  return [self notImplemented: _cmd];
4503#endif
4504}
4505
4506- (NSString*) stringByAppendingPathComponent: (NSString*)aString
4507{
4508  unsigned	originalLength = [self length];
4509  unsigned	length = originalLength;
4510  unsigned	aLength = [aString length];
4511  unsigned	root;
4512  unichar	buf[length+aLength+1];
4513
4514  root = rootOf(aString, aLength);
4515
4516  if (length == 0)
4517    {
4518      [aString getCharacters: buf range: ((NSRange){0, aLength})];
4519      length = aLength;
4520      root = rootOf(aString, aLength);
4521    }
4522  else
4523    {
4524      /* If the 'component' has a leading path separator (or drive spec
4525       * in windows) then we need to find its length so we can strip it.
4526       */
4527      if (root > 0)
4528	{
4529	  unichar c = [aString characterAtIndex: 0];
4530
4531	  if (c == '~')
4532	    {
4533	      root = 0;
4534	    }
4535	  else if (root > 1 && pathSepMember(c))
4536	    {
4537	      int	i;
4538
4539	      for (i = 1; i < root; i++)
4540		{
4541		  c = [aString characterAtIndex: i];
4542		  if (!pathSepMember(c))
4543		    {
4544		      break;
4545		    }
4546		}
4547	      root = i;
4548	    }
4549	}
4550
4551      [self getCharacters: buf range: ((NSRange){0, length})];
4552
4553      /* We strip back trailing path separators, and replace them with
4554       * a single one ... except in the case where we have a windows
4555       * drive specification, and the string being appended does not
4556       * have a path separator as a root. In that case we just want to
4557       * append to the drive specification directly, leaving a relative
4558       * path like c:foo
4559       */
4560      if (length != 2 || buf[1] != ':' || GSPathHandlingUnix() == YES
4561	|| buf[0] < 'A' || buf[0] > 'z' || (buf[0] > 'Z' && buf[0] < 'a')
4562	|| (root > 0 && pathSepMember([aString characterAtIndex: root-1])))
4563	{
4564	  while (length > 0 && pathSepMember(buf[length-1]) == YES)
4565	    {
4566	      length--;
4567	    }
4568	  buf[length++] = pathSepChar();
4569	}
4570
4571      if ((aLength - root) > 0)
4572	{
4573	  // appending .. discard root from aString
4574	  [aString getCharacters: &buf[length]
4575			   range: ((NSRange){root, aLength-root})];
4576	  length += aLength-root;
4577	}
4578      // Find length of root part of new path.
4579      root = rootOf(self, originalLength);
4580    }
4581
4582  if (length > 0)
4583    {
4584      /* Trim trailing path separators as long as they are not part of
4585       * the root.
4586       */
4587      aLength = length - 1;
4588      while (aLength > root && pathSepMember(buf[aLength]) == YES)
4589	{
4590	  aLength--;
4591	  length--;
4592	}
4593
4594      /* Trim multi separator sequences outside root (root may contain an
4595       * initial // pair if it is a windows UNC path).
4596       */
4597      if (length > 0)
4598	{
4599	  while (aLength > root)
4600	    {
4601	      if (pathSepMember(buf[aLength]) == YES)
4602		{
4603		  buf[aLength] = pathSepChar();
4604		  if (pathSepMember(buf[aLength-1]) == YES)
4605		    {
4606		      unsigned	pos;
4607
4608		      buf[aLength-1] = pathSepChar();
4609		      for (pos = aLength+1; pos < length; pos++)
4610			{
4611			  buf[pos-1] = buf[pos];
4612			}
4613		      length--;
4614		    }
4615		}
4616	      aLength--;
4617	    }
4618	}
4619    }
4620  return [NSStringClass stringWithCharacters: buf length: length];
4621}
4622
4623- (NSString*) stringByAppendingPathExtension: (NSString*)aString
4624{
4625  unsigned	l = [self length];
4626  unsigned 	originalLength = l;
4627  unsigned	root;
4628
4629  if (l == 0)
4630    {
4631      NSLog(@"[%@-%@] cannot append extension '%@' to empty string",
4632	NSStringFromClass([self class]), NSStringFromSelector(_cmd), aString);
4633      return @"";		// Must have a file name to append extension.
4634    }
4635  root = rootOf(self, l);
4636  /*
4637   * Step past trailing path separators.
4638   */
4639  while (l > root && pathSepMember([self characterAtIndex: l-1]) == YES)
4640    {
4641      l--;
4642    }
4643  if (root == l)
4644    {
4645      NSLog(@"[%@-%@] cannot append extension '%@' to path '%@'",
4646	NSStringFromClass([self class]), NSStringFromSelector(_cmd),
4647	aString, self);
4648      return IMMUTABLE(self);	// Must have a file name to append extension.
4649    }
4650
4651  /* MacOS-X prohibits an extension beginning with a path separator,
4652   * but this code extends that a little to prohibit any root except
4653   * one beginning with '~' from being used as an extension.
4654   */
4655  root = rootOf(aString, [aString length]);
4656  if (root > 0 && [aString characterAtIndex: 0] != '~')
4657    {
4658      NSLog(@"[%@-%@] cannot append extension '%@' to path '%@'",
4659	NSStringFromClass([self class]), NSStringFromSelector(_cmd),
4660	aString, self);
4661      return IMMUTABLE(self);	// Must have a file name to append extension.
4662    }
4663
4664  if (originalLength != l)
4665    {
4666      NSRange	range = NSMakeRange(0, l);
4667
4668      return [[self substringFromRange: range]
4669	stringByAppendingFormat: @".%@", aString];
4670    }
4671  return [self stringByAppendingFormat: @".%@", aString];
4672}
4673
4674- (NSString*) stringByDeletingLastPathComponent
4675{
4676  unsigned int	length;
4677  unsigned int	root;
4678  unsigned int	end;
4679  unsigned int	i;
4680
4681  end = length = [self length];
4682  if (length == 0)
4683    {
4684      return @"";
4685    }
4686  i = root = rootOf(self, length);
4687
4688  /*
4689   * Any root without a trailing path separator can be deleted
4690   * as it's either a relative path or a tilde expression.
4691   */
4692  if (i == length && pathSepMember([self characterAtIndex: i-1]) == NO)
4693    {
4694      return @"";	// Delete relative root
4695    }
4696
4697  /*
4698   * Step past trailing path separators.
4699   */
4700  while (end > i && pathSepMember([self characterAtIndex: end-1]) == YES)
4701    {
4702      end--;
4703    }
4704
4705  /*
4706   * If all we have left is the root, return that root, except for the
4707   * special case of a tilde expression ... which may be deleted even
4708   * when it is followed by a separator.
4709   */
4710  if (end == i)
4711    {
4712      if ([self characterAtIndex: 0] == '~')
4713	{
4714	  return @"";				// Tilde roots may be deleted.
4715	}
4716      return [self substringToIndex: i];	// Return root component.
4717    }
4718  else
4719    {
4720      NSString	*result;
4721      unichar	*to;
4722      unsigned	o;
4723      unsigned	lastComponent = root;
4724      GS_BEGINITEMBUF(from, (end * 2 * sizeof(unichar)), unichar)
4725
4726      to = from + end;
4727      [self getCharacters: from range: NSMakeRange(0, end)];
4728      for (o = 0; o < root; o++)
4729	{
4730	  to[o] = from[o];
4731	}
4732      for (i = root; i < end; i++)
4733	{
4734	  if (pathSepMember(from[i]))
4735	    {
4736	      if (o > lastComponent)
4737		{
4738		  to[o++] = from[i];
4739		  lastComponent = o;
4740		}
4741	    }
4742	  else
4743	    {
4744	      to[o++] = from[i];
4745	    }
4746	}
4747      if (lastComponent > root)
4748	{
4749	  o = lastComponent - 1;
4750	}
4751      else
4752	{
4753	  o = root;
4754	}
4755      result = [NSString stringWithCharacters: to length: o];
4756      GS_ENDITEMBUF();
4757      return result;
4758    }
4759}
4760
4761- (NSString*) stringByDeletingPathExtension
4762{
4763  NSRange	range;
4764  NSRange	r0;
4765  NSRange	r1;
4766  NSString	*substring;
4767  unsigned	l = [self length];
4768  unsigned	root;
4769
4770  if ((root = rootOf(self, l)) == l)
4771    {
4772      return IMMUTABLE(self);
4773    }
4774
4775  /*
4776   * Skip past any trailing path separators... but not into root.
4777   */
4778  while (l > root && pathSepMember([self characterAtIndex: l-1]) == YES)
4779    {
4780      l--;
4781    }
4782  range = NSMakeRange(root, l-root);
4783  /*
4784   * Locate path extension.
4785   */
4786  r0 = [self rangeOfString: @"."
4787		   options: NSBackwardsSearch
4788		     range: range
4789                    locale: nil];
4790  /*
4791   * Locate a path separator.
4792   */
4793  r1 = [self rangeOfCharacterFromSet: pathSeps()
4794			     options: NSBackwardsSearch
4795			       range: range];
4796  /*
4797   * Assuming the extension separator was found in the last path
4798   * component, set the length of the substring we want.
4799   */
4800  if (r0.length > 0 && r0.location > root
4801    && (r1.length == 0 || r1.location < r0.location))
4802    {
4803      l = r0.location;
4804    }
4805  substring = [self substringToIndex: l];
4806  return substring;
4807}
4808
4809- (NSString*) stringByExpandingTildeInPath
4810{
4811  NSString	*homedir;
4812  NSRange	firstSlashRange;
4813  unsigned	length;
4814
4815  if ((length = [self length]) == 0)
4816    {
4817      return IMMUTABLE(self);
4818    }
4819  if ([self characterAtIndex: 0] != 0x007E)
4820    {
4821      return IMMUTABLE(self);
4822    }
4823
4824  /* FIXME ... should remove in future
4825   * Anything beginning '~@' is assumed to be a windows path specification
4826   * which can't be expanded.
4827   */
4828  if (length > 1 && [self characterAtIndex: 1] == 0x0040)
4829    {
4830      return IMMUTABLE(self);
4831    }
4832
4833  firstSlashRange = [self rangeOfCharacterFromSet: pathSeps()
4834                                          options: NSLiteralSearch
4835                                            range: ((NSRange){0, length})];
4836  if (firstSlashRange.length == 0)
4837    {
4838      firstSlashRange.location = length;
4839    }
4840
4841  /* FIXME ... should remove in future
4842   * Anything beginning '~' followed by a single letter is assumed
4843   * to be a windows drive specification.
4844   */
4845  if (firstSlashRange.location == 2 && isalpha([self characterAtIndex: 1]))
4846    {
4847      return IMMUTABLE(self);
4848    }
4849
4850  if (firstSlashRange.location != 1)
4851    {
4852      /* It is of the form `~username/blah/...' or '~username' */
4853      int	userNameLen;
4854      NSString	*uname;
4855
4856      if (firstSlashRange.length != 0)
4857	{
4858	  userNameLen = firstSlashRange.location - 1;
4859	}
4860      else
4861	{
4862	  /* It is actually of the form `~username' */
4863	  userNameLen = [self length] - 1;
4864	  firstSlashRange.location = [self length];
4865	}
4866      uname = [self substringWithRange: ((NSRange){1, userNameLen})];
4867      homedir = NSHomeDirectoryForUser(uname);
4868    }
4869  else
4870    {
4871      /* It is of the form `~/blah/...' or is '~' */
4872      homedir = NSHomeDirectory();
4873    }
4874
4875  if (homedir != nil)
4876    {
4877      if (firstSlashRange.location < length)
4878	{
4879	  return [homedir stringByAppendingPathComponent:
4880	    [self substringFromIndex: firstSlashRange.location]];
4881	}
4882      else
4883	{
4884	  return IMMUTABLE(homedir);
4885	}
4886    }
4887  else
4888    {
4889      return IMMUTABLE(self);
4890    }
4891}
4892
4893- (NSString*) stringByAbbreviatingWithTildeInPath
4894{
4895  NSString	*homedir;
4896
4897  if (YES == [self hasPrefix: @"~"])
4898    {
4899      return IMMUTABLE(self);
4900    }
4901  homedir = NSHomeDirectory();
4902  if (NO == [self hasPrefix: homedir])
4903    {
4904      /* OSX compatibility ... we clean up the path to try to get a
4905       * home directory we can abbreviate.
4906       */
4907      self = [self stringByStandardizingPath];
4908      if (NO == [self hasPrefix: homedir])
4909        {
4910          return IMMUTABLE(self);
4911        }
4912    }
4913  if ([self length] == [homedir length])
4914    {
4915      return @"~";
4916    }
4917  return [@"~" stringByAppendingPathComponent:
4918    [self substringFromIndex: [homedir length]]];
4919}
4920
4921/**
4922 * Returns a string formed by extending or truncating the receiver to
4923 * newLength characters.  If the new string is larger, it is padded
4924 * by appending characters from padString (appending it as many times
4925 * as required).  The first character from padString to be appended
4926 * is specified by padIndex.<br />
4927 */
4928- (NSString*) stringByPaddingToLength: (NSUInteger)newLength
4929			   withString: (NSString*)padString
4930		      startingAtIndex: (NSUInteger)padIndex
4931{
4932  unsigned	length = [self length];
4933  unsigned	padLength;
4934
4935  if (padString == nil || [padString isKindOfClass: [NSString class]] == NO)
4936    {
4937      [NSException raise: NSInvalidArgumentException
4938	format: @"%@ - Illegal pad string", NSStringFromSelector(_cmd)];
4939    }
4940  padLength = [padString length];
4941  if (padIndex >= padLength)
4942    {
4943      [NSException raise: NSRangeException
4944	format: @"%@ - pad index larger too big", NSStringFromSelector(_cmd)];
4945    }
4946  if (newLength == length)
4947    {
4948      return IMMUTABLE(self);
4949    }
4950  else if (newLength < length)
4951    {
4952      return [self substringToIndex: newLength];
4953    }
4954  else
4955    {
4956      length = newLength - length;	// What we want to add.
4957      if (length <= (padLength - padIndex))
4958	{
4959	  NSRange	r;
4960
4961	  r = NSMakeRange(padIndex, length);
4962	  return [self stringByAppendingString:
4963	    [padString substringWithRange: r]];
4964	}
4965      else
4966	{
4967	  NSMutableString	*m = [self mutableCopy];
4968
4969	  if (padIndex > 0)
4970	    {
4971	      NSRange	r;
4972
4973	      r = NSMakeRange(padIndex, padLength - padIndex);
4974	      [m appendString: [padString substringWithRange: r]];
4975	      length -= r.length;
4976	    }
4977	  /*
4978	   * In case we have to append a small string lots of times,
4979	   * we cache the method impllementation to do it.
4980	   */
4981	  if (length >= padLength)
4982	    {
4983	      void	(*appImp)(NSMutableString*, SEL, NSString*);
4984	      SEL	appSel;
4985
4986	      appSel = @selector(appendString:);
4987	      appImp = (void (*)(NSMutableString*, SEL, NSString*))
4988		[m methodForSelector: appSel];
4989	      while (length >= padLength)
4990		{
4991		  (*appImp)(m, appSel, padString);
4992		  length -= padLength;
4993		}
4994	    }
4995	  if (length > 0)
4996	    {
4997	      [m appendString:
4998		[padString substringWithRange: NSMakeRange(0, length)]];
4999	    }
5000	  return AUTORELEASE(m);
5001	}
5002    }
5003}
5004
5005/**
5006 * Returns a string created by replacing percent escape sequences in the
5007 * receiver assuming that the resulting data represents characters in
5008 * the specified encoding.<br />
5009 * Returns nil if the result is not a string in the specified encoding.
5010 */
5011- (NSString*) stringByReplacingPercentEscapesUsingEncoding: (NSStringEncoding)e
5012{
5013  NSMutableData	*d;
5014  NSString	*s = nil;
5015
5016  d = [[self dataUsingEncoding: NSASCIIStringEncoding] mutableCopy];
5017  if (d != nil)
5018    {
5019      unsigned char	*p = (unsigned char*)[d mutableBytes];
5020      unsigned		l = [d length];
5021      unsigned		i = 0;
5022      unsigned		j = 0;
5023
5024      while (i < l)
5025	{
5026	  unsigned char	t;
5027
5028	  if ((t = p[i++]) == '%')
5029	    {
5030	      unsigned char	c;
5031
5032	      if (i >= l)
5033		{
5034		  DESTROY(d);
5035		  break;
5036		}
5037	      t = p[i++];
5038
5039	      if (isxdigit(t))
5040		{
5041		  if (t <= '9')
5042		    {
5043		      c = t - '0';
5044		    }
5045		  else if (t <= 'F')
5046		    {
5047		      c = t - 'A' + 10;
5048		    }
5049		  else
5050		    {
5051		      c = t - 'a' + 10;
5052		    }
5053		}
5054	      else
5055		{
5056		  DESTROY(d);
5057		  break;
5058		}
5059	      c <<= 4;
5060
5061	      if (i >= l)
5062		{
5063		  DESTROY(d);
5064		  break;
5065		}
5066	      t = p[i++];
5067	      if (isxdigit(t))
5068		{
5069		  if (t <= '9')
5070		    {
5071		      c |= t - '0';
5072		    }
5073		  else if (t <= 'F')
5074		    {
5075		      c |= t - 'A' + 10;
5076		    }
5077		  else
5078		    {
5079		      c |= t - 'a' + 10;
5080		    }
5081		}
5082	      else
5083		{
5084		  DESTROY(d);
5085		  break;
5086		}
5087	      p[j++] = c;
5088	    }
5089	  else
5090	    {
5091	      p[j++] = t;
5092	    }
5093	}
5094      [d setLength: j];
5095      s = AUTORELEASE([[NSString alloc] initWithData: d encoding: e]);
5096      RELEASE(d);
5097    }
5098  return s;
5099}
5100
5101- (NSString*) stringByResolvingSymlinksInPath
5102{
5103  NSString	*s = self;
5104
5105  if (0 == [s length])
5106    {
5107      return @"";
5108    }
5109  if ('~' == [s characterAtIndex: 0])
5110    {
5111      s = [s stringByExpandingTildeInPath];
5112    }
5113#if defined(_WIN32)
5114  return IMMUTABLE(s);
5115#else
5116
5117{
5118  #if defined(__GLIBC__) || defined(__FreeBSD__)
5119  #define GS_MAXSYMLINKS sysconf(_SC_SYMLOOP_MAX)
5120  #else
5121  #define GS_MAXSYMLINKS MAXSYMLINKS
5122  #endif
5123
5124  #ifndef PATH_MAX
5125  #define PATH_MAX 1024
5126  /* Don't use realpath unless we know we have the correct path size limit */
5127  #ifdef        HAVE_REALPATH
5128  #undef        HAVE_REALPATH
5129  #endif
5130  #endif
5131  char		newBuf[PATH_MAX];
5132#ifdef HAVE_REALPATH
5133
5134  if (realpath([s fileSystemRepresentation], newBuf) == 0)
5135    return IMMUTABLE(s);
5136#else
5137  char		extra[PATH_MAX];
5138  char		*dest;
5139  const char	*name = [s fileSystemRepresentation];
5140  const char	*start;
5141  const	char	*end;
5142  unsigned	num_links = 0;
5143
5144  if (name[0] != '/')
5145    {
5146      if (!getcwd(newBuf, PATH_MAX))
5147	{
5148	  return IMMUTABLE(s);	/* Couldn't get directory.	*/
5149	}
5150      dest = strchr(newBuf, '\0');
5151    }
5152  else
5153    {
5154      newBuf[0] = '/';
5155      dest = &newBuf[1];
5156    }
5157
5158  for (start = end = name; *start; start = end)
5159    {
5160      struct stat	st;
5161      int		n;
5162      int		len;
5163
5164      /* Elide repeated path separators	*/
5165      while (*start == '/')
5166	{
5167	  start++;
5168	}
5169      /* Locate end of path component	*/
5170      end = start;
5171      while (*end && *end != '/')
5172	{
5173	  end++;
5174	}
5175      len = end - start;
5176      if (len == 0)
5177	{
5178	  break;	/* End of path.	*/
5179	}
5180      else if (len == 1 && *start == '.')
5181	{
5182          /* Elide '/./' sequence by ignoring it.	*/
5183	}
5184      else if (len == 2 && strncmp(start, "..", len) == 0)
5185	{
5186	  /*
5187	   * Backup - if we are not at the root, remove the last component.
5188	   */
5189	  if (dest > &newBuf[1])
5190	    {
5191	      do
5192		{
5193		  dest--;
5194		}
5195	      while (dest[-1] != '/');
5196	    }
5197	}
5198      else
5199        {
5200          if (dest[-1] != '/')
5201	    {
5202	      *dest++ = '/';
5203	    }
5204          if (&dest[len] >= &newBuf[PATH_MAX])
5205	    {
5206	      return IMMUTABLE(s);	/* Resolved name too long.	*/
5207	    }
5208          memmove(dest, start, len);
5209          dest += len;
5210          *dest = '\0';
5211
5212          if (lstat(newBuf, &st) < 0)
5213	    {
5214	      return IMMUTABLE(s);	/* Unable to stat file.		*/
5215	    }
5216          if (S_ISLNK(st.st_mode))
5217            {
5218              char	buf[PATH_MAX];
5219	      int	l;
5220
5221              if (++num_links > GS_MAXSYMLINKS)
5222		{
5223		  return IMMUTABLE(s);	/* Too many links.	*/
5224		}
5225              n = readlink(newBuf, buf, PATH_MAX);
5226              if (n < 0)
5227		{
5228		  return IMMUTABLE(s);	/* Couldn't resolve.	*/
5229		}
5230              buf[n] = '\0';
5231
5232	      l = strlen(end);
5233              if ((n + l) >= PATH_MAX)
5234		{
5235		  return IMMUTABLE(s);	/* Path too long.	*/
5236		}
5237	      /*
5238	       * Concatenate the resolved name with the string still to
5239	       * be processed, and start using the result as input.
5240	       */
5241              memcpy(buf + n, end, l);
5242	      n += l;
5243	      buf[n] = '\0';
5244              memcpy(extra, buf, n);
5245	      extra[n] = '\0';
5246              name = end = extra;
5247
5248              if (buf[0] == '/')
5249		{
5250		  /*
5251		   * For an absolute link, we start at root again.
5252		   */
5253		  dest = newBuf + 1;
5254		}
5255              else
5256		{
5257		  /*
5258		   * Backup - remove the last component.
5259		   */
5260		  if (dest > newBuf + 1)
5261		    {
5262		      do
5263			{
5264			  dest--;
5265			}
5266		      while (dest[-1] != '/');
5267		    }
5268		}
5269            }
5270          else
5271	    {
5272	      num_links = 0;
5273	    }
5274        }
5275    }
5276  if (dest > newBuf + 1 && dest[-1] == '/')
5277    {
5278      --dest;
5279    }
5280  *dest = '\0';
5281#endif
5282  if (strncmp(newBuf, "/private/", 9) == 0)
5283    {
5284      struct stat	st;
5285
5286      if (lstat(&newBuf[8], &st) == 0)
5287	{
5288	  int	l = strlen(newBuf) - 7;
5289
5290	  memmove(newBuf, &newBuf[8], l);
5291	}
5292    }
5293  return [[NSFileManager defaultManager]
5294   stringWithFileSystemRepresentation: newBuf length: strlen(newBuf)];
5295}
5296#endif
5297}
5298
5299- (NSString*) stringByStandardizingPath
5300{
5301  NSMutableString	*s;
5302  NSRange		r;
5303  unichar		(*caiImp)(NSString*, SEL, NSUInteger);
5304  unsigned int		l = [self length];
5305  unichar		c;
5306  unsigned		root;
5307
5308  if (l == 0)
5309    {
5310      return @"";
5311    }
5312  c = [self characterAtIndex: 0];
5313  if (c == '~')
5314    {
5315      s = AUTORELEASE([[self stringByExpandingTildeInPath] mutableCopy]);
5316    }
5317  else
5318    {
5319      s = AUTORELEASE([self mutableCopy]);
5320    }
5321
5322  /* We must always use the standard path separator unless specifically set
5323   * to use the mswindows one.  That ensures that standardised paths and
5324   * anything built by adding path components to them use a consistent
5325   * separator character anad can be compared readily using standard string
5326   * comparisons.
5327   */
5328  if (GSPathHandlingWindows() == YES)
5329    {
5330      [s replaceString: @"/" withString: @"\\"];
5331    }
5332  else
5333    {
5334      [s replaceString: @"\\" withString: @"/"];
5335    }
5336
5337  l = [s length];
5338  root = rootOf(s, l);
5339
5340  caiImp = (unichar (*)())[s methodForSelector: caiSel];
5341
5342  /* Remove any separators ('/') immediately after the trailing
5343   * separator in the root (if any).
5344   */
5345  if (root > 0 && YES == pathSepMember((*caiImp)(s, caiSel, root-1)))
5346    {
5347      unsigned	i;
5348
5349      for (i = root; i < l; i++)
5350	{
5351	  if (NO == pathSepMember((*caiImp)(s, caiSel, i)))
5352	    {
5353	      break;
5354	    }
5355	}
5356      if (i > root)
5357	{
5358	  r = (NSRange){root, i-root};
5359	  [s deleteCharactersInRange: r];
5360	  l -= r.length;
5361	}
5362    }
5363
5364  /* Condense multiple separator ('/') sequences.
5365   */
5366  r = (NSRange){root, l-root};
5367  while ((r = [s rangeOfCharacterFromSet: pathSeps()
5368				 options: 0
5369				   range: r]).length == 1)
5370    {
5371      while (NSMaxRange(r) < l
5372	&& pathSepMember((*caiImp)(s, caiSel, NSMaxRange(r))) == YES)
5373	{
5374	  r.length++;
5375	}
5376      r.location++;
5377      r.length--;
5378      if (r.length > 0)
5379	{
5380	  [s deleteCharactersInRange: r];
5381	  l -= r.length;
5382	}
5383      r.length = l - r.location;
5384    }
5385
5386  /* Remove trailing ('.') as long as it's preceeded by a path separator.
5387   * As a special case for OSX compatibility, we only remove the trailing
5388   * dot if it's not immediately after the root.
5389   */
5390  if (l > root + 1 && (*caiImp)(s, caiSel, l-1) == '.'
5391    && pathSepMember((*caiImp)(s, caiSel, l-2)) == YES)
5392    {
5393      l--;
5394      [s deleteCharactersInRange: NSMakeRange(l, 1)];
5395    }
5396
5397  // Condense ('/./') sequences.
5398  r = (NSRange){root, l-root};
5399  while ((r = [s rangeOfString: @"." options: 0 range: r locale: nil]).length
5400    == 1)
5401    {
5402      if (r.location > 0 && r.location < l - 1
5403	&& pathSepMember((*caiImp)(s, caiSel, r.location-1)) == YES
5404	&& pathSepMember((*caiImp)(s, caiSel, r.location+1)) == YES)
5405	{
5406	  r.length++;
5407	  [s deleteCharactersInRange: r];
5408	  l -= r.length;
5409	}
5410      else
5411	{
5412	  r.location++;
5413	}
5414      r.length = l - r.location;
5415    }
5416
5417  // Strip trailing '/' if present.
5418  if (l > root && pathSepMember([s characterAtIndex: l - 1]) == YES)
5419    {
5420      r.length = 1;
5421      r.location = l - r.length;
5422      [s deleteCharactersInRange: r];
5423      l -= r.length;
5424    }
5425
5426  if ([s isAbsolutePath] == NO)
5427    {
5428      return s;
5429    }
5430
5431  // Remove leading `/private' if present.
5432  if ([s hasPrefix: @"/private"])
5433    {
5434      [s deleteCharactersInRange: ((NSRange){0,8})];
5435      l -= 8;
5436    }
5437
5438  /*
5439   *	For absolute paths, we must
5440   *	remove '/../' sequences and their matching parent directories.
5441   */
5442  r = (NSRange){root, l-root};
5443  while ((r = [s rangeOfString: @".." options: 0 range: r locale: nil]).length
5444    == 2)
5445    {
5446      if (r.location > 0
5447	&& pathSepMember((*caiImp)(s, caiSel, r.location-1)) == YES
5448        && (NSMaxRange(r) == l
5449	  || pathSepMember((*caiImp)(s, caiSel, NSMaxRange(r))) == YES))
5450	{
5451	  BOOL	atEnd = (NSMaxRange(r) == l) ? YES : NO;
5452
5453	  if (r.location > root)
5454	    {
5455	      NSRange r2;
5456
5457	      r.location--;
5458	      r.length++;
5459	      r2 = NSMakeRange(root, r.location-root);
5460	      r = [s rangeOfCharacterFromSet: pathSeps()
5461				     options: NSBackwardsSearch
5462				       range: r2];
5463	      if (r.length == 0)
5464		{
5465		  r = r2;	// Location just after root
5466		  r.length++;
5467		}
5468	      else
5469		{
5470		  r.length = NSMaxRange(r2) - r.location;
5471	          r.location++;		// Location Just after last separator
5472		}
5473	      r.length += 2;		// Add the `..'
5474	    }
5475	  if (NO == atEnd)
5476	    {
5477	      r.length++;		// Add the '/' after the '..'
5478	    }
5479	  [s deleteCharactersInRange: r];
5480	  l -= r.length;
5481	}
5482      else
5483	{
5484	  r.location++;
5485	}
5486      r.length = l - r.location;
5487    }
5488
5489  return IMMUTABLE(s);
5490}
5491
5492/**
5493 * Return a string formed by removing characters from the ends of the
5494 * receiver.  Characters are removed only if they are in aSet.<br />
5495 * If the string consists entirely of characters in aSet, an empty
5496 * string is returned.<br />
5497 * The aSet argument must not be nil.<br />
5498 */
5499- (NSString*) stringByTrimmingCharactersInSet: (NSCharacterSet*)aSet
5500{
5501  unsigned	length = [self length];
5502  unsigned	end = length;
5503  unsigned	start = 0;
5504
5505  if (aSet == nil)
5506    {
5507      [NSException raise: NSInvalidArgumentException
5508	format: @"%@ - nil character set argument", NSStringFromSelector(_cmd)];
5509    }
5510  if (length > 0)
5511    {
5512      unichar	(*caiImp)(NSString*, SEL, NSUInteger);
5513      BOOL	(*mImp)(id, SEL, unichar);
5514      unichar	letter;
5515
5516      caiImp = (unichar (*)())[self methodForSelector: caiSel];
5517      mImp = (BOOL(*)(id,SEL,unichar)) [aSet methodForSelector: cMemberSel];
5518
5519      while (end > 0)
5520	{
5521	  letter = (*caiImp)(self, caiSel, end-1);
5522	  if ((*mImp)(aSet, cMemberSel, letter) == NO)
5523	    {
5524	      break;
5525	    }
5526	  end--;
5527	}
5528      while (start < end)
5529	{
5530	  letter = (*caiImp)(self, caiSel, start);
5531	  if ((*mImp)(aSet, cMemberSel, letter) == NO)
5532	    {
5533	      break;
5534	    }
5535	  start++;
5536	}
5537    }
5538  if (start == 0 && end == length)
5539    {
5540      return IMMUTABLE(self);
5541    }
5542  if (start == end)
5543    {
5544      return @"";
5545    }
5546  return [self substringFromRange: NSMakeRange(start, end - start)];
5547}
5548
5549// private methods for Unicode level 3 implementation
5550- (int) _baseLength
5551{
5552  int		blen = 0;
5553  unsigned	len = [self length];
5554
5555  if (len > 0)
5556    {
5557      unsigned int	count = 0;
5558      unichar	(*caiImp)(NSString*, SEL, NSUInteger);
5559
5560      caiImp = (unichar (*)())[self methodForSelector: caiSel];
5561      while (count < len)
5562	{
5563	  if (!uni_isnonsp((*caiImp)(self, caiSel, count++)))
5564	    {
5565	      blen++;
5566	    }
5567	}
5568    }
5569  return blen;
5570}
5571
5572+ (NSString*) pathWithComponents: (NSArray*)components
5573{
5574  NSString	*s;
5575  unsigned	c;
5576  unsigned	i;
5577
5578  c = [components count];
5579  if (c == 0)
5580    {
5581      return @"";
5582    }
5583  s = [components objectAtIndex: 0];
5584  if ([s length] == 0)
5585    {
5586      s = pathSepString();
5587    }
5588  for (i = 1; i < c; i++)
5589    {
5590      s = [s stringByAppendingPathComponent: [components objectAtIndex: i]];
5591    }
5592  return s;
5593}
5594
5595- (BOOL) isAbsolutePath
5596{
5597  unichar	c;
5598  unsigned	l = [self length];
5599  unsigned	root;
5600
5601  if (l == 0)
5602    {
5603      return NO;		// Empty string ... relative
5604    }
5605  c = [self characterAtIndex: 0];
5606  if (c == (unichar)'~')
5607    {
5608      return YES;		// Begins with tilde ... absolute
5609    }
5610
5611  /*
5612   * Any string beginning with '/' is absolute ... except in windows mode
5613   * or on windows and not in unix mode.
5614   */
5615  if (c == pathSepChar())
5616    {
5617#if defined(_WIN32)
5618      if (GSPathHandlingUnix() == YES)
5619	{
5620	  return YES;
5621	}
5622#else
5623      if (GSPathHandlingWindows() == NO)
5624	{
5625	  return YES;
5626	}
5627#endif
5628     }
5629
5630  /*
5631   * Any root over two characters long must be a drive specification with a
5632   * slash (absolute) or a UNC path (always absolute).
5633   */
5634  root = rootOf(self, l);
5635  if (root > 2)
5636    {
5637      return YES;		// UNC or C:/ ... absolute
5638    }
5639
5640  /*
5641   * What we have left are roots of the form 'C:' or '\' or a path
5642   * with no root, or a '/' (in windows mode only sence we already
5643   * handled a single slash in unix mode) ...
5644   * all these cases are relative paths.
5645   */
5646  return NO;
5647}
5648
5649- (NSArray*) pathComponents
5650{
5651  NSMutableArray	*a;
5652  NSArray		*r;
5653  NSString		*s = self;
5654  unsigned int		l = [s length];
5655  unsigned int		root;
5656  unsigned int		i;
5657  NSRange		range;
5658
5659  if (l == 0)
5660    {
5661      return [NSArray array];
5662    }
5663  root = rootOf(s, l);
5664  a = [[NSMutableArray alloc] initWithCapacity: 8];
5665  if (root > 0)
5666    {
5667      [a addObject: [s substringToIndex: root]];
5668    }
5669  i = root;
5670
5671  while (i < l)
5672    {
5673      range = [s rangeOfCharacterFromSet: pathSeps()
5674				 options: NSLiteralSearch
5675				   range: ((NSRange){i, l - i})];
5676      if (range.length > 0)
5677	{
5678	  if (range.location > i)
5679	    {
5680	      [a addObject: [s substringWithRange:
5681		NSMakeRange(i, range.location - i)]];
5682	    }
5683	  i = NSMaxRange(range);
5684	}
5685      else
5686	{
5687	  [a addObject: [s substringFromIndex: i]];
5688	  i = l;
5689	}
5690    }
5691
5692  /*
5693   * If the path ended with a path separator which was not already
5694   * added as part of the root, add it as final component.
5695   */
5696  if (l > root && pathSepMember([s characterAtIndex: l-1]))
5697    {
5698      [a addObject: pathSepString()];
5699    }
5700
5701  r = [a copy];
5702  RELEASE(a);
5703  return AUTORELEASE(r);
5704}
5705
5706- (NSArray*) stringsByAppendingPaths: (NSArray*)paths
5707{
5708  NSMutableArray	*a;
5709  NSArray		*r;
5710  unsigned		i, count = [paths count];
5711
5712  a = [[NSMutableArray allocWithZone: NSDefaultMallocZone()]
5713	initWithCapacity: count];
5714  for (i = 0; i < count; i++)
5715    {
5716      NSString	*s = [paths objectAtIndex: i];
5717
5718      s = [self stringByAppendingPathComponent: s];
5719      [a addObject: s];
5720    }
5721  r = [a copy];
5722  RELEASE(a);
5723  return AUTORELEASE(r);
5724}
5725
5726/**
5727 * Returns an autoreleased string with given format using the default locale.
5728 */
5729+ (NSString*) localizedStringWithFormat: (NSString*) format, ...
5730{
5731  va_list ap;
5732  id ret;
5733
5734  va_start(ap, format);
5735  if (format == nil)
5736    {
5737      ret = nil;
5738    }
5739  else
5740    {
5741      ret = AUTORELEASE([[self allocWithZone: NSDefaultMallocZone()]
5742        initWithFormat: format locale: GSPrivateDefaultLocale() arguments: ap]);
5743    }
5744  va_end(ap);
5745  return ret;
5746}
5747
5748/**
5749 * Compares this string with aString ignoring case.  Convenience for
5750 * -compare:options:range: with the <code>NSCaseInsensitiveSearch</code>
5751 * option, in the default locale.
5752 */
5753- (NSComparisonResult) caseInsensitiveCompare: (NSString*)aString
5754{
5755  if (aString == self) return NSOrderedSame;
5756  return [self compare: aString
5757	       options: NSCaseInsensitiveSearch
5758		 range: ((NSRange){0, [self length]})];
5759}
5760
5761/**
5762 * <p>Compares this instance with string. If locale is an NSLocale
5763 * instance and ICU is available, performs a comparison using the
5764 * ICU collator for that locale. If locale is an instance of a class
5765 * other than NSLocale, perform a comparison using +[NSLocale currentLocale].
5766 * If locale is nil, or ICU is not available, use a POSIX-style
5767 * collation (for example, latin capital letters A-Z are ordered before
5768 * all of the lowercase letter, a-z.)
5769 * </p>
5770 * <p>mask may be <code>NSLiteralSearch</code>, which requests a literal
5771 * byte-by-byte
5772 * comparison, which is fastest but may return inaccurate results in cases
5773 * where two different composed character sequences may be used to express
5774 * the same character; <code>NSCaseInsensitiveSearch</code>, which ignores case
5775 * differences; <code>NSDiacriticInsensitiveSearch</code>
5776 * which ignores accent differences;
5777 * <code>NSNumericSearch</code>, which sorts groups of digits as numbers,
5778 * so "abc2" sorts before "abc100".
5779 * </p>
5780 * <p>compareRange refers to this instance, and should be set to 0..length
5781 * to compare the whole string.
5782 * </p>
5783 * <p>Returns <code>NSOrderedAscending</code>, <code>NSOrderedDescending</code>,
5784 * or <code>NSOrderedSame</code>, depending on whether this instance occurs
5785 * before or after string in lexical order, or is equal to it.
5786 * </p>
5787 */
5788- (NSComparisonResult) compare: (NSString *)string
5789		       options: (NSUInteger)mask
5790			 range: (NSRange)compareRange
5791			locale: (id)locale
5792{
5793  GS_RANGE_CHECK(compareRange, [self length]);
5794  if (string == nil)
5795    [NSException raise: NSInvalidArgumentException format: @"compare with nil"];
5796
5797#if GS_USE_ICU == 1
5798    {
5799      UCollator *coll = GSICUCollatorOpen(mask, locale);
5800
5801      if (coll != NULL)
5802	{
5803	  NSUInteger countSelf = compareRange.length;
5804	  NSUInteger countOther = [string length];
5805	  unichar *charsSelf;
5806	  unichar *charsOther;
5807	  UCollationResult result;
5808
5809	  charsSelf = NSZoneMalloc(NSDefaultMallocZone(),
5810	    countSelf * sizeof(unichar));
5811	  charsOther = NSZoneMalloc(NSDefaultMallocZone(),
5812	    countOther * sizeof(unichar));
5813	  // Copy to buffer
5814
5815	  [self getCharacters: charsSelf range: compareRange];
5816	  [string getCharacters: charsOther range: NSMakeRange(0, countOther)];
5817
5818	  result = ucol_strcoll(coll,
5819	    charsSelf, countSelf, charsOther, countOther);
5820
5821	  NSZoneFree(NSDefaultMallocZone(), charsSelf);
5822	  NSZoneFree(NSDefaultMallocZone(), charsOther);
5823	  ucol_close(coll);
5824
5825	  switch (result)
5826	    {
5827	      case UCOL_EQUAL: return NSOrderedSame;
5828	      case UCOL_GREATER: return NSOrderedDescending;
5829	      case UCOL_LESS: return NSOrderedAscending;
5830	    }
5831	}
5832    }
5833#endif
5834
5835  return strCompNsNs(self, string, mask, compareRange);
5836}
5837
5838/**
5839 * Compares this instance with string, using +[NSLocale currentLocale].
5840 */
5841- (NSComparisonResult) localizedCompare: (NSString *)string
5842{
5843  return [self compare: string
5844               options: 0
5845                 range: NSMakeRange(0, [self length])
5846                locale: [NSLocale currentLocale]];
5847}
5848
5849/**
5850 * Compares this instance with string, using +[NSLocale currentLocale],
5851 * ignoring case.
5852 */
5853- (NSComparisonResult) localizedCaseInsensitiveCompare: (NSString *)string
5854{
5855  return [self compare: string
5856               options: NSCaseInsensitiveSearch
5857                 range: NSMakeRange(0, [self length])
5858                locale: [NSLocale currentLocale]];
5859}
5860
5861/**
5862 * Writes contents out to file at filename, using the default C string encoding
5863 * unless this would result in information loss, otherwise straight unicode.
5864 * The '<code>atomically</code>' option if set will cause the contents to be
5865 * written to a temp file, which is then closed and renamed to filename.  Thus,
5866 * an incomplete file at filename should never result.
5867 */
5868- (BOOL) writeToFile: (NSString*)filename
5869	  atomically: (BOOL)useAuxiliaryFile
5870{
5871  id	d = [self dataUsingEncoding: _DefaultStringEncoding];
5872
5873  if (d == nil)
5874    {
5875      d = [self dataUsingEncoding: NSUnicodeStringEncoding];
5876    }
5877  return [d writeToFile: filename atomically: useAuxiliaryFile];
5878}
5879
5880/**
5881 * Writes contents out to file at filename, using the default C string encoding
5882 * unless this would result in information loss, otherwise straight unicode.
5883 * The '<code>atomically</code>' option if set will cause the contents to be
5884 * written to a temp file, which is then closed and renamed to filename.  Thus,
5885 * an incomplete file at filename should never result.<br />
5886 * If there is a problem and error is not NULL, the cause of the problem is
5887 * returned in *error.
5888 */
5889- (BOOL) writeToFile: (NSString*)path
5890	  atomically: (BOOL)atomically
5891	    encoding: (NSStringEncoding)enc
5892	       error: (NSError**)error
5893{
5894  id	d = [self dataUsingEncoding: enc];
5895
5896  if (d == nil)
5897    {
5898      if (error != 0)
5899        {
5900          *error = [NSError errorWithDomain: NSCocoaErrorDomain
5901	    code: NSFileWriteInapplicableStringEncodingError
5902	    userInfo: nil];
5903        }
5904      return NO;
5905    }
5906  return [d writeToFile: path
5907	        options: atomically ? NSDataWritingAtomic : 0
5908		  error: error];
5909}
5910
5911/**
5912 * Writes contents out to url, using the default C string encoding
5913 * unless this would result in information loss, otherwise straight unicode.
5914 * See [NSURLHandle-writeData:] on which URL types are supported.
5915 * The '<code>atomically</code>' option is only heeded if the URL is a
5916 * <code>file://</code> URL; see -writeToFile:atomically: .<br />
5917 * If there is a problem and error is not NULL, the cause of the problem is
5918 * returned in *error.
5919 */
5920- (BOOL) writeToURL: (NSURL*)url
5921	 atomically: (BOOL)atomically
5922	    encoding: (NSStringEncoding)enc
5923	       error: (NSError**)error
5924{
5925  id	d = [self dataUsingEncoding: enc];
5926
5927  if (d == nil)
5928    {
5929      d = [self dataUsingEncoding: NSUnicodeStringEncoding];
5930    }
5931  if (d == nil)
5932    {
5933      if (error != 0)
5934        {
5935          *error = [NSError errorWithDomain: NSCocoaErrorDomain
5936	    code: NSFileWriteInapplicableStringEncodingError
5937	    userInfo: nil];
5938        }
5939      return NO;
5940    }
5941  return [d writeToURL: url
5942	       options: atomically ? NSDataWritingAtomic : 0
5943		 error: error];
5944}
5945
5946/**
5947 * Writes contents out to url, using the default C string encoding
5948 * unless this would result in information loss, otherwise straight unicode.
5949 * See [NSURLHandle-writeData:] on which URL types are supported.
5950 * The '<code>atomically</code>' option is only heeded if the URL is a
5951 * <code>file://</code> URL; see -writeToFile:atomically: .
5952 */
5953- (BOOL) writeToURL: (NSURL*)url atomically: (BOOL)atomically
5954{
5955  id	d = [self dataUsingEncoding: _DefaultStringEncoding];
5956
5957  if (d == nil)
5958    {
5959      d = [self dataUsingEncoding: NSUnicodeStringEncoding];
5960    }
5961  return [d writeToURL: url atomically: atomically];
5962}
5963
5964/* NSCopying Protocol */
5965
5966- (id) copyWithZone: (NSZone*)zone
5967{
5968  /*
5969   * Default implementation should not simply retain ... the string may
5970   * have been initialised with freeWhenDone==NO and not own its
5971   * characters ... so the code which created it may destroy the memory
5972   * when it has finished with the original string ... leaving the
5973   * copy with pointers to invalid data.  So, we always copy in full.
5974   */
5975  return [[NSStringClass allocWithZone: zone] initWithString: self];
5976}
5977
5978- (id) mutableCopyWithZone: (NSZone*)zone
5979{
5980  return [[GSMutableStringClass allocWithZone: zone] initWithString: self];
5981}
5982
5983/* NSCoding Protocol */
5984
5985- (void) encodeWithCoder: (NSCoder*)aCoder
5986{
5987  if ([aCoder allowsKeyedCoding])
5988    {
5989      [(NSKeyedArchiver*)aCoder _encodePropertyList: self forKey: @"NS.string"];
5990    }
5991  else
5992    {
5993      unsigned	count = [self length];
5994
5995      [aCoder encodeValueOfObjCType: @encode(unsigned) at: &count];
5996      if (count > 0)
5997	{
5998	  NSStringEncoding	enc = NSUnicodeStringEncoding;
5999	  unichar		*chars;
6000
6001	  /* For backwards-compatibility, we always encode/decode
6002	     'NSStringEncoding' (which really is an 'unsigned int') as
6003	     an 'int'.  Due to a bug, GCC up to 4.5 always encode all
6004	     enums as 'i' (int) regardless of the actual integer type
6005	     required to store them; we need to be able to read/write
6006	     archives compatible with GCC <= 4.5 so we explictly use
6007	     'int' to read/write these variables.  */
6008	  [aCoder encodeValueOfObjCType: @encode(int) at: &enc];
6009
6010	  chars = NSZoneMalloc(NSDefaultMallocZone(), count*sizeof(unichar));
6011	  [self getCharacters: chars range: ((NSRange){0, count})];
6012	  [aCoder encodeArrayOfObjCType: @encode(unichar)
6013				  count: count
6014				     at: chars];
6015	  NSZoneFree(NSDefaultMallocZone(), chars);
6016	}
6017    }
6018}
6019
6020- (id) initWithCoder: (NSCoder*)aCoder
6021{
6022  if ([aCoder allowsKeyedCoding])
6023    {
6024      if ([aCoder containsValueForKey: @"NS.string"])
6025        {
6026          NSString *string = nil;
6027
6028          string = (NSString*)[(NSKeyedUnarchiver*)aCoder
6029                                  _decodePropertyListForKey: @"NS.string"];
6030          self = [self initWithString: string];
6031        }
6032      else if ([aCoder containsValueForKey: @"NS.bytes"])
6033        {
6034          id bytes = [(NSKeyedUnarchiver*)aCoder
6035                         decodeObjectForKey: @"NS.bytes"];
6036
6037          if ([bytes isKindOfClass: NSStringClass])
6038            {
6039              self = [self initWithString: (NSString*)bytes];
6040            }
6041          else
6042            {
6043              self = [self initWithData: (NSData*)bytes
6044                               encoding: NSUTF8StringEncoding];
6045            }
6046        }
6047      else
6048        {
6049          // empty string
6050          self = [self initWithString: @""];
6051        }
6052    }
6053  else
6054    {
6055      unsigned	count;
6056
6057      [aCoder decodeValueOfObjCType: @encode(unsigned) at: &count];
6058
6059      if (count > 0)
6060        {
6061	  NSStringEncoding	enc;
6062	  NSZone		*zone;
6063
6064	  [aCoder decodeValueOfObjCType: @encode(int) at: &enc];
6065	  zone = [self zone];
6066
6067	  if (enc == NSUnicodeStringEncoding)
6068	    {
6069	      unichar	*chars;
6070
6071	      chars = NSZoneMalloc(zone, count*sizeof(unichar));
6072	      [aCoder decodeArrayOfObjCType: @encode(unichar)
6073		                      count: count
6074		                         at: chars];
6075	      self = [self initWithCharactersNoCopy: chars
6076					     length: count
6077				       freeWhenDone: YES];
6078	    }
6079	  else
6080	    {
6081	      unsigned char	*chars;
6082
6083	      chars = NSZoneMalloc(zone, count+1);
6084	      [aCoder decodeArrayOfObjCType: @encode(unsigned char)
6085		                      count: count
6086				         at: chars];
6087	      self = [self initWithBytesNoCopy: chars
6088					length: count
6089				      encoding: enc
6090				  freeWhenDone: YES];
6091	    }
6092	}
6093      else
6094        {
6095	  self = [self initWithBytesNoCopy: (char *)""
6096				    length: 0
6097			          encoding: NSASCIIStringEncoding
6098			      freeWhenDone: NO];
6099	}
6100    }
6101  return self;
6102}
6103
6104- (Class) classForCoder
6105{
6106  return NSStringClass;
6107}
6108
6109- (id) replacementObjectForPortCoder: (NSPortCoder*)aCoder
6110{
6111  if ([aCoder isByref] == NO)
6112    return self;
6113  return [super replacementObjectForPortCoder: aCoder];
6114}
6115
6116/**
6117 * <p>Attempts to interpret the receiver as a <em>property list</em>
6118 * and returns the result.  If the receiver does not contain a
6119 * string representation of a <em>property list</em> then the method
6120 * returns nil.
6121 * </p>
6122 * <p>Containers (arrays and dictionaries) are decoded as <em>mutable</em>
6123 * objects.
6124 * </p>
6125 * <p>There are three readable <em>property list</em> storage formats -
6126 * The binary format used by [NSSerializer] does not concern us here,
6127 * but there are two 'human readable' formats, the <em>traditional</em>
6128 * OpenStep format (which is extended in GNUstep) and the <em>XML</em> format.
6129 * </p>
6130 * <p>The [NSArray-descriptionWithLocale:indent:] and
6131 * [NSDictionary-descriptionWithLocale:indent:] methods
6132 * both generate strings containing traditional style <em>property lists</em>,
6133 * but [NSArray-writeToFile:atomically:] and
6134 * [NSDictionary-writeToFile:atomically:] generate either traditional or
6135 * XML style <em>property lists</em> depending on the value of the
6136 * GSMacOSXCompatible and NSWriteOldStylePropertyLists user defaults.<br />
6137 * If GSMacOSXCompatible is YES then XML <em>property lists</em> are
6138 * written unless NSWriteOldStylePropertyLists is also YES.<br />
6139 * By default GNUstep writes old style data and always supports reading of
6140 * either style.
6141 * </p>
6142 * <p>The traditional format is more compact and more easily readable by
6143 * people, but (without the GNUstep extensions) cannot represent date and
6144 * number objects (except as strings).  The XML format is more verbose and
6145 * less readable, but can be fed into modern XML tools and thus used to
6146 * pass data to non-OpenStep applications more readily.
6147 * </p>
6148 * <p>The traditional format is strictly ascii encoded, with any unicode
6149 * characters represented by escape sequences.  The XML format is encoded
6150 * as UTF8 data.
6151 * </p>
6152 * <p>Both the traditional format and the XML format permit comments to be
6153 * placed in <em>property list</em> documents.  In traditional format the
6154 * comment notations used in Objective-C programming are supported, while
6155 * in XML format, the standard SGML comment sequences are used.
6156 * </p>
6157 * <p>See the documentation for [NSPropertyListSerialization] for more
6158 * information on what a property list is.
6159 * </p>
6160 * <p>If the string cannot be parsed as a normal property list format,
6161 * this method also tries to parse it as 'strings file' format (see the
6162 * -propertyListFromStringsFileFormat method).
6163 * </p>
6164 */
6165- (id) propertyList
6166{
6167  NSData		*data;
6168  id			result = nil;
6169  NSPropertyListFormat	format;
6170  NSString		*error = nil;
6171
6172  if ([self length] == 0)
6173    {
6174      return nil;
6175    }
6176  data = [self dataUsingEncoding: NSUTF8StringEncoding];
6177  NSAssert(data, @"Couldn't get utf8 data from string.");
6178
6179  result = [NSPropertyListSerialization
6180    propertyListFromData: data
6181    mutabilityOption: NSPropertyListMutableContainers
6182    format: &format
6183    errorDescription: &error];
6184
6185  if (result == nil)
6186    {
6187      extern id	GSPropertyListFromStringsFormat(NSString *string);
6188
6189      NS_DURING
6190        {
6191          result = GSPropertyListFromStringsFormat(self);
6192        }
6193      NS_HANDLER
6194        {
6195          error = [NSString stringWithFormat:
6196            @"as property list {%@}, and as strings file {%@}",
6197            error, [localException reason]];
6198          result = nil;
6199        }
6200      NS_ENDHANDLER
6201      if (result == nil)
6202        {
6203          [NSException raise: NSGenericException
6204                      format: @"Parse failed - %@", error];
6205        }
6206    }
6207  return result;
6208}
6209
6210/**
6211 * <p>Reads a <em>property list</em> (see -propertyList) from a simplified
6212 * file format.  This format is a traditional style property list file
6213 * containing a single dictionary, but with the leading '{' and trailing
6214 * '}' characters omitted.
6215 * </p>
6216 * <p>That is to say, the file contains only semicolon separated key/value
6217 * pairs (and optionally comments).  As a convenience, it is possible to
6218 * omit the equals sign and the value, so an entry consists of a key string
6219 * followed by a semicolon.  In this case, the value for that key is
6220 * assumed to be an empty string.
6221 * </p>
6222 * <example>
6223 *   // Strings file entries follow -
6224 *   key1 = " a string value";
6225 *   key2;	// This key has an empty string as a value.
6226 *   "Another key" = "a longer string value for th third key";
6227 * </example>
6228 */
6229- (NSDictionary*) propertyListFromStringsFileFormat
6230{
6231  extern id	GSPropertyListFromStringsFormat(NSString *string);
6232
6233  return GSPropertyListFromStringsFormat(self);
6234}
6235
6236/**
6237  * Returns YES if the receiver contains string, otherwise, NO.
6238  */
6239- (BOOL) containsString: (NSString *)string
6240{
6241  return [self rangeOfString: string].location != NSNotFound;
6242}
6243
6244@end
6245
6246/**
6247 * This is the mutable form of the [NSString] class.
6248 */
6249@implementation NSMutableString
6250
6251+ (id) allocWithZone: (NSZone*)z
6252{
6253  if (self == NSMutableStringClass)
6254    {
6255      return NSAllocateObject(GSMutableStringClass, 0, z);
6256    }
6257  else
6258    {
6259      return NSAllocateObject(self, 0, z);
6260    }
6261}
6262
6263// Creating Temporary Strings
6264
6265/**
6266 * Constructs an empty string.
6267 */
6268+ (id) string
6269{
6270  return AUTORELEASE([[GSMutableStringClass allocWithZone:
6271    NSDefaultMallocZone()] initWithCapacity: 0]);
6272}
6273
6274/**
6275 * Constructs an empty string with initial buffer size of capacity.
6276 */
6277+ (NSMutableString*) stringWithCapacity: (NSUInteger)capacity
6278{
6279  return AUTORELEASE([[GSMutableStringClass allocWithZone:
6280    NSDefaultMallocZone()] initWithCapacity: capacity]);
6281}
6282
6283/**
6284 * Create a string of unicode characters.
6285 */
6286// Inefficient implementation.
6287+ (id) stringWithCharacters: (const unichar*)characters
6288		     length: (NSUInteger)length
6289{
6290  return AUTORELEASE([[GSMutableStringClass allocWithZone:
6291    NSDefaultMallocZone()] initWithCharacters: characters length: length]);
6292}
6293
6294/**
6295 * Load contents of file at path into a new string.  Will interpret file as
6296 * containing direct unicode if it begins with the unicode byte order mark,
6297 * else converts to unicode using default C string encoding.
6298 */
6299+ (id) stringWithContentsOfFile: (NSString *)path
6300{
6301  return AUTORELEASE([[GSMutableStringClass allocWithZone:
6302    NSDefaultMallocZone()] initWithContentsOfFile: path]);
6303}
6304
6305/**
6306 * Create a string based on the given C (char[]) string, which should be
6307 * null-terminated and encoded in the default C string encoding.  (Characters
6308 * will be converted to unicode representation internally.)
6309 */
6310+ (id) stringWithCString: (const char*)byteString
6311{
6312  return AUTORELEASE([[GSMutableStringClass allocWithZone:
6313    NSDefaultMallocZone()] initWithCString: byteString]);
6314}
6315
6316/**
6317 * Create a string based on the given C (char[]) string, which may contain
6318 * null bytes and should be encoded in the default C string encoding.
6319 * (Characters will be converted to unicode representation internally.)
6320 */
6321+ (id) stringWithCString: (const char*)byteString
6322		  length: (NSUInteger)length
6323{
6324  return AUTORELEASE([[GSMutableStringClass allocWithZone:
6325    NSDefaultMallocZone()] initWithCString: byteString length: length]);
6326}
6327
6328/**
6329 * Creates a new string using C printf-style formatting.  First argument should
6330 * be a constant format string, like '<code>@"float val = %f"</code>', remaining
6331 * arguments should be the variables to print the values of, comma-separated.
6332 */
6333+ (id) stringWithFormat: (NSString*)format, ...
6334{
6335  va_list ap;
6336  va_start(ap, format);
6337  self = [super stringWithFormat: format arguments: ap];
6338  va_end(ap);
6339  return self;
6340}
6341
6342/** <init/> <override-subclass />
6343 * Constructs an empty string with initial buffer size of capacity.<br />
6344 * Calls -init (which does nothing but maintain MacOS-X compatibility),
6345 * and needs to be re-implemented in subclasses in order to have all
6346 * other initialisers work.
6347 */
6348- (id) initWithCapacity: (NSUInteger)capacity
6349{
6350  self = [self init];
6351  return self;
6352}
6353
6354- (id) initWithCharactersNoCopy: (unichar*)chars
6355			 length: (NSUInteger)length
6356		   freeWhenDone: (BOOL)flag
6357{
6358  if ((self = [self initWithCapacity: length]) != nil && length > 0)
6359    {
6360      NSString	*tmp;
6361
6362      tmp = [NSString allocWithZone: NSDefaultMallocZone()];
6363      tmp = [tmp initWithCharactersNoCopy: chars
6364				   length: length
6365			     freeWhenDone: flag];
6366      [self replaceCharactersInRange: NSMakeRange(0,0) withString: tmp];
6367      RELEASE(tmp);
6368    }
6369  return self;
6370}
6371
6372- (id) initWithCStringNoCopy: (char*)chars
6373		      length: (NSUInteger)length
6374		freeWhenDone: (BOOL)flag
6375{
6376  if ((self = [self initWithCapacity: length]) != nil && length > 0)
6377    {
6378      NSString	*tmp;
6379
6380      tmp = [NSString allocWithZone: NSDefaultMallocZone()];
6381      tmp = [tmp initWithCStringNoCopy: chars
6382				length: length
6383			  freeWhenDone: flag];
6384      [self replaceCharactersInRange: NSMakeRange(0,0) withString: tmp];
6385      RELEASE(tmp);
6386    }
6387  return self;
6388}
6389
6390// Modify A String
6391
6392/**
6393 *  Modifies this string by appending aString.
6394 */
6395- (void) appendString: (NSString*)aString
6396{
6397  NSRange aRange;
6398
6399  aRange.location = [self length];
6400  aRange.length = 0;
6401  [self replaceCharactersInRange: aRange withString: aString];
6402}
6403
6404/**
6405 *  Modifies this string by appending string described by given format.
6406 */
6407// Inefficient implementation.
6408- (void) appendFormat: (NSString*)format, ...
6409{
6410  va_list	ap;
6411  id		tmp;
6412
6413  va_start(ap, format);
6414  tmp = [[NSStringClass allocWithZone: NSDefaultMallocZone()]
6415    initWithFormat: format arguments: ap];
6416  va_end(ap);
6417  [self appendString: tmp];
6418  RELEASE(tmp);
6419}
6420
6421- (Class) classForCoder
6422{
6423  return NSMutableStringClass;
6424}
6425
6426/**
6427 * Modifies this instance by deleting specified range of characters.
6428 */
6429- (void) deleteCharactersInRange: (NSRange)range
6430{
6431  [self replaceCharactersInRange: range withString: nil];
6432}
6433
6434/**
6435 * Modifies this instance by inserting aString at loc.
6436 */
6437- (void) insertString: (NSString*)aString atIndex: (NSUInteger)loc
6438{
6439  NSRange range = {loc, 0};
6440  [self replaceCharactersInRange: range withString: aString];
6441}
6442
6443/**
6444 * Modifies this instance by deleting characters in range and then inserting
6445 * aString at its beginning.
6446 */
6447- (void) replaceCharactersInRange: (NSRange)range
6448		       withString: (NSString*)aString
6449{
6450  [self subclassResponsibility: _cmd];
6451}
6452
6453/**
6454 * Replaces all occurrences of the replace string with the by string,
6455 * for those cases where the entire replace string lies within the
6456 * specified searchRange value.<br />
6457 * The value of opts determines the direction of the search is and
6458 * whether only leading/trailing occurrences (anchored search) of
6459 * replace are substituted.<br />
6460 * Raises NSInvalidArgumentException if either string argument is nil.<br />
6461 * Raises NSRangeException if part of searchRange is beyond the end
6462 * of the receiver.
6463 */
6464- (NSUInteger) replaceOccurrencesOfString: (NSString*)replace
6465                               withString: (NSString*)by
6466                                  options: (NSUInteger)opts
6467                                    range: (NSRange)searchRange
6468{
6469  NSRange	range;
6470  unsigned int	count = 0;
6471  GSRSFunc	func;
6472
6473  if ([replace isKindOfClass: NSStringClass] == NO)
6474    {
6475      [NSException raise: NSInvalidArgumentException
6476		  format: @"%@ bad search string", NSStringFromSelector(_cmd)];
6477    }
6478  if ([by isKindOfClass: NSStringClass] == NO)
6479    {
6480      [NSException raise: NSInvalidArgumentException
6481		  format: @"%@ bad replace string", NSStringFromSelector(_cmd)];
6482    }
6483  if (NSMaxRange(searchRange) > [self length])
6484    {
6485      [NSException raise: NSInvalidArgumentException
6486		  format: @"%@ bad search range", NSStringFromSelector(_cmd)];
6487    }
6488  func = GSPrivateRangeOfString(self, replace);
6489  range = (*func)(self, replace, opts, searchRange);
6490
6491  if (range.length > 0)
6492    {
6493      unsigned	byLen = [by length];
6494      SEL	sel;
6495      void	(*imp)(id, SEL, NSRange, NSString*);
6496
6497      sel = @selector(replaceCharactersInRange:withString:);
6498      imp = (void(*)(id, SEL, NSRange, NSString*))[self methodForSelector: sel];
6499      do
6500	{
6501	  count++;
6502	  (*imp)(self, sel, range, by);
6503	  if ((opts & NSBackwardsSearch) == NSBackwardsSearch)
6504	    {
6505	      searchRange.length = range.location - searchRange.location;
6506	    }
6507	  else
6508	    {
6509	      unsigned int	newEnd;
6510
6511	      newEnd = NSMaxRange(searchRange) + byLen - range.length;
6512	      searchRange.location = range.location + byLen;
6513	      searchRange.length = newEnd - searchRange.location;
6514	    }
6515	  /* We replaced something and now need to scan again.
6516	   * As we modified the receiver, we must refresh the
6517	   * method implementation for searching.
6518	   */
6519	  func = GSPrivateRangeOfString(self, replace);
6520	  range = (*func)(self, replace, opts, searchRange);
6521	}
6522      while (range.length > 0);
6523    }
6524  return count;
6525}
6526
6527/**
6528 * Modifies this instance by replacing contents with those of aString.
6529 */
6530- (void) setString: (NSString*)aString
6531{
6532  NSRange range = {0, [self length]};
6533  [self replaceCharactersInRange: range withString: aString];
6534}
6535
6536@end
6537
6538