/* Copyright (C) 2011-2011 Lavtech.com corp. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "udm_config.h" #include #include #include "udm_uniconv.h" #include "udm_unidata.h" #include "udm_sgml.h" size_t UdmWellFormedLengthGeneric(UDM_CHARSET *cs, const char *src, size_t srclen, int flags) { const char *src0= src, *srcend= src + srclen; while (src < srcend) { udm_wc_t wc; int length= cs->cset->mb_wc(NULL, cs, &wc, (const unsigned char *) src, (const unsigned char *) srcend, flags); if (length <= 0) break; src+= length; } return src - src0; } static inline int scan_one_char(UDM_UNIDATA *unidata, UDM_CHARSET *cs, const char *str, const char *strend, int *ctype, int flags) { int wclen; if (*str == '&' && (flags & UDM_RECODE_HTML_IN)) { udm_wc_t wc; wclen= UdmSGMLScan(&wc, (const unsigned char*) str, (const unsigned char*) strend); *ctype= UdmUniCType(unidata, wc); } else if (*str > 0) { wclen= 1; *ctype= udm_charset_usascii.ctype[(unsigned char) *str]; } else { udm_wc_t wc; wclen= cs->cset->mb_wc(NULL, cs, &wc, (const unsigned char *) str, (const unsigned char *) strend, flags); if (wclen <= 0) { *ctype= UDM_UNI_SEPAR; return 1; } *ctype= UdmUniCType(unidata, wc); } if (*ctype == UDM_UNI_DIGIT || *ctype == UDM_UNI_CJK) *ctype= UDM_UNI_LETTER; return wclen; } const char * UdmStrGetSepTokenMB(UDM_UNIDATA *unidata, UDM_CHARSET *cs, const char *str, const char *strend, const char **last, int *ctype0, int flags) { const char *beg; int ctype, wclen; if(str == NULL && (str= *last) == NULL) return NULL; if ((beg= str) >= strend) return NULL; wclen= scan_one_char(unidata, cs, str, strend, ctype0, flags); if (wclen <= 0) return NULL; str+= wclen; for ( ; str < strend; ) { wclen= scan_one_char(unidata, cs, str, strend, &ctype, flags); if (wclen <= 0 || *ctype0 != ctype) break; str+= wclen; } *last= str; return beg; }