diff --git a/include/cdio/utf8.h b/include/cdio/utf8.h new file mode 100644 index 00000000..38bc8609 --- /dev/null +++ b/include/cdio/utf8.h @@ -0,0 +1,91 @@ +/* + Copyright (C) 2006 Burkhard Plaum + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. +*/ +/* UTF-8 support */ + + +#include + +/** \brief Opaque characterset converter + */ + +typedef struct cdio_charset_coverter_s cdio_charset_coverter_t; + +/** \brief Create a charset converter + * \param src_charset Source charset + * \param dst_charset Destination charset + * \returns A newly allocated charset converter + */ + +cdio_charset_coverter_t * +cdio_charset_converter_create(const char * src_charset, + const char * dst_charset); + +/** \brief Destroy a characterset converter + * \param cnv A characterset converter + */ + +void cdio_charset_converter_destroy(cdio_charset_coverter_t*cnv); + +/** \brief Convert a string from one character set to another + * \param cnv A charset converter + * \param src Source string + * \param src_len Length of source string + * \param dst Returns destination string + * \param dst_len If non NULL, returns the length of the destination string + * \returns true if conversion was sucessful, false else. + * + * The destination string must be freed by the caller with free(). + * If you pass -1 for src_len, strlen() will be used. + */ + +bool cdio_charset_convert(cdio_charset_coverter_t*cnv, + char * src, int src_len, + char ** dst, int * dst_len); + +/** \brief Convert a string from UTF-8 to another charset + * \param src Source string (0 terminated) + * \param dst Returns destination string + * \param dst_len If non NULL, returns the length of the destination string + * \param dst_charset The characterset to convert to + * \returns true if conversion was sucessful, false else. + * + * This is a convenience function, which creates a charset converter, + * converts one string and destroys the charset converter. + */ + + +bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst, + int * dst_len, const char * dst_charset); + +/** \brief Convert a string from another charset to UTF-8 + * \param src Source string + * \param src_len Length of the source string + * \param dst Returns destination string (0 terminated) + * \param src_charset The characterset to convert from + * \returns true if conversion was sucessful, false else. + * + * This is a convenience function, which creates a charset converter, + * converts one string and destroys the charset converter. If you pass -1 + * for src_len, strlen() will be used. + */ + + +bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst, + const char * src_charset); + diff --git a/lib/driver/utf8.c b/lib/driver/utf8.c new file mode 100644 index 00000000..c35d9643 --- /dev/null +++ b/lib/driver/utf8.c @@ -0,0 +1,202 @@ +/* + Copyright (C) 2006 Burkhard Plaum + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA. +*/ +/* UTF-8 support */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifdef HAVE_STRING_H +# include +#endif + +#ifdef HAVE_STDLIB_H +#include +#endif + +#ifdef HAVE_ICONV +# include +#endif + +#ifdef HAVE_ERRNO_H +#include +#endif + +#include + +#include + + +struct cdio_charset_coverter_s + { + iconv_t ic; + }; + +cdio_charset_coverter_t * +cdio_charset_converter_create(const char * src_charset, + const char * dst_charset) + { + cdio_charset_coverter_t * ret; + ret = calloc(1, sizeof(*ret)); + ret->ic = iconv_open(dst_charset, src_charset); + return ret; + } + +#if 0 +static void bgav_hexdump(uint8_t * data, int len, int linebreak) + { + int i; + int bytes_written = 0; + int imax; + + while(bytes_written < len) + { + imax = (bytes_written + linebreak > len) ? len - bytes_written : linebreak; + for(i = 0; i < imax; i++) + fprintf(stderr, "%02x ", data[bytes_written + i]); + for(i = imax; i < linebreak; i++) + fprintf(stderr, " "); + for(i = 0; i < imax; i++) + { + if(!(data[bytes_written + i] & 0x80) && (data[bytes_written + i] >= 32)) + fprintf(stderr, "%c", data[bytes_written + i]); + else + fprintf(stderr, "."); + } + bytes_written += imax; + fprintf(stderr, "\n"); + } + } +#endif + +void cdio_charset_converter_destroy(cdio_charset_coverter_t*cnv) + { + iconv_close(cnv->ic); + free(cnv); + } + +#define BYTES_INCREMENT 16 + +static bool +do_convert(iconv_t cd, char * src, int src_len, + char ** dst, int *dst_len) + { + char * ret; + + char *inbuf; + char *outbuf; + int alloc_size; + int output_pos; + size_t inbytesleft; + size_t outbytesleft; + + if(src_len < 0) + src_len = strlen(src); +#if 0 + fprintf(stderr, "Converting:\n"); + bgav_hexdump(src, src_len, 16); +#endif + alloc_size = src_len + BYTES_INCREMENT; + + inbytesleft = src_len; + + /* We reserve space here to add a final '\0' */ + outbytesleft = alloc_size-1; + + ret = malloc(alloc_size); + + inbuf = src; + outbuf = ret; + + while(1) + { + + if(iconv(cd, &inbuf, &inbytesleft, + &outbuf, &outbytesleft) == (size_t)-1) + { + switch(errno) + { + case E2BIG: + output_pos = (int)(outbuf - ret); + + alloc_size += BYTES_INCREMENT; + outbytesleft += BYTES_INCREMENT; + + ret = realloc(ret, alloc_size); + outbuf = ret + output_pos; + break; + default: + fprintf(stderr, "Iconv failed: %s\n", strerror(errno)); + free(ret); + return false; + break; + } + } + if(!inbytesleft) + break; + } + /* Zero terminate */ + *outbuf = '\0'; + + /* Set return values */ + *dst = ret; + if(dst_len) + *dst_len = (int)(outbuf - ret); +#if 0 + fprintf(stderr, "Conversion done, src:\n"); + bgav_hexdump(src, src_len, 16); + fprintf(stderr, "dst:\n"); + bgav_hexdump((uint8_t*)(ret), (int)(outbuf - ret), 16); +#endif + return true; + } + +bool cdio_charset_convert(cdio_charset_coverter_t*cnv, + char * src, int src_len, + char ** dst, int * dst_len) + { + return do_convert(cnv->ic, src, src_len, dst, dst_len); + } + + + +bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst, + int * dst_len, const char * dst_charset) + { + iconv_t ic; + bool result; + ic = iconv_open(dst_charset, "UTF-8"); + result = do_convert(ic, src, -1, dst, dst_len); + iconv_close(ic); + return result; + } + + + + +bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst, + const char * src_charset) + { + iconv_t ic; + bool result; + ic = iconv_open("UTF-8", src_charset); + result = do_convert(ic, src, src_len, dst, NULL); + iconv_close(ic); + return result; + }