initial import

This commit is contained in:
Josh Coalson
2002-05-17 06:33:39 +00:00
parent 9fb3a4c34d
commit fda98fb7e4
23 changed files with 3631 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
# libFLAC++ - Free Lossless Audio Codec library
# Copyright (C) 2002 Josh Coalson
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
includedir = ${prefix}/include/FLAC++
include_HEADERS = \
all.h \
metadata.h

25
include/FLAC++/all.h Normal file
View File

@@ -0,0 +1,25 @@
/* libFLAC++ - Free Lossless Audio Codec library
* Copyright (C) 2002 Josh Coalson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifndef FLACPP__ALL_H
#define FLACPP__ALL_H
#include "metadata.h"
#endif

112
include/FLAC++/metadata.h Normal file
View File

@@ -0,0 +1,112 @@
/* libFLAC++ - Free Lossless Audio Codec library
* Copyright (C) 2002 Josh Coalson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifndef FLACPP__METADATA_H
#define FLACPP__METADATA_H
#include "FLAC/metadata.h"
namespace FLAC {
namespace Metadata {
// NOTE: When the get_*() methods return you a const pointer,
// absolutely DO NOT write into it. Always use the set_*()
// methods.
// base class for all metadata blocks
class Prototype {
protected:
Prototype(::FLAC__StreamMetaData *object, bool copy);
virtual ~Prototype();
::FLAC__StreamMetaData *object_;
public:
inline bool is_valid() const { return 0 != object_; }
inline operator bool() const { return is_valid(); }
bool get_is_last() const;
FLAC__MetaDataType get_type() const;
unsigned get_length() const; // NOTE: does not include the header, per spec
};
class StreamInfo : public Prototype {
public:
StreamInfo();
StreamInfo(::FLAC__StreamMetaData *object, bool copy = false);
~StreamInfo();
unsigned get_min_blocksize() const;
unsigned get_max_blocksize() const;
unsigned get_min_framesize() const;
unsigned get_max_framesize() const;
unsigned get_sample_rate() const;
unsigned get_channels() const;
unsigned get_bits_per_sample() const;
FLAC__uint64 get_total_samples() const;
const FLAC__byte *get_md5sum() const;
void set_min_blocksize(unsigned value);
void set_max_blocksize(unsigned value);
void set_min_framesize(unsigned value);
void set_max_framesize(unsigned value);
void set_sample_rate(unsigned value);
void set_channels(unsigned value);
void set_bits_per_sample(unsigned value);
void set_total_samples(FLAC__uint64 value);
void set_md5sum(const FLAC__byte value[16]);
};
class Padding : public Prototype {
public:
Padding();
Padding(::FLAC__StreamMetaData *object, bool copy = false);
~Padding();
};
class Application : public Prototype {
public:
Application();
Application(::FLAC__StreamMetaData *object, bool copy = false);
~Application();
const FLAC__byte *get_id() const;
const FLAC__byte *get_data() const;
void set_id(FLAC__byte value[4]);
bool set_data(FLAC__byte *data, unsigned length, bool copy = false);
};
class SeekTable : public Prototype {
public:
SeekTable();
SeekTable(::FLAC__StreamMetaData *object, bool copy = false);
~SeekTable();
};
class VorbisComment : public Prototype {
public:
VorbisComment();
VorbisComment(::FLAC__StreamMetaData *object, bool copy = false);
~VorbisComment();
};
};
};
#endif

View File

@@ -0,0 +1,5 @@
## Process this file with automake to produce Makefile.in
AUTOMAKE_OPTIONS = foreign
EXTRA_DIST = utf8.h getopt.h

169
include/share/getopt.h Normal file
View File

@@ -0,0 +1,169 @@
/* Declarations for getopt.
Copyright (C) 1989,90,91,92,93,94,96,97,98 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#ifndef _GETOPT_H
#ifndef __need_getopt
# define _GETOPT_H 1
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
the argument value is returned here.
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
extern char *optarg;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
and for communication between successive calls to `getopt'.
On entry to `getopt', zero means this is the first call; initialize.
When `getopt' returns -1, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
extern int optind;
/* Callers store zero here to inhibit the error message `getopt' prints
for unrecognized options. */
extern int opterr;
/* Set to an option character which was unrecognized. */
extern int optopt;
#ifndef __need_getopt
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
zero.
The field `has_arg' is:
no_argument (or 0) if the option does not take an argument,
required_argument (or 1) if the option requires an argument,
optional_argument (or 2) if the option takes an optional argument.
If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
To have a long-named option do something other than set an `int' to
a compiled-in constant, such as set a value from `optarg', set the
option's `flag' field to zero and its `val' field to a nonzero
value (the equivalent single-letter option character, if there is
one). For long options that have a zero `flag' field, `getopt'
returns the contents of the `val' field. */
struct option
{
# if defined __STDC__ && __STDC__
const char *name;
# else
char *name;
# endif
/* has_arg can't be an enum because some compilers complain about
type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
/* Names for the values of the `has_arg' field of `struct option'. */
# define no_argument 0
# define required_argument 1
# define optional_argument 2
#endif /* need getopt */
/* Get definitions and prototypes for functions to process the
arguments in ARGV (ARGC of them, minus the program name) for
options given in OPTS.
Return the option character from OPTS just read. Return -1 when
there are no more options. For unrecognized options, or options
missing arguments, `optopt' is set to the option letter, and '?' is
returned.
The OPTS string is a list of characters which are recognized option
letters, optionally followed by colons, specifying that that letter
takes an argument, to be placed in `optarg'.
If a letter in OPTS is followed by two colons, its argument is
optional. This behavior is specific to the GNU `getopt'.
The argument `--' causes premature termination of argument
scanning, explicitly telling `getopt' that there are no more
options.
If OPTS begins with `--', then non-option arguments are treated as
arguments to the option '\0'. This behavior is specific to the GNU
`getopt'. */
#if defined __STDC__ && __STDC__
# ifdef __GNU_LIBRARY__
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int __argc, char *const *__argv, const char *__shortopts);
# else /* not __GNU_LIBRARY__ */
extern int getopt ();
# endif /* __GNU_LIBRARY__ */
# ifndef __need_getopt
extern int getopt_long (int __argc, char *const *__argv, const char *__shortopts,
const struct option *__longopts, int *__longind);
extern int getopt_long_only (int __argc, char *const *__argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
/* Internal only. Users should not call this directly. */
extern int _getopt_internal (int __argc, char *const *__argv,
const char *__shortopts,
const struct option *__longopts, int *__longind,
int __long_only);
# endif
#else /* not __STDC__ */
extern int getopt ();
# ifndef __need_getopt
extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
# endif
#endif /* __STDC__ */
#ifdef __cplusplus
}
#endif
/* Make sure we later can get all the definitions and declarations. */
#undef __need_getopt
#endif /* getopt.h */

23
include/share/utf8.h Normal file
View File

@@ -0,0 +1,23 @@
/*
* Convert a string between UTF-8 and the locale's charset.
* Invalid bytes are replaced by '#', and characters that are
* not available in the target encoding are replaced by '?'.
*
* If the locale's charset is not set explicitly then it is
* obtained using nl_langinfo(CODESET), where available, the
* environment variable CHARSET, or assumed to be US-ASCII.
*
* Return value of conversion functions:
*
* -1 : memory allocation failed
* 0 : data was converted exactly
* 1 : valid data was converted approximately (using '?')
* 2 : input was invalid (but still converted, using '#')
* 3 : unknown encoding (but still converted, using '?')
*/
void convert_set_charset(const char *charset);
int utf8_encode(const char *from, char **to);
int utf8_decode(const char *from, char **to);

25
src/libFLAC++/Makefile.am Normal file
View File

@@ -0,0 +1,25 @@
# libFLAC++ - Free Lossless Audio Codec library
# Copyright (C) 2002 Josh Coalson
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
lib_LTLIBRARIES = libFLAC++.la
CFLAGS = @CFLAGS@
libFLAC++_la_LDFLAGS = -version-info 2:1:1
libFLAC++_la_SOURCES = \
metadata.c

View File

@@ -0,0 +1,31 @@
# libFLAC++ - Free Lossless Audio Codec library
# Copyright (C) 2002 Josh Coalson
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
#
# GNU makefile
#
LIB_NAME = libFLAC++
INCLUDES = -I../../include
OBJS = \
metadata.o
include ../../build/lib.mk
# DO NOT DELETE THIS LINE -- make depend depends on it.

41
src/libFLAC++/Makefile.vc Normal file
View File

@@ -0,0 +1,41 @@
# libFLAC++ - Free Lossless Audio Codec library
# Copyright (C) 2002 Josh Coalson
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
!include <win32.mak>
!IFDEF DEBUG
.cc.obj:
$(ccc) /D "_LIB" /GX $(cdebug) $(cflags) /I "..\..\include" -DSTRICT -YX /Od /D "_DEBUG" $<
!else
.cc.obj:
$(ccc) /D "_LIB" /O2 $(crelease) $(cflags) /I "..\..\include" -DSTRICT -YX -DNODEBUG $<
!endif
CC_FILES= \
metadata.cc
OBJS= $(CC_FILES:.cc=.obj)
all: libFLAC++.lib
libFLAC++.lib: $(OBJS)
link.exe -lib /nodefaultlib -out:../../obj/lib/$*.lib $(OBJS)
clean:
-del *.obj ia32\*.obj *.pch
-del ..\..\obj\lib\libFLAC++.lib ..\..\obj\lib\libFLAC++.pdb

283
src/libFLAC++/metadata.cc Normal file
View File

@@ -0,0 +1,283 @@
/* libFLAC++ - Free Lossless Audio Codec library
* Copyright (C) 2002 Josh Coalson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "FLAC++/metadata.h"
#include "FLAC/assert.h"
#include <string.h> // for memcpy()
namespace FLAC {
namespace Metadata {
//
// Prototype
//
Prototype::Prototype(::FLAC__StreamMetaData *object, bool copy)
{
FLAC__ASSERT(0 != object);
object_ = copy? ::FLAC__metadata_object_copy(object) : object;
}
Prototype::~Prototype()
{
if(0 != object_)
FLAC__metadata_object_delete(object_);
}
bool Prototype::get_is_last() const
{
FLAC__ASSERT(is_valid());
return object_->is_last;
}
FLAC__MetaDataType Prototype::get_type() const
{
FLAC__ASSERT(is_valid());
return object_->type;
}
unsigned Prototype::get_length() const
{
FLAC__ASSERT(is_valid());
return object_->length;
}
//
// StreamInfo
//
StreamInfo::StreamInfo():
Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_STREAMINFO), /*copy=*/false)
{ }
StreamInfo::StreamInfo(::FLAC__StreamMetaData *object, bool copy):
Prototype(object, copy)
{ }
StreamInfo::~StreamInfo()
{ }
unsigned StreamInfo::get_min_blocksize() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.min_blocksize;
}
unsigned StreamInfo::get_max_blocksize() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.max_blocksize;
}
unsigned StreamInfo::get_min_framesize() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.min_framesize;
}
unsigned StreamInfo::get_max_framesize() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.max_framesize;
}
unsigned StreamInfo::get_sample_rate() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.sample_rate;
}
unsigned StreamInfo::get_channels() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.channels;
}
unsigned StreamInfo::get_bits_per_sample() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.bits_per_sample;
}
FLAC__uint64 StreamInfo::get_total_samples() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.total_samples;
}
const FLAC__byte *StreamInfo::get_md5sum() const
{
FLAC__ASSERT(is_valid());
return object_->data.stream_info.md5sum;
}
void StreamInfo::set_min_blocksize(unsigned value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(value >= FLAC__MIN_BLOCK_SIZE);
FLAC__ASSERT(value <= FLAC__MAX_BLOCK_SIZE);
object_->data.stream_info.min_blocksize = value;
}
void StreamInfo::set_max_blocksize(unsigned value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(value >= FLAC__MIN_BLOCK_SIZE);
FLAC__ASSERT(value <= FLAC__MAX_BLOCK_SIZE);
object_->data.stream_info.max_blocksize = value;
}
void StreamInfo::set_min_framesize(unsigned value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(value < (1u < FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN));
object_->data.stream_info.min_framesize = value;
}
void StreamInfo::set_max_framesize(unsigned value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(value < (1u < FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN));
object_->data.stream_info.max_framesize = value;
}
void StreamInfo::set_sample_rate(unsigned value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(FLAC__format_is_valid_sample_rate(value));
object_->data.stream_info.sample_rate = value;
}
void StreamInfo::set_channels(unsigned value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(value > 0);
FLAC__ASSERT(value <= FLAC__MAX_CHANNELS);
object_->data.stream_info.channels = value;
}
void StreamInfo::set_bits_per_sample(unsigned value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(value >= FLAC__MIN_BITS_PER_SAMPLE);
FLAC__ASSERT(value <= FLAC__MAX_BITS_PER_SAMPLE);
object_->data.stream_info.bits_per_sample = value;
}
void StreamInfo::set_total_samples(FLAC__uint64 value)
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(value < (1u << FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN));
object_->data.stream_info.total_samples = value;
}
void StreamInfo::set_md5sum(const FLAC__byte value[16])
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(0 != value);
memcpy(object_->data.stream_info.md5sum, value, 16);
}
//
// Padding
//
Padding::Padding():
Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_PADDING), /*copy=*/false)
{ }
Padding::Padding(::FLAC__StreamMetaData *object, bool copy):
Prototype(object, copy)
{ }
Padding::~Padding()
{ }
//
// Application
//
Application::Application():
Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_APPLICATION), /*copy=*/false)
{ }
Application::Application(::FLAC__StreamMetaData *object, bool copy):
Prototype(object, copy)
{ }
Application::~Application()
{ }
const FLAC__byte *Application::get_id() const
{
FLAC__ASSERT(is_valid());
return object_->data.application.id;
}
const FLAC__byte *Application::get_data() const
{
FLAC__ASSERT(is_valid());
return object_->data.application.data;
}
void Application::set_id(FLAC__byte value[4])
{
FLAC__ASSERT(is_valid());
FLAC__ASSERT(0 != value);
memcpy(object_->data.application.id, value, 4);
}
bool Application::set_data(FLAC__byte *data, unsigned length, bool copy)
{
FLAC__ASSERT(is_valid());
return FLAC__metadata_object_application_set_data(object_, data, length, copy);
}
//
// SeekTable
//
SeekTable::SeekTable():
Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_SEEKTABLE), /*copy=*/false)
{ }
SeekTable::SeekTable(::FLAC__StreamMetaData *object, bool copy):
Prototype(object, copy)
{ }
SeekTable::~SeekTable()
{ }
//
// VorbisComment
//
VorbisComment::VorbisComment():
Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_VORBIS_COMMENT), /*copy=*/false)
{ }
VorbisComment::VorbisComment(::FLAC__StreamMetaData *object, bool copy):
Prototype(object, copy)
{ }
VorbisComment::~VorbisComment()
{ }
};
};

19
src/share/Makefile.am Normal file
View File

@@ -0,0 +1,19 @@
## Process this file with automake to produce Makefile.in
AUTOMAKE_OPTIONS = foreign
INCLUDES = -I$(top_srcdir)/include/share
noinst_LIBRARIES = libutf8.a libgetopt.a
libutf8_a_SOURCES = charset.c charset.h iconvert.c utf8.c
libgetopt_a_SOURCES = getopt.c getopt1.c
EXTRA_DIST = charmaps.h makemap.c charset_test.c charsetmap.h
debug:
$(MAKE) all CFLAGS="@DEBUG@"
profile:
$(MAKE) all CFLAGS="@PROFILE@"

14
src/share/Makefile.lite Normal file
View File

@@ -0,0 +1,14 @@
#
# GNU makefile
#
LIB_NAME = libgetopt
INCLUDES = -I../../include/share
OBJS = \
getopt.o \
getopt1.o
include ../../build/lib.mk
# DO NOT DELETE THIS LINE -- make depend depends on it.

4
src/share/README Normal file
View File

@@ -0,0 +1,4 @@
This directory is shamelessly copied from vorbistools. It contains two
convenience libraries, one for manipulating UTF-8 strings (GPL) and one
for implementing getopt (LGPL). libFLAC does not link to either; the
only FLAC tools that do are GPL'ed.

57
src/share/charmaps.h Normal file
View File

@@ -0,0 +1,57 @@
/*
* If you need to generate more maps, use makemap.c on a system
* with a decent iconv.
*/
static const unsigned short mapping_iso_8859_2[256] = {
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9
};
static struct {
const char *name;
const unsigned short *map;
struct charset *charset;
} maps[] = {
{ "ISO-8859-2", mapping_iso_8859_2, 0 },
{ 0, 0, 0 }
};
static const struct {
const char *bad;
const char *good;
} names[] = {
{ "ANSI_X3.4-1968", "us-ascii" },
{ 0, 0 }
};

521
src/share/charset.c Normal file
View File

@@ -0,0 +1,521 @@
/*
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* See the corresponding header file for a description of the functions
* that this file provides.
*
* This was first written for Ogg Vorbis but could be of general use.
*
* The only deliberate assumption about data sizes is that a short has
* at least 16 bits, but this code has only been tested on systems with
* 8-bit char, 16-bit short and 32-bit int.
*/
#ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */
#include <stdlib.h>
#include "charset.h"
#include "charmaps.h"
/*
* This is like the standard strcasecmp, but it does not depend
* on the locale. Locale-dependent functions can be dangerous:
* we once had a bug involving strcasecmp("iso", "ISO") in a
* Turkish locale!
*
* (I'm not really sure what the official standard says
* about the sign of strcasecmp("Z", "["), but usually
* we're only interested in whether it's zero.)
*/
static int ascii_strcasecmp(const char *s1, const char *s2)
{
char c1, c2;
for (;; s1++, s2++) {
if (!*s1 || !*s1)
break;
if (*s1 == *s2)
continue;
c1 = *s1;
if ('a' <= c1 && c1 <= 'z')
c1 += 'A' - 'a';
c2 = *s2;
if ('a' <= c2 && c2 <= 'z')
c2 += 'A' - 'a';
if (c1 != c2)
break;
}
return (unsigned char)*s1 - (unsigned char)*s2;
}
/*
* UTF-8 equivalents of the C library's wctomb() and mbtowc().
*/
int utf8_mbtowc(int *pwc, const char *s, size_t n)
{
unsigned char c;
int wc, i, k;
if (!n || !s)
return 0;
c = *s;
if (c < 0x80) {
if (pwc)
*pwc = c;
return c ? 1 : 0;
}
else if (c < 0xc2)
return -1;
else if (c < 0xe0) {
if (n >= 2 && (s[1] & 0xc0) == 0x80) {
if (pwc)
*pwc = ((c & 0x1f) << 6) | (s[1] & 0x3f);
return 2;
}
else
return -1;
}
else if (c < 0xf0)
k = 3;
else if (c < 0xf8)
k = 4;
else if (c < 0xfc)
k = 5;
else if (c < 0xfe)
k = 6;
else
return -1;
if (n < k)
return -1;
wc = *s++ & ((1 << (7 - k)) - 1);
for (i = 1; i < k; i++) {
if ((*s & 0xc0) != 0x80)
return -1;
wc = (wc << 6) | (*s++ & 0x3f);
}
if (wc < (1 << (5 * k - 4)))
return -1;
if (pwc)
*pwc = wc;
return k;
}
int utf8_wctomb(char *s, int wc1)
{
unsigned int wc = wc1;
if (!s)
return 0;
if (wc < (1 << 7)) {
*s++ = wc;
return 1;
}
else if (wc < (1 << 11)) {
*s++ = 0xc0 | (wc >> 6);
*s++ = 0x80 | (wc & 0x3f);
return 2;
}
else if (wc < (1 << 16)) {
*s++ = 0xe0 | (wc >> 12);
*s++ = 0x80 | ((wc >> 6) & 0x3f);
*s++ = 0x80 | (wc & 0x3f);
return 3;
}
else if (wc < (1 << 21)) {
*s++ = 0xf0 | (wc >> 18);
*s++ = 0x80 | ((wc >> 12) & 0x3f);
*s++ = 0x80 | ((wc >> 6) & 0x3f);
*s++ = 0x80 | (wc & 0x3f);
return 4;
}
else if (wc < (1 << 26)) {
*s++ = 0xf8 | (wc >> 24);
*s++ = 0x80 | ((wc >> 18) & 0x3f);
*s++ = 0x80 | ((wc >> 12) & 0x3f);
*s++ = 0x80 | ((wc >> 6) & 0x3f);
*s++ = 0x80 | (wc & 0x3f);
return 5;
}
else if (wc < (1 << 31)) {
*s++ = 0xfc | (wc >> 30);
*s++ = 0x80 | ((wc >> 24) & 0x3f);
*s++ = 0x80 | ((wc >> 18) & 0x3f);
*s++ = 0x80 | ((wc >> 12) & 0x3f);
*s++ = 0x80 | ((wc >> 6) & 0x3f);
*s++ = 0x80 | (wc & 0x3f);
return 6;
}
else
return -1;
}
/*
* The charset "object" and methods.
*/
struct charset {
int max;
int (*mbtowc)(void *table, int *pwc, const char *s, size_t n);
int (*wctomb)(void *table, char *s, int wc);
void *map;
};
int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n)
{
return (*charset->mbtowc)(charset->map, pwc, s, n);
}
int charset_wctomb(struct charset *charset, char *s, int wc)
{
return (*charset->wctomb)(charset->map, s, wc);
}
int charset_max(struct charset *charset)
{
return charset->max;
}
/*
* Implementation of UTF-8.
*/
static int mbtowc_utf8(void *map, int *pwc, const char *s, size_t n)
{
return utf8_mbtowc(pwc, s, n);
}
static int wctomb_utf8(void *map, char *s, int wc)
{
return utf8_wctomb(s, wc);
}
/*
* Implementation of US-ASCII.
* Probably on most architectures this compiles to less than 256 bytes
* of code, so we can save space by not having a table for this one.
*/
static int mbtowc_ascii(void *map, int *pwc, const char *s, size_t n)
{
int wc;
if (!n || !s)
return 0;
wc = (unsigned char)*s;
if (wc & ~0x7f)
return -1;
if (pwc)
*pwc = wc;
return wc ? 1 : 0;
}
static int wctomb_ascii(void *map, char *s, int wc)
{
if (!s)
return 0;
if (wc & ~0x7f)
return -1;
*s = wc;
return 1;
}
/*
* Implementation of ISO-8859-1.
* Probably on most architectures this compiles to less than 256 bytes
* of code, so we can save space by not having a table for this one.
*/
static int mbtowc_iso1(void *map, int *pwc, const char *s, size_t n)
{
int wc;
if (!n || !s)
return 0;
wc = (unsigned char)*s;
if (wc & ~0xff)
return -1;
if (pwc)
*pwc = wc;
return wc ? 1 : 0;
}
static int wctomb_iso1(void *map, char *s, int wc)
{
if (!s)
return 0;
if (wc & ~0xff)
return -1;
*s = wc;
return 1;
}
/*
* Implementation of any 8-bit charset.
*/
struct map {
const unsigned short *from;
struct inverse_map *to;
};
static int mbtowc_8bit(void *map1, int *pwc, const char *s, size_t n)
{
struct map *map = map1;
unsigned short wc;
if (!n || !s)
return 0;
wc = map->from[(unsigned char)*s];
if (wc == 0xffff)
return -1;
if (pwc)
*pwc = (int)wc;
return wc ? 1 : 0;
}
/*
* For the inverse map we use a hash table, which has the advantages
* of small constant memory requirement and simple memory allocation,
* but the disadvantage of slow conversion in the worst case.
* If you need real-time performance while letting a potentially
* malicious user define their own map, then the method used in
* linux/drivers/char/consolemap.c would be more appropriate.
*/
struct inverse_map {
unsigned char first[256];
unsigned char next[256];
};
/*
* The simple hash is good enough for this application.
* Use the alternative trivial hashes for testing.
*/
#define HASH(i) ((i) & 0xff)
/* #define HASH(i) 0 */
/* #define HASH(i) 99 */
static struct inverse_map *make_inverse_map(const unsigned short *from)
{
struct inverse_map *to;
char used[256];
int i, j, k;
to = (struct inverse_map *)malloc(sizeof(struct inverse_map));
if (!to)
return 0;
for (i = 0; i < 256; i++)
to->first[i] = to->next[i] = used[i] = 0;
for (i = 255; i >= 0; i--)
if (from[i] != 0xffff) {
k = HASH(from[i]);
to->next[i] = to->first[k];
to->first[k] = i;
used[k] = 1;
}
/* Point the empty buckets at an empty list. */
for (i = 0; i < 256; i++)
if (!to->next[i])
break;
if (i < 256)
for (j = 0; j < 256; j++)
if (!used[j])
to->first[j] = i;
return to;
}
int wctomb_8bit(void *map1, char *s, int wc1)
{
struct map *map = map1;
unsigned short wc = wc1;
int i;
if (!s)
return 0;
if (wc1 & ~0xffff)
return -1;
if (1) /* Change 1 to 0 to test the case where malloc fails. */
if (!map->to)
map->to = make_inverse_map(map->from);
if (map->to) {
/* Use the inverse map. */
i = map->to->first[HASH(wc)];
for (;;) {
if (map->from[i] == wc) {
*s = i;
return 1;
}
if (!(i = map->to->next[i]))
break;
}
}
else {
/* We don't have an inverse map, so do a linear search. */
for (i = 0; i < 256; i++)
if (map->from[i] == wc) {
*s = i;
return 1;
}
}
return -1;
}
/*
* The "constructor" charset_find().
*/
struct charset charset_utf8 = {
6,
&mbtowc_utf8,
&wctomb_utf8,
0
};
struct charset charset_iso1 = {
1,
&mbtowc_iso1,
&wctomb_iso1,
0
};
struct charset charset_ascii = {
1,
&mbtowc_ascii,
&wctomb_ascii,
0
};
struct charset *charset_find(const char *code)
{
int i;
/* Find good (MIME) name. */
for (i = 0; names[i].bad; i++)
if (!ascii_strcasecmp(code, names[i].bad)) {
code = names[i].good;
break;
}
/* Recognise some charsets for which we avoid using a table. */
if (!ascii_strcasecmp(code, "UTF-8"))
return &charset_utf8;
if (!ascii_strcasecmp(code, "US-ASCII"))
return &charset_ascii;
if (!ascii_strcasecmp(code, "ISO-8859-1"))
return &charset_iso1;
/* Look for a mapping for a simple 8-bit encoding. */
for (i = 0; maps[i].name; i++)
if (!ascii_strcasecmp(code, maps[i].name)) {
if (!maps[i].charset) {
maps[i].charset = (struct charset *)malloc(sizeof(struct charset));
if (maps[i].charset) {
struct map *map = (struct map *)malloc(sizeof(struct map));
if (!map) {
free(maps[i].charset);
maps[i].charset = 0;
}
else {
maps[i].charset->max = 1;
maps[i].charset->mbtowc = &mbtowc_8bit;
maps[i].charset->wctomb = &wctomb_8bit;
maps[i].charset->map = map;
map->from = maps[i].map;
map->to = 0; /* inverse mapping is created when required */
}
}
}
return maps[i].charset;
}
return 0;
}
/*
* Function to convert a buffer from one encoding to another.
* Invalid bytes are replaced by '#', and characters that are
* not available in the target encoding are replaced by '?'.
* Each of TO and TOLEN may be zero, if the result is not needed.
* The output buffer is null-terminated, so it is all right to
* use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
*/
int charset_convert(const char *fromcode, const char *tocode,
const char *from, size_t fromlen,
char **to, size_t *tolen)
{
int ret = 0;
struct charset *charset1, *charset2;
char *tobuf, *p, *newbuf;
int i, j, wc;
charset1 = charset_find(fromcode);
charset2 = charset_find(tocode);
if (!charset1 || !charset2 )
return -1;
tobuf = (char *)malloc(fromlen * charset2->max + 1);
if (!tobuf)
return -2;
for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) {
i = charset_mbtowc(charset1, &wc, from, fromlen);
if (!i)
i = 1;
else if (i == -1) {
i = 1;
wc = '#';
ret = 2;
}
j = charset_wctomb(charset2, p, wc);
if (j == -1) {
if (!ret)
ret = 1;
j = charset_wctomb(charset2, p, '?');
if (j == -1)
j = 0;
}
}
if (tolen)
*tolen = p - tobuf;
*p++ = '\0';
if (to) {
newbuf = realloc(tobuf, p - tobuf);
*to = newbuf ? newbuf : tobuf;
}
else
free(tobuf);
return ret;
}
#endif /* USE_CHARSET_ICONV */

72
src/share/charset.h Normal file
View File

@@ -0,0 +1,72 @@
/*
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdlib.h>
/*
* These functions are like the C library's mbtowc() and wctomb(),
* but instead of depending on the locale they always work in UTF-8,
* and they use int instead of wchar_t.
*/
int utf8_mbtowc(int *pwc, const char *s, size_t n);
int utf8_wctomb(char *s, int wc);
/*
* This is an object-oriented version of mbtowc() and wctomb().
* The caller first uses charset_find() to get a pointer to struct
* charset, then uses the mbtowc() and wctomb() methods on it.
* The function charset_max() gives the maximum length of a
* multibyte character in that encoding.
* This API is only appropriate for stateless encodings like UTF-8
* or ISO-8859-3, but I have no intention of implementing anything
* other than UTF-8 and 8-bit encodings.
*
* MINOR BUG: If there is no memory charset_find() may return 0 and
* there is no way to distinguish this case from an unknown encoding.
*/
struct charset;
struct charset *charset_find(const char *code);
int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n);
int charset_wctomb(struct charset *charset, char *s, int wc);
int charset_max(struct charset *charset);
/*
* Function to convert a buffer from one encoding to another.
* Invalid bytes are replaced by '#', and characters that are
* not available in the target encoding are replaced by '?'.
* Each of TO and TOLEN may be zero if the result is not wanted.
* The input or output may contain null bytes, but the output
* buffer is also null-terminated, so it is all right to
* use charset_convert(fromcode, tocode, s, strlen(s), &t, 0).
*
* Return value:
*
* -2 : memory allocation failed
* -1 : unknown encoding
* 0 : data was converted exactly
* 1 : valid data was converted approximately (using '?')
* 2 : input was invalid (but still converted, using '#')
*/
int charset_convert(const char *fromcode, const char *tocode,
const char *from, size_t fromlen,
char **to, size_t *tolen);

259
src/share/charset_test.c Normal file
View File

@@ -0,0 +1,259 @@
/*
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <assert.h>
#include <string.h>
#include "charset.h"
void test_any(struct charset *charset)
{
int wc;
char s[2];
assert(charset);
/* Decoder */
assert(charset_mbtowc(charset, 0, 0, 0) == 0);
assert(charset_mbtowc(charset, 0, 0, 1) == 0);
assert(charset_mbtowc(charset, 0, (char *)(-1), 0) == 0);
assert(charset_mbtowc(charset, 0, "a", 0) == 0);
assert(charset_mbtowc(charset, 0, "", 1) == 0);
assert(charset_mbtowc(charset, 0, "b", 1) == 1);
assert(charset_mbtowc(charset, 0, "", 2) == 0);
assert(charset_mbtowc(charset, 0, "c", 2) == 1);
wc = 'x';
assert(charset_mbtowc(charset, &wc, "a", 0) == 0 && wc == 'x');
assert(charset_mbtowc(charset, &wc, "", 1) == 0 && wc == 0);
assert(charset_mbtowc(charset, &wc, "b", 1) == 1 && wc == 'b');
assert(charset_mbtowc(charset, &wc, "", 2) == 0 && wc == 0);
assert(charset_mbtowc(charset, &wc, "c", 2) == 1 && wc == 'c');
/* Encoder */
assert(charset_wctomb(charset, 0, 0) == 0);
s[0] = s[1] = '.';
assert(charset_wctomb(charset, s, 0) == 1 &&
s[0] == '\0' && s[1] == '.');
assert(charset_wctomb(charset, s, 'x') == 1 &&
s[0] == 'x' && s[1] == '.');
}
void test_utf8()
{
struct charset *charset;
int wc;
char s[8];
charset = charset_find("UTF-8");
test_any(charset);
/* Decoder */
wc = 0;
assert(charset_mbtowc(charset, &wc, "\177", 1) == 1 && wc == 127);
assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
assert(charset_mbtowc(charset, &wc, "\301\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\302\200", 1) == -1);
assert(charset_mbtowc(charset, &wc, "\302\200", 2) == 2 && wc == 128);
assert(charset_mbtowc(charset, &wc, "\302\200", 3) == 2 && wc == 128);
assert(charset_mbtowc(charset, &wc, "\340\237\200", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\340\240\200", 9) == 3 &&
wc == 1 << 11);
assert(charset_mbtowc(charset, &wc, "\360\217\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\360\220\200\200", 9) == 4 &&
wc == 1 << 16);
assert(charset_mbtowc(charset, &wc, "\370\207\277\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\370\210\200\200\200", 9) == 5 &&
wc == 1 << 21);
assert(charset_mbtowc(charset, &wc, "\374\203\277\277\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\374\204\200\200\200\200", 9) == 6 &&
wc == 1 << 26);
assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\277", 9) == 6 &&
wc == 0x7fffffff);
assert(charset_mbtowc(charset, &wc, "\302\000", 2) == -1);
assert(charset_mbtowc(charset, &wc, "\302\300", 2) == -1);
assert(charset_mbtowc(charset, &wc, "\340\040\200", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\340\340\200", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\340\240\000", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\340\240\300", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\360\020\200\200", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\360\320\200\200", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\360\220\000\200", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\360\220\300\200", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\360\220\200\000", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\360\220\200\300", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\077\277\277\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\377\277\277\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\277\077\277\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\277\377\277\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\277\277\277\077\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\277\277\277\377\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\077", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\377", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\376\277\277\277\277\277", 9) == -1);
assert(charset_mbtowc(charset, &wc, "\377\277\277\277\277\277", 9) == -1);
/* Encoder */
strcpy(s, ".......");
assert(charset_wctomb(charset, s, 1 << 31) == -1 &&
!strcmp(s, "......."));
assert(charset_wctomb(charset, s, 127) == 1 &&
!strcmp(s, "\177......"));
assert(charset_wctomb(charset, s, 128) == 2 &&
!strcmp(s, "\302\200....."));
assert(charset_wctomb(charset, s, 0x7ff) == 2 &&
!strcmp(s, "\337\277....."));
assert(charset_wctomb(charset, s, 0x800) == 3 &&
!strcmp(s, "\340\240\200...."));
assert(charset_wctomb(charset, s, 0xffff) == 3 &&
!strcmp(s, "\357\277\277...."));
assert(charset_wctomb(charset, s, 0x10000) == 4 &&
!strcmp(s, "\360\220\200\200..."));
assert(charset_wctomb(charset, s, 0x1fffff) == 4 &&
!strcmp(s, "\367\277\277\277..."));
assert(charset_wctomb(charset, s, 0x200000) == 5 &&
!strcmp(s, "\370\210\200\200\200.."));
assert(charset_wctomb(charset, s, 0x3ffffff) == 5 &&
!strcmp(s, "\373\277\277\277\277.."));
assert(charset_wctomb(charset, s, 0x4000000) == 6 &&
!strcmp(s, "\374\204\200\200\200\200."));
assert(charset_wctomb(charset, s, 0x7fffffff) == 6 &&
!strcmp(s, "\375\277\277\277\277\277."));
}
void test_ascii()
{
struct charset *charset;
int wc;
char s[3];
charset = charset_find("us-ascii");
test_any(charset);
/* Decoder */
wc = 0;
assert(charset_mbtowc(charset, &wc, "\177", 2) == 1 && wc == 127);
assert(charset_mbtowc(charset, &wc, "\200", 2) == -1);
/* Encoder */
strcpy(s, "..");
assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
assert(charset_wctomb(charset, s, 255) == -1);
assert(charset_wctomb(charset, s, 128) == -1);
assert(charset_wctomb(charset, s, 127) == 1 && !strcmp(s, "\177."));
}
void test_iso1()
{
struct charset *charset;
int wc;
char s[3];
charset = charset_find("iso-8859-1");
test_any(charset);
/* Decoder */
wc = 0;
assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
/* Encoder */
strcpy(s, "..");
assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
assert(charset_wctomb(charset, s, 255) == 1 && !strcmp(s, "\377."));
assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
}
void test_iso2()
{
struct charset *charset;
int wc;
char s[3];
charset = charset_find("iso-8859-2");
test_any(charset);
/* Decoder */
wc = 0;
assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2);
assert(charset_mbtowc(charset, &wc, "\377", 2) == 1 && wc == 0x2d9);
/* Encoder */
strcpy(s, "..");
assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, ".."));
assert(charset_wctomb(charset, s, 255) == -1 && !strcmp(s, ".."));
assert(charset_wctomb(charset, s, 258) == 1 && !strcmp(s, "\303."));
assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200."));
}
void test_convert()
{
const char *p;
char *q, *r;
char s[256];
size_t n, n2;
int i;
p = "\000x\302\200\375\277\277\277\277\277";
assert(charset_convert("UTF-8", "UTF-8", p, 10, &q, &n) == 0 &&
n == 10 && !strcmp(p, q));
assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, &n) == 2 &&
n == 4 && !strcmp(q, "x##y"));
assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, 0, &n) == 2 &&
n == 4);
assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, 0) == 2 &&
!strcmp(q, "x##y"));
assert(charset_convert("UTF-8", "iso-8859-1",
"\302\200\304\200x", 5, &q, &n) == 1 &&
n == 3 && !strcmp(q, "\200?x"));
assert(charset_convert("iso-8859-1", "UTF-8",
"\000\200\377", 3, &q, &n) == 0 &&
n == 5 && !memcmp(q, "\000\302\200\303\277", 5));
assert(charset_convert("iso-8859-1", "iso-8859-1",
"\000\200\377", 3, &q, &n) == 0 &&
n == 3 && !memcmp(q, "\000\200\377", 3));
assert(charset_convert("iso-8859-2", "utf-8", "\300", 1, &q, &n) == 0 &&
n == 2 && !strcmp(q, "\305\224"));
assert(charset_convert("utf-8", "iso-8859-2", "\305\224", 2, &q, &n) == 0 &&
n == 1 && !strcmp(q, "\300"));
for (i = 0; i < 256; i++)
s[i] = i;
assert(charset_convert("iso-8859-2", "utf-8", s, 256, &q, &n) == 0);
assert(charset_convert("utf-8", "iso-8859-2", q, n, &r, &n2) == 0);
assert(n2 == 256 && !memcmp(r, s, n2));
}
int main()
{
test_utf8();
test_ascii();
test_iso1();
test_iso2();
test_convert();
return 0;
}

79
src/share/charsetmap.h Normal file
View File

@@ -0,0 +1,79 @@
/* This file was automatically generated by make_code_map.pl
please don't edit directly
Daniel Resare <noa@metamatrix.se>
*/
charset_map maps[] = {
{"ISO-8859-1",
{
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
}
},
{"ISO-8859-2",
{
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
}
},
{NULL}
};

1047
src/share/getopt.c Normal file

File diff suppressed because it is too large Load Diff

188
src/share/getopt1.c Normal file
View File

@@ -0,0 +1,188 @@
/* getopt_long and getopt_long_only entry points for GNU getopt.
Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "getopt.h"
#if !defined __STDC__ || !__STDC__
/* This is a separate conditional since some stdc systems
reject `defined (const)'. */
#ifndef const
#define const
#endif
#endif
#include <stdio.h>
/* Comment out all this code if we are using the GNU C Library, and are not
actually compiling the library itself. This code is part of the GNU C
Library, but also included in many other GNU distributions. Compiling
and linking in this code is a waste when using the GNU C library
(especially if it is a shared library). Rather than having every GNU
program understand `configure --with-gnu-libc' and omit the object files,
it is simpler to just do this in the source for each such file. */
#define GETOPT_INTERFACE_VERSION 2
#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
#include <gnu-versions.h>
#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
#define ELIDE_CODE
#endif
#endif
#ifndef ELIDE_CODE
/* This needs to come after some library #include
to get __GNU_LIBRARY__ defined. */
#ifdef __GNU_LIBRARY__
#include <stdlib.h>
#endif
#ifndef NULL
#define NULL 0
#endif
int
getopt_long (argc, argv, options, long_options, opt_index)
int argc;
char *const *argv;
const char *options;
const struct option *long_options;
int *opt_index;
{
return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
}
/* Like getopt_long, but '-' as well as '--' can indicate a long option.
If an option that starts with '-' (not '--') doesn't match a long option,
but does match a short option, it is parsed as a short option
instead. */
int
getopt_long_only (argc, argv, options, long_options, opt_index)
int argc;
char *const *argv;
const char *options;
const struct option *long_options;
int *opt_index;
{
return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
}
#endif /* Not ELIDE_CODE. */
#ifdef TEST
#include <stdio.h>
int
main (argc, argv)
int argc;
char **argv;
{
int c;
int digit_optind = 0;
while (1)
{
int this_option_optind = optind ? optind : 1;
int option_index = 0;
static struct option long_options[] =
{
{"add", 1, 0, 0},
{"append", 0, 0, 0},
{"delete", 1, 0, 0},
{"verbose", 0, 0, 0},
{"create", 0, 0, 0},
{"file", 1, 0, 0},
{0, 0, 0, 0}
};
c = getopt_long (argc, argv, "abc:d:0123456789",
long_options, &option_index);
if (c == -1)
break;
switch (c)
{
case 0:
printf ("option %s", long_options[option_index].name);
if (optarg)
printf (" with arg %s", optarg);
printf ("\n");
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (digit_optind != 0 && digit_optind != this_option_optind)
printf ("digits occur in two different argv-elements.\n");
digit_optind = this_option_optind;
printf ("option %c\n", c);
break;
case 'a':
printf ("option a\n");
break;
case 'b':
printf ("option b\n");
break;
case 'c':
printf ("option c with value `%s'\n", optarg);
break;
case 'd':
printf ("option d with value `%s'\n", optarg);
break;
case '?':
break;
default:
printf ("?? getopt returned character code 0%o ??\n", c);
}
}
if (optind < argc)
{
printf ("non-option ARGV-elements: ");
while (optind < argc)
printf ("%s ", argv[optind++]);
printf ("\n");
}
exit (0);
}
#endif /* TEST */

243
src/share/iconvert.c Normal file
View File

@@ -0,0 +1,243 @@
/*
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_ICONV
#include <assert.h>
#include <errno.h>
#include <iconv.h>
#include <stdlib.h>
#include <string.h>
/*
* Convert data from one encoding to another. Return:
*
* -2 : memory allocation failed
* -1 : unknown encoding
* 0 : data was converted exactly
* 1 : data was converted inexactly
* 2 : data was invalid (but still converted)
*
* We convert in two steps, via UTF-8, as this is the only
* reliable way of distinguishing between invalid input
* and valid input which iconv refuses to transliterate.
* We convert from UTF-8 twice, because we have no way of
* knowing whether the conversion was exact if iconv returns
* E2BIG (due to a bug in the specification of iconv).
* An alternative approach is to assume that the output of
* iconv is never more than 4 times as long as the input,
* but I prefer to avoid that assumption if possible.
*/
int iconvert(const char *fromcode, const char *tocode,
const char *from, size_t fromlen,
char **to, size_t *tolen)
{
int ret = 0;
iconv_t cd1, cd2;
char *ib;
char *ob;
char *utfbuf, *outbuf, *newbuf;
size_t utflen, outlen, ibl, obl, k;
char tbuf[2048];
cd1 = iconv_open("UTF-8", fromcode);
if (cd1 == (iconv_t)(-1))
return -1;
cd2 = (iconv_t)(-1);
/* Don't use strcasecmp() as it's locale-dependent. */
if (!strchr("Uu", tocode[0]) ||
!strchr("Tt", tocode[1]) ||
!strchr("Ff", tocode[2]) ||
tocode[3] != '-' ||
tocode[4] != '8' ||
tocode[5] != '\0') {
char *tocode1;
/*
* Try using this non-standard feature of glibc and libiconv.
* This is deliberately not a config option as people often
* change their iconv library without rebuilding applications.
*/
tocode1 = (char *)malloc(strlen(tocode) + 11);
if (!tocode1)
goto fail;
strcpy(tocode1, tocode);
strcat(tocode1, "//TRANSLIT");
cd2 = iconv_open(tocode1, "UTF-8");
free(tocode1);
if (cd2 == (iconv_t)(-1))
cd2 = iconv_open(tocode, fromcode);
if (cd2 == (iconv_t)(-1)) {
iconv_close(cd1);
return -1;
}
}
utflen = 1; /*fromlen * 2 + 1; XXX */
utfbuf = (char *)malloc(utflen);
if (!utfbuf)
goto fail;
/* Convert to UTF-8 */
ib = (char *)from;
ibl = fromlen;
ob = utfbuf;
obl = utflen;
for (;;) {
k = iconv(cd1, &ib, &ibl, &ob, &obl);
assert((!k && !ibl) ||
(k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) ||
(k == (size_t)(-1) &&
(errno == EILSEQ || errno == EINVAL) && ibl));
if (!ibl)
break;
if (obl < 6) {
/* Enlarge the buffer */
utflen *= 2;
newbuf = (char *)realloc(utfbuf, utflen);
if (!newbuf)
goto fail;
ob = (ob - utfbuf) + newbuf;
obl = utflen - (ob - newbuf);
utfbuf = newbuf;
}
else {
/* Invalid input */
ib++, ibl--;
*ob++ = '#', obl--;
ret = 2;
iconv(cd1, 0, 0, 0, 0);
}
}
if (cd2 == (iconv_t)(-1)) {
/* The target encoding was UTF-8 */
if (tolen)
*tolen = ob - utfbuf;
if (!to) {
free(utfbuf);
iconv_close(cd1);
return ret;
}
newbuf = (char *)realloc(utfbuf, (ob - utfbuf) + 1);
if (!newbuf)
goto fail;
ob = (ob - utfbuf) + newbuf;
*ob = '\0';
*to = newbuf;
iconv_close(cd1);
return ret;
}
/* Truncate the buffer to be tidy */
utflen = ob - utfbuf;
newbuf = (char *)realloc(utfbuf, utflen);
if (!newbuf)
goto fail;
utfbuf = newbuf;
/* Convert from UTF-8 to discover how long the output is */
outlen = 0;
ib = utfbuf;
ibl = utflen;
while (ibl) {
ob = tbuf;
obl = sizeof(tbuf);
k = iconv(cd2, &ib, &ibl, &ob, &obl);
assert((k != (size_t)(-1) && !ibl) ||
(k == (size_t)(-1) && errno == E2BIG && ibl) ||
(k == (size_t)(-1) && errno == EILSEQ && ibl));
if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
/* Replace one character */
char *tb = "?";
size_t tbl = 1;
outlen += ob - tbuf;
ob = tbuf;
obl = sizeof(tbuf);
k = iconv(cd2, &tb, &tbl, &ob, &obl);
assert((!k && !tbl) ||
(k == (size_t)(-1) && errno == EILSEQ && tbl));
for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
;
}
outlen += ob - tbuf;
}
ob = tbuf;
obl = sizeof(tbuf);
k = iconv(cd2, 0, 0, &ob, &obl);
assert(!k);
outlen += ob - tbuf;
/* Convert from UTF-8 for real */
outbuf = (char *)malloc(outlen + 1);
if (!outbuf)
goto fail;
ib = utfbuf;
ibl = utflen;
ob = outbuf;
obl = outlen;
while (ibl) {
k = iconv(cd2, &ib, &ibl, &ob, &obl);
assert((k != (size_t)(-1) && !ibl) ||
(k == (size_t)(-1) && errno == EILSEQ && ibl));
if (k && !ret)
ret = 1;
if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) {
/* Replace one character */
char *tb = "?";
size_t tbl = 1;
k = iconv(cd2, &tb, &tbl, &ob, &obl);
assert((!k && !tbl) ||
(k == (size_t)(-1) && errno == EILSEQ && tbl));
for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--)
;
}
}
k = iconv(cd2, 0, 0, &ob, &obl);
assert(!k);
assert(!obl);
*ob = '\0';
free(utfbuf);
iconv_close(cd1);
iconv_close(cd2);
if (tolen)
*tolen = outlen;
if (!to) {
free(outbuf);
return ret;
}
*to = outbuf;
return ret;
fail:
free(utfbuf);
iconv_close(cd1);
if (cd2 != (iconv_t)(-1))
iconv_close(cd2);
return -2;
}
#endif /* HAVE_ICONV */

77
src/share/makemap.c Normal file
View File

@@ -0,0 +1,77 @@
/*
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <errno.h>
#include <iconv.h>
#include <stdio.h>
int main(int argc, char *argv[])
{
iconv_t cd;
const char *ib;
char *ob;
size_t ibl, obl, k;
unsigned char c, buf[4];
int i, wc;
if (argc != 2) {
printf("Usage: %s ENCODING\n", argv[0]);
printf("Output a charset map for the 8-bit ENCODING.\n");
return 1;
}
cd = iconv_open("UCS-4", argv[1]);
if (cd == (iconv_t)(-1)) {
perror("iconv_open");
return 1;
}
for (i = 0; i < 256; i++) {
c = i;
ib = &c;
ibl = 1;
ob = buf;
obl = 4;
k = iconv(cd, &ib, &ibl, &ob, &obl);
if (!k && !ibl && !obl) {
wc = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
if (wc >= 0xffff) {
printf("Dodgy value.\n");
return 1;
}
}
else if (k == (size_t)(-1) && errno == EILSEQ)
wc = 0xffff;
else {
printf("Non-standard iconv.\n");
return 1;
}
if (i % 8 == 0)
printf(" ");
printf("0x%04x", wc);
if (i == 255)
printf("\n");
else if (i % 8 == 7)
printf(",\n");
else
printf(", ");
}
return 0;
}

314
src/share/utf8.c Normal file
View File

@@ -0,0 +1,314 @@
/*
* Copyright (C) 2001 Peter Harris <peter.harris@hummingbird.com>
* Copyright (C) 2001 Edmund Grimley Evans <edmundo@rano.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Convert a string between UTF-8 and the locale's charset.
*/
#include <stdlib.h>
#include <string.h>
#include "utf8.h"
#ifdef _WIN32
/* Thanks to Peter Harris <peter.harris@hummingbird.com> for this win32
* code.
*/
#include <stdio.h>
#include <windows.h>
static unsigned char *make_utf8_string(const wchar_t *unicode)
{
int size = 0, index = 0, out_index = 0;
unsigned char *out;
unsigned short c;
/* first calculate the size of the target string */
c = unicode[index++];
while(c) {
if(c < 0x0080) {
size += 1;
} else if(c < 0x0800) {
size += 2;
} else {
size += 3;
}
c = unicode[index++];
}
out = malloc(size + 1);
if (out == NULL)
return NULL;
index = 0;
c = unicode[index++];
while(c)
{
if(c < 0x080) {
out[out_index++] = (unsigned char)c;
} else if(c < 0x800) {
out[out_index++] = 0xc0 | (c >> 6);
out[out_index++] = 0x80 | (c & 0x3f);
} else {
out[out_index++] = 0xe0 | (c >> 12);
out[out_index++] = 0x80 | ((c >> 6) & 0x3f);
out[out_index++] = 0x80 | (c & 0x3f);
}
c = unicode[index++];
}
out[out_index] = 0x00;
return out;
}
static wchar_t *make_unicode_string(const unsigned char *utf8)
{
int size = 0, index = 0, out_index = 0;
wchar_t *out;
unsigned char c;
/* first calculate the size of the target string */
c = utf8[index++];
while(c) {
if((c & 0x80) == 0) {
index += 0;
} else if((c & 0xe0) == 0xe0) {
index += 2;
} else {
index += 1;
}
size += 1;
c = utf8[index++];
}
out = malloc((size + 1) * sizeof(wchar_t));
if (out == NULL)
return NULL;
index = 0;
c = utf8[index++];
while(c)
{
if((c & 0x80) == 0) {
out[out_index++] = c;
} else if((c & 0xe0) == 0xe0) {
out[out_index] = (c & 0x1F) << 12;
c = utf8[index++];
out[out_index] |= (c & 0x3F) << 6;
c = utf8[index++];
out[out_index++] |= (c & 0x3F);
} else {
out[out_index] = (c & 0x3F) << 6;
c = utf8[index++];
out[out_index++] |= (c & 0x3F);
}
c = utf8[index++];
}
out[out_index] = 0;
return out;
}
int utf8_encode(const char *from, char **to)
{
wchar_t *unicode;
int wchars, err;
wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
strlen(from), NULL, 0);
if(wchars == 0)
{
fprintf(stderr, "Unicode translation error %d\n", GetLastError());
return -1;
}
unicode = calloc(wchars + 1, sizeof(unsigned short));
if(unicode == NULL)
{
fprintf(stderr, "Out of memory processing string to UTF8\n");
return -1;
}
err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
strlen(from), unicode, wchars);
if(err != wchars)
{
free(unicode);
fprintf(stderr, "Unicode translation error %d\n", GetLastError());
return -1;
}
/* On NT-based windows systems, we could use WideCharToMultiByte(), but
* MS doesn't actually have a consistent API across win32.
*/
*to = make_utf8_string(unicode);
free(unicode);
return 0;
}
int utf8_decode(const char *from, char **to)
{
wchar_t *unicode;
int chars, err;
/* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but
* MS doesn't actually have a consistent API across win32.
*/
unicode = make_unicode_string(from);
if(unicode == NULL)
{
fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");
return -1;
}
chars = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
-1, NULL, 0, NULL, NULL);
if(chars == 0)
{
fprintf(stderr, "Unicode translation error %d\n", GetLastError());
free(unicode);
return -1;
}
*to = calloc(chars + 1, sizeof(unsigned char));
if(*to == NULL)
{
fprintf(stderr, "Out of memory processing string to local charset\n");
free(unicode);
return -1;
}
err = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
-1, *to, chars, NULL, NULL);
if(err != chars)
{
fprintf(stderr, "Unicode translation error %d\n", GetLastError());
free(unicode);
free(*to);
*to = NULL;
return -1;
}
free(unicode);
return 0;
}
#else /* End win32. Rest is for real operating systems */
#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif
int iconvert(const char *fromcode, const char *tocode,
const char *from, size_t fromlen,
char **to, size_t *tolen);
static char *current_charset = 0; /* means "US-ASCII" */
void convert_set_charset(const char *charset)
{
#ifdef HAVE_LANGINFO_CODESET
if (!charset)
charset = nl_langinfo(CODESET);
#endif
if (!charset)
charset = getenv("CHARSET");
free(current_charset);
current_charset = 0;
if (charset && *charset)
current_charset = strdup(charset);
}
static int convert_buffer(const char *fromcode, const char *tocode,
const char *from, size_t fromlen,
char **to, size_t *tolen)
{
int ret = -1;
#ifdef HAVE_ICONV
ret = iconvert(fromcode, tocode, from, fromlen, to, tolen);
if (ret != -1)
return ret;
#endif
#ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */
ret = charset_convert(fromcode, tocode, from, fromlen, to, tolen);
if (ret != -1)
return ret;
#endif
return ret;
}
static int convert_string(const char *fromcode, const char *tocode,
const char *from, char **to, char replace)
{
int ret;
size_t fromlen;
char *s;
fromlen = strlen(from);
ret = convert_buffer(fromcode, tocode, from, fromlen, to, 0);
if (ret == -2)
return -1;
if (ret != -1)
return ret;
s = malloc(fromlen + 1);
if (!s)
return -1;
strcpy(s, from);
*to = s;
for (; *s; s++)
if (*s & ~0x7f)
*s = replace;
return 3;
}
int utf8_encode(const char *from, char **to)
{
char *charset;
if (!current_charset)
convert_set_charset(0);
charset = current_charset ? current_charset : "US-ASCII";
return convert_string(charset, "UTF-8", from, to, '#');
}
int utf8_decode(const char *from, char **to)
{
char *charset;
if (!current_charset)
convert_set_charset(0);
charset = current_charset ? current_charset : "US-ASCII";
return convert_string("UTF-8", charset, from, to, '?');
}
#endif