diff --git a/include/FLAC++/Makefile.am b/include/FLAC++/Makefile.am new file mode 100644 index 00000000..3e927393 --- /dev/null +++ b/include/FLAC++/Makefile.am @@ -0,0 +1,23 @@ +# libFLAC++ - Free Lossless Audio Codec library +# Copyright (C) 2002 Josh Coalson +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +includedir = ${prefix}/include/FLAC++ + +include_HEADERS = \ + all.h \ + metadata.h diff --git a/include/FLAC++/all.h b/include/FLAC++/all.h new file mode 100644 index 00000000..d74bc65f --- /dev/null +++ b/include/FLAC++/all.h @@ -0,0 +1,25 @@ +/* libFLAC++ - Free Lossless Audio Codec library + * Copyright (C) 2002 Josh Coalson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef FLACPP__ALL_H +#define FLACPP__ALL_H + +#include "metadata.h" + +#endif diff --git a/include/FLAC++/metadata.h b/include/FLAC++/metadata.h new file mode 100644 index 00000000..933cf943 --- /dev/null +++ b/include/FLAC++/metadata.h @@ -0,0 +1,112 @@ +/* libFLAC++ - Free Lossless Audio Codec library + * Copyright (C) 2002 Josh Coalson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef FLACPP__METADATA_H +#define FLACPP__METADATA_H + +#include "FLAC/metadata.h" + +namespace FLAC { + namespace Metadata { + + // NOTE: When the get_*() methods return you a const pointer, + // absolutely DO NOT write into it. Always use the set_*() + // methods. + + // base class for all metadata blocks + class Prototype { + protected: + Prototype(::FLAC__StreamMetaData *object, bool copy); + virtual ~Prototype(); + + ::FLAC__StreamMetaData *object_; + public: + inline bool is_valid() const { return 0 != object_; } + inline operator bool() const { return is_valid(); } + + bool get_is_last() const; + FLAC__MetaDataType get_type() const; + unsigned get_length() const; // NOTE: does not include the header, per spec + }; + + class StreamInfo : public Prototype { + public: + StreamInfo(); + StreamInfo(::FLAC__StreamMetaData *object, bool copy = false); + ~StreamInfo(); + + unsigned get_min_blocksize() const; + unsigned get_max_blocksize() const; + unsigned get_min_framesize() const; + unsigned get_max_framesize() const; + unsigned get_sample_rate() const; + unsigned get_channels() const; + unsigned get_bits_per_sample() const; + FLAC__uint64 get_total_samples() const; + const FLAC__byte *get_md5sum() const; + + void set_min_blocksize(unsigned value); + void set_max_blocksize(unsigned value); + void set_min_framesize(unsigned value); + void set_max_framesize(unsigned value); + void set_sample_rate(unsigned value); + void set_channels(unsigned value); + void set_bits_per_sample(unsigned value); + void set_total_samples(FLAC__uint64 value); + void set_md5sum(const FLAC__byte value[16]); + }; + + class Padding : public Prototype { + public: + Padding(); + Padding(::FLAC__StreamMetaData *object, bool copy = false); + ~Padding(); + }; + + class Application : public Prototype { + public: + Application(); + Application(::FLAC__StreamMetaData *object, bool copy = false); + ~Application(); + + const FLAC__byte *get_id() const; + const FLAC__byte *get_data() const; + + void set_id(FLAC__byte value[4]); + bool set_data(FLAC__byte *data, unsigned length, bool copy = false); + }; + + class SeekTable : public Prototype { + public: + SeekTable(); + SeekTable(::FLAC__StreamMetaData *object, bool copy = false); + ~SeekTable(); + }; + + class VorbisComment : public Prototype { + public: + VorbisComment(); + VorbisComment(::FLAC__StreamMetaData *object, bool copy = false); + ~VorbisComment(); + }; + + }; +}; + +#endif diff --git a/include/share/Makefile.am b/include/share/Makefile.am new file mode 100644 index 00000000..d2e21770 --- /dev/null +++ b/include/share/Makefile.am @@ -0,0 +1,5 @@ +## Process this file with automake to produce Makefile.in + +AUTOMAKE_OPTIONS = foreign + +EXTRA_DIST = utf8.h getopt.h diff --git a/include/share/getopt.h b/include/share/getopt.h new file mode 100644 index 00000000..b0147e9d --- /dev/null +++ b/include/share/getopt.h @@ -0,0 +1,169 @@ +/* Declarations for getopt. + Copyright (C) 1989,90,91,92,93,94,96,97,98 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifndef _GETOPT_H + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +#ifndef __need_getopt +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +# if defined __STDC__ && __STDC__ + const char *name; +# else + char *name; +# endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. + + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if defined __STDC__ && __STDC__ +# ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int __argc, char *const *__argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ +extern int getopt (); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int __argc, char *const *__argv, const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int __argc, char *const *__argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int __argc, char *const *__argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not __STDC__ */ +extern int getopt (); +# ifndef __need_getopt +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +# endif +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ diff --git a/include/share/utf8.h b/include/share/utf8.h new file mode 100644 index 00000000..465fd82b --- /dev/null +++ b/include/share/utf8.h @@ -0,0 +1,23 @@ + +/* + * Convert a string between UTF-8 and the locale's charset. + * Invalid bytes are replaced by '#', and characters that are + * not available in the target encoding are replaced by '?'. + * + * If the locale's charset is not set explicitly then it is + * obtained using nl_langinfo(CODESET), where available, the + * environment variable CHARSET, or assumed to be US-ASCII. + * + * Return value of conversion functions: + * + * -1 : memory allocation failed + * 0 : data was converted exactly + * 1 : valid data was converted approximately (using '?') + * 2 : input was invalid (but still converted, using '#') + * 3 : unknown encoding (but still converted, using '?') + */ + +void convert_set_charset(const char *charset); + +int utf8_encode(const char *from, char **to); +int utf8_decode(const char *from, char **to); diff --git a/src/libFLAC++/Makefile.am b/src/libFLAC++/Makefile.am new file mode 100644 index 00000000..da6a2fd2 --- /dev/null +++ b/src/libFLAC++/Makefile.am @@ -0,0 +1,25 @@ +# libFLAC++ - Free Lossless Audio Codec library +# Copyright (C) 2002 Josh Coalson +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +lib_LTLIBRARIES = libFLAC++.la +CFLAGS = @CFLAGS@ + +libFLAC++_la_LDFLAGS = -version-info 2:1:1 + +libFLAC++_la_SOURCES = \ + metadata.c diff --git a/src/libFLAC++/Makefile.lite b/src/libFLAC++/Makefile.lite new file mode 100644 index 00000000..4148b6a5 --- /dev/null +++ b/src/libFLAC++/Makefile.lite @@ -0,0 +1,31 @@ +# libFLAC++ - Free Lossless Audio Codec library +# Copyright (C) 2002 Josh Coalson +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# +# GNU makefile +# + +LIB_NAME = libFLAC++ +INCLUDES = -I../../include + +OBJS = \ + metadata.o + +include ../../build/lib.mk + +# DO NOT DELETE THIS LINE -- make depend depends on it. diff --git a/src/libFLAC++/Makefile.vc b/src/libFLAC++/Makefile.vc new file mode 100644 index 00000000..b09b5c7a --- /dev/null +++ b/src/libFLAC++/Makefile.vc @@ -0,0 +1,41 @@ +# libFLAC++ - Free Lossless Audio Codec library +# Copyright (C) 2002 Josh Coalson +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +!include + +!IFDEF DEBUG +.cc.obj: + $(ccc) /D "_LIB" /GX $(cdebug) $(cflags) /I "..\..\include" -DSTRICT -YX /Od /D "_DEBUG" $< +!else +.cc.obj: + $(ccc) /D "_LIB" /O2 $(crelease) $(cflags) /I "..\..\include" -DSTRICT -YX -DNODEBUG $< +!endif + +CC_FILES= \ + metadata.cc + +OBJS= $(CC_FILES:.cc=.obj) + +all: libFLAC++.lib + +libFLAC++.lib: $(OBJS) + link.exe -lib /nodefaultlib -out:../../obj/lib/$*.lib $(OBJS) + +clean: + -del *.obj ia32\*.obj *.pch + -del ..\..\obj\lib\libFLAC++.lib ..\..\obj\lib\libFLAC++.pdb diff --git a/src/libFLAC++/metadata.cc b/src/libFLAC++/metadata.cc new file mode 100644 index 00000000..40868944 --- /dev/null +++ b/src/libFLAC++/metadata.cc @@ -0,0 +1,283 @@ +/* libFLAC++ - Free Lossless Audio Codec library + * Copyright (C) 2002 Josh Coalson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "FLAC++/metadata.h" +#include "FLAC/assert.h" +#include // for memcpy() + +namespace FLAC { + namespace Metadata { + + // + // Prototype + // + + Prototype::Prototype(::FLAC__StreamMetaData *object, bool copy) + { + FLAC__ASSERT(0 != object); + object_ = copy? ::FLAC__metadata_object_copy(object) : object; + } + + Prototype::~Prototype() + { + if(0 != object_) + FLAC__metadata_object_delete(object_); + } + + bool Prototype::get_is_last() const + { + FLAC__ASSERT(is_valid()); + return object_->is_last; + } + + FLAC__MetaDataType Prototype::get_type() const + { + FLAC__ASSERT(is_valid()); + return object_->type; + } + + unsigned Prototype::get_length() const + { + FLAC__ASSERT(is_valid()); + return object_->length; + } + + // + // StreamInfo + // + + StreamInfo::StreamInfo(): + Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_STREAMINFO), /*copy=*/false) + { } + + StreamInfo::StreamInfo(::FLAC__StreamMetaData *object, bool copy): + Prototype(object, copy) + { } + + StreamInfo::~StreamInfo() + { } + + unsigned StreamInfo::get_min_blocksize() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.min_blocksize; + } + + unsigned StreamInfo::get_max_blocksize() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.max_blocksize; + } + + unsigned StreamInfo::get_min_framesize() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.min_framesize; + } + + unsigned StreamInfo::get_max_framesize() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.max_framesize; + } + + unsigned StreamInfo::get_sample_rate() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.sample_rate; + } + + unsigned StreamInfo::get_channels() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.channels; + } + + unsigned StreamInfo::get_bits_per_sample() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.bits_per_sample; + } + + FLAC__uint64 StreamInfo::get_total_samples() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.total_samples; + } + + const FLAC__byte *StreamInfo::get_md5sum() const + { + FLAC__ASSERT(is_valid()); + return object_->data.stream_info.md5sum; + } + + void StreamInfo::set_min_blocksize(unsigned value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(value >= FLAC__MIN_BLOCK_SIZE); + FLAC__ASSERT(value <= FLAC__MAX_BLOCK_SIZE); + object_->data.stream_info.min_blocksize = value; + } + + void StreamInfo::set_max_blocksize(unsigned value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(value >= FLAC__MIN_BLOCK_SIZE); + FLAC__ASSERT(value <= FLAC__MAX_BLOCK_SIZE); + object_->data.stream_info.max_blocksize = value; + } + + void StreamInfo::set_min_framesize(unsigned value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(value < (1u < FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN)); + object_->data.stream_info.min_framesize = value; + } + + void StreamInfo::set_max_framesize(unsigned value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(value < (1u < FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN)); + object_->data.stream_info.max_framesize = value; + } + + void StreamInfo::set_sample_rate(unsigned value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(FLAC__format_is_valid_sample_rate(value)); + object_->data.stream_info.sample_rate = value; + } + + void StreamInfo::set_channels(unsigned value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(value > 0); + FLAC__ASSERT(value <= FLAC__MAX_CHANNELS); + object_->data.stream_info.channels = value; + } + + void StreamInfo::set_bits_per_sample(unsigned value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(value >= FLAC__MIN_BITS_PER_SAMPLE); + FLAC__ASSERT(value <= FLAC__MAX_BITS_PER_SAMPLE); + object_->data.stream_info.bits_per_sample = value; + } + + void StreamInfo::set_total_samples(FLAC__uint64 value) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(value < (1u << FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN)); + object_->data.stream_info.total_samples = value; + } + + void StreamInfo::set_md5sum(const FLAC__byte value[16]) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(0 != value); + memcpy(object_->data.stream_info.md5sum, value, 16); + } + + // + // Padding + // + + Padding::Padding(): + Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_PADDING), /*copy=*/false) + { } + + Padding::Padding(::FLAC__StreamMetaData *object, bool copy): + Prototype(object, copy) + { } + + Padding::~Padding() + { } + + // + // Application + // + + Application::Application(): + Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_APPLICATION), /*copy=*/false) + { } + + Application::Application(::FLAC__StreamMetaData *object, bool copy): + Prototype(object, copy) + { } + + Application::~Application() + { } + + const FLAC__byte *Application::get_id() const + { + FLAC__ASSERT(is_valid()); + return object_->data.application.id; + } + + const FLAC__byte *Application::get_data() const + { + FLAC__ASSERT(is_valid()); + return object_->data.application.data; + } + + void Application::set_id(FLAC__byte value[4]) + { + FLAC__ASSERT(is_valid()); + FLAC__ASSERT(0 != value); + memcpy(object_->data.application.id, value, 4); + } + + bool Application::set_data(FLAC__byte *data, unsigned length, bool copy) + { + FLAC__ASSERT(is_valid()); + return FLAC__metadata_object_application_set_data(object_, data, length, copy); + } + + // + // SeekTable + // + + SeekTable::SeekTable(): + Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_SEEKTABLE), /*copy=*/false) + { } + + SeekTable::SeekTable(::FLAC__StreamMetaData *object, bool copy): + Prototype(object, copy) + { } + + SeekTable::~SeekTable() + { } + + // + // VorbisComment + // + + VorbisComment::VorbisComment(): + Prototype(FLAC__metadata_object_new(FLAC__METADATA_TYPE_VORBIS_COMMENT), /*copy=*/false) + { } + + VorbisComment::VorbisComment(::FLAC__StreamMetaData *object, bool copy): + Prototype(object, copy) + { } + + VorbisComment::~VorbisComment() + { } + + }; +}; diff --git a/src/share/Makefile.am b/src/share/Makefile.am new file mode 100644 index 00000000..a8d8c7c6 --- /dev/null +++ b/src/share/Makefile.am @@ -0,0 +1,19 @@ +## Process this file with automake to produce Makefile.in + +AUTOMAKE_OPTIONS = foreign + +INCLUDES = -I$(top_srcdir)/include/share + +noinst_LIBRARIES = libutf8.a libgetopt.a + +libutf8_a_SOURCES = charset.c charset.h iconvert.c utf8.c + +libgetopt_a_SOURCES = getopt.c getopt1.c + +EXTRA_DIST = charmaps.h makemap.c charset_test.c charsetmap.h + +debug: + $(MAKE) all CFLAGS="@DEBUG@" + +profile: + $(MAKE) all CFLAGS="@PROFILE@" diff --git a/src/share/Makefile.lite b/src/share/Makefile.lite new file mode 100644 index 00000000..efde797d --- /dev/null +++ b/src/share/Makefile.lite @@ -0,0 +1,14 @@ +# +# GNU makefile +# + +LIB_NAME = libgetopt +INCLUDES = -I../../include/share + +OBJS = \ + getopt.o \ + getopt1.o + +include ../../build/lib.mk + +# DO NOT DELETE THIS LINE -- make depend depends on it. diff --git a/src/share/README b/src/share/README new file mode 100644 index 00000000..09690807 --- /dev/null +++ b/src/share/README @@ -0,0 +1,4 @@ +This directory is shamelessly copied from vorbistools. It contains two +convenience libraries, one for manipulating UTF-8 strings (GPL) and one +for implementing getopt (LGPL). libFLAC does not link to either; the +only FLAC tools that do are GPL'ed. diff --git a/src/share/charmaps.h b/src/share/charmaps.h new file mode 100644 index 00000000..690d8906 --- /dev/null +++ b/src/share/charmaps.h @@ -0,0 +1,57 @@ + +/* + * If you need to generate more maps, use makemap.c on a system + * with a decent iconv. + */ + +static const unsigned short mapping_iso_8859_2[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, + 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, + 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, + 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, + 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, + 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, + 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, + 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, + 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, + 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, + 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, + 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9 +}; + +static struct { + const char *name; + const unsigned short *map; + struct charset *charset; +} maps[] = { + { "ISO-8859-2", mapping_iso_8859_2, 0 }, + { 0, 0, 0 } +}; + +static const struct { + const char *bad; + const char *good; +} names[] = { + { "ANSI_X3.4-1968", "us-ascii" }, + { 0, 0 } +}; diff --git a/src/share/charset.c b/src/share/charset.c new file mode 100644 index 00000000..29b214a0 --- /dev/null +++ b/src/share/charset.c @@ -0,0 +1,521 @@ +/* + * Copyright (C) 2001 Edmund Grimley Evans + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * See the corresponding header file for a description of the functions + * that this file provides. + * + * This was first written for Ogg Vorbis but could be of general use. + * + * The only deliberate assumption about data sizes is that a short has + * at least 16 bits, but this code has only been tested on systems with + * 8-bit char, 16-bit short and 32-bit int. + */ + +#ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */ + +#include + +#include "charset.h" + +#include "charmaps.h" + +/* + * This is like the standard strcasecmp, but it does not depend + * on the locale. Locale-dependent functions can be dangerous: + * we once had a bug involving strcasecmp("iso", "ISO") in a + * Turkish locale! + * + * (I'm not really sure what the official standard says + * about the sign of strcasecmp("Z", "["), but usually + * we're only interested in whether it's zero.) + */ + +static int ascii_strcasecmp(const char *s1, const char *s2) +{ + char c1, c2; + + for (;; s1++, s2++) { + if (!*s1 || !*s1) + break; + if (*s1 == *s2) + continue; + c1 = *s1; + if ('a' <= c1 && c1 <= 'z') + c1 += 'A' - 'a'; + c2 = *s2; + if ('a' <= c2 && c2 <= 'z') + c2 += 'A' - 'a'; + if (c1 != c2) + break; + } + return (unsigned char)*s1 - (unsigned char)*s2; +} + +/* + * UTF-8 equivalents of the C library's wctomb() and mbtowc(). + */ + +int utf8_mbtowc(int *pwc, const char *s, size_t n) +{ + unsigned char c; + int wc, i, k; + + if (!n || !s) + return 0; + + c = *s; + if (c < 0x80) { + if (pwc) + *pwc = c; + return c ? 1 : 0; + } + else if (c < 0xc2) + return -1; + else if (c < 0xe0) { + if (n >= 2 && (s[1] & 0xc0) == 0x80) { + if (pwc) + *pwc = ((c & 0x1f) << 6) | (s[1] & 0x3f); + return 2; + } + else + return -1; + } + else if (c < 0xf0) + k = 3; + else if (c < 0xf8) + k = 4; + else if (c < 0xfc) + k = 5; + else if (c < 0xfe) + k = 6; + else + return -1; + + if (n < k) + return -1; + wc = *s++ & ((1 << (7 - k)) - 1); + for (i = 1; i < k; i++) { + if ((*s & 0xc0) != 0x80) + return -1; + wc = (wc << 6) | (*s++ & 0x3f); + } + if (wc < (1 << (5 * k - 4))) + return -1; + if (pwc) + *pwc = wc; + return k; +} + +int utf8_wctomb(char *s, int wc1) +{ + unsigned int wc = wc1; + + if (!s) + return 0; + if (wc < (1 << 7)) { + *s++ = wc; + return 1; + } + else if (wc < (1 << 11)) { + *s++ = 0xc0 | (wc >> 6); + *s++ = 0x80 | (wc & 0x3f); + return 2; + } + else if (wc < (1 << 16)) { + *s++ = 0xe0 | (wc >> 12); + *s++ = 0x80 | ((wc >> 6) & 0x3f); + *s++ = 0x80 | (wc & 0x3f); + return 3; + } + else if (wc < (1 << 21)) { + *s++ = 0xf0 | (wc >> 18); + *s++ = 0x80 | ((wc >> 12) & 0x3f); + *s++ = 0x80 | ((wc >> 6) & 0x3f); + *s++ = 0x80 | (wc & 0x3f); + return 4; + } + else if (wc < (1 << 26)) { + *s++ = 0xf8 | (wc >> 24); + *s++ = 0x80 | ((wc >> 18) & 0x3f); + *s++ = 0x80 | ((wc >> 12) & 0x3f); + *s++ = 0x80 | ((wc >> 6) & 0x3f); + *s++ = 0x80 | (wc & 0x3f); + return 5; + } + else if (wc < (1 << 31)) { + *s++ = 0xfc | (wc >> 30); + *s++ = 0x80 | ((wc >> 24) & 0x3f); + *s++ = 0x80 | ((wc >> 18) & 0x3f); + *s++ = 0x80 | ((wc >> 12) & 0x3f); + *s++ = 0x80 | ((wc >> 6) & 0x3f); + *s++ = 0x80 | (wc & 0x3f); + return 6; + } + else + return -1; +} + +/* + * The charset "object" and methods. + */ + +struct charset { + int max; + int (*mbtowc)(void *table, int *pwc, const char *s, size_t n); + int (*wctomb)(void *table, char *s, int wc); + void *map; +}; + +int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n) +{ + return (*charset->mbtowc)(charset->map, pwc, s, n); +} + +int charset_wctomb(struct charset *charset, char *s, int wc) +{ + return (*charset->wctomb)(charset->map, s, wc); +} + +int charset_max(struct charset *charset) +{ + return charset->max; +} + +/* + * Implementation of UTF-8. + */ + +static int mbtowc_utf8(void *map, int *pwc, const char *s, size_t n) +{ + return utf8_mbtowc(pwc, s, n); +} + +static int wctomb_utf8(void *map, char *s, int wc) +{ + return utf8_wctomb(s, wc); +} + +/* + * Implementation of US-ASCII. + * Probably on most architectures this compiles to less than 256 bytes + * of code, so we can save space by not having a table for this one. + */ + +static int mbtowc_ascii(void *map, int *pwc, const char *s, size_t n) +{ + int wc; + + if (!n || !s) + return 0; + wc = (unsigned char)*s; + if (wc & ~0x7f) + return -1; + if (pwc) + *pwc = wc; + return wc ? 1 : 0; +} + +static int wctomb_ascii(void *map, char *s, int wc) +{ + if (!s) + return 0; + if (wc & ~0x7f) + return -1; + *s = wc; + return 1; +} + +/* + * Implementation of ISO-8859-1. + * Probably on most architectures this compiles to less than 256 bytes + * of code, so we can save space by not having a table for this one. + */ + +static int mbtowc_iso1(void *map, int *pwc, const char *s, size_t n) +{ + int wc; + + if (!n || !s) + return 0; + wc = (unsigned char)*s; + if (wc & ~0xff) + return -1; + if (pwc) + *pwc = wc; + return wc ? 1 : 0; +} + +static int wctomb_iso1(void *map, char *s, int wc) +{ + if (!s) + return 0; + if (wc & ~0xff) + return -1; + *s = wc; + return 1; +} + +/* + * Implementation of any 8-bit charset. + */ + +struct map { + const unsigned short *from; + struct inverse_map *to; +}; + +static int mbtowc_8bit(void *map1, int *pwc, const char *s, size_t n) +{ + struct map *map = map1; + unsigned short wc; + + if (!n || !s) + return 0; + wc = map->from[(unsigned char)*s]; + if (wc == 0xffff) + return -1; + if (pwc) + *pwc = (int)wc; + return wc ? 1 : 0; +} + +/* + * For the inverse map we use a hash table, which has the advantages + * of small constant memory requirement and simple memory allocation, + * but the disadvantage of slow conversion in the worst case. + * If you need real-time performance while letting a potentially + * malicious user define their own map, then the method used in + * linux/drivers/char/consolemap.c would be more appropriate. + */ + +struct inverse_map { + unsigned char first[256]; + unsigned char next[256]; +}; + +/* + * The simple hash is good enough for this application. + * Use the alternative trivial hashes for testing. + */ +#define HASH(i) ((i) & 0xff) +/* #define HASH(i) 0 */ +/* #define HASH(i) 99 */ + +static struct inverse_map *make_inverse_map(const unsigned short *from) +{ + struct inverse_map *to; + char used[256]; + int i, j, k; + + to = (struct inverse_map *)malloc(sizeof(struct inverse_map)); + if (!to) + return 0; + for (i = 0; i < 256; i++) + to->first[i] = to->next[i] = used[i] = 0; + for (i = 255; i >= 0; i--) + if (from[i] != 0xffff) { + k = HASH(from[i]); + to->next[i] = to->first[k]; + to->first[k] = i; + used[k] = 1; + } + + /* Point the empty buckets at an empty list. */ + for (i = 0; i < 256; i++) + if (!to->next[i]) + break; + if (i < 256) + for (j = 0; j < 256; j++) + if (!used[j]) + to->first[j] = i; + + return to; +} + +int wctomb_8bit(void *map1, char *s, int wc1) +{ + struct map *map = map1; + unsigned short wc = wc1; + int i; + + if (!s) + return 0; + + if (wc1 & ~0xffff) + return -1; + + if (1) /* Change 1 to 0 to test the case where malloc fails. */ + if (!map->to) + map->to = make_inverse_map(map->from); + + if (map->to) { + /* Use the inverse map. */ + i = map->to->first[HASH(wc)]; + for (;;) { + if (map->from[i] == wc) { + *s = i; + return 1; + } + if (!(i = map->to->next[i])) + break; + } + } + else { + /* We don't have an inverse map, so do a linear search. */ + for (i = 0; i < 256; i++) + if (map->from[i] == wc) { + *s = i; + return 1; + } + } + + return -1; +} + +/* + * The "constructor" charset_find(). + */ + +struct charset charset_utf8 = { + 6, + &mbtowc_utf8, + &wctomb_utf8, + 0 +}; + +struct charset charset_iso1 = { + 1, + &mbtowc_iso1, + &wctomb_iso1, + 0 +}; + +struct charset charset_ascii = { + 1, + &mbtowc_ascii, + &wctomb_ascii, + 0 +}; + +struct charset *charset_find(const char *code) +{ + int i; + + /* Find good (MIME) name. */ + for (i = 0; names[i].bad; i++) + if (!ascii_strcasecmp(code, names[i].bad)) { + code = names[i].good; + break; + } + + /* Recognise some charsets for which we avoid using a table. */ + if (!ascii_strcasecmp(code, "UTF-8")) + return &charset_utf8; + if (!ascii_strcasecmp(code, "US-ASCII")) + return &charset_ascii; + if (!ascii_strcasecmp(code, "ISO-8859-1")) + return &charset_iso1; + + /* Look for a mapping for a simple 8-bit encoding. */ + for (i = 0; maps[i].name; i++) + if (!ascii_strcasecmp(code, maps[i].name)) { + if (!maps[i].charset) { + maps[i].charset = (struct charset *)malloc(sizeof(struct charset)); + if (maps[i].charset) { + struct map *map = (struct map *)malloc(sizeof(struct map)); + if (!map) { + free(maps[i].charset); + maps[i].charset = 0; + } + else { + maps[i].charset->max = 1; + maps[i].charset->mbtowc = &mbtowc_8bit; + maps[i].charset->wctomb = &wctomb_8bit; + maps[i].charset->map = map; + map->from = maps[i].map; + map->to = 0; /* inverse mapping is created when required */ + } + } + } + return maps[i].charset; + } + + return 0; +} + +/* + * Function to convert a buffer from one encoding to another. + * Invalid bytes are replaced by '#', and characters that are + * not available in the target encoding are replaced by '?'. + * Each of TO and TOLEN may be zero, if the result is not needed. + * The output buffer is null-terminated, so it is all right to + * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0). + */ + +int charset_convert(const char *fromcode, const char *tocode, + const char *from, size_t fromlen, + char **to, size_t *tolen) +{ + int ret = 0; + struct charset *charset1, *charset2; + char *tobuf, *p, *newbuf; + int i, j, wc; + + charset1 = charset_find(fromcode); + charset2 = charset_find(tocode); + if (!charset1 || !charset2 ) + return -1; + + tobuf = (char *)malloc(fromlen * charset2->max + 1); + if (!tobuf) + return -2; + + for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) { + i = charset_mbtowc(charset1, &wc, from, fromlen); + if (!i) + i = 1; + else if (i == -1) { + i = 1; + wc = '#'; + ret = 2; + } + j = charset_wctomb(charset2, p, wc); + if (j == -1) { + if (!ret) + ret = 1; + j = charset_wctomb(charset2, p, '?'); + if (j == -1) + j = 0; + } + } + + if (tolen) + *tolen = p - tobuf; + *p++ = '\0'; + if (to) { + newbuf = realloc(tobuf, p - tobuf); + *to = newbuf ? newbuf : tobuf; + } + else + free(tobuf); + + return ret; +} + +#endif /* USE_CHARSET_ICONV */ diff --git a/src/share/charset.h b/src/share/charset.h new file mode 100644 index 00000000..b5e2fb73 --- /dev/null +++ b/src/share/charset.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2001 Edmund Grimley Evans + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +/* + * These functions are like the C library's mbtowc() and wctomb(), + * but instead of depending on the locale they always work in UTF-8, + * and they use int instead of wchar_t. + */ + +int utf8_mbtowc(int *pwc, const char *s, size_t n); +int utf8_wctomb(char *s, int wc); + +/* + * This is an object-oriented version of mbtowc() and wctomb(). + * The caller first uses charset_find() to get a pointer to struct + * charset, then uses the mbtowc() and wctomb() methods on it. + * The function charset_max() gives the maximum length of a + * multibyte character in that encoding. + * This API is only appropriate for stateless encodings like UTF-8 + * or ISO-8859-3, but I have no intention of implementing anything + * other than UTF-8 and 8-bit encodings. + * + * MINOR BUG: If there is no memory charset_find() may return 0 and + * there is no way to distinguish this case from an unknown encoding. + */ + +struct charset; + +struct charset *charset_find(const char *code); + +int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n); +int charset_wctomb(struct charset *charset, char *s, int wc); +int charset_max(struct charset *charset); + +/* + * Function to convert a buffer from one encoding to another. + * Invalid bytes are replaced by '#', and characters that are + * not available in the target encoding are replaced by '?'. + * Each of TO and TOLEN may be zero if the result is not wanted. + * The input or output may contain null bytes, but the output + * buffer is also null-terminated, so it is all right to + * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0). + * + * Return value: + * + * -2 : memory allocation failed + * -1 : unknown encoding + * 0 : data was converted exactly + * 1 : valid data was converted approximately (using '?') + * 2 : input was invalid (but still converted, using '#') + */ + +int charset_convert(const char *fromcode, const char *tocode, + const char *from, size_t fromlen, + char **to, size_t *tolen); diff --git a/src/share/charset_test.c b/src/share/charset_test.c new file mode 100644 index 00000000..87426d0a --- /dev/null +++ b/src/share/charset_test.c @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2001 Edmund Grimley Evans + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "charset.h" + +void test_any(struct charset *charset) +{ + int wc; + char s[2]; + + assert(charset); + + /* Decoder */ + + assert(charset_mbtowc(charset, 0, 0, 0) == 0); + assert(charset_mbtowc(charset, 0, 0, 1) == 0); + assert(charset_mbtowc(charset, 0, (char *)(-1), 0) == 0); + + assert(charset_mbtowc(charset, 0, "a", 0) == 0); + assert(charset_mbtowc(charset, 0, "", 1) == 0); + assert(charset_mbtowc(charset, 0, "b", 1) == 1); + assert(charset_mbtowc(charset, 0, "", 2) == 0); + assert(charset_mbtowc(charset, 0, "c", 2) == 1); + + wc = 'x'; + assert(charset_mbtowc(charset, &wc, "a", 0) == 0 && wc == 'x'); + assert(charset_mbtowc(charset, &wc, "", 1) == 0 && wc == 0); + assert(charset_mbtowc(charset, &wc, "b", 1) == 1 && wc == 'b'); + assert(charset_mbtowc(charset, &wc, "", 2) == 0 && wc == 0); + assert(charset_mbtowc(charset, &wc, "c", 2) == 1 && wc == 'c'); + + /* Encoder */ + + assert(charset_wctomb(charset, 0, 0) == 0); + + s[0] = s[1] = '.'; + assert(charset_wctomb(charset, s, 0) == 1 && + s[0] == '\0' && s[1] == '.'); + assert(charset_wctomb(charset, s, 'x') == 1 && + s[0] == 'x' && s[1] == '.'); +} + +void test_utf8() +{ + struct charset *charset; + int wc; + char s[8]; + + charset = charset_find("UTF-8"); + test_any(charset); + + /* Decoder */ + wc = 0; + assert(charset_mbtowc(charset, &wc, "\177", 1) == 1 && wc == 127); + assert(charset_mbtowc(charset, &wc, "\200", 2) == -1); + assert(charset_mbtowc(charset, &wc, "\301\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\302\200", 1) == -1); + assert(charset_mbtowc(charset, &wc, "\302\200", 2) == 2 && wc == 128); + assert(charset_mbtowc(charset, &wc, "\302\200", 3) == 2 && wc == 128); + assert(charset_mbtowc(charset, &wc, "\340\237\200", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\340\240\200", 9) == 3 && + wc == 1 << 11); + assert(charset_mbtowc(charset, &wc, "\360\217\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\360\220\200\200", 9) == 4 && + wc == 1 << 16); + assert(charset_mbtowc(charset, &wc, "\370\207\277\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\370\210\200\200\200", 9) == 5 && + wc == 1 << 21); + assert(charset_mbtowc(charset, &wc, "\374\203\277\277\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\374\204\200\200\200\200", 9) == 6 && + wc == 1 << 26); + assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\277", 9) == 6 && + wc == 0x7fffffff); + + assert(charset_mbtowc(charset, &wc, "\302\000", 2) == -1); + assert(charset_mbtowc(charset, &wc, "\302\300", 2) == -1); + assert(charset_mbtowc(charset, &wc, "\340\040\200", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\340\340\200", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\340\240\000", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\340\240\300", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\360\020\200\200", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\360\320\200\200", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\360\220\000\200", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\360\220\300\200", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\360\220\200\000", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\360\220\200\300", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\077\277\277\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\377\277\277\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\277\077\277\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\277\377\277\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\277\277\277\077\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\277\277\277\377\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\077", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\375\277\277\277\277\377", 9) == -1); + + assert(charset_mbtowc(charset, &wc, "\376\277\277\277\277\277", 9) == -1); + assert(charset_mbtowc(charset, &wc, "\377\277\277\277\277\277", 9) == -1); + + /* Encoder */ + strcpy(s, "......."); + assert(charset_wctomb(charset, s, 1 << 31) == -1 && + !strcmp(s, ".......")); + assert(charset_wctomb(charset, s, 127) == 1 && + !strcmp(s, "\177......")); + assert(charset_wctomb(charset, s, 128) == 2 && + !strcmp(s, "\302\200.....")); + assert(charset_wctomb(charset, s, 0x7ff) == 2 && + !strcmp(s, "\337\277.....")); + assert(charset_wctomb(charset, s, 0x800) == 3 && + !strcmp(s, "\340\240\200....")); + assert(charset_wctomb(charset, s, 0xffff) == 3 && + !strcmp(s, "\357\277\277....")); + assert(charset_wctomb(charset, s, 0x10000) == 4 && + !strcmp(s, "\360\220\200\200...")); + assert(charset_wctomb(charset, s, 0x1fffff) == 4 && + !strcmp(s, "\367\277\277\277...")); + assert(charset_wctomb(charset, s, 0x200000) == 5 && + !strcmp(s, "\370\210\200\200\200..")); + assert(charset_wctomb(charset, s, 0x3ffffff) == 5 && + !strcmp(s, "\373\277\277\277\277..")); + assert(charset_wctomb(charset, s, 0x4000000) == 6 && + !strcmp(s, "\374\204\200\200\200\200.")); + assert(charset_wctomb(charset, s, 0x7fffffff) == 6 && + !strcmp(s, "\375\277\277\277\277\277.")); +} + +void test_ascii() +{ + struct charset *charset; + int wc; + char s[3]; + + charset = charset_find("us-ascii"); + test_any(charset); + + /* Decoder */ + wc = 0; + assert(charset_mbtowc(charset, &wc, "\177", 2) == 1 && wc == 127); + assert(charset_mbtowc(charset, &wc, "\200", 2) == -1); + + /* Encoder */ + strcpy(s, ".."); + assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, "..")); + assert(charset_wctomb(charset, s, 255) == -1); + assert(charset_wctomb(charset, s, 128) == -1); + assert(charset_wctomb(charset, s, 127) == 1 && !strcmp(s, "\177.")); +} + +void test_iso1() +{ + struct charset *charset; + int wc; + char s[3]; + + charset = charset_find("iso-8859-1"); + test_any(charset); + + /* Decoder */ + wc = 0; + assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2); + + /* Encoder */ + strcpy(s, ".."); + assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, "..")); + assert(charset_wctomb(charset, s, 255) == 1 && !strcmp(s, "\377.")); + assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200.")); +} + +void test_iso2() +{ + struct charset *charset; + int wc; + char s[3]; + + charset = charset_find("iso-8859-2"); + test_any(charset); + + /* Decoder */ + wc = 0; + assert(charset_mbtowc(charset, &wc, "\302\200", 9) == 1 && wc == 0xc2); + assert(charset_mbtowc(charset, &wc, "\377", 2) == 1 && wc == 0x2d9); + + /* Encoder */ + strcpy(s, ".."); + assert(charset_wctomb(charset, s, 256) == -1 && !strcmp(s, "..")); + assert(charset_wctomb(charset, s, 255) == -1 && !strcmp(s, "..")); + assert(charset_wctomb(charset, s, 258) == 1 && !strcmp(s, "\303.")); + assert(charset_wctomb(charset, s, 128) == 1 && !strcmp(s, "\200.")); +} + +void test_convert() +{ + const char *p; + char *q, *r; + char s[256]; + size_t n, n2; + int i; + + p = "\000x\302\200\375\277\277\277\277\277"; + assert(charset_convert("UTF-8", "UTF-8", p, 10, &q, &n) == 0 && + n == 10 && !strcmp(p, q)); + assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, &n) == 2 && + n == 4 && !strcmp(q, "x##y")); + assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, 0, &n) == 2 && + n == 4); + assert(charset_convert("UTF-8", "UTF-8", "x\301\277y", 4, &q, 0) == 2 && + !strcmp(q, "x##y")); + assert(charset_convert("UTF-8", "iso-8859-1", + "\302\200\304\200x", 5, &q, &n) == 1 && + n == 3 && !strcmp(q, "\200?x")); + assert(charset_convert("iso-8859-1", "UTF-8", + "\000\200\377", 3, &q, &n) == 0 && + n == 5 && !memcmp(q, "\000\302\200\303\277", 5)); + assert(charset_convert("iso-8859-1", "iso-8859-1", + "\000\200\377", 3, &q, &n) == 0 && + n == 3 && !memcmp(q, "\000\200\377", 3)); + + assert(charset_convert("iso-8859-2", "utf-8", "\300", 1, &q, &n) == 0 && + n == 2 && !strcmp(q, "\305\224")); + assert(charset_convert("utf-8", "iso-8859-2", "\305\224", 2, &q, &n) == 0 && + n == 1 && !strcmp(q, "\300")); + + for (i = 0; i < 256; i++) + s[i] = i; + + assert(charset_convert("iso-8859-2", "utf-8", s, 256, &q, &n) == 0); + assert(charset_convert("utf-8", "iso-8859-2", q, n, &r, &n2) == 0); + assert(n2 == 256 && !memcmp(r, s, n2)); +} + +int main() +{ + test_utf8(); + test_ascii(); + test_iso1(); + test_iso2(); + + test_convert(); + + return 0; +} diff --git a/src/share/charsetmap.h b/src/share/charsetmap.h new file mode 100644 index 00000000..b8125905 --- /dev/null +++ b/src/share/charsetmap.h @@ -0,0 +1,79 @@ +/* This file was automatically generated by make_code_map.pl + please don't edit directly + Daniel Resare +*/ +charset_map maps[] = { + {"ISO-8859-1", + { + 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007, + 0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F, + 0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017, + 0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F, + 0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027, + 0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F, + 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037, + 0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F, + 0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047, + 0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F, + 0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057, + 0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F, + 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067, + 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, + 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077, + 0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F, + 0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087, + 0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F, + 0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097, + 0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F, + 0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7, + 0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF, + 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7, + 0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF, + 0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7, + 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF, + 0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7, + 0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF, + 0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7, + 0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF, + 0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7, + 0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF + } + }, + {"ISO-8859-2", + { + 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007, + 0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F, + 0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017, + 0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F, + 0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027, + 0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F, + 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037, + 0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F, + 0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047, + 0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F, + 0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057, + 0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F, + 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067, + 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F, + 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077, + 0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F, + 0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087, + 0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F, + 0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097, + 0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F, + 0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7, + 0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B, + 0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7, + 0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C, + 0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7, + 0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E, + 0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7, + 0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF, + 0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7, + 0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F, + 0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7, + 0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9 + } + }, + {NULL} +}; diff --git a/src/share/getopt.c b/src/share/getopt.c new file mode 100644 index 00000000..a11cf22b --- /dev/null +++ b/src/share/getopt.c @@ -0,0 +1,1047 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to drepper@gnu.org + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 + Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +# include +#endif + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +# include +# include +#endif /* GNU C library. */ + +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. + When compiling libc, the _ macro is predefined. */ +# ifdef HAVE_LIBINTL_H +# include +# define _(msgid) gettext (msgid) +# else +# define _(msgid) (msgid) +# endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +# include +# define my_index strchr +#else + +#include + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +#ifndef getenv +extern char *getenv (); +#endif + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +#ifdef _LIBC +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; + +static int original_argc; +static char *const *original_argv; + +/* Make sure the environment variable bash 2.0 puts in the environment + is valid for the getopt call we must make sure that the ARGV passed + to getopt is that one passed to the process. */ +static void +__attribute__ ((unused)) +store_args_and_env (int argc, char *const *argv) +{ + /* XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ + original_argc = argc; + original_argv = argv; +} +# ifdef text_set_element +text_set_element (__libc_subinit, store_args_and_env); +# endif /* text_set_element */ + +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#ifdef _LIBC + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#ifdef _LIBC + if (posixly_correct == NULL + && argc == original_argc && argv == original_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + optarg = NULL; + + if (optind == 0 || !__getopt_initialized) + { + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#ifdef _LIBC +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc && NONOPTION_P) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (ordering == REQUIRE_ORDER) + return -1; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + { + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + _("%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + _("%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); + } + + nextchar += strlen (nextchar); + + optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, _("%s: illegal option -- %c\n"), + argv[0], c); + else + fprintf (stderr, _("%s: invalid option -- %c\n"), + argv[0], c); + } + optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* Not ELIDE_CODE. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == -1) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/src/share/getopt1.c b/src/share/getopt1.c new file mode 100644 index 00000000..3d264f2d --- /dev/null +++ b/src/share/getopt1.c @@ -0,0 +1,188 @@ +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "getopt.h" + +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +#ifndef const +#define const +#endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +#include +#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +#define ELIDE_CODE +#endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + + +#endif /* Not ELIDE_CODE. */ + +#ifdef TEST + +#include + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/src/share/iconvert.c b/src/share/iconvert.c new file mode 100644 index 00000000..8ed95718 --- /dev/null +++ b/src/share/iconvert.c @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2001 Edmund Grimley Evans + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef HAVE_ICONV + +#include +#include +#include +#include +#include + +/* + * Convert data from one encoding to another. Return: + * + * -2 : memory allocation failed + * -1 : unknown encoding + * 0 : data was converted exactly + * 1 : data was converted inexactly + * 2 : data was invalid (but still converted) + * + * We convert in two steps, via UTF-8, as this is the only + * reliable way of distinguishing between invalid input + * and valid input which iconv refuses to transliterate. + * We convert from UTF-8 twice, because we have no way of + * knowing whether the conversion was exact if iconv returns + * E2BIG (due to a bug in the specification of iconv). + * An alternative approach is to assume that the output of + * iconv is never more than 4 times as long as the input, + * but I prefer to avoid that assumption if possible. + */ + +int iconvert(const char *fromcode, const char *tocode, + const char *from, size_t fromlen, + char **to, size_t *tolen) +{ + int ret = 0; + iconv_t cd1, cd2; + char *ib; + char *ob; + char *utfbuf, *outbuf, *newbuf; + size_t utflen, outlen, ibl, obl, k; + char tbuf[2048]; + + cd1 = iconv_open("UTF-8", fromcode); + if (cd1 == (iconv_t)(-1)) + return -1; + + cd2 = (iconv_t)(-1); + /* Don't use strcasecmp() as it's locale-dependent. */ + if (!strchr("Uu", tocode[0]) || + !strchr("Tt", tocode[1]) || + !strchr("Ff", tocode[2]) || + tocode[3] != '-' || + tocode[4] != '8' || + tocode[5] != '\0') { + char *tocode1; + + /* + * Try using this non-standard feature of glibc and libiconv. + * This is deliberately not a config option as people often + * change their iconv library without rebuilding applications. + */ + tocode1 = (char *)malloc(strlen(tocode) + 11); + if (!tocode1) + goto fail; + + strcpy(tocode1, tocode); + strcat(tocode1, "//TRANSLIT"); + cd2 = iconv_open(tocode1, "UTF-8"); + free(tocode1); + + if (cd2 == (iconv_t)(-1)) + cd2 = iconv_open(tocode, fromcode); + + if (cd2 == (iconv_t)(-1)) { + iconv_close(cd1); + return -1; + } + } + + utflen = 1; /*fromlen * 2 + 1; XXX */ + utfbuf = (char *)malloc(utflen); + if (!utfbuf) + goto fail; + + /* Convert to UTF-8 */ + ib = (char *)from; + ibl = fromlen; + ob = utfbuf; + obl = utflen; + for (;;) { + k = iconv(cd1, &ib, &ibl, &ob, &obl); + assert((!k && !ibl) || + (k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) || + (k == (size_t)(-1) && + (errno == EILSEQ || errno == EINVAL) && ibl)); + if (!ibl) + break; + if (obl < 6) { + /* Enlarge the buffer */ + utflen *= 2; + newbuf = (char *)realloc(utfbuf, utflen); + if (!newbuf) + goto fail; + ob = (ob - utfbuf) + newbuf; + obl = utflen - (ob - newbuf); + utfbuf = newbuf; + } + else { + /* Invalid input */ + ib++, ibl--; + *ob++ = '#', obl--; + ret = 2; + iconv(cd1, 0, 0, 0, 0); + } + } + + if (cd2 == (iconv_t)(-1)) { + /* The target encoding was UTF-8 */ + if (tolen) + *tolen = ob - utfbuf; + if (!to) { + free(utfbuf); + iconv_close(cd1); + return ret; + } + newbuf = (char *)realloc(utfbuf, (ob - utfbuf) + 1); + if (!newbuf) + goto fail; + ob = (ob - utfbuf) + newbuf; + *ob = '\0'; + *to = newbuf; + iconv_close(cd1); + return ret; + } + + /* Truncate the buffer to be tidy */ + utflen = ob - utfbuf; + newbuf = (char *)realloc(utfbuf, utflen); + if (!newbuf) + goto fail; + utfbuf = newbuf; + + /* Convert from UTF-8 to discover how long the output is */ + outlen = 0; + ib = utfbuf; + ibl = utflen; + while (ibl) { + ob = tbuf; + obl = sizeof(tbuf); + k = iconv(cd2, &ib, &ibl, &ob, &obl); + assert((k != (size_t)(-1) && !ibl) || + (k == (size_t)(-1) && errno == E2BIG && ibl) || + (k == (size_t)(-1) && errno == EILSEQ && ibl)); + if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) { + /* Replace one character */ + char *tb = "?"; + size_t tbl = 1; + + outlen += ob - tbuf; + ob = tbuf; + obl = sizeof(tbuf); + k = iconv(cd2, &tb, &tbl, &ob, &obl); + assert((!k && !tbl) || + (k == (size_t)(-1) && errno == EILSEQ && tbl)); + for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--) + ; + } + outlen += ob - tbuf; + } + ob = tbuf; + obl = sizeof(tbuf); + k = iconv(cd2, 0, 0, &ob, &obl); + assert(!k); + outlen += ob - tbuf; + + /* Convert from UTF-8 for real */ + outbuf = (char *)malloc(outlen + 1); + if (!outbuf) + goto fail; + ib = utfbuf; + ibl = utflen; + ob = outbuf; + obl = outlen; + while (ibl) { + k = iconv(cd2, &ib, &ibl, &ob, &obl); + assert((k != (size_t)(-1) && !ibl) || + (k == (size_t)(-1) && errno == EILSEQ && ibl)); + if (k && !ret) + ret = 1; + if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) { + /* Replace one character */ + char *tb = "?"; + size_t tbl = 1; + + k = iconv(cd2, &tb, &tbl, &ob, &obl); + assert((!k && !tbl) || + (k == (size_t)(-1) && errno == EILSEQ && tbl)); + for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--) + ; + } + } + k = iconv(cd2, 0, 0, &ob, &obl); + assert(!k); + assert(!obl); + *ob = '\0'; + + free(utfbuf); + iconv_close(cd1); + iconv_close(cd2); + if (tolen) + *tolen = outlen; + if (!to) { + free(outbuf); + return ret; + } + *to = outbuf; + return ret; + + fail: + free(utfbuf); + iconv_close(cd1); + if (cd2 != (iconv_t)(-1)) + iconv_close(cd2); + return -2; +} + +#endif /* HAVE_ICONV */ diff --git a/src/share/makemap.c b/src/share/makemap.c new file mode 100644 index 00000000..e19c1960 --- /dev/null +++ b/src/share/makemap.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2001 Edmund Grimley Evans + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +int main(int argc, char *argv[]) +{ + iconv_t cd; + const char *ib; + char *ob; + size_t ibl, obl, k; + unsigned char c, buf[4]; + int i, wc; + + if (argc != 2) { + printf("Usage: %s ENCODING\n", argv[0]); + printf("Output a charset map for the 8-bit ENCODING.\n"); + return 1; + } + + cd = iconv_open("UCS-4", argv[1]); + if (cd == (iconv_t)(-1)) { + perror("iconv_open"); + return 1; + } + + for (i = 0; i < 256; i++) { + c = i; + ib = &c; + ibl = 1; + ob = buf; + obl = 4; + k = iconv(cd, &ib, &ibl, &ob, &obl); + if (!k && !ibl && !obl) { + wc = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3]; + if (wc >= 0xffff) { + printf("Dodgy value.\n"); + return 1; + } + } + else if (k == (size_t)(-1) && errno == EILSEQ) + wc = 0xffff; + else { + printf("Non-standard iconv.\n"); + return 1; + } + + if (i % 8 == 0) + printf(" "); + printf("0x%04x", wc); + if (i == 255) + printf("\n"); + else if (i % 8 == 7) + printf(",\n"); + else + printf(", "); + } + + return 0; +} diff --git a/src/share/utf8.c b/src/share/utf8.c new file mode 100644 index 00000000..770a16bb --- /dev/null +++ b/src/share/utf8.c @@ -0,0 +1,314 @@ +/* + * Copyright (C) 2001 Peter Harris + * Copyright (C) 2001 Edmund Grimley Evans + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * Convert a string between UTF-8 and the locale's charset. + */ + +#include +#include + +#include "utf8.h" + + +#ifdef _WIN32 + + /* Thanks to Peter Harris for this win32 + * code. + */ + +#include +#include + +static unsigned char *make_utf8_string(const wchar_t *unicode) +{ + int size = 0, index = 0, out_index = 0; + unsigned char *out; + unsigned short c; + + /* first calculate the size of the target string */ + c = unicode[index++]; + while(c) { + if(c < 0x0080) { + size += 1; + } else if(c < 0x0800) { + size += 2; + } else { + size += 3; + } + c = unicode[index++]; + } + + out = malloc(size + 1); + if (out == NULL) + return NULL; + index = 0; + + c = unicode[index++]; + while(c) + { + if(c < 0x080) { + out[out_index++] = (unsigned char)c; + } else if(c < 0x800) { + out[out_index++] = 0xc0 | (c >> 6); + out[out_index++] = 0x80 | (c & 0x3f); + } else { + out[out_index++] = 0xe0 | (c >> 12); + out[out_index++] = 0x80 | ((c >> 6) & 0x3f); + out[out_index++] = 0x80 | (c & 0x3f); + } + c = unicode[index++]; + } + out[out_index] = 0x00; + + return out; +} + +static wchar_t *make_unicode_string(const unsigned char *utf8) +{ + int size = 0, index = 0, out_index = 0; + wchar_t *out; + unsigned char c; + + /* first calculate the size of the target string */ + c = utf8[index++]; + while(c) { + if((c & 0x80) == 0) { + index += 0; + } else if((c & 0xe0) == 0xe0) { + index += 2; + } else { + index += 1; + } + size += 1; + c = utf8[index++]; + } + + out = malloc((size + 1) * sizeof(wchar_t)); + if (out == NULL) + return NULL; + index = 0; + + c = utf8[index++]; + while(c) + { + if((c & 0x80) == 0) { + out[out_index++] = c; + } else if((c & 0xe0) == 0xe0) { + out[out_index] = (c & 0x1F) << 12; + c = utf8[index++]; + out[out_index] |= (c & 0x3F) << 6; + c = utf8[index++]; + out[out_index++] |= (c & 0x3F); + } else { + out[out_index] = (c & 0x3F) << 6; + c = utf8[index++]; + out[out_index++] |= (c & 0x3F); + } + c = utf8[index++]; + } + out[out_index] = 0; + + return out; +} + +int utf8_encode(const char *from, char **to) +{ + wchar_t *unicode; + int wchars, err; + + wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, + strlen(from), NULL, 0); + + if(wchars == 0) + { + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + return -1; + } + + unicode = calloc(wchars + 1, sizeof(unsigned short)); + if(unicode == NULL) + { + fprintf(stderr, "Out of memory processing string to UTF8\n"); + return -1; + } + + err = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from, + strlen(from), unicode, wchars); + if(err != wchars) + { + free(unicode); + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + return -1; + } + + /* On NT-based windows systems, we could use WideCharToMultiByte(), but + * MS doesn't actually have a consistent API across win32. + */ + *to = make_utf8_string(unicode); + + free(unicode); + return 0; +} + +int utf8_decode(const char *from, char **to) +{ + wchar_t *unicode; + int chars, err; + + /* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but + * MS doesn't actually have a consistent API across win32. + */ + unicode = make_unicode_string(from); + if(unicode == NULL) + { + fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n"); + return -1; + } + + chars = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode, + -1, NULL, 0, NULL, NULL); + + if(chars == 0) + { + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + free(unicode); + return -1; + } + + *to = calloc(chars + 1, sizeof(unsigned char)); + if(*to == NULL) + { + fprintf(stderr, "Out of memory processing string to local charset\n"); + free(unicode); + return -1; + } + + err = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode, + -1, *to, chars, NULL, NULL); + if(err != chars) + { + fprintf(stderr, "Unicode translation error %d\n", GetLastError()); + free(unicode); + free(*to); + *to = NULL; + return -1; + } + + free(unicode); + return 0; +} + +#else /* End win32. Rest is for real operating systems */ + + +#ifdef HAVE_LANGINFO_CODESET +#include +#endif + +int iconvert(const char *fromcode, const char *tocode, + const char *from, size_t fromlen, + char **to, size_t *tolen); + +static char *current_charset = 0; /* means "US-ASCII" */ + +void convert_set_charset(const char *charset) +{ + +#ifdef HAVE_LANGINFO_CODESET + if (!charset) + charset = nl_langinfo(CODESET); +#endif + + if (!charset) + charset = getenv("CHARSET"); + + free(current_charset); + current_charset = 0; + if (charset && *charset) + current_charset = strdup(charset); +} + +static int convert_buffer(const char *fromcode, const char *tocode, + const char *from, size_t fromlen, + char **to, size_t *tolen) +{ + int ret = -1; + +#ifdef HAVE_ICONV + ret = iconvert(fromcode, tocode, from, fromlen, to, tolen); + if (ret != -1) + return ret; +#endif + +#ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */ + ret = charset_convert(fromcode, tocode, from, fromlen, to, tolen); + if (ret != -1) + return ret; +#endif + + return ret; +} + +static int convert_string(const char *fromcode, const char *tocode, + const char *from, char **to, char replace) +{ + int ret; + size_t fromlen; + char *s; + + fromlen = strlen(from); + ret = convert_buffer(fromcode, tocode, from, fromlen, to, 0); + if (ret == -2) + return -1; + if (ret != -1) + return ret; + + s = malloc(fromlen + 1); + if (!s) + return -1; + strcpy(s, from); + *to = s; + for (; *s; s++) + if (*s & ~0x7f) + *s = replace; + return 3; +} + +int utf8_encode(const char *from, char **to) +{ + char *charset; + + if (!current_charset) + convert_set_charset(0); + charset = current_charset ? current_charset : "US-ASCII"; + return convert_string(charset, "UTF-8", from, to, '#'); +} + +int utf8_decode(const char *from, char **to) +{ + char *charset; + + if (!current_charset) + convert_set_charset(0); + charset = current_charset ? current_charset : "US-ASCII"; + return convert_string("UTF-8", charset, from, to, '?'); +} + +#endif