diff --git a/example/extract.c b/example/extract.c index 88b8f57b..dd6164e0 100644 --- a/example/extract.c +++ b/example/extract.c @@ -1,7 +1,7 @@ /* Copyright (C) 2012 Pete Batard Based on samples copyright (c) 2003-2011 Rocky Bernstein - + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -73,7 +73,19 @@ free(psz_str); \ psz_str = NULL; -const char *psz_extract_dir; +static const char *psz_extract_dir; +static uint8_t i_joliet_level = 0; + +static void log_handler (cdio_log_level_t level, const char *message) +{ + switch(level) { + case CDIO_LOG_DEBUG: + case CDIO_LOG_INFO: + return; + default: + printf("cdio %d message: %s\n", level, message); + } +} static int udf_extract_files(udf_t *p_udf, udf_dirent_t *p_udf_dirent, const char *psz_path) { @@ -167,8 +179,10 @@ static int iso_extract_files(iso9660_t* p_iso, const char *psz_path) psz_basename = &psz_fullpath[i_length]; p_entlist = iso9660_ifs_readdir(p_iso, psz_path); - if (!p_entlist) + if (!p_entlist) { + printf("Could not access %s\n", psz_path); return 1; + } _CDIO_LIST_FOREACH (p_entnode, p_entlist) { p_statbuf = (iso9660_stat_t*) _cdio_list_node_data(p_entnode); @@ -176,7 +190,7 @@ static int iso_extract_files(iso9660_t* p_iso, const char *psz_path) if ( (strcmp(p_statbuf->filename, ".") == 0) || (strcmp(p_statbuf->filename, "..") == 0) ) continue; - iso9660_name_translate(p_statbuf->filename, psz_basename); + iso9660_name_translate_ext(p_statbuf->filename, psz_basename, i_joliet_level); if (p_statbuf->type == _STAT_DIR) { _mkdir(psz_fullpath); if (iso_extract_files(p_iso, psz_iso_name)) @@ -221,20 +235,20 @@ out: int main(int argc, char** argv) { iso9660_t* p_iso = NULL; - udf_t* p_udf = NULL; + udf_t* p_udf = NULL; udf_dirent_t* p_udf_root; char *psz_str = NULL; char vol_id[UDF_VOLID_SIZE] = ""; char volset_id[UDF_VOLSET_ID_SIZE+1] = ""; int r = 0; - cdio_loglevel_default = CDIO_LOG_DEBUG; - + cdio_log_set_handler (log_handler); + if (argc < 3) { fprintf(stderr, "Usage: extract \n"); return 1; } - + /* Warn if LFS doesn't appear to be enabled */ if (sizeof(off_t) < 8) { fprintf(stderr, "INFO: Large File Support not detected (required for files >2GB)\n"); @@ -259,7 +273,7 @@ int main(int argc, char** argv) goto out; } vol_id[0] = 0; volset_id[0] = 0; - + /* Show basic UDF Volume info */ if (udf_get_volume_id(p_udf, vol_id, sizeof(vol_id)) > 0) fprintf(stderr, "Volume id: %s\n", vol_id); @@ -275,12 +289,13 @@ int main(int argc, char** argv) goto out; try_iso: - p_iso = iso9660_open(argv[1]); + p_iso = iso9660_open_ext(argv[1], ISO_EXTENSION_ALL); if (p_iso == NULL) { fprintf(stderr, "Unable to open image '%s'.\n", argv[1]); r = 1; goto out; } + i_joliet_level = iso9660_ifs_get_joliet_level(p_iso); /* Show basic ISO9660 info from the Primary Volume Descriptor. */ print_vd_info("Application", iso9660_ifs_get_application_id); diff --git a/lib/driver/utf8.c b/lib/driver/utf8.c index 5ae9adc3..64529b1a 100644 --- a/lib/driver/utf8.c +++ b/lib/driver/utf8.c @@ -24,8 +24,12 @@ #endif #ifdef HAVE_JOLIET +#ifdef HAVE_STDIO_H +#include +#endif + #ifdef HAVE_STRING_H -# include +#include #endif #ifdef HAVE_STDLIB_H @@ -39,11 +43,64 @@ #include #include -#ifdef HAVE_STDIO_H -#include +#if defined(_WIN32) +#include + +#define wchar_to_utf8_no_alloc(wsrc, dest, dest_size) \ + WideCharToMultiByte(CP_UTF8, 0, wsrc, -1, dest, dest_size, NULL, NULL) +#define utf8_to_wchar_no_alloc(src, wdest, wdest_size) \ + MultiByteToWideChar(CP_UTF8, 0, src, -1, wdest, wdest_size) + +/* + * Converts an UTF-16 string to UTF8 (allocate returned string) + * Returns NULL on error + */ +static inline char* cdio_wchar_to_utf8(const wchar_t* wstr) + { + int size = 0; + char* str = NULL; + + /* Find out the size we need to allocate for our converted string */ + size = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); + if (size <= 1) /* An empty string would be size 1 */ + return NULL; + + if ((str = (char*)calloc(size, 1)) == NULL) + return NULL; + + if (wchar_to_utf8_no_alloc(wstr, str, size) != size) { + free(str); + return NULL; + } + + return str; + } + +/* + * Converts an UTF8 string to UTF-16 (allocate returned string) + * Returns NULL on error + */ +static inline wchar_t* cdio_utf8_to_wchar(const char* str) + { + int size = 0; + wchar_t* wstr = NULL; + + /* Find out the size we need to allocate for our converted string */ + size = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); + if (size <= 1) /* An empty string would be size 1 */ + return NULL; + + if ((wstr = (wchar_t*)calloc(size, sizeof(wchar_t))) == NULL) + return NULL; + + if (utf8_to_wchar_no_alloc(str, wstr, size) != size) { + free(wstr); + return NULL; + } + return wstr; + } #endif -// TODO: also remove the need for iconv on MinGW #ifdef HAVE_ICONV #include struct cdio_charset_coverter_s @@ -210,91 +267,68 @@ bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst, return result; } #elif defined(_WIN32) -#include -#define wchar_to_utf8_no_alloc(wsrc, dest, dest_size) \ - WideCharToMultiByte(CP_UTF8, 0, wsrc, -1, dest, dest_size, NULL, NULL) -#define utf8_to_wchar_no_alloc(src, wdest, wdest_size) \ - MultiByteToWideChar(CP_UTF8, 0, src, -1, wdest, wdest_size) +bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst, + int * dst_len, const char * dst_charset) + { + wchar_t* le_dst; + size_t i, len; -/* - * Converts an UTF-16 string to UTF8 (allocate returned string) - * Returns NULL on error - */ -static __inline char* wchar_to_utf8(const wchar_t* wstr) -{ - int size = 0; - char* str = NULL; + if (src == NULL || dst == NULL || dst_len == NULL || dst_charset == NULL || strcmp(dst_charset, "UTF-8") != 0) + return false; - /* Find out the size we need to allocate for our converted string */ - size = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL); - if (size <= 1) // An empty string would be size 1 - return NULL; + /* Eliminate empty strings */ + le_dst = cdio_utf8_to_wchar(src); + if ((le_dst == NULL) || (le_dst[0] == 0)) { + free(le_dst); + return false; + } - if ((str = (char*)calloc(size, 1)) == NULL) - return NULL; + /* Perform byte reversal */ + len = wcslen(le_dst); + *dst = (char*)calloc(len+1, sizeof(wchar_t)); + for (i=0; i<2*len; i++) { + (*dst)[i] = ((char*)le_dst)[i+1]; + (*dst)[i+1] = ((char*)le_dst)[i]; + } + free(le_dst); - if (wchar_to_utf8_no_alloc(wstr, str, size) != size) { - free(str); - return NULL; - } - - return str; -} - -/* - * Converts an UTF8 string to UTF-16 (allocate returned string) - * Returns NULL on error - */ -static __inline wchar_t* utf8_to_wchar(const char* str) -{ - int size = 0; - wchar_t* wstr = NULL; - - /* Find out the size we need to allocate for our converted string */ - size = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); - if (size <= 1) // An empty string would be size 1 - return NULL; - - if ((wstr = (wchar_t*)calloc(size, sizeof(wchar_t))) == NULL) - return NULL; - - if (utf8_to_wchar_no_alloc(str, wstr, size) != size) { - free(wstr); - return NULL; - } - return wstr; -} + return true; + } bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst, const char * src_charset) -{ - wchar_t* le_src; + { + wchar_t* le_src; + int i; - if (src == NULL || dst == NULL || src_charset == NULL || strcmp(src_charset, "UCS-2BE") != 0) - return false; + if (src == NULL || dst == NULL || src_charset == NULL || strcmp(src_charset, "UCS-2BE") != 0) + return false; - if (src_len == (size_t)-1) { - for (src_len = 0; ((uint16_t*)src)[src_len] !=0; src_len++); - src_len <<=2; - } + /* Compute UCS-2 src length */ + if (src_len == (size_t)-1) { + for (src_len = 0; ((uint16_t*)src)[src_len] !=0; src_len++); + } else { + src_len >>=1; + } - /* zero lenght is a headache (LCMapString doesn't support it) - => eliminate this case first */ - if (src_len == 0) { - *dst = (cdio_utf8_t*)malloc(1); - *dst[0] = 0; - return true; - } + /* Eliminate empty strings */ + if ((src_len < 1) || ((src[0] == 0) && (src[1] == 0))) { + *dst = NULL; + return false; + } - le_src = (wchar_t*)malloc(src_len+2); - /* WideCharToMultiByte only takes UCS-2LE, and we are fed UCS-2BE - => perform byte reversal */ - LCMapStringW(0, LCMAP_BYTEREV, (LPCWSTR)src, src_len, le_src, src_len); - *dst = wchar_to_utf8(le_src); - free(le_src); + /* Perform byte reversal */ + le_src = (wchar_t*)malloc(2*src_len+2); + for (i=0; i #endif #ifdef HAVE_STRING_H -# include +#include #endif #ifdef HAVE_ERRNO_H -# include +#include #endif #ifdef HAVE_LANGINFO_CODESET @@ -46,8 +50,6 @@ #include "_cdio_stdio.h" #include "cdio_private.h" -#include - static const char _rcsid[] = "$Id: iso9660_fs.c,v 1.47 2008/04/18 16:02:09 karl Exp $"; /* Implementation of iso9660_t type */ @@ -272,6 +274,72 @@ check_pvd (const iso9660_pvd_t *p_pvd, cdio_log_level_t log_level) return true; } + +/*! + Core procedure for the iso9660_ifs_get_###_id() calls. + pvd_member/svd_member is a pointer to an achar_t or dchar_t + ID string which we can superset as char. + If the Joliet converted string is the same as the achar_t/dchar_t + one, we fall back to using the latter, as it may be longer. +*/ +static inline bool +get_member_id(iso9660_t *p_iso, cdio_utf8_t **p_psz_member_id, + char* pvd_member, char* svd_member, size_t max_size) +{ + int j; + bool strip; + + if (!p_iso) { + *p_psz_member_id = NULL; + return false; + } +#ifdef HAVE_JOLIET + if (p_iso->i_joliet_level) { + /* Translate USC-2 string from Secondary Volume Descriptor */ + if (cdio_charset_to_utf8(svd_member, max_size, + p_psz_member_id, "UCS-2BE")) { + /* NB: *p_psz_member_id is never NULL on success. */ + if (strncmp(*p_psz_member_id, pvd_member, + strlen(*p_psz_member_id)) != 0) { + /* Strip trailing spaces */ + for (j = strlen(*p_psz_member_id)-1; j >= 0; j--) { + if ((*p_psz_member_id)[j] != ' ') + break; + (*p_psz_member_id)[j] = '\0'; + } + if ((*p_psz_member_id)[0] != 0) { + /* Joliet string is not empty and differs from + non Joliet one => use it */ + return true; + } + } + /* Joliet string was either empty or same */ + free(*p_psz_member_id); + } + } +#endif /*HAVE_JOLIET*/ + *p_psz_member_id = calloc(max_size+1, sizeof(cdio_utf8_t)); + if (!*p_psz_member_id) { + cdio_warn("Memory allocation error"); + return false; + } + /* Copy string while removing trailing spaces */ + (*p_psz_member_id)[max_size] = 0; + for (strip=true, j=max_size-1; j>=0; j--) { + if (strip && (pvd_member[j] == ' ')) + continue; + strip = false; + (*p_psz_member_id)[j] = pvd_member[j]; + } + if (strlen(*p_psz_member_id) == 0) { + free(*p_psz_member_id); + *p_psz_member_id = NULL; + return false; + } + return true; +} + + /*! Return the application ID. NULL is returned in psz_app_id if there is some problem in getting this. @@ -280,30 +348,14 @@ bool iso9660_ifs_get_application_id(iso9660_t *p_iso, /*out*/ cdio_utf8_t **p_psz_app_id) { - if (!p_iso) { - *p_psz_app_id = NULL; - return false; - } - -#ifdef HAVE_JOLIET - if (p_iso->i_joliet_level) { - /* TODO: check that we haven't reached the maximum size. - If we have, perhaps we've truncated and if we can get - longer results *and* have the same character using - the PVD, do that. - */ - if ( cdio_charset_to_utf8(p_iso->svd.application_id, - ISO_MAX_APPLICATION_ID, - p_psz_app_id, "UCS-2BE")) - return true; - } -#endif /*HAVE_JOLIET*/ - *p_psz_app_id = iso9660_get_application_id( &(p_iso->pvd) ); - return *p_psz_app_id != NULL && strlen(*p_psz_app_id); + return get_member_id(p_iso, p_psz_app_id, + (char*)p_iso->pvd.application_id, + (char*)p_iso->svd.application_id, + ISO_MAX_APPLICATION_ID); } /*! - Return the Joliet level recognaized for p_iso. + Return the Joliet level recognized for p_iso. */ uint8_t iso9660_ifs_get_joliet_level(iso9660_t *p_iso) { @@ -319,25 +371,10 @@ bool iso9660_ifs_get_preparer_id(iso9660_t *p_iso, /*out*/ cdio_utf8_t **p_psz_preparer_id) { - if (!p_iso) { - *p_psz_preparer_id = NULL; - return false; - } - -#ifdef HAVE_JOLIET - if (p_iso->i_joliet_level) { - /* TODO: check that we haven't reached the maximum size. - If we have, perhaps we've truncated and if we can get - longer results *and* have the same character using - the PVD, do that. - */ - if ( cdio_charset_to_utf8(p_iso->svd.preparer_id, ISO_MAX_PREPARER_ID, - p_psz_preparer_id, "UCS-2BE") ) - return true; - } -#endif /*HAVE_JOLIET*/ - *p_psz_preparer_id = iso9660_get_preparer_id( &(p_iso->pvd) ); - return *p_psz_preparer_id != NULL && strlen(*p_psz_preparer_id); + return get_member_id(p_iso, p_psz_preparer_id, + (char*)p_iso->pvd.preparer_id, + (char*)p_iso->svd.preparer_id, + ISO_MAX_PREPARER_ID); } /*! @@ -347,28 +384,12 @@ iso9660_ifs_get_preparer_id(iso9660_t *p_iso, bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso, /*out*/ cdio_utf8_t **p_psz_publisher_id) { - if (!p_iso) { - *p_psz_publisher_id = NULL; - return false; - } - -#ifdef HAVE_JOLIET - if (p_iso->i_joliet_level) { - /* TODO: check that we haven't reached the maximum size. - If we have, perhaps we've truncated and if we can get - longer results *and* have the same character using - the PVD, do that. - */ - if( cdio_charset_to_utf8(p_iso->svd.publisher_id, ISO_MAX_PUBLISHER_ID, - p_psz_publisher_id, "UCS-2BE") ) - return true; - } -#endif /*HAVE_JOLIET*/ - *p_psz_publisher_id = iso9660_get_publisher_id( &(p_iso->pvd) ); - return *p_psz_publisher_id != NULL && strlen(*p_psz_publisher_id); + return get_member_id(p_iso, p_psz_publisher_id, + (char*)p_iso->pvd.publisher_id, + (char*)p_iso->svd.publisher_id, + ISO_MAX_PUBLISHER_ID); } - /*! Return a string containing the PVD's publisher id with trailing blanks removed. @@ -376,28 +397,12 @@ bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso, bool iso9660_ifs_get_system_id(iso9660_t *p_iso, /*out*/ cdio_utf8_t **p_psz_system_id) { - if (!p_iso) { - *p_psz_system_id = NULL; - return false; - } - -#ifdef HAVE_JOLIET - if (p_iso->i_joliet_level) { - /* TODO: check that we haven't reached the maximum size. - If we have, perhaps we've truncated and if we can get - longer results *and* have the same character using - the PVD, do that. - */ - if ( cdio_charset_to_utf8(p_iso->svd.system_id, ISO_MAX_SYSTEM_ID, - p_psz_system_id, "UCS-2BE") ) - return true; - } -#endif /*HAVE_JOLIET*/ - *p_psz_system_id = iso9660_get_system_id( &(p_iso->pvd) ); - return *p_psz_system_id != NULL && strlen(*p_psz_system_id); + return get_member_id(p_iso, p_psz_system_id, + (char*)p_iso->pvd.system_id, + (char*)p_iso->svd.system_id, + ISO_MAX_SYSTEM_ID); } - /*! Return a string containing the PVD's publisher id with trailing blanks removed. @@ -405,28 +410,12 @@ bool iso9660_ifs_get_system_id(iso9660_t *p_iso, bool iso9660_ifs_get_volume_id(iso9660_t *p_iso, /*out*/ cdio_utf8_t **p_psz_volume_id) { - if (!p_iso) { - *p_psz_volume_id = NULL; - return false; - } - -#ifdef HAVE_JOLIET - if (p_iso->i_joliet_level) { - /* TODO: check that we haven't reached the maximum size. - If we have, perhaps we've truncated and if we can get - longer results *and* have the same character using - the PVD, do that. - */ - if ( cdio_charset_to_utf8(p_iso->svd.volume_id, ISO_MAX_VOLUME_ID, - p_psz_volume_id, "UCS-2BE") ) - return true; - } -#endif /* HAVE_JOLIET */ - *p_psz_volume_id = iso9660_get_volume_id( &(p_iso->pvd) ); - return *p_psz_volume_id != NULL && strlen(*p_psz_volume_id); + return get_member_id(p_iso, p_psz_volume_id, + (char*)p_iso->pvd.volume_id, + (char*)p_iso->svd.volume_id, + ISO_MAX_VOLUME_ID); } - /*! Return a string containing the PVD's publisher id with trailing blanks removed. @@ -434,27 +423,10 @@ bool iso9660_ifs_get_volume_id(iso9660_t *p_iso, bool iso9660_ifs_get_volumeset_id(iso9660_t *p_iso, /*out*/ cdio_utf8_t **p_psz_volumeset_id) { - if (!p_iso) { - *p_psz_volumeset_id = NULL; - return false; - } - -#ifdef HAVE_JOLIET - if (p_iso->i_joliet_level) { - /* TODO: check that we haven't reached the maximum size. - If we have, perhaps we've truncated and if we can get - longer results *and* have the same character using - the PVD, do that. - */ - if ( cdio_charset_to_utf8(p_iso->svd.volume_set_id, - ISO_MAX_VOLUMESET_ID, - p_psz_volumeset_id, - "UCS-2BE") ) - return true; - } -#endif /*HAVE_JOLIET*/ - *p_psz_volumeset_id = iso9660_get_volumeset_id( &(p_iso->pvd) ); - return *p_psz_volumeset_id != NULL && strlen(*p_psz_volumeset_id); + return get_member_id(p_iso, p_psz_volumeset_id, + (char*)p_iso->pvd.volume_set_id, + (char*)p_iso->svd.volume_set_id, + ISO_MAX_VOLUMESET_ID); } @@ -494,19 +466,25 @@ bool iso9660_ifs_read_superblock (iso9660_t *p_iso, iso_extension_mask_t iso_extension_mask) { - iso9660_svd_t *p_svd; /* Secondary volume descriptor. */ - + iso9660_svd_t p_svd; /* Secondary volume descriptor. */ + int i; + if (!p_iso || !iso9660_ifs_read_pvd(p_iso, &(p_iso->pvd))) return false; - p_svd = &(p_iso->svd); p_iso->i_joliet_level = 0; - if (0 != iso9660_iso_seek_read (p_iso, p_svd, ISO_PVD_SECTOR+1, 1)) { - if ( ISO_VD_SUPPLEMENTARY == from_711(p_svd->type) ) { - if (p_svd->escape_sequences[0] == 0x25 - && p_svd->escape_sequences[1] == 0x2f) { - switch (p_svd->escape_sequences[2]) { + /* There may be multiple Secondary Volume Descriptors (eg. El Torito + Joliet) */ + for (i=1; (0 != iso9660_iso_seek_read (p_iso, &p_svd, ISO_PVD_SECTOR+i, 1)); i++) { + if (ISO_VD_END == from_711(p_svd.type) ) /* Last SVD */ + break; + if ( ISO_VD_SUPPLEMENTARY == from_711(p_svd.type) ) { + /* We're only interested in Joliet => make sure the SVD isn't overwritten */ + if (p_iso->i_joliet_level == 0) + memcpy(&(p_iso->svd), &p_svd, sizeof(iso9660_svd_t)); + if (p_svd.escape_sequences[0] == 0x25 + && p_svd.escape_sequences[1] == 0x2f) { + switch (p_svd.escape_sequences[2]) { case 0x40: if (iso_extension_mask & ISO_EXTENSION_JOLIET_LEVEL1) p_iso->i_joliet_level = 1;