Joliet improvements

* support discs with more than one secondary volume descriptors
* add Joliet support for extract sample
* add fallback to non-Joliet if non-Joliet may be longer
This commit is contained in:
Pete Batard
2012-03-05 17:06:01 +00:00
parent e6b00d7dcf
commit 1f1c20771e
3 changed files with 248 additions and 221 deletions

View File

@@ -73,7 +73,19 @@
free(psz_str); \
psz_str = NULL;
const char *psz_extract_dir;
static const char *psz_extract_dir;
static uint8_t i_joliet_level = 0;
static void log_handler (cdio_log_level_t level, const char *message)
{
switch(level) {
case CDIO_LOG_DEBUG:
case CDIO_LOG_INFO:
return;
default:
printf("cdio %d message: %s\n", level, message);
}
}
static int udf_extract_files(udf_t *p_udf, udf_dirent_t *p_udf_dirent, const char *psz_path)
{
@@ -167,8 +179,10 @@ static int iso_extract_files(iso9660_t* p_iso, const char *psz_path)
psz_basename = &psz_fullpath[i_length];
p_entlist = iso9660_ifs_readdir(p_iso, psz_path);
if (!p_entlist)
if (!p_entlist) {
printf("Could not access %s\n", psz_path);
return 1;
}
_CDIO_LIST_FOREACH (p_entnode, p_entlist) {
p_statbuf = (iso9660_stat_t*) _cdio_list_node_data(p_entnode);
@@ -176,7 +190,7 @@ static int iso_extract_files(iso9660_t* p_iso, const char *psz_path)
if ( (strcmp(p_statbuf->filename, ".") == 0)
|| (strcmp(p_statbuf->filename, "..") == 0) )
continue;
iso9660_name_translate(p_statbuf->filename, psz_basename);
iso9660_name_translate_ext(p_statbuf->filename, psz_basename, i_joliet_level);
if (p_statbuf->type == _STAT_DIR) {
_mkdir(psz_fullpath);
if (iso_extract_files(p_iso, psz_iso_name))
@@ -228,7 +242,7 @@ int main(int argc, char** argv)
char volset_id[UDF_VOLSET_ID_SIZE+1] = "";
int r = 0;
cdio_loglevel_default = CDIO_LOG_DEBUG;
cdio_log_set_handler (log_handler);
if (argc < 3) {
fprintf(stderr, "Usage: extract <iso_image> <extraction_dir>\n");
@@ -275,12 +289,13 @@ int main(int argc, char** argv)
goto out;
try_iso:
p_iso = iso9660_open(argv[1]);
p_iso = iso9660_open_ext(argv[1], ISO_EXTENSION_ALL);
if (p_iso == NULL) {
fprintf(stderr, "Unable to open image '%s'.\n", argv[1]);
r = 1;
goto out;
}
i_joliet_level = iso9660_ifs_get_joliet_level(p_iso);
/* Show basic ISO9660 info from the Primary Volume Descriptor. */
print_vd_info("Application", iso9660_ifs_get_application_id);

View File

@@ -24,8 +24,12 @@
#endif
#ifdef HAVE_JOLIET
#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#include <string.h>
#endif
#ifdef HAVE_STDLIB_H
@@ -39,11 +43,64 @@
#include <cdio/utf8.h>
#include <cdio/logging.h>
#ifdef HAVE_STDIO_H
#include <stdio.h>
#if defined(_WIN32)
#include <windows.h>
#define wchar_to_utf8_no_alloc(wsrc, dest, dest_size) \
WideCharToMultiByte(CP_UTF8, 0, wsrc, -1, dest, dest_size, NULL, NULL)
#define utf8_to_wchar_no_alloc(src, wdest, wdest_size) \
MultiByteToWideChar(CP_UTF8, 0, src, -1, wdest, wdest_size)
/*
* Converts an UTF-16 string to UTF8 (allocate returned string)
* Returns NULL on error
*/
static inline char* cdio_wchar_to_utf8(const wchar_t* wstr)
{
int size = 0;
char* str = NULL;
/* Find out the size we need to allocate for our converted string */
size = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
if (size <= 1) /* An empty string would be size 1 */
return NULL;
if ((str = (char*)calloc(size, 1)) == NULL)
return NULL;
if (wchar_to_utf8_no_alloc(wstr, str, size) != size) {
free(str);
return NULL;
}
return str;
}
/*
* Converts an UTF8 string to UTF-16 (allocate returned string)
* Returns NULL on error
*/
static inline wchar_t* cdio_utf8_to_wchar(const char* str)
{
int size = 0;
wchar_t* wstr = NULL;
/* Find out the size we need to allocate for our converted string */
size = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
if (size <= 1) /* An empty string would be size 1 */
return NULL;
if ((wstr = (wchar_t*)calloc(size, sizeof(wchar_t))) == NULL)
return NULL;
if (utf8_to_wchar_no_alloc(str, wstr, size) != size) {
free(wstr);
return NULL;
}
return wstr;
}
#endif
// TODO: also remove the need for iconv on MinGW
#ifdef HAVE_ICONV
#include <iconv.h>
struct cdio_charset_coverter_s
@@ -210,91 +267,68 @@ bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst,
return result;
}
#elif defined(_WIN32)
#include <windows.h>
#define wchar_to_utf8_no_alloc(wsrc, dest, dest_size) \
WideCharToMultiByte(CP_UTF8, 0, wsrc, -1, dest, dest_size, NULL, NULL)
#define utf8_to_wchar_no_alloc(src, wdest, wdest_size) \
MultiByteToWideChar(CP_UTF8, 0, src, -1, wdest, wdest_size)
bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst,
int * dst_len, const char * dst_charset)
{
wchar_t* le_dst;
size_t i, len;
/*
* Converts an UTF-16 string to UTF8 (allocate returned string)
* Returns NULL on error
*/
static __inline char* wchar_to_utf8(const wchar_t* wstr)
{
int size = 0;
char* str = NULL;
if (src == NULL || dst == NULL || dst_len == NULL || dst_charset == NULL || strcmp(dst_charset, "UTF-8") != 0)
return false;
/* Find out the size we need to allocate for our converted string */
size = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
if (size <= 1) // An empty string would be size 1
return NULL;
/* Eliminate empty strings */
le_dst = cdio_utf8_to_wchar(src);
if ((le_dst == NULL) || (le_dst[0] == 0)) {
free(le_dst);
return false;
}
if ((str = (char*)calloc(size, 1)) == NULL)
return NULL;
/* Perform byte reversal */
len = wcslen(le_dst);
*dst = (char*)calloc(len+1, sizeof(wchar_t));
for (i=0; i<2*len; i++) {
(*dst)[i] = ((char*)le_dst)[i+1];
(*dst)[i+1] = ((char*)le_dst)[i];
}
free(le_dst);
if (wchar_to_utf8_no_alloc(wstr, str, size) != size) {
free(str);
return NULL;
}
return str;
}
/*
* Converts an UTF8 string to UTF-16 (allocate returned string)
* Returns NULL on error
*/
static __inline wchar_t* utf8_to_wchar(const char* str)
{
int size = 0;
wchar_t* wstr = NULL;
/* Find out the size we need to allocate for our converted string */
size = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
if (size <= 1) // An empty string would be size 1
return NULL;
if ((wstr = (wchar_t*)calloc(size, sizeof(wchar_t))) == NULL)
return NULL;
if (utf8_to_wchar_no_alloc(str, wstr, size) != size) {
free(wstr);
return NULL;
}
return wstr;
}
return true;
}
bool cdio_charset_to_utf8(char *src, size_t src_len, cdio_utf8_t **dst,
const char * src_charset)
{
wchar_t* le_src;
{
wchar_t* le_src;
int i;
if (src == NULL || dst == NULL || src_charset == NULL || strcmp(src_charset, "UCS-2BE") != 0)
return false;
if (src == NULL || dst == NULL || src_charset == NULL || strcmp(src_charset, "UCS-2BE") != 0)
return false;
if (src_len == (size_t)-1) {
for (src_len = 0; ((uint16_t*)src)[src_len] !=0; src_len++);
src_len <<=2;
}
/* Compute UCS-2 src length */
if (src_len == (size_t)-1) {
for (src_len = 0; ((uint16_t*)src)[src_len] !=0; src_len++);
} else {
src_len >>=1;
}
/* zero lenght is a headache (LCMapString doesn't support it)
=> eliminate this case first */
if (src_len == 0) {
*dst = (cdio_utf8_t*)malloc(1);
*dst[0] = 0;
return true;
}
/* Eliminate empty strings */
if ((src_len < 1) || ((src[0] == 0) && (src[1] == 0))) {
*dst = NULL;
return false;
}
le_src = (wchar_t*)malloc(src_len+2);
/* WideCharToMultiByte only takes UCS-2LE, and we are fed UCS-2BE
=> perform byte reversal */
LCMapStringW(0, LCMAP_BYTEREV, (LPCWSTR)src, src_len, le_src, src_len);
*dst = wchar_to_utf8(le_src);
free(le_src);
/* Perform byte reversal */
le_src = (wchar_t*)malloc(2*src_len+2);
for (i=0; i<src_len; i++) {
((char*)le_src)[2*i] = src[2*i+1];
((char*)le_src)[2*i+1] = src[2*i];
}
le_src[src_len] = 0;
*dst = cdio_wchar_to_utf8(le_src);
free(le_src);
return (*dst != NULL);
return (*dst != NULL);
}
#endif /* HAVE_ICONV */

View File

@@ -19,16 +19,20 @@
/* iso9660 filesystem-based routines */
#if defined(HAVE_CONFIG_H) && !defined(__CDIO_CONFIG_H__)
# include "config.h"
# define __CDIO_CONFIG_H__ 1
#include "config.h"
#define __CDIO_CONFIG_H__ 1
#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#include <string.h>
#endif
#ifdef HAVE_ERRNO_H
# include <errno.h>
#include <errno.h>
#endif
#ifdef HAVE_LANGINFO_CODESET
@@ -46,8 +50,6 @@
#include "_cdio_stdio.h"
#include "cdio_private.h"
#include <stdio.h>
static const char _rcsid[] = "$Id: iso9660_fs.c,v 1.47 2008/04/18 16:02:09 karl Exp $";
/* Implementation of iso9660_t type */
@@ -272,6 +274,72 @@ check_pvd (const iso9660_pvd_t *p_pvd, cdio_log_level_t log_level)
return true;
}
/*!
Core procedure for the iso9660_ifs_get_###_id() calls.
pvd_member/svd_member is a pointer to an achar_t or dchar_t
ID string which we can superset as char.
If the Joliet converted string is the same as the achar_t/dchar_t
one, we fall back to using the latter, as it may be longer.
*/
static inline bool
get_member_id(iso9660_t *p_iso, cdio_utf8_t **p_psz_member_id,
char* pvd_member, char* svd_member, size_t max_size)
{
int j;
bool strip;
if (!p_iso) {
*p_psz_member_id = NULL;
return false;
}
#ifdef HAVE_JOLIET
if (p_iso->i_joliet_level) {
/* Translate USC-2 string from Secondary Volume Descriptor */
if (cdio_charset_to_utf8(svd_member, max_size,
p_psz_member_id, "UCS-2BE")) {
/* NB: *p_psz_member_id is never NULL on success. */
if (strncmp(*p_psz_member_id, pvd_member,
strlen(*p_psz_member_id)) != 0) {
/* Strip trailing spaces */
for (j = strlen(*p_psz_member_id)-1; j >= 0; j--) {
if ((*p_psz_member_id)[j] != ' ')
break;
(*p_psz_member_id)[j] = '\0';
}
if ((*p_psz_member_id)[0] != 0) {
/* Joliet string is not empty and differs from
non Joliet one => use it */
return true;
}
}
/* Joliet string was either empty or same */
free(*p_psz_member_id);
}
}
#endif /*HAVE_JOLIET*/
*p_psz_member_id = calloc(max_size+1, sizeof(cdio_utf8_t));
if (!*p_psz_member_id) {
cdio_warn("Memory allocation error");
return false;
}
/* Copy string while removing trailing spaces */
(*p_psz_member_id)[max_size] = 0;
for (strip=true, j=max_size-1; j>=0; j--) {
if (strip && (pvd_member[j] == ' '))
continue;
strip = false;
(*p_psz_member_id)[j] = pvd_member[j];
}
if (strlen(*p_psz_member_id) == 0) {
free(*p_psz_member_id);
*p_psz_member_id = NULL;
return false;
}
return true;
}
/*!
Return the application ID. NULL is returned in psz_app_id if there
is some problem in getting this.
@@ -280,30 +348,14 @@ bool
iso9660_ifs_get_application_id(iso9660_t *p_iso,
/*out*/ cdio_utf8_t **p_psz_app_id)
{
if (!p_iso) {
*p_psz_app_id = NULL;
return false;
}
#ifdef HAVE_JOLIET
if (p_iso->i_joliet_level) {
/* TODO: check that we haven't reached the maximum size.
If we have, perhaps we've truncated and if we can get
longer results *and* have the same character using
the PVD, do that.
*/
if ( cdio_charset_to_utf8(p_iso->svd.application_id,
ISO_MAX_APPLICATION_ID,
p_psz_app_id, "UCS-2BE"))
return true;
}
#endif /*HAVE_JOLIET*/
*p_psz_app_id = iso9660_get_application_id( &(p_iso->pvd) );
return *p_psz_app_id != NULL && strlen(*p_psz_app_id);
return get_member_id(p_iso, p_psz_app_id,
(char*)p_iso->pvd.application_id,
(char*)p_iso->svd.application_id,
ISO_MAX_APPLICATION_ID);
}
/*!
Return the Joliet level recognaized for p_iso.
Return the Joliet level recognized for p_iso.
*/
uint8_t iso9660_ifs_get_joliet_level(iso9660_t *p_iso)
{
@@ -319,25 +371,10 @@ bool
iso9660_ifs_get_preparer_id(iso9660_t *p_iso,
/*out*/ cdio_utf8_t **p_psz_preparer_id)
{
if (!p_iso) {
*p_psz_preparer_id = NULL;
return false;
}
#ifdef HAVE_JOLIET
if (p_iso->i_joliet_level) {
/* TODO: check that we haven't reached the maximum size.
If we have, perhaps we've truncated and if we can get
longer results *and* have the same character using
the PVD, do that.
*/
if ( cdio_charset_to_utf8(p_iso->svd.preparer_id, ISO_MAX_PREPARER_ID,
p_psz_preparer_id, "UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
*p_psz_preparer_id = iso9660_get_preparer_id( &(p_iso->pvd) );
return *p_psz_preparer_id != NULL && strlen(*p_psz_preparer_id);
return get_member_id(p_iso, p_psz_preparer_id,
(char*)p_iso->pvd.preparer_id,
(char*)p_iso->svd.preparer_id,
ISO_MAX_PREPARER_ID);
}
/*!
@@ -347,28 +384,12 @@ iso9660_ifs_get_preparer_id(iso9660_t *p_iso,
bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso,
/*out*/ cdio_utf8_t **p_psz_publisher_id)
{
if (!p_iso) {
*p_psz_publisher_id = NULL;
return false;
}
#ifdef HAVE_JOLIET
if (p_iso->i_joliet_level) {
/* TODO: check that we haven't reached the maximum size.
If we have, perhaps we've truncated and if we can get
longer results *and* have the same character using
the PVD, do that.
*/
if( cdio_charset_to_utf8(p_iso->svd.publisher_id, ISO_MAX_PUBLISHER_ID,
p_psz_publisher_id, "UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
*p_psz_publisher_id = iso9660_get_publisher_id( &(p_iso->pvd) );
return *p_psz_publisher_id != NULL && strlen(*p_psz_publisher_id);
return get_member_id(p_iso, p_psz_publisher_id,
(char*)p_iso->pvd.publisher_id,
(char*)p_iso->svd.publisher_id,
ISO_MAX_PUBLISHER_ID);
}
/*!
Return a string containing the PVD's publisher id with trailing
blanks removed.
@@ -376,28 +397,12 @@ bool iso9660_ifs_get_publisher_id(iso9660_t *p_iso,
bool iso9660_ifs_get_system_id(iso9660_t *p_iso,
/*out*/ cdio_utf8_t **p_psz_system_id)
{
if (!p_iso) {
*p_psz_system_id = NULL;
return false;
}
#ifdef HAVE_JOLIET
if (p_iso->i_joliet_level) {
/* TODO: check that we haven't reached the maximum size.
If we have, perhaps we've truncated and if we can get
longer results *and* have the same character using
the PVD, do that.
*/
if ( cdio_charset_to_utf8(p_iso->svd.system_id, ISO_MAX_SYSTEM_ID,
p_psz_system_id, "UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
*p_psz_system_id = iso9660_get_system_id( &(p_iso->pvd) );
return *p_psz_system_id != NULL && strlen(*p_psz_system_id);
return get_member_id(p_iso, p_psz_system_id,
(char*)p_iso->pvd.system_id,
(char*)p_iso->svd.system_id,
ISO_MAX_SYSTEM_ID);
}
/*!
Return a string containing the PVD's publisher id with trailing
blanks removed.
@@ -405,28 +410,12 @@ bool iso9660_ifs_get_system_id(iso9660_t *p_iso,
bool iso9660_ifs_get_volume_id(iso9660_t *p_iso,
/*out*/ cdio_utf8_t **p_psz_volume_id)
{
if (!p_iso) {
*p_psz_volume_id = NULL;
return false;
}
#ifdef HAVE_JOLIET
if (p_iso->i_joliet_level) {
/* TODO: check that we haven't reached the maximum size.
If we have, perhaps we've truncated and if we can get
longer results *and* have the same character using
the PVD, do that.
*/
if ( cdio_charset_to_utf8(p_iso->svd.volume_id, ISO_MAX_VOLUME_ID,
p_psz_volume_id, "UCS-2BE") )
return true;
}
#endif /* HAVE_JOLIET */
*p_psz_volume_id = iso9660_get_volume_id( &(p_iso->pvd) );
return *p_psz_volume_id != NULL && strlen(*p_psz_volume_id);
return get_member_id(p_iso, p_psz_volume_id,
(char*)p_iso->pvd.volume_id,
(char*)p_iso->svd.volume_id,
ISO_MAX_VOLUME_ID);
}
/*!
Return a string containing the PVD's publisher id with trailing
blanks removed.
@@ -434,27 +423,10 @@ bool iso9660_ifs_get_volume_id(iso9660_t *p_iso,
bool iso9660_ifs_get_volumeset_id(iso9660_t *p_iso,
/*out*/ cdio_utf8_t **p_psz_volumeset_id)
{
if (!p_iso) {
*p_psz_volumeset_id = NULL;
return false;
}
#ifdef HAVE_JOLIET
if (p_iso->i_joliet_level) {
/* TODO: check that we haven't reached the maximum size.
If we have, perhaps we've truncated and if we can get
longer results *and* have the same character using
the PVD, do that.
*/
if ( cdio_charset_to_utf8(p_iso->svd.volume_set_id,
ISO_MAX_VOLUMESET_ID,
p_psz_volumeset_id,
"UCS-2BE") )
return true;
}
#endif /*HAVE_JOLIET*/
*p_psz_volumeset_id = iso9660_get_volumeset_id( &(p_iso->pvd) );
return *p_psz_volumeset_id != NULL && strlen(*p_psz_volumeset_id);
return get_member_id(p_iso, p_psz_volumeset_id,
(char*)p_iso->pvd.volume_set_id,
(char*)p_iso->svd.volume_set_id,
ISO_MAX_VOLUMESET_ID);
}
@@ -494,19 +466,25 @@ bool
iso9660_ifs_read_superblock (iso9660_t *p_iso,
iso_extension_mask_t iso_extension_mask)
{
iso9660_svd_t *p_svd; /* Secondary volume descriptor. */
iso9660_svd_t p_svd; /* Secondary volume descriptor. */
int i;
if (!p_iso || !iso9660_ifs_read_pvd(p_iso, &(p_iso->pvd)))
return false;
p_svd = &(p_iso->svd);
p_iso->i_joliet_level = 0;
if (0 != iso9660_iso_seek_read (p_iso, p_svd, ISO_PVD_SECTOR+1, 1)) {
if ( ISO_VD_SUPPLEMENTARY == from_711(p_svd->type) ) {
if (p_svd->escape_sequences[0] == 0x25
&& p_svd->escape_sequences[1] == 0x2f) {
switch (p_svd->escape_sequences[2]) {
/* There may be multiple Secondary Volume Descriptors (eg. El Torito + Joliet) */
for (i=1; (0 != iso9660_iso_seek_read (p_iso, &p_svd, ISO_PVD_SECTOR+i, 1)); i++) {
if (ISO_VD_END == from_711(p_svd.type) ) /* Last SVD */
break;
if ( ISO_VD_SUPPLEMENTARY == from_711(p_svd.type) ) {
/* We're only interested in Joliet => make sure the SVD isn't overwritten */
if (p_iso->i_joliet_level == 0)
memcpy(&(p_iso->svd), &p_svd, sizeof(iso9660_svd_t));
if (p_svd.escape_sequences[0] == 0x25
&& p_svd.escape_sequences[1] == 0x2f) {
switch (p_svd.escape_sequences[2]) {
case 0x40:
if (iso_extension_mask & ISO_EXTENSION_JOLIET_LEVEL1)
p_iso->i_joliet_level = 1;