diff --git a/parse/.cvsignore b/parse/.cvsignore index 06880e26..e26b56be 100644 --- a/parse/.cvsignore +++ b/parse/.cvsignore @@ -2,5 +2,8 @@ cue.tab.c cue.tab.h cuelexer cueparser +toc.tab.c +toc.tab.h +tocparser lex.cuelex.c lex.cue.c diff --git a/parse/Makefile b/parse/Makefile index 531e16c7..9d324d73 100644 --- a/parse/Makefile +++ b/parse/Makefile @@ -1,5 +1,5 @@ INCLUDES = -I . -all: cueparser cuelexer +all: cueparser cuelexer tocparser lex.cue.c: cue.L cue.tab.h flex -Pcue cue.L @@ -25,6 +25,20 @@ cueparser: lex.cue.o cue.tab.o cuelexer: lex.cuelex.o gcc -g lex.cuelex.o -lfl -o cuelexer +toc.tab.h: toc.tab.c + +toc.tab.c: toc.y + bison -p toc -d toc.y + +toclexer.o: toclexer.c + gcc -g -Wall -c toclexer.c $(INCLUDES) + +toc.tab.o: toc.tab.c toc.tab.h + gcc -g -Wall -DSTANDALONE -c toc.tab.c $(INCLUDES) + +tocparser: toc.tab.o toclexer.o + gcc -g toclexer.o toc.tab.o -o tocparser + clean: rm -f lex.cue.c lex.cuelex.c lex.cue.o lex.cuelex.o cue.tab.c \ cue.tab.o cueparser cuelexer diff --git a/parse/toc.y b/parse/toc.y new file mode 100644 index 00000000..d8d4e0f4 --- /dev/null +++ b/parse/toc.y @@ -0,0 +1,295 @@ +/* + $Id: toc.y,v 1.1 2005/01/31 10:20:51 rocky Exp $ + + Copyright (C) 2005 Rocky Bernstein + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* Yacc grammer for cdrdao TOC file */ +%{ +#include "toclexer.h" +#include "errno.h" +FILE *toc_in; +int tocerror (char const *s); + +#ifdef STANDALONE +#include +#define YYDEBUG 1 +#endif + +%} + +/* BISON Declarations */ + +%token ARRANGER +%token AUDIO +%token AUDIOFILE +%token CATALOG +%token CD_DA +%token CD_I +%token CD_ROM +%token CD_ROM_XA +%token CD_TEXT +%token COMPOSER +%token COPY +%token DATAFILE +%token DISC_ID +%token EN +%token END +%token FIFO +%token FILE_TOKEN +%token FOUR_CHANNEL_AUDIO +%token GENRE +%token INDEX +%token ISRC +%token LANGUAGE +%token LANGUAGE_MAP +%token MESSAGE +%token MODE0 +%token MODE1 +%token MODE1_RAW +%token MODE2 +%token MODE2_FORM1 +%token MODE2_FORM2 +%token MODE2_FORM_MIX +%token MODE2_RAW +%token NO +%token PERFORMER +%token PREGAP +%token PRE_EMPHASIS +%token RESERVED1 +%token RESERVED2 +%token RESERVED3 +%token RESERVED4 +%token RW +%token RW_RAW +%token SILENCE +%token SIZE_INFO +%token SONGWRITER +%token START +%token SWAP +%token TITLE +%token TOC_INFO1 +%token TOC_INFO2 +%token TRACK +%token TWO_CHANNEL_AUDIO +%token UPC_EAN +%token ZERO + +%token LeftBrace /* "{" */ +%token RightBrace /* "}" */ +%token Colon /* ":" */ +%token Error /* Error token return */ +%token Id /* Id but not one of the above keywords */ +%token Integer +%token String + +%union { + long unsigned int val; /* For returning numbers. */ + symrec *tptr; /* For returning symbol-table pointers. */ +} + +/* Grammar follows */ +%% + +/* We optionally allow spaces at the end of the TOC file. + */ +toc: catalog_or_tocType cdTextGlobal tracks ; + +catalog_or_tocType: catalog_or_tocType CATALOG String + | catalog_or_tocType tocType + | /* empty */ ; + +tracks: tracks track | track ; + +track: TRACK trackMode opt_subChannelMode opt_track_flags + cdTextTrack opt_pregap_msf subTracks_or_starts_or_ends opt_index_msfs + ; + +opt_track_flags: opt_track_flags track_flag + | /* empty */; + +track_flag: ISRC String + | opt_no COPY + | opt_no PRE_EMPHASIS + | TWO_CHANNEL_AUDIO + | FOUR_CHANNEL_AUDIO ; + +opt_no: NO + | /* empty */; + +opt_pregap_msf: PREGAP msf + | /* empty */; + +opt_index_msfs: opt_index_msfs INDEX msf + | /* empty */ ; + +subTrack_or_start_or_end: subTrack + | START + | END; + +subTracks_or_starts_or_ends: subTracks_or_starts_or_ends + subTrack_or_start_or_end + | subTrack_or_start_or_end ; + +subTrack: + AudioFile String opt_swap opt_start_offset samples + | DATAFILE String opt_start_length + | FIFO String dataLength + | SILENCE samples + | ZERO opt_dataMode opt_subChannelMode dataLength + ; + +AudioFile: AUDIOFILE | FILE_TOKEN ; + +opt_swap: SWAP + | /* empty */; + +opt_start_offset: "#" sLong + | /* empty */; + +opt_start_length: "#" sLong + | '#' sLong dataLength + | /* empty */; + +opt_dataMode: dataMode + | /* empty */ ; + +opt_string: String + | /* empty */ ; + +uLong: Integer ; + +sLong: Integer ; + +msf: Integer Colon Integer Colon Integer ; + +samples: msf | uLong ; + +dataLength: msf | uLong ; + +dataMode: AUDIO | MODE0 | MODE1 | MODE1_RAW | MODE2 + | MODE2_RAW | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX + ; + + +trackMode: AUDIO | MODE1 | MODE1_RAW | MODE2 + | MODE2_RAW | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX + ; + + +opt_subChannelMode: RW | RW_RAW + | /* empty */; + +tocType: CD_DA | CD_ROM | CD_ROM_XA | CD_I ; + +packType: TITLE | PERFORMER | SONGWRITER | COMPOSER | ARRANGER + | MESSAGE | DISC_ID | GENRE | TOC_INFO1 | TOC_INFO2 + | RESERVED1 | RESERVED2 | RESERVED3 | RESERVED4 | UPC_EAN + | ISRC | SIZE_INFO ; + + +binaryData: LeftBrace Integers RightBrace ; + +Integers: Integers "," Integer | Integer ; + + +cdTextItem: packType opt_string_or_binaryData | ; + +opt_string_or_binaryData: opt_string | binaryData ; + +cdTextBlock: LANGUAGE Integer LeftBrace cdTextItem RightBrace ; + +opt_cdTextBlock: cdTextBlock + | /* empty */; + +opt_cdTextBlocks: opt_cdTextBlocks cdTextBlock + | /* empty */ ; + +opt_cdTextLanguageMap : LANGUAGE_MAP LeftBrace Language_mappings RightBrace + | /* empty */; + +Language_mappings: Language_mappings Language_mapping | Language_mapping ; + +Language_mapping: Integer ":" Language_id ; + +Language_id: Integer | EN; + +cdTextTrack: CD_TEXT LeftBrace opt_cdTextBlocks RightBrace | ; + +cdTextGlobal: CD_TEXT LeftBrace opt_cdTextLanguageMap opt_cdTextBlock + RightBrace + | /* empty */; + + +%% + +#ifdef STANDALONE +/* The controlling function */ + +int +tocerror(char const *s) /* called by tocparse on error */ +{ + printf("%s\n",s); + return(0); +} + +int +main( int argc, char **argv ) +{ + int c; + + tocdebug = 0; + + while (1) { + int option_index = 0; + static struct option long_options[] = { + {"debug", 0, 0, 'd'}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "d", long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'd': + tocdebug = 1; + break; + default: + printf ("?? getopt returned character code 0%o ??\n", c); + exit(1); + } + } + + if ( optind < argc ) { + toc_in = fopen( argv[optind], "r" ); + if (!toc_in) { + printf("unable to open %s for reading: %s\n", argv[optind], + strerror(errno)); + exit(1); + } + } else + toc_in = stdin; + + + if (tocparse()==0) { + printf("Is a TOC file\n"); + } else { + printf("Isn't a TOC file\n"); + } + exit(0); +} +#endif /* STANDALONE*/ diff --git a/parse/toclexer.c b/parse/toclexer.c new file mode 100644 index 00000000..e0670c0e --- /dev/null +++ b/parse/toclexer.c @@ -0,0 +1,198 @@ +/* + $Id: toclexer.c,v 1.1 2005/01/31 10:20:51 rocky Exp $ + + Copyright (C) 2005 Rocky Bernstein + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* Lexical scanner for cdrdao's TOC. */ +#include "toclexer.h" +#include "toc.tab.h" +#include + +#define YYEOF 0 + +/* A structure for associating a word with a token. */ +typedef struct keyword_s +{ + char const *psz_keyword; + token_t i_token; +} keyword_t; + +/* These are all of the words that might appear in a TOC file and + the token association that the parser will use. +*/ +const keyword_t keywords[] = + { + {"ARRANGER", ARRANGER}, + {"AUDIO", AUDIO}, + {"AUDIOFILE", AUDIOFILE}, + {"CATALOG", CATALOG}, + {"CD_DA", CD_DA}, + {"CD_I", CD_I}, + {"CD_ROM", CD_ROM}, + {"CD_ROM_XA", CD_ROM_XA}, + {"CD_TEXT", CD_TEXT}, + {"COMPOSER", COMPOSER}, + {"COPY", COPY}, + {"DATAFILE", DATAFILE}, + {"DISC_ID", DISC_ID}, + {"EN", EN}, + {"END", END}, + {"FIFO", FIFO}, + {"FILE", FILE_TOKEN}, + {"FOUR_CHANNEL_AUDIO", FOUR_CHANNEL_AUDIO}, + {"GENRE", GENRE}, + {"INDEX", INDEX}, + {"ISRC", ISRC}, + {"LANGUAGE", LANGUAGE}, + {"LANGUAGE_MAP", LANGUAGE_MAP}, + {"MESSAGE", MESSAGE}, + {"MODE0", MODE0}, + {"MODE1", MODE1}, + {"MODE1_RAW", MODE1_RAW}, + {"MODE2", MODE2}, + {"MODE2_FORM1", MODE2_FORM1}, + {"MODE2_FORM2", MODE2_FORM2}, + {"MODE2_FORM_MIX", MODE2_FORM_MIX}, + {"MODE2_RAW", MODE2_RAW}, + {"NO", NO}, + {"PERFORMER", PERFORMER}, + {"PREGAP", PREGAP}, + {"PRE_EMPHASIS", PRE_EMPHASIS}, + {"RESERVED1", RESERVED1}, + {"RESERVED2", RESERVED2}, + {"RESERVED3", RESERVED3}, + {"RESERVED4", RESERVED4}, + {"RW", RW}, + {"RW_RAW", RW_RAW}, + {"SILENCE", SILENCE}, + {"SIZE_INFO", SIZE_INFO}, + {"SONGWRITER", SONGWRITER}, + {"START", START}, + {"SWAP", SWAP}, + {"TITLE", TITLE}, + {"TOC_INFO1", TOC_INFO1}, + {"TOC_INFO2", TOC_INFO2}, + {"TRACK", TRACK}, + {"TWO_CHANNEL_AUDIO", TWO_CHANNEL_AUDIO}, + {"UPC_EAN", UPC_EAN}, + {"ZERO", ZERO}, + {0, 0} + }; + +static int +compare_keyword(const void *p_id, const void *p_keyword) { + char *psz_id = (char *) p_id; + char const *psz_keyword = ((keyword_t *) p_keyword)->psz_keyword; + return strcmp(psz_id, psz_keyword); +} + +token_t +toclex (void) +{ + int c; + + start: + /* Skip white space. */ + while ( isspace(c = fgetc (toc_in)) ) + ; + + /* Process a number. */ + if (isdigit (c)) + { + ungetc (c, toc_in); + fscanf (toc_in, "%lu", &(toclval.val)); + return Integer; + } + + /* Process a comment. */ + if ( '/' == c ) { + if ('/' == (c = fgetc (toc_in)) ) { + while ((c = fgetc (toc_in)) != EOF && c != '\n') + ; + + /* Return end-of-input. */ + if (EOF == c) return YYEOF; + goto start; + } + /* Not a comment. So put back the character after the '/' and + return '/' */ + ungetc (c, toc_in); + return '/'; + } + + /* Char starts an identifier => read the name. */ + if (isalpha (c)) + { + static char symbol[50] = ""; + unsigned int i; + + i = 0; + do + { + /* Add this character to the buffer. */ + symbol[i++] = c; + /* Get another character. */ + c = fgetc (toc_in); + } + while (isgraph (c)); + + ungetc (c, toc_in); + symbol[i] = '\0'; + + toclval.psz_str = symbol; + + { + keyword_t *p_keyword; + p_keyword = bsearch(symbol, keywords, + (sizeof(keywords) / sizeof(keyword_t)) - 1, + sizeof(keyword_t), compare_keyword); + if (!p_keyword) return Id; + return p_keyword->i_token; + } + + } + + /* Process a string. + To do: save the value of the string and process octal numbers. + */ + if ( '"' == c ) { + int b_backslash = 0; + while ( EOF != (c = fgetc (toc_in)) + && (b_backslash || '"' != c ) ) { + b_backslash = ('\\' == c ); + } + + /* Return end-of-input. */ + if (EOF == c) return YYEOF; + + return String; + } + + /* Return end-of-input. */ + if (EOF == c) return YYEOF; + + switch (c) { + case ':': return Colon; + case '{': return LeftBrace; + case '}': return RightBrace; + default: + /* Return a single char. */ + return c; + } + +} diff --git a/parse/toclexer.h b/parse/toclexer.h new file mode 100644 index 00000000..da5abe14 --- /dev/null +++ b/parse/toclexer.h @@ -0,0 +1,44 @@ +/* + $Id: toclexer.h,v 1.1 2005/01/31 10:20:51 rocky Exp $ + + Copyright (C) 2005 Rocky Bernstein + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* Common header between TOC lexer and parser. */ +#include +#include +#include "string.h" + +typedef int token_t; + +extern FILE *toc_in; + +typedef union { + long unsigned int val; /* For returning numbers. */ + char const *psz_str; /* For strings. */ +} tocval_t; + +#define YYSTYPE tocval_t + +YYSTYPE toclval; + +/* Call to the TOC scanner */ +token_t toclex (void); + + + +