First attempt at Bison TOC parser.

This commit is contained in:
rocky
2005-01-31 10:20:51 +00:00
parent 6c50829f71
commit 9664537b44
5 changed files with 555 additions and 1 deletions

View File

@@ -2,5 +2,8 @@ cue.tab.c
cue.tab.h
cuelexer
cueparser
toc.tab.c
toc.tab.h
tocparser
lex.cuelex.c
lex.cue.c

View File

@@ -1,5 +1,5 @@
INCLUDES = -I .
all: cueparser cuelexer
all: cueparser cuelexer tocparser
lex.cue.c: cue.L cue.tab.h
flex -Pcue cue.L
@@ -25,6 +25,20 @@ cueparser: lex.cue.o cue.tab.o
cuelexer: lex.cuelex.o
gcc -g lex.cuelex.o -lfl -o cuelexer
toc.tab.h: toc.tab.c
toc.tab.c: toc.y
bison -p toc -d toc.y
toclexer.o: toclexer.c
gcc -g -Wall -c toclexer.c $(INCLUDES)
toc.tab.o: toc.tab.c toc.tab.h
gcc -g -Wall -DSTANDALONE -c toc.tab.c $(INCLUDES)
tocparser: toc.tab.o toclexer.o
gcc -g toclexer.o toc.tab.o -o tocparser
clean:
rm -f lex.cue.c lex.cuelex.c lex.cue.o lex.cuelex.o cue.tab.c \
cue.tab.o cueparser cuelexer

295
parse/toc.y Normal file
View File

@@ -0,0 +1,295 @@
/*
$Id: toc.y,v 1.1 2005/01/31 10:20:51 rocky Exp $
Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Yacc grammer for cdrdao TOC file */
%{
#include "toclexer.h"
#include "errno.h"
FILE *toc_in;
int tocerror (char const *s);
#ifdef STANDALONE
#include <getopt.h>
#define YYDEBUG 1
#endif
%}
/* BISON Declarations */
%token ARRANGER
%token AUDIO
%token AUDIOFILE
%token CATALOG
%token CD_DA
%token CD_I
%token CD_ROM
%token CD_ROM_XA
%token CD_TEXT
%token COMPOSER
%token COPY
%token DATAFILE
%token DISC_ID
%token EN
%token END
%token FIFO
%token FILE_TOKEN
%token FOUR_CHANNEL_AUDIO
%token GENRE
%token INDEX
%token ISRC
%token LANGUAGE
%token LANGUAGE_MAP
%token MESSAGE
%token MODE0
%token MODE1
%token MODE1_RAW
%token MODE2
%token MODE2_FORM1
%token MODE2_FORM2
%token MODE2_FORM_MIX
%token MODE2_RAW
%token NO
%token PERFORMER
%token PREGAP
%token PRE_EMPHASIS
%token RESERVED1
%token RESERVED2
%token RESERVED3
%token RESERVED4
%token RW
%token RW_RAW
%token SILENCE
%token SIZE_INFO
%token SONGWRITER
%token START
%token SWAP
%token TITLE
%token TOC_INFO1
%token TOC_INFO2
%token TRACK
%token TWO_CHANNEL_AUDIO
%token UPC_EAN
%token ZERO
%token LeftBrace /* "{" */
%token RightBrace /* "}" */
%token Colon /* ":" */
%token Error /* Error token return */
%token Id /* Id but not one of the above keywords */
%token Integer
%token String
%union {
long unsigned int val; /* For returning numbers. */
symrec *tptr; /* For returning symbol-table pointers. */
}
/* Grammar follows */
%%
/* We optionally allow spaces at the end of the TOC file.
*/
toc: catalog_or_tocType cdTextGlobal tracks ;
catalog_or_tocType: catalog_or_tocType CATALOG String
| catalog_or_tocType tocType
| /* empty */ ;
tracks: tracks track | track ;
track: TRACK trackMode opt_subChannelMode opt_track_flags
cdTextTrack opt_pregap_msf subTracks_or_starts_or_ends opt_index_msfs
;
opt_track_flags: opt_track_flags track_flag
| /* empty */;
track_flag: ISRC String
| opt_no COPY
| opt_no PRE_EMPHASIS
| TWO_CHANNEL_AUDIO
| FOUR_CHANNEL_AUDIO ;
opt_no: NO
| /* empty */;
opt_pregap_msf: PREGAP msf
| /* empty */;
opt_index_msfs: opt_index_msfs INDEX msf
| /* empty */ ;
subTrack_or_start_or_end: subTrack
| START
| END;
subTracks_or_starts_or_ends: subTracks_or_starts_or_ends
subTrack_or_start_or_end
| subTrack_or_start_or_end ;
subTrack:
AudioFile String opt_swap opt_start_offset samples
| DATAFILE String opt_start_length
| FIFO String dataLength
| SILENCE samples
| ZERO opt_dataMode opt_subChannelMode dataLength
;
AudioFile: AUDIOFILE | FILE_TOKEN ;
opt_swap: SWAP
| /* empty */;
opt_start_offset: "#" sLong
| /* empty */;
opt_start_length: "#" sLong
| '#' sLong dataLength
| /* empty */;
opt_dataMode: dataMode
| /* empty */ ;
opt_string: String
| /* empty */ ;
uLong: Integer ;
sLong: Integer ;
msf: Integer Colon Integer Colon Integer ;
samples: msf | uLong ;
dataLength: msf | uLong ;
dataMode: AUDIO | MODE0 | MODE1 | MODE1_RAW | MODE2
| MODE2_RAW | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX
;
trackMode: AUDIO | MODE1 | MODE1_RAW | MODE2
| MODE2_RAW | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX
;
opt_subChannelMode: RW | RW_RAW
| /* empty */;
tocType: CD_DA | CD_ROM | CD_ROM_XA | CD_I ;
packType: TITLE | PERFORMER | SONGWRITER | COMPOSER | ARRANGER
| MESSAGE | DISC_ID | GENRE | TOC_INFO1 | TOC_INFO2
| RESERVED1 | RESERVED2 | RESERVED3 | RESERVED4 | UPC_EAN
| ISRC | SIZE_INFO ;
binaryData: LeftBrace Integers RightBrace ;
Integers: Integers "," Integer | Integer ;
cdTextItem: packType opt_string_or_binaryData | ;
opt_string_or_binaryData: opt_string | binaryData ;
cdTextBlock: LANGUAGE Integer LeftBrace cdTextItem RightBrace ;
opt_cdTextBlock: cdTextBlock
| /* empty */;
opt_cdTextBlocks: opt_cdTextBlocks cdTextBlock
| /* empty */ ;
opt_cdTextLanguageMap : LANGUAGE_MAP LeftBrace Language_mappings RightBrace
| /* empty */;
Language_mappings: Language_mappings Language_mapping | Language_mapping ;
Language_mapping: Integer ":" Language_id ;
Language_id: Integer | EN;
cdTextTrack: CD_TEXT LeftBrace opt_cdTextBlocks RightBrace | ;
cdTextGlobal: CD_TEXT LeftBrace opt_cdTextLanguageMap opt_cdTextBlock
RightBrace
| /* empty */;
%%
#ifdef STANDALONE
/* The controlling function */
int
tocerror(char const *s) /* called by tocparse on error */
{
printf("%s\n",s);
return(0);
}
int
main( int argc, char **argv )
{
int c;
tocdebug = 0;
while (1) {
int option_index = 0;
static struct option long_options[] = {
{"debug", 0, 0, 'd'},
{0, 0, 0, 0}
};
c = getopt_long (argc, argv, "d", long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 'd':
tocdebug = 1;
break;
default:
printf ("?? getopt returned character code 0%o ??\n", c);
exit(1);
}
}
if ( optind < argc ) {
toc_in = fopen( argv[optind], "r" );
if (!toc_in) {
printf("unable to open %s for reading: %s\n", argv[optind],
strerror(errno));
exit(1);
}
} else
toc_in = stdin;
if (tocparse()==0) {
printf("Is a TOC file\n");
} else {
printf("Isn't a TOC file\n");
}
exit(0);
}
#endif /* STANDALONE*/

198
parse/toclexer.c Normal file
View File

@@ -0,0 +1,198 @@
/*
$Id: toclexer.c,v 1.1 2005/01/31 10:20:51 rocky Exp $
Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Lexical scanner for cdrdao's TOC. */
#include "toclexer.h"
#include "toc.tab.h"
#include <ctype.h>
#define YYEOF 0
/* A structure for associating a word with a token. */
typedef struct keyword_s
{
char const *psz_keyword;
token_t i_token;
} keyword_t;
/* These are all of the words that might appear in a TOC file and
the token association that the parser will use.
*/
const keyword_t keywords[] =
{
{"ARRANGER", ARRANGER},
{"AUDIO", AUDIO},
{"AUDIOFILE", AUDIOFILE},
{"CATALOG", CATALOG},
{"CD_DA", CD_DA},
{"CD_I", CD_I},
{"CD_ROM", CD_ROM},
{"CD_ROM_XA", CD_ROM_XA},
{"CD_TEXT", CD_TEXT},
{"COMPOSER", COMPOSER},
{"COPY", COPY},
{"DATAFILE", DATAFILE},
{"DISC_ID", DISC_ID},
{"EN", EN},
{"END", END},
{"FIFO", FIFO},
{"FILE", FILE_TOKEN},
{"FOUR_CHANNEL_AUDIO", FOUR_CHANNEL_AUDIO},
{"GENRE", GENRE},
{"INDEX", INDEX},
{"ISRC", ISRC},
{"LANGUAGE", LANGUAGE},
{"LANGUAGE_MAP", LANGUAGE_MAP},
{"MESSAGE", MESSAGE},
{"MODE0", MODE0},
{"MODE1", MODE1},
{"MODE1_RAW", MODE1_RAW},
{"MODE2", MODE2},
{"MODE2_FORM1", MODE2_FORM1},
{"MODE2_FORM2", MODE2_FORM2},
{"MODE2_FORM_MIX", MODE2_FORM_MIX},
{"MODE2_RAW", MODE2_RAW},
{"NO", NO},
{"PERFORMER", PERFORMER},
{"PREGAP", PREGAP},
{"PRE_EMPHASIS", PRE_EMPHASIS},
{"RESERVED1", RESERVED1},
{"RESERVED2", RESERVED2},
{"RESERVED3", RESERVED3},
{"RESERVED4", RESERVED4},
{"RW", RW},
{"RW_RAW", RW_RAW},
{"SILENCE", SILENCE},
{"SIZE_INFO", SIZE_INFO},
{"SONGWRITER", SONGWRITER},
{"START", START},
{"SWAP", SWAP},
{"TITLE", TITLE},
{"TOC_INFO1", TOC_INFO1},
{"TOC_INFO2", TOC_INFO2},
{"TRACK", TRACK},
{"TWO_CHANNEL_AUDIO", TWO_CHANNEL_AUDIO},
{"UPC_EAN", UPC_EAN},
{"ZERO", ZERO},
{0, 0}
};
static int
compare_keyword(const void *p_id, const void *p_keyword) {
char *psz_id = (char *) p_id;
char const *psz_keyword = ((keyword_t *) p_keyword)->psz_keyword;
return strcmp(psz_id, psz_keyword);
}
token_t
toclex (void)
{
int c;
start:
/* Skip white space. */
while ( isspace(c = fgetc (toc_in)) )
;
/* Process a number. */
if (isdigit (c))
{
ungetc (c, toc_in);
fscanf (toc_in, "%lu", &(toclval.val));
return Integer;
}
/* Process a comment. */
if ( '/' == c ) {
if ('/' == (c = fgetc (toc_in)) ) {
while ((c = fgetc (toc_in)) != EOF && c != '\n')
;
/* Return end-of-input. */
if (EOF == c) return YYEOF;
goto start;
}
/* Not a comment. So put back the character after the '/' and
return '/' */
ungetc (c, toc_in);
return '/';
}
/* Char starts an identifier => read the name. */
if (isalpha (c))
{
static char symbol[50] = "";
unsigned int i;
i = 0;
do
{
/* Add this character to the buffer. */
symbol[i++] = c;
/* Get another character. */
c = fgetc (toc_in);
}
while (isgraph (c));
ungetc (c, toc_in);
symbol[i] = '\0';
toclval.psz_str = symbol;
{
keyword_t *p_keyword;
p_keyword = bsearch(symbol, keywords,
(sizeof(keywords) / sizeof(keyword_t)) - 1,
sizeof(keyword_t), compare_keyword);
if (!p_keyword) return Id;
return p_keyword->i_token;
}
}
/* Process a string.
To do: save the value of the string and process octal numbers.
*/
if ( '"' == c ) {
int b_backslash = 0;
while ( EOF != (c = fgetc (toc_in))
&& (b_backslash || '"' != c ) ) {
b_backslash = ('\\' == c );
}
/* Return end-of-input. */
if (EOF == c) return YYEOF;
return String;
}
/* Return end-of-input. */
if (EOF == c) return YYEOF;
switch (c) {
case ':': return Colon;
case '{': return LeftBrace;
case '}': return RightBrace;
default:
/* Return a single char. */
return c;
}
}

44
parse/toclexer.h Normal file
View File

@@ -0,0 +1,44 @@
/*
$Id: toclexer.h,v 1.1 2005/01/31 10:20:51 rocky Exp $
Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Common header between TOC lexer and parser. */
#include <stdio.h>
#include <stdlib.h>
#include "string.h"
typedef int token_t;
extern FILE *toc_in;
typedef union {
long unsigned int val; /* For returning numbers. */
char const *psz_str; /* For strings. */
} tocval_t;
#define YYSTYPE tocval_t
YYSTYPE toclval;
/* Call to the TOC scanner */
token_t toclex (void);