First attempt at Bison TOC parser.
This commit is contained in:
@@ -2,5 +2,8 @@ cue.tab.c
|
||||
cue.tab.h
|
||||
cuelexer
|
||||
cueparser
|
||||
toc.tab.c
|
||||
toc.tab.h
|
||||
tocparser
|
||||
lex.cuelex.c
|
||||
lex.cue.c
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
INCLUDES = -I .
|
||||
all: cueparser cuelexer
|
||||
all: cueparser cuelexer tocparser
|
||||
|
||||
lex.cue.c: cue.L cue.tab.h
|
||||
flex -Pcue cue.L
|
||||
@@ -25,6 +25,20 @@ cueparser: lex.cue.o cue.tab.o
|
||||
cuelexer: lex.cuelex.o
|
||||
gcc -g lex.cuelex.o -lfl -o cuelexer
|
||||
|
||||
toc.tab.h: toc.tab.c
|
||||
|
||||
toc.tab.c: toc.y
|
||||
bison -p toc -d toc.y
|
||||
|
||||
toclexer.o: toclexer.c
|
||||
gcc -g -Wall -c toclexer.c $(INCLUDES)
|
||||
|
||||
toc.tab.o: toc.tab.c toc.tab.h
|
||||
gcc -g -Wall -DSTANDALONE -c toc.tab.c $(INCLUDES)
|
||||
|
||||
tocparser: toc.tab.o toclexer.o
|
||||
gcc -g toclexer.o toc.tab.o -o tocparser
|
||||
|
||||
clean:
|
||||
rm -f lex.cue.c lex.cuelex.c lex.cue.o lex.cuelex.o cue.tab.c \
|
||||
cue.tab.o cueparser cuelexer
|
||||
|
||||
295
parse/toc.y
Normal file
295
parse/toc.y
Normal file
@@ -0,0 +1,295 @@
|
||||
/*
|
||||
$Id: toc.y,v 1.1 2005/01/31 10:20:51 rocky Exp $
|
||||
|
||||
Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
/* Yacc grammer for cdrdao TOC file */
|
||||
%{
|
||||
#include "toclexer.h"
|
||||
#include "errno.h"
|
||||
FILE *toc_in;
|
||||
int tocerror (char const *s);
|
||||
|
||||
#ifdef STANDALONE
|
||||
#include <getopt.h>
|
||||
#define YYDEBUG 1
|
||||
#endif
|
||||
|
||||
%}
|
||||
|
||||
/* BISON Declarations */
|
||||
|
||||
%token ARRANGER
|
||||
%token AUDIO
|
||||
%token AUDIOFILE
|
||||
%token CATALOG
|
||||
%token CD_DA
|
||||
%token CD_I
|
||||
%token CD_ROM
|
||||
%token CD_ROM_XA
|
||||
%token CD_TEXT
|
||||
%token COMPOSER
|
||||
%token COPY
|
||||
%token DATAFILE
|
||||
%token DISC_ID
|
||||
%token EN
|
||||
%token END
|
||||
%token FIFO
|
||||
%token FILE_TOKEN
|
||||
%token FOUR_CHANNEL_AUDIO
|
||||
%token GENRE
|
||||
%token INDEX
|
||||
%token ISRC
|
||||
%token LANGUAGE
|
||||
%token LANGUAGE_MAP
|
||||
%token MESSAGE
|
||||
%token MODE0
|
||||
%token MODE1
|
||||
%token MODE1_RAW
|
||||
%token MODE2
|
||||
%token MODE2_FORM1
|
||||
%token MODE2_FORM2
|
||||
%token MODE2_FORM_MIX
|
||||
%token MODE2_RAW
|
||||
%token NO
|
||||
%token PERFORMER
|
||||
%token PREGAP
|
||||
%token PRE_EMPHASIS
|
||||
%token RESERVED1
|
||||
%token RESERVED2
|
||||
%token RESERVED3
|
||||
%token RESERVED4
|
||||
%token RW
|
||||
%token RW_RAW
|
||||
%token SILENCE
|
||||
%token SIZE_INFO
|
||||
%token SONGWRITER
|
||||
%token START
|
||||
%token SWAP
|
||||
%token TITLE
|
||||
%token TOC_INFO1
|
||||
%token TOC_INFO2
|
||||
%token TRACK
|
||||
%token TWO_CHANNEL_AUDIO
|
||||
%token UPC_EAN
|
||||
%token ZERO
|
||||
|
||||
%token LeftBrace /* "{" */
|
||||
%token RightBrace /* "}" */
|
||||
%token Colon /* ":" */
|
||||
%token Error /* Error token return */
|
||||
%token Id /* Id but not one of the above keywords */
|
||||
%token Integer
|
||||
%token String
|
||||
|
||||
%union {
|
||||
long unsigned int val; /* For returning numbers. */
|
||||
symrec *tptr; /* For returning symbol-table pointers. */
|
||||
}
|
||||
|
||||
/* Grammar follows */
|
||||
%%
|
||||
|
||||
/* We optionally allow spaces at the end of the TOC file.
|
||||
*/
|
||||
toc: catalog_or_tocType cdTextGlobal tracks ;
|
||||
|
||||
catalog_or_tocType: catalog_or_tocType CATALOG String
|
||||
| catalog_or_tocType tocType
|
||||
| /* empty */ ;
|
||||
|
||||
tracks: tracks track | track ;
|
||||
|
||||
track: TRACK trackMode opt_subChannelMode opt_track_flags
|
||||
cdTextTrack opt_pregap_msf subTracks_or_starts_or_ends opt_index_msfs
|
||||
;
|
||||
|
||||
opt_track_flags: opt_track_flags track_flag
|
||||
| /* empty */;
|
||||
|
||||
track_flag: ISRC String
|
||||
| opt_no COPY
|
||||
| opt_no PRE_EMPHASIS
|
||||
| TWO_CHANNEL_AUDIO
|
||||
| FOUR_CHANNEL_AUDIO ;
|
||||
|
||||
opt_no: NO
|
||||
| /* empty */;
|
||||
|
||||
opt_pregap_msf: PREGAP msf
|
||||
| /* empty */;
|
||||
|
||||
opt_index_msfs: opt_index_msfs INDEX msf
|
||||
| /* empty */ ;
|
||||
|
||||
subTrack_or_start_or_end: subTrack
|
||||
| START
|
||||
| END;
|
||||
|
||||
subTracks_or_starts_or_ends: subTracks_or_starts_or_ends
|
||||
subTrack_or_start_or_end
|
||||
| subTrack_or_start_or_end ;
|
||||
|
||||
subTrack:
|
||||
AudioFile String opt_swap opt_start_offset samples
|
||||
| DATAFILE String opt_start_length
|
||||
| FIFO String dataLength
|
||||
| SILENCE samples
|
||||
| ZERO opt_dataMode opt_subChannelMode dataLength
|
||||
;
|
||||
|
||||
AudioFile: AUDIOFILE | FILE_TOKEN ;
|
||||
|
||||
opt_swap: SWAP
|
||||
| /* empty */;
|
||||
|
||||
opt_start_offset: "#" sLong
|
||||
| /* empty */;
|
||||
|
||||
opt_start_length: "#" sLong
|
||||
| '#' sLong dataLength
|
||||
| /* empty */;
|
||||
|
||||
opt_dataMode: dataMode
|
||||
| /* empty */ ;
|
||||
|
||||
opt_string: String
|
||||
| /* empty */ ;
|
||||
|
||||
uLong: Integer ;
|
||||
|
||||
sLong: Integer ;
|
||||
|
||||
msf: Integer Colon Integer Colon Integer ;
|
||||
|
||||
samples: msf | uLong ;
|
||||
|
||||
dataLength: msf | uLong ;
|
||||
|
||||
dataMode: AUDIO | MODE0 | MODE1 | MODE1_RAW | MODE2
|
||||
| MODE2_RAW | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX
|
||||
;
|
||||
|
||||
|
||||
trackMode: AUDIO | MODE1 | MODE1_RAW | MODE2
|
||||
| MODE2_RAW | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX
|
||||
;
|
||||
|
||||
|
||||
opt_subChannelMode: RW | RW_RAW
|
||||
| /* empty */;
|
||||
|
||||
tocType: CD_DA | CD_ROM | CD_ROM_XA | CD_I ;
|
||||
|
||||
packType: TITLE | PERFORMER | SONGWRITER | COMPOSER | ARRANGER
|
||||
| MESSAGE | DISC_ID | GENRE | TOC_INFO1 | TOC_INFO2
|
||||
| RESERVED1 | RESERVED2 | RESERVED3 | RESERVED4 | UPC_EAN
|
||||
| ISRC | SIZE_INFO ;
|
||||
|
||||
|
||||
binaryData: LeftBrace Integers RightBrace ;
|
||||
|
||||
Integers: Integers "," Integer | Integer ;
|
||||
|
||||
|
||||
cdTextItem: packType opt_string_or_binaryData | ;
|
||||
|
||||
opt_string_or_binaryData: opt_string | binaryData ;
|
||||
|
||||
cdTextBlock: LANGUAGE Integer LeftBrace cdTextItem RightBrace ;
|
||||
|
||||
opt_cdTextBlock: cdTextBlock
|
||||
| /* empty */;
|
||||
|
||||
opt_cdTextBlocks: opt_cdTextBlocks cdTextBlock
|
||||
| /* empty */ ;
|
||||
|
||||
opt_cdTextLanguageMap : LANGUAGE_MAP LeftBrace Language_mappings RightBrace
|
||||
| /* empty */;
|
||||
|
||||
Language_mappings: Language_mappings Language_mapping | Language_mapping ;
|
||||
|
||||
Language_mapping: Integer ":" Language_id ;
|
||||
|
||||
Language_id: Integer | EN;
|
||||
|
||||
cdTextTrack: CD_TEXT LeftBrace opt_cdTextBlocks RightBrace | ;
|
||||
|
||||
cdTextGlobal: CD_TEXT LeftBrace opt_cdTextLanguageMap opt_cdTextBlock
|
||||
RightBrace
|
||||
| /* empty */;
|
||||
|
||||
|
||||
%%
|
||||
|
||||
#ifdef STANDALONE
|
||||
/* The controlling function */
|
||||
|
||||
int
|
||||
tocerror(char const *s) /* called by tocparse on error */
|
||||
{
|
||||
printf("%s\n",s);
|
||||
return(0);
|
||||
}
|
||||
|
||||
int
|
||||
main( int argc, char **argv )
|
||||
{
|
||||
int c;
|
||||
|
||||
tocdebug = 0;
|
||||
|
||||
while (1) {
|
||||
int option_index = 0;
|
||||
static struct option long_options[] = {
|
||||
{"debug", 0, 0, 'd'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
c = getopt_long (argc, argv, "d", long_options, &option_index);
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
switch (c) {
|
||||
case 'd':
|
||||
tocdebug = 1;
|
||||
break;
|
||||
default:
|
||||
printf ("?? getopt returned character code 0%o ??\n", c);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if ( optind < argc ) {
|
||||
toc_in = fopen( argv[optind], "r" );
|
||||
if (!toc_in) {
|
||||
printf("unable to open %s for reading: %s\n", argv[optind],
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
} else
|
||||
toc_in = stdin;
|
||||
|
||||
|
||||
if (tocparse()==0) {
|
||||
printf("Is a TOC file\n");
|
||||
} else {
|
||||
printf("Isn't a TOC file\n");
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
#endif /* STANDALONE*/
|
||||
198
parse/toclexer.c
Normal file
198
parse/toclexer.c
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
$Id: toclexer.c,v 1.1 2005/01/31 10:20:51 rocky Exp $
|
||||
|
||||
Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
/* Lexical scanner for cdrdao's TOC. */
|
||||
#include "toclexer.h"
|
||||
#include "toc.tab.h"
|
||||
#include <ctype.h>
|
||||
|
||||
#define YYEOF 0
|
||||
|
||||
/* A structure for associating a word with a token. */
|
||||
typedef struct keyword_s
|
||||
{
|
||||
char const *psz_keyword;
|
||||
token_t i_token;
|
||||
} keyword_t;
|
||||
|
||||
/* These are all of the words that might appear in a TOC file and
|
||||
the token association that the parser will use.
|
||||
*/
|
||||
const keyword_t keywords[] =
|
||||
{
|
||||
{"ARRANGER", ARRANGER},
|
||||
{"AUDIO", AUDIO},
|
||||
{"AUDIOFILE", AUDIOFILE},
|
||||
{"CATALOG", CATALOG},
|
||||
{"CD_DA", CD_DA},
|
||||
{"CD_I", CD_I},
|
||||
{"CD_ROM", CD_ROM},
|
||||
{"CD_ROM_XA", CD_ROM_XA},
|
||||
{"CD_TEXT", CD_TEXT},
|
||||
{"COMPOSER", COMPOSER},
|
||||
{"COPY", COPY},
|
||||
{"DATAFILE", DATAFILE},
|
||||
{"DISC_ID", DISC_ID},
|
||||
{"EN", EN},
|
||||
{"END", END},
|
||||
{"FIFO", FIFO},
|
||||
{"FILE", FILE_TOKEN},
|
||||
{"FOUR_CHANNEL_AUDIO", FOUR_CHANNEL_AUDIO},
|
||||
{"GENRE", GENRE},
|
||||
{"INDEX", INDEX},
|
||||
{"ISRC", ISRC},
|
||||
{"LANGUAGE", LANGUAGE},
|
||||
{"LANGUAGE_MAP", LANGUAGE_MAP},
|
||||
{"MESSAGE", MESSAGE},
|
||||
{"MODE0", MODE0},
|
||||
{"MODE1", MODE1},
|
||||
{"MODE1_RAW", MODE1_RAW},
|
||||
{"MODE2", MODE2},
|
||||
{"MODE2_FORM1", MODE2_FORM1},
|
||||
{"MODE2_FORM2", MODE2_FORM2},
|
||||
{"MODE2_FORM_MIX", MODE2_FORM_MIX},
|
||||
{"MODE2_RAW", MODE2_RAW},
|
||||
{"NO", NO},
|
||||
{"PERFORMER", PERFORMER},
|
||||
{"PREGAP", PREGAP},
|
||||
{"PRE_EMPHASIS", PRE_EMPHASIS},
|
||||
{"RESERVED1", RESERVED1},
|
||||
{"RESERVED2", RESERVED2},
|
||||
{"RESERVED3", RESERVED3},
|
||||
{"RESERVED4", RESERVED4},
|
||||
{"RW", RW},
|
||||
{"RW_RAW", RW_RAW},
|
||||
{"SILENCE", SILENCE},
|
||||
{"SIZE_INFO", SIZE_INFO},
|
||||
{"SONGWRITER", SONGWRITER},
|
||||
{"START", START},
|
||||
{"SWAP", SWAP},
|
||||
{"TITLE", TITLE},
|
||||
{"TOC_INFO1", TOC_INFO1},
|
||||
{"TOC_INFO2", TOC_INFO2},
|
||||
{"TRACK", TRACK},
|
||||
{"TWO_CHANNEL_AUDIO", TWO_CHANNEL_AUDIO},
|
||||
{"UPC_EAN", UPC_EAN},
|
||||
{"ZERO", ZERO},
|
||||
{0, 0}
|
||||
};
|
||||
|
||||
static int
|
||||
compare_keyword(const void *p_id, const void *p_keyword) {
|
||||
char *psz_id = (char *) p_id;
|
||||
char const *psz_keyword = ((keyword_t *) p_keyword)->psz_keyword;
|
||||
return strcmp(psz_id, psz_keyword);
|
||||
}
|
||||
|
||||
token_t
|
||||
toclex (void)
|
||||
{
|
||||
int c;
|
||||
|
||||
start:
|
||||
/* Skip white space. */
|
||||
while ( isspace(c = fgetc (toc_in)) )
|
||||
;
|
||||
|
||||
/* Process a number. */
|
||||
if (isdigit (c))
|
||||
{
|
||||
ungetc (c, toc_in);
|
||||
fscanf (toc_in, "%lu", &(toclval.val));
|
||||
return Integer;
|
||||
}
|
||||
|
||||
/* Process a comment. */
|
||||
if ( '/' == c ) {
|
||||
if ('/' == (c = fgetc (toc_in)) ) {
|
||||
while ((c = fgetc (toc_in)) != EOF && c != '\n')
|
||||
;
|
||||
|
||||
/* Return end-of-input. */
|
||||
if (EOF == c) return YYEOF;
|
||||
goto start;
|
||||
}
|
||||
/* Not a comment. So put back the character after the '/' and
|
||||
return '/' */
|
||||
ungetc (c, toc_in);
|
||||
return '/';
|
||||
}
|
||||
|
||||
/* Char starts an identifier => read the name. */
|
||||
if (isalpha (c))
|
||||
{
|
||||
static char symbol[50] = "";
|
||||
unsigned int i;
|
||||
|
||||
i = 0;
|
||||
do
|
||||
{
|
||||
/* Add this character to the buffer. */
|
||||
symbol[i++] = c;
|
||||
/* Get another character. */
|
||||
c = fgetc (toc_in);
|
||||
}
|
||||
while (isgraph (c));
|
||||
|
||||
ungetc (c, toc_in);
|
||||
symbol[i] = '\0';
|
||||
|
||||
toclval.psz_str = symbol;
|
||||
|
||||
{
|
||||
keyword_t *p_keyword;
|
||||
p_keyword = bsearch(symbol, keywords,
|
||||
(sizeof(keywords) / sizeof(keyword_t)) - 1,
|
||||
sizeof(keyword_t), compare_keyword);
|
||||
if (!p_keyword) return Id;
|
||||
return p_keyword->i_token;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Process a string.
|
||||
To do: save the value of the string and process octal numbers.
|
||||
*/
|
||||
if ( '"' == c ) {
|
||||
int b_backslash = 0;
|
||||
while ( EOF != (c = fgetc (toc_in))
|
||||
&& (b_backslash || '"' != c ) ) {
|
||||
b_backslash = ('\\' == c );
|
||||
}
|
||||
|
||||
/* Return end-of-input. */
|
||||
if (EOF == c) return YYEOF;
|
||||
|
||||
return String;
|
||||
}
|
||||
|
||||
/* Return end-of-input. */
|
||||
if (EOF == c) return YYEOF;
|
||||
|
||||
switch (c) {
|
||||
case ':': return Colon;
|
||||
case '{': return LeftBrace;
|
||||
case '}': return RightBrace;
|
||||
default:
|
||||
/* Return a single char. */
|
||||
return c;
|
||||
}
|
||||
|
||||
}
|
||||
44
parse/toclexer.h
Normal file
44
parse/toclexer.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
$Id: toclexer.h,v 1.1 2005/01/31 10:20:51 rocky Exp $
|
||||
|
||||
Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
/* Common header between TOC lexer and parser. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "string.h"
|
||||
|
||||
typedef int token_t;
|
||||
|
||||
extern FILE *toc_in;
|
||||
|
||||
typedef union {
|
||||
long unsigned int val; /* For returning numbers. */
|
||||
char const *psz_str; /* For strings. */
|
||||
} tocval_t;
|
||||
|
||||
#define YYSTYPE tocval_t
|
||||
|
||||
YYSTYPE toclval;
|
||||
|
||||
/* Call to the TOC scanner */
|
||||
token_t toclex (void);
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user