First attempt at Bison TOC parser.

2005-01-31 10:20:51 +00:00
parent 6c50829f71
commit 9664537b44
5 changed files with 555 additions and 1 deletions
--- a/parse/.cvsignore
+++ b/parse/.cvsignore
@@ -2,5 +2,8 @@ cue.tab.c
 cue.tab.h
 cuelexer
 cueparser
+toc.tab.c
+toc.tab.h
+tocparser
 lex.cuelex.c
 lex.cue.c
--- a/parse/Makefile
+++ b/parse/Makefile
@@ -1,5 +1,5 @@
 INCLUDES = -I .
-all: cueparser cuelexer
+all: cueparser cuelexer tocparser

 lex.cue.c: cue.L cue.tab.h
 	flex -Pcue cue.L
@@ -25,6 +25,20 @@ cueparser: lex.cue.o cue.tab.o
 cuelexer: lex.cuelex.o 
 	gcc -g lex.cuelex.o -lfl -o cuelexer

+toc.tab.h: toc.tab.c
+
+toc.tab.c: toc.y
+	bison -p toc -d toc.y
+
+toclexer.o: toclexer.c
+	gcc -g -Wall -c toclexer.c $(INCLUDES)
+
+toc.tab.o: toc.tab.c toc.tab.h
+	gcc -g -Wall -DSTANDALONE -c toc.tab.c $(INCLUDES)
+
+tocparser: toc.tab.o toclexer.o
+	gcc -g toclexer.o toc.tab.o -o tocparser
+
 clean: 
 	rm -f lex.cue.c lex.cuelex.c lex.cue.o lex.cuelex.o cue.tab.c \
 	    cue.tab.o cueparser cuelexer
--- a/parse/toc.y
+++ b/parse/toc.y
@@ -0,0 +1,295 @@
+/*
+    $Id: toc.y,v 1.1 2005/01/31 10:20:51 rocky Exp $
+
+    Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/* Yacc grammer for cdrdao TOC file */
+%{
+#include "toclexer.h"
+#include "errno.h"
+FILE *toc_in;
+int tocerror (char const *s);
+
+#ifdef STANDALONE
+#include <getopt.h>
+#define YYDEBUG 1
+#endif
+
+%}
+     
+/* BISON Declarations */
+
+%token ARRANGER
+%token AUDIO
+%token AUDIOFILE
+%token CATALOG
+%token CD_DA
+%token CD_I
+%token CD_ROM
+%token CD_ROM_XA
+%token CD_TEXT
+%token COMPOSER
+%token COPY
+%token DATAFILE
+%token DISC_ID
+%token EN
+%token END
+%token FIFO
+%token FILE_TOKEN
+%token FOUR_CHANNEL_AUDIO
+%token GENRE
+%token INDEX
+%token ISRC
+%token LANGUAGE
+%token LANGUAGE_MAP
+%token MESSAGE
+%token MODE0
+%token MODE1
+%token MODE1_RAW
+%token MODE2
+%token MODE2_FORM1
+%token MODE2_FORM2
+%token MODE2_FORM_MIX
+%token MODE2_RAW
+%token NO
+%token PERFORMER
+%token PREGAP
+%token PRE_EMPHASIS
+%token RESERVED1
+%token RESERVED2
+%token RESERVED3
+%token RESERVED4
+%token RW
+%token RW_RAW
+%token SILENCE
+%token SIZE_INFO
+%token SONGWRITER
+%token START
+%token SWAP
+%token TITLE
+%token TOC_INFO1
+%token TOC_INFO2
+%token TRACK
+%token TWO_CHANNEL_AUDIO
+%token UPC_EAN
+%token ZERO
+
+%token LeftBrace  /* "{" */
+%token RightBrace  /* "}" */
+%token Colon  /* ":" */
+%token Error  /* Error token return */
+%token Id     /* Id but not one of the above keywords */
+%token Integer
+%token String
+
+%union {
+  long unsigned int val;   /* For returning numbers.  */
+  symrec  *tptr;   /* For returning symbol-table pointers.  */
+}
+
+/* Grammar follows */
+%%
+
+/* We optionally allow spaces at the end of the TOC file.
+ */
+toc: catalog_or_tocType cdTextGlobal tracks ;
+
+catalog_or_tocType: catalog_or_tocType CATALOG String 
+                    | catalog_or_tocType tocType
+                    | /* empty */ ;
+
+tracks: tracks track | track ;
+
+track: TRACK trackMode opt_subChannelMode opt_track_flags
+    cdTextTrack opt_pregap_msf subTracks_or_starts_or_ends opt_index_msfs
+    ;
+
+opt_track_flags: opt_track_flags track_flag 
+       | /* empty */;
+
+track_flag: ISRC String 
+          | opt_no COPY 
+          | opt_no PRE_EMPHASIS
+          | TWO_CHANNEL_AUDIO  
+          | FOUR_CHANNEL_AUDIO ;
+
+opt_no: NO 
+          | /* empty */;
+
+opt_pregap_msf: PREGAP msf 
+         | /* empty  */;
+
+opt_index_msfs: opt_index_msfs INDEX msf 
+         | /* empty */ ;
+
+subTrack_or_start_or_end: subTrack 
+         | START 
+         | END;
+
+subTracks_or_starts_or_ends: subTracks_or_starts_or_ends 
+                             subTrack_or_start_or_end 
+                           | subTrack_or_start_or_end ;
+
+subTrack: 
+    AudioFile String opt_swap opt_start_offset samples
+     | DATAFILE String opt_start_length
+     | FIFO String dataLength 
+     | SILENCE samples 
+     | ZERO opt_dataMode opt_subChannelMode dataLength 
+ ;
+
+AudioFile: AUDIOFILE | FILE_TOKEN ;
+
+opt_swap: SWAP 
+     | /* empty */;
+
+opt_start_offset: "#" sLong 
+     | /* empty */;
+
+opt_start_length: "#" sLong 
+     | '#' sLong dataLength 
+     | /* empty */;
+
+opt_dataMode: dataMode 
+     | /* empty */ ;
+
+opt_string: String 
+     | /* empty */  ;
+
+uLong: Integer ;
+
+sLong: Integer ;
+
+msf: Integer Colon Integer Colon Integer ;
+
+samples: msf | uLong ;
+
+dataLength:  msf | uLong  ;
+
+dataMode: AUDIO | MODE0 | MODE1 | MODE1_RAW | MODE2
+     | MODE2_RAW | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX
+     ;
+
+
+trackMode:  AUDIO | MODE1 | MODE1_RAW | MODE2
+     | MODE2_RAW  | MODE2_FORM1 | MODE2_FORM2 | MODE2_FORM_MIX
+     ;
+
+
+opt_subChannelMode: RW | RW_RAW 
+     | /* empty  */;
+
+tocType: CD_DA | CD_ROM | CD_ROM_XA | CD_I ;
+
+packType:  TITLE | PERFORMER | SONGWRITER | COMPOSER | ARRANGER  
+     | MESSAGE | DISC_ID | GENRE | TOC_INFO1 | TOC_INFO2  
+     | RESERVED1 | RESERVED2 | RESERVED3 | RESERVED4 | UPC_EAN
+     | ISRC | SIZE_INFO ;
+
+
+binaryData:  LeftBrace Integers RightBrace  ;
+
+Integers:  Integers "," Integer | Integer ;
+
+         
+cdTextItem: packType opt_string_or_binaryData | ;
+
+opt_string_or_binaryData: opt_string | binaryData ;
+ 
+cdTextBlock:  LANGUAGE Integer LeftBrace cdTextItem RightBrace ;
+
+opt_cdTextBlock:  cdTextBlock 
+         | /* empty */;
+
+opt_cdTextBlocks:  opt_cdTextBlocks cdTextBlock 
+         | /* empty */ ;
+
+opt_cdTextLanguageMap : LANGUAGE_MAP LeftBrace Language_mappings RightBrace 
+         | /* empty */;
+
+Language_mappings: Language_mappings Language_mapping | Language_mapping ;
+
+Language_mapping: Integer ":" Language_id ;
+
+Language_id: Integer | EN;
+
+cdTextTrack:  CD_TEXT LeftBrace opt_cdTextBlocks  RightBrace | ;
+
+cdTextGlobal: CD_TEXT LeftBrace opt_cdTextLanguageMap opt_cdTextBlock  
+                      RightBrace 
+            | /* empty */;
+
+
+%%
+
+#ifdef STANDALONE
+/* The controlling function */
+
+int 
+tocerror(char const *s)	/* called by tocparse on error */
+{
+	printf("%s\n",s);
+	return(0);
+}
+
+int
+main( int argc, char **argv )
+{
+  int c;
+
+  tocdebug = 0;
+  
+  while (1) {
+    int option_index = 0;
+    static struct option long_options[] = {
+      {"debug", 0, 0, 'd'},
+      {0, 0, 0, 0}
+    };
+
+    c = getopt_long (argc, argv, "d", long_options, &option_index);
+    if (c == -1)
+      break;
+    
+    switch (c) {
+    case 'd':
+      tocdebug = 1;
+      break;
+    default:
+      printf ("?? getopt returned character code 0%o ??\n", c);
+      exit(1);
+    }
+  }
+  
+  if ( optind < argc ) {
+    toc_in = fopen( argv[optind], "r" );
+    if (!toc_in) {
+      printf("unable to open %s for reading: %s\n", argv[optind], 
+	     strerror(errno));
+      exit(1);
+    }
+  } else
+    toc_in = stdin;
+
+
+  if (tocparse()==0) {
+    printf("Is a TOC file\n");
+  } else {
+    printf("Isn't a TOC file\n");
+  }
+  exit(0);
+}
+#endif /* STANDALONE*/
--- a/parse/toclexer.c
+++ b/parse/toclexer.c
@@ -0,0 +1,198 @@
+/*
+    $Id: toclexer.c,v 1.1 2005/01/31 10:20:51 rocky Exp $
+
+    Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Lexical scanner for cdrdao's TOC. */
+#include "toclexer.h"
+#include "toc.tab.h"
+#include <ctype.h>
+
+#define YYEOF 0
+
+/* A structure for associating a word with a token. */
+typedef struct keyword_s
+{
+  char const *psz_keyword;
+  token_t i_token;
+} keyword_t;
+
+/* These are all of the words that might appear in a TOC file and
+   the token association that the parser will use.
+*/
+const keyword_t keywords[] =
+  {
+    {"ARRANGER",           ARRANGER},
+    {"AUDIO",              AUDIO},
+    {"AUDIOFILE",          AUDIOFILE},
+    {"CATALOG",            CATALOG},
+    {"CD_DA",              CD_DA},
+    {"CD_I",               CD_I},
+    {"CD_ROM",             CD_ROM},
+    {"CD_ROM_XA",          CD_ROM_XA},
+    {"CD_TEXT",            CD_TEXT},
+    {"COMPOSER",           COMPOSER},
+    {"COPY",               COPY},
+    {"DATAFILE",           DATAFILE},
+    {"DISC_ID",            DISC_ID},
+    {"EN",                 EN},
+    {"END",                END},
+    {"FIFO",               FIFO},
+    {"FILE",               FILE_TOKEN},
+    {"FOUR_CHANNEL_AUDIO", FOUR_CHANNEL_AUDIO},
+    {"GENRE",              GENRE},
+    {"INDEX",              INDEX},
+    {"ISRC",               ISRC},
+    {"LANGUAGE",           LANGUAGE},
+    {"LANGUAGE_MAP",       LANGUAGE_MAP},
+    {"MESSAGE",            MESSAGE},
+    {"MODE0",              MODE0},
+    {"MODE1",              MODE1},
+    {"MODE1_RAW",          MODE1_RAW},
+    {"MODE2",              MODE2},
+    {"MODE2_FORM1",        MODE2_FORM1},
+    {"MODE2_FORM2",        MODE2_FORM2},
+    {"MODE2_FORM_MIX",     MODE2_FORM_MIX},
+    {"MODE2_RAW",          MODE2_RAW},
+    {"NO",                 NO},
+    {"PERFORMER",          PERFORMER},
+    {"PREGAP",             PREGAP},
+    {"PRE_EMPHASIS",       PRE_EMPHASIS},
+    {"RESERVED1",          RESERVED1},
+    {"RESERVED2",          RESERVED2},
+    {"RESERVED3",          RESERVED3},
+    {"RESERVED4",          RESERVED4},
+    {"RW",                 RW},
+    {"RW_RAW",             RW_RAW},
+    {"SILENCE",            SILENCE},
+    {"SIZE_INFO",          SIZE_INFO},
+    {"SONGWRITER",         SONGWRITER},
+    {"START",              START},
+    {"SWAP",               SWAP},
+    {"TITLE",              TITLE},
+    {"TOC_INFO1",          TOC_INFO1},
+    {"TOC_INFO2",          TOC_INFO2},
+    {"TRACK",              TRACK},
+    {"TWO_CHANNEL_AUDIO",  TWO_CHANNEL_AUDIO},
+    {"UPC_EAN",            UPC_EAN},
+    {"ZERO",               ZERO},
+    {0, 0}
+  };
+
+static int 
+compare_keyword(const void *p_id, const void *p_keyword) {
+  char *psz_id = (char *) p_id;
+  char const *psz_keyword = ((keyword_t *) p_keyword)->psz_keyword;
+  return strcmp(psz_id, psz_keyword);
+}
+
+token_t
+toclex (void)
+{
+  int c;
+
+ start:  
+  /* Skip white space.  */
+  while  ( isspace(c = fgetc (toc_in)) )
+    ;
+
+  /* Process a number.  */
+  if (isdigit (c))
+    {
+      ungetc (c, toc_in);
+      fscanf (toc_in, "%lu", &(toclval.val));
+      return Integer;
+    }
+
+  /* Process a comment.  */
+  if ( '/' == c ) {
+    if ('/' == (c = fgetc (toc_in)) ) {
+      while ((c = fgetc (toc_in)) != EOF && c != '\n')
+	;
+
+      /* Return end-of-input.  */
+      if (EOF == c) return YYEOF;
+      goto start;
+    }
+    /* Not a comment. So put back the character after the '/' and
+       return '/'  */
+    ungetc (c, toc_in);
+    return '/';
+  }
+  
+  /* Char starts an identifier => read the name.       */
+  if (isalpha (c))
+    {
+      static char symbol[50] = "";
+      unsigned int i;
+      
+      i = 0;
+      do
+	{
+	  /* Add this character to the buffer.         */
+	  symbol[i++] = c;
+	  /* Get another character.                    */
+	  c = fgetc (toc_in);
+	}
+      while (isgraph (c));
+      
+      ungetc (c, toc_in);
+      symbol[i] = '\0';
+
+      toclval.psz_str = symbol;
+      
+      {
+	keyword_t *p_keyword;
+	p_keyword = bsearch(symbol, keywords, 
+			    (sizeof(keywords) / sizeof(keyword_t)) - 1,
+			    sizeof(keyword_t), compare_keyword);
+	if (!p_keyword) return Id;
+	return p_keyword->i_token;
+      }
+      
+    }
+
+  /* Process a string.  
+     To do: save the value of the string and process octal numbers.
+   */
+  if ( '"' == c ) {
+    int b_backslash = 0;
+    while ( EOF != (c = fgetc (toc_in)) 
+	    && (b_backslash || '"' != c ) ) {
+      b_backslash = ('\\' == c );
+    }
+
+    /* Return end-of-input.  */
+    if (EOF == c) return YYEOF;
+
+    return String;
+  }
+  
+  /* Return end-of-input.  */
+  if (EOF == c) return YYEOF;
+
+  switch (c) {
+  case ':': return Colon;
+  case '{': return LeftBrace;
+  case '}': return RightBrace;
+  default:
+    /* Return a single char.  */
+    return c;
+  }
+  
+}
--- a/parse/toclexer.h
+++ b/parse/toclexer.h
@@ -0,0 +1,44 @@
+/*
+    $Id: toclexer.h,v 1.1 2005/01/31 10:20:51 rocky Exp $
+
+    Copyright (C) 2005 Rocky Bernstein <rocky@panix.com>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Common header between TOC lexer and parser. */
+#include <stdio.h>
+#include <stdlib.h>
+#include "string.h"
+
+typedef int token_t;
+
+extern FILE *toc_in;
+
+typedef union {
+  long unsigned int val;   /* For returning numbers.  */
+  char  const *psz_str;    /* For strings.  */
+} tocval_t;
+
+#define YYSTYPE tocval_t
+  
+YYSTYPE toclval;
+
+/* Call to the TOC scanner */
+token_t toclex (void);
+
+
+
+