From b7ebc584398998d7c1b9471a4367991080905124 Mon Sep 17 00:00:00 2001 From: peg626 Date: Thu, 9 Apr 2026 19:23:20 -0300 Subject: [PATCH 1/5] Delete src directory --- src/__init__.py | 0 src/c/bison_callback.c | 233 ------------- src/c/bison_callback.h | 5 - src/c/bisondynlib-linux.c | 108 ------ src/c/bisondynlib-win32.c | 89 ----- src/c/bisondynlib.h | 20 -- src/c/win32test.c | 5 - src/pyrex/bison_.pyx | 689 -------------------------------------- src/python/__init__.py | 352 ------------------- src/python/convert.py | 377 --------------------- src/python/node.py | 168 ---------- src/python/xmlifier.py | 136 -------- 12 files changed, 2182 deletions(-) delete mode 100644 src/__init__.py delete mode 100644 src/c/bison_callback.c delete mode 100644 src/c/bison_callback.h delete mode 100644 src/c/bisondynlib-linux.c delete mode 100644 src/c/bisondynlib-win32.c delete mode 100644 src/c/bisondynlib.h delete mode 100644 src/c/win32test.c delete mode 100644 src/pyrex/bison_.pyx delete mode 100644 src/python/__init__.py delete mode 100644 src/python/convert.py delete mode 100644 src/python/node.py delete mode 100644 src/python/xmlifier.py diff --git a/src/__init__.py b/src/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/c/bison_callback.c b/src/c/bison_callback.c deleted file mode 100644 index b5b03be..0000000 --- a/src/c/bison_callback.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Callback functions called by bison. - * - * The original py_callback function is removed from bison_.pyx because Cython - * generated crappy code for that callback. Cython's generated code caused - * segfaults when python triggered its garbage collection. Thus, something was - * wrong with references. Debugging the generated code was hard and the - * callbacks are part of PyBison's core, so implementing the callbacks in C - * instead of generating them by Cython seems the right way to go. - * - * Written januari 2012 by Sander Mathijs van Veen - * Copyright (c) 2012 by Sander Mathijs van Veen, all rights reserved. - * - * Released under the GNU General Public License, a copy of which should appear - * in this distribution in the file called 'COPYING'. If this file is missing, - * then you can obtain a copy of the GPL license document from the GNU website - * at http://www.gnu.org. - * - * This software is released with no warranty whatsoever. Use it at your own - * risk. - * - * If you wish to use this software in a commercial application, and wish to - * depart from the GPL licensing requirements, please contact the author and - * apply for a commercial license. - */ - -#include "Python.h" - -#include -#include -#include - -#define likely(x) __builtin_expect((x),1) -#define unlikely(x) __builtin_expect((x),0) - -static PyObject *py_attr_hook_handler_name; -static PyObject *py_attr_hook_read_after_name; -static PyObject *py_attr_hook_read_before_name; - -static PyObject *py_attr_handle_name; -static PyObject *py_attr_read_name; -static PyObject *py_attr_file_name; -static PyObject *py_attr_close_name; - -// Construct attribute names (only the first time) -// TODO: where do we Py_DECREF(handle_name) ?? -#define INIT_ATTR(variable, name, failure) \ - if (unlikely(!variable)) { \ - variable = PyString_FromString(name); \ - if (!variable) failure; \ - } - -#define debug_refcnt(variable, count) { \ - printf(#variable ": %d\n", Py_REFCNT(variable)); \ - assert(Py_REFCNT(variable) == count); \ - } - -/* - * Callback function which is invoked by target handlers within the C yyparse() - * function. This callback function will return parser._handle's python object - * or, on failure, NULL is returned. - */ -PyObject* py_callback(PyObject *parser, char *target, int option, int nargs, - ...) -{ - va_list ap; - int i; - - PyObject *res; - - PyObject *names = PyList_New(nargs), - *values = PyList_New(nargs); - - va_start(ap, nargs); - - // Construct the names and values list from the variable argument list. - for(i = 0; i < nargs; i++) { - PyObject *name = PyString_FromString(va_arg(ap, char *)); - PyList_SetItem(names, i, name); - - PyObject *value = va_arg(ap, PyObject *); - Py_INCREF(value); - PyList_SetItem(values, i, value); - } - - va_end(ap); - - INIT_ATTR(py_attr_handle_name, "_handle", return NULL); - INIT_ATTR(py_attr_hook_handler_name, "hook_handler", return NULL); - - // Call the handler with the arguments - PyObject *handle = PyObject_GetAttr(parser, py_attr_handle_name); - - if (unlikely(!handle)) return NULL; - - PyObject *arglist = Py_BuildValue("(siOO)", target, option, names, values); - if (unlikely(!arglist)) { Py_DECREF(handle); return NULL; } - - res = PyObject_CallObject(handle, arglist); - - Py_DECREF(handle); - Py_DECREF(arglist); - - if (unlikely(!res)) return res; - - // Check if the "hook_handler" callback exists - handle = PyObject_GetAttr(parser, py_attr_hook_handler_name); - - if (!handle) { - PyErr_Clear(); - return res; - } - - // XXX: PyObject_GetAttr increases the refcnt of py_attr_hook_handler_name - // by one. - //debug_refcnt(py_attr_hook_handler_name, 1); - - // Call the "hook_handler" callback - arglist = Py_BuildValue("(siOOO)", target, option, names, values, res); - if (unlikely(!arglist)) { Py_DECREF(handle); return res; } - - res = PyObject_CallObject(handle, arglist); - - Py_DECREF(handle); - Py_DECREF(arglist); - - return res; -} - -void py_input(PyObject *parser, char *buf, int *result, int max_size) -{ - PyObject *handle, *arglist, *res; - char *bufstr; - - INIT_ATTR(py_attr_hook_read_after_name, "hook_read_after", return); - INIT_ATTR(py_attr_hook_read_before_name, "hook_read_before", return); - INIT_ATTR(py_attr_read_name, "read", return); - INIT_ATTR(py_attr_file_name, "file", return); - INIT_ATTR(py_attr_close_name, "close", return); - - // Check if the "hook_READ_BEFORE" callback exists - if (PyObject_HasAttr(parser, py_attr_hook_read_before_name)) - { - handle = PyObject_GetAttr(parser, py_attr_hook_read_before_name); - if (unlikely(!handle)) return; - - // Call the "hook_READ_BEFORE" callback - arglist = PyTuple_New(0); - if (unlikely(!arglist)) { Py_DECREF(handle); return; } - - res = PyObject_CallObject(handle, arglist); - - Py_DECREF(handle); - Py_DECREF(arglist); - - if (unlikely(!res)) return; - } - - // Read the input string and catch keyboard interrupt exceptions. - handle = PyObject_GetAttr(parser, py_attr_read_name); - if (unlikely(!handle)) return; - - arglist = Py_BuildValue("(i)", max_size); - if (unlikely(!arglist)) { Py_DECREF(handle); return; } - - res = PyObject_CallObject(handle, arglist); - - Py_DECREF(handle); - Py_DECREF(arglist); - - if (unlikely(!res)) { - // Catch and reset KeyboardInterrupt exception - PyObject *given = PyErr_Occurred(); - if (given && PyErr_GivenExceptionMatches(given, - PyExc_KeyboardInterrupt)) { - - PyErr_Clear(); - } - - return; - } - - // Check if the "hook_read_after" callback exists - if (unlikely(!PyObject_HasAttr(parser, py_attr_hook_read_after_name))) - goto finish_input; - - handle = PyObject_GetAttr(parser, py_attr_hook_read_after_name); - if (unlikely(!handle)) return; - - // Call the "hook_READ_AFTER" callback - arglist = Py_BuildValue("(O)", res); - if (unlikely(!arglist)) { Py_DECREF(handle); return; } - - res = PyObject_CallObject(handle, arglist); - - Py_XDECREF(res); - Py_DECREF(handle); - Py_DECREF(arglist); - - if (unlikely(!res)) return; - -finish_input: - - // Copy the read python input string to the buffer - bufstr = PyString_AsString(res); - *result = strlen(bufstr); - memcpy(buf, bufstr, *result); - - // Close the read buffer if nothing is read. Marks the Python file object - // as being closed from Python's point of view. This does not close the - // associated C stream (which is not necessary here, otherwise use - // "os.close(0)"). - if (!*result && PyObject_HasAttr(parser, py_attr_file_name)) { - PyObject *file_handle = PyObject_GetAttr(parser, py_attr_file_name); - if (unlikely(!file_handle)) return; - - handle = PyObject_GetAttr(file_handle, py_attr_close_name); - Py_DECREF(file_handle); - if (unlikely(!handle)) return; - - arglist = PyTuple_New(0); - if (unlikely(!arglist)) { Py_DECREF(handle); return; } - - res = PyObject_CallObject(handle, arglist); - - Py_XDECREF(res); - Py_DECREF(handle); - Py_DECREF(arglist); - - // TODO: something went wrong while closing the buffer. - if (unlikely(!res)) return; - } -} diff --git a/src/c/bison_callback.h b/src/c/bison_callback.h deleted file mode 100644 index 239dab5..0000000 --- a/src/c/bison_callback.h +++ /dev/null @@ -1,5 +0,0 @@ -#include "Python.h" -#include "stdarg.h" - -PyObject* py_callback(PyObject *, char *, int, int,...); -void py_input(PyObject *, char *, int *, int); diff --git a/src/c/bisondynlib-linux.c b/src/c/bisondynlib-linux.c deleted file mode 100644 index 183f976..0000000 --- a/src/c/bisondynlib-linux.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Linux-specific dynamic library manipulation routines - */ - -#include "bisondynlib.h" -#include -#include - -void (*reset_flex_buffer)(void) = NULL; - -void *bisondynlib_open(char *filename) -{ - void *handle; - - handle = dlopen(filename, (RTLD_NOW|RTLD_GLOBAL)); - - dlerror(); - - if (!handle) - return NULL; - - reset_flex_buffer = dlsym(handle, "reset_flex_buffer"); - - dlerror(); - - return handle; -} - -int bisondynlib_close(void *handle) -{ - return dlclose(handle); -} - -void bisondynlib_reset(void) -{ - if (reset_flex_buffer) - reset_flex_buffer(); -} - -char *bisondynlib_err() -{ - return dlerror(); -} - -char *bisondynlib_lookup_hash(void *handle) -{ - char **hash; - - hash = dlsym(handle, "rules_hash"); - - dlerror(); - - return hash ? *hash : NULL; -} - -PyObject *bisondynlib_run(void *handle, PyObject *parser, void *cb, void *in, int debug) -{ - if(!handle) - return NULL; - - PyObject *(*pparser)(PyObject *, void *, void *, int); - - pparser = bisondynlib_lookup_parser(handle); - - if (!pparser) { - PyErr_SetString(PyExc_RuntimeError, - "bisondynlib_lookup_parser() returned NULL"); - return NULL; - } - - (*pparser)(parser, cb, in, debug); - - // Do not ignore a raised exception, but pass the exception through. - if (PyErr_Occurred()) - return NULL; - - Py_INCREF(Py_None); - return Py_None; - -} - -/* - * function(void *) returns a pointer to a function(PyObject *, char *) - * returning PyObject* - */ -PyObject *(*bisondynlib_lookup_parser(void *handle))(PyObject *, void *, void *, int) -{ - PyObject *(*do_parse)(PyObject *, void *, void *, int) = dlsym(handle, - "do_parse"); - - dlerror(); - - return do_parse; -} - -/* - * Runs the compiler commands which build the parser/lexer into a shared lib - */ - /* -int bisondynlib_build(char *libName, char *pyincdir) -{ - char buf[1024]; - sprintf(buf, "gcc -fPIC -shared -I%s tmp.bison.c tmp.lex.c -o %s", pyincdir, libName); - printf("Running linux build command: %s\n", buf); - system(buf); - return 0; -} -*/ diff --git a/src/c/bisondynlib-win32.c b/src/c/bisondynlib-win32.c deleted file mode 100644 index 0205700..0000000 --- a/src/c/bisondynlib-win32.c +++ /dev/null @@ -1,89 +0,0 @@ -//@+leo-ver=4 -//@+node:@file src/c/bisondynlib-win32.c -//@@language c -/* - * Linux-specific dynamic library manipulation routines - */ - -#include -#include "bisondynlib.h" - -#include "windows.h" - -//#include "dlluser.h" - -void * bisondynlib_open(char *filename) -{ - HINSTANCE hinstLib; - - hinstLib = LoadLibrary(filename); - - return (void *)hinstLib; -} - -int bisondynlib_close(void *handle) -{ - return FreeLibrary((HINSTANCE)handle); -} - -char * bisondynlib_err() -{ - return NULL; -} - - -char * bisondynlib_lookup_hash(void *handle) -{ - char *hash; - - hash = (char *)GetProcAddress((HINSTANCE)handle, "rules_hash"); - printf("bisondynlib_lookup_hash: hash=%s\n", hash); - return hash; -} - -PyObject * bisondynlib_run(void *handle, PyObject *parser, char *filename, void *cb) -{ - PyObject *(*pparser)(PyObject *, char *, void *); - - //printf("bisondynlib_run: looking up parser\n"); - pparser = bisondynlib_lookup_parser(handle); - //printf("bisondynlib_run: calling parser\n"); - - (*pparser)(parser, filename, cb); - - //printf("bisondynlib_run: back from parser\n"); - //return result; - Py_INCREF(Py_None); - return Py_None; - -} - -/* - * function(void *) returns a pointer to a function(PyObject *, char *) returning PyObject* - */ -PyObject *(*bisondynlib_lookup_parser(void *handle))(PyObject *, char *, void *) -{ - //void *pparser; - PyObject *(*pparser)(PyObject *, char *, void *); - - pparser = (PyObject *(*)(PyObject *, char *, void *))GetProcAddress((HINSTANCE)handle, "do_parse"); - - return pparser; -} - -/* - * Runs the compiler commands which build the parser/lexer into a shared lib - */ - /* -int bisondynlib_build(char *libName, char *pyincdir) -{ - char buf[1024]; - sprintf(buf, "gcc -fPIC -shared -I%s tmp.bison.c tmp.lex.c -o %s", pyincdir, libName); - printf("Running linux build command: %s\n", buf); - system(buf); - return 0; -} -*/ - -//@-node:@file src/c/bisondynlib-win32.c -//@-leo diff --git a/src/c/bisondynlib.h b/src/c/bisondynlib.h deleted file mode 100644 index ba0d3c6..0000000 --- a/src/c/bisondynlib.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * common interface to dynamic library routines - */ - -#include "Python.h" -#include - -void *bisondynlib_open(char *filename); -int bisondynlib_close(void *handle); -void bisondynlib_reset(void); -char *bisondynlib_err(void); - -PyObject *(*bisondynlib_lookup_parser(void *handle))(PyObject *, void *, void *, int); - -char *bisondynlib_lookup_hash(void *handle); - -PyObject *bisondynlib_run(void *handle, PyObject *parser, void *cb, void *in, int debug); -/* -int bisondynlib_build(char *libName, char *pyincdir); -*/ diff --git a/src/c/win32test.c b/src/c/win32test.c deleted file mode 100644 index 2e735ab..0000000 --- a/src/c/win32test.c +++ /dev/null @@ -1,5 +0,0 @@ -#include -int main(int argc, char *argv[]) -{ - printf("Hello, world\n"); -} diff --git a/src/pyrex/bison_.pyx b/src/pyrex/bison_.pyx deleted file mode 100644 index 293256f..0000000 --- a/src/pyrex/bison_.pyx +++ /dev/null @@ -1,689 +0,0 @@ -""" -Pyrex-generated portion of pybison -""" - -cdef extern from "Python.h": - object PyString_FromStringAndSize(char *, int) - object PyString_FromString(char *) - char *PyString_AsString(object o) - - object PyInt_FromLong(long ival) - long PyInt_AsLong(object io) - - object PyList_New(int len) - int PyList_SetItem(object list, int index, object item) - - void Py_INCREF(object o) - - object PyObject_GetAttrString(object o, char *attr_name) - object PyTuple_New(int len) - int PyTuple_SetItem(object p, int pos, object o) - object PyObject_Call(object callable_object, object args, object kw) - object PyObject_CallObject(object callable_object, object args) - int PyObject_SetAttrString(object o, char *attr_name, object v) - -# use libdl for now - easy and simple - maybe switch to -# glib or libtool if a keen windows dev sends in a patch - -#cdef extern from "dlfcn.h": -# void *dlopen(char *filename, int mode) -# int dlclose(void *handle) -# void *dlsym(void *handle, char *name) -# char *dlerror() -# -# ctypedef enum DL_MODES: -# RTLD_LAZY -# RTLD_NOW -# RTLD_BINDING_MASK -# RTLD_NOLOAD -# RTLD_GLOBAL - -cdef extern from "stdio.h": - int printf(char *format,...) - -cdef extern from "string.h": - void *memcpy(void *dest, void *src, long n) - -# Callback function which is invoked by target handlers -# within the C yyparse() function. -cdef extern from "../c/bison_callback.h": - object py_callback(object, char *, int, int,...) - void py_input(object, char *, int *, int) - -cdef extern from "../c/bisondynlib.h": - void *bisondynlib_open(char *filename) - int bisondynlib_close(void *handle) - void bisondynlib_reset() - char *bisondynlib_err() - object (*bisondynlib_lookup_parser(void *handle))(object, char *) - char *bisondynlib_lookup_hash(void *handle) - object bisondynlib_run(void *handle, object parser, void *cb, void *pyin, int debug) - - #int bisondynlib_build(char *libName, char *includedir) - - -import sys, os, sha, re, imp, traceback -import shutil -import distutils.sysconfig -import distutils.ccompiler - - -reSpaces = re.compile("\\s+") - -#unquoted = r"""^|[^'"]%s[^'"]?""" -unquoted = '[^\'"]%s[^\'"]?' - -cdef class ParserEngine: - """ - Wraps the interface to the binary bison/lex-generated parser engine dynamic - library. - - You shouldn't need to deal with this at all. - - Takes care of: - - building the library (if the parser rules have changed) - - loading the library and extracting the parser entry point - - calling the entry point - - closing the library - - Makes direct calls to the platform-dependent routines in - bisondynlib-[linux|windows].c - """ - cdef object parser - cdef object parserHash # hash of current python parser object - cdef object libFilename_py - - cdef void *libHandle - - # rules hash str embedded in bison parser lib - cdef char *libHash - - def __init__(self, parser): - """ - Creates a ParserEngine wrapper, and builds/loads the library. - - Arguments: - - parser - an instance of a subclass of Parser - - In the course of initialisation, we check the library against the - parser object's rules. If the lib doesn't exist, or can't be loaded, or - doesn't match, we build a new library. - - Either way, we end up with a binary parser engine which matches the - current rules in the parser object. - """ - self.parser = parser - - self.libFilename_py = parser.buildDirectory \ - + parser.bisonEngineLibName \ - + imp.get_suffixes()[0][0] - - self.parserHash = hashParserObject(self.parser) - - self.openCurrentLib() - - def reset(self): - """ - Reset Flex's buffer and state. - """ - bisondynlib_reset() - - def openCurrentLib(self): - """ - Tests if library exists and is current. If not, builds a fresh one. - - Opens the library and imports the parser entry point. - """ - parser = self.parser - verbose = parser.verbose - - if verbose: - distutils.log.set_verbosity(1) - - if not os.path.isfile(self.libFilename_py): - self.buildLib() - - self.openLib() - - # hash our parser spec, compare to hash val stored in lib - libHash = PyString_FromString(self.libHash) - if self.parserHash != libHash: - if verbose: - print "Hash discrepancy, need to rebuild bison lib" - print " current parser class: %s" % self.parserHash - print " bison library: %s" % libHash - self.closeLib() - self.buildLib() - self.openLib() - else: - if verbose: - print "Hashes match, no need to rebuild bison engine lib" - - def openLib(self): - """ - Loads the parser engine's dynamic library, and extracts the following - symbols: - - - void *do_parse() (runs parser) - - char *parserHash (contains hash of python parser rules) - - Returns lib handle, plus pointer to do_parse() function, as long ints - (which later need to be cast to pointers) - - Important note -this is totally linux-specific. - If you want windows support, you'll have to modify these funcs to - use glib instead (or create windows equivalents), in which case I'd - greatly appreciate you sending me a patch. - """ - cdef char *libFilename - cdef char *err - cdef void *handle - - # convert python filename string to c string - libFilename = PyString_AsString(self.libFilename_py) - - parser = self.parser - - if parser.verbose: - print 'Opening library %s' % self.libFilename_py - handle = bisondynlib_open(libFilename) - self.libHandle = handle - err = bisondynlib_err() - if err: - printf('ParserEngine.openLib: error "%s"\n', err) - return - - # extract symbols - self.libHash = bisondynlib_lookup_hash(handle) - - if parser.verbose: - print 'Successfully loaded library' - - def generate_exception_handler(self): - s = '' - - s += ' {\n' - s += ' PyObject* obj = PyErr_Occurred();\n' - s += ' if (obj) {\n' - s += ' //yyerror("exception raised");\n' - s += ' YYERROR;\n' - s += ' }\n' - s += ' }\n' - - return s - - def buildLib(self): - """ - Creates the parser engine lib - - This consists of: - 1. Ripping the tokens list, precedences, start target, handler docstrings - and lex script from this Parser instance's attribs and methods - 2. Creating bison and lex files - 3. Compiling bison/lex files to C - 4. Compiling the C files, and link into a dynamic lib - """ - - # ------------------------------------------------- - # rip the pertinent grammar specs from parser class - parser = self.parser - - # get target handler methods, in the order of appearance in the - # source file. - attribs = dir(parser) - gHandlers = [] - - for a in attribs: - if a.startswith('on_'): - method = getattr(parser, a) - gHandlers.append(method) - - gHandlers.sort(cmpLines) - - # get start symbol, tokens, precedences, lex script - gStart = parser.start - gTokens = parser.tokens - gPrecedences = parser.precedences - gLex = parser.lexscript - - buildDirectory = parser.buildDirectory - - # ------------------------------------------------ - # now, can generate the grammar file - if os.path.isfile(buildDirectory + parser.bisonFile): - os.unlink(buildDirectory + parser.bisonFile) - - if parser.verbose: - print 'generating bison file:', buildDirectory + parser.bisonFile - - f = open(buildDirectory + parser.bisonFile, "w") - write = f.write - #writelines = f.writelines - - # grammar file prologue - write('\n'.join([ - '%code top {', - '', - '#include "Python.h"', - 'extern FILE *yyin;', - #'extern int yylineno;' - 'extern char *yytext;', - '#define YYSTYPE void*', - #'extern void *py_callback(void *, char *, int, void*, ...);', - 'void *(*py_callback)(void *, char *, int, int, ...);', - 'void (*py_input)(void *, char *, int *, int);', - 'void *py_parser;', - 'char *rules_hash = "%s";' % self.parserHash, - '#define YYERROR_VERBOSE 1', - '', - '}', - '', - '%code requires {', - '', - '#define YYLTYPE YYLTYPE', - 'typedef struct YYLTYPE', - '{', - ' int first_line;', - ' int first_column;', - ' int last_line;', - ' int last_column;', - ' char *filename;', - '} YYLTYPE;', - #'', - #'YYLTYPE yylloc; /* location data */' - '', - '}', - '', - '%locations', - '', - ])) - - # write out tokens and start target dec - write('%%token %s\n\n' % ' '.join(gTokens)) - write('%%start %s\n\n' % gStart) - - # write out precedences - for p in gPrecedences: - write("%%%s %s\n" % (p[0], " ".join(p[1]))) - - write("\n\n%%\n\n") - - # carve up docstrings - rules = [] - for h in gHandlers: - - doc = h.__doc__.strip() - - # added by Eugene Oden - #target, options = doc.split(":") - doc = re.sub(unquoted % ";", "", doc) - - #print "---------------------" - - s = re.split(unquoted % ":", doc) - #print "s=%s" % s - - target, options = s - target = target.strip() - - options = options.strip() - tmp = [] - - #print "options = %s" % repr(options) - #opts = options.split("|") - ##print "opts = %s" % repr(opts) - r = unquoted % r"\|" - #print "r = <%s>" % r - opts1 = re.split(r, " " + options) - #print "opts1 = %s" % repr(opts1) - - for o in opts1: - o = o.strip() - - tmp.append(reSpaces.split(o)) - options = tmp - - rules.append((target, options)) - - # and render rules to grammar file - for rule in rules: - try: - write("%s\n : " % rule[0]) - options = [] - idx = 0 - for option in rule[1]: - nterms = len(option) - if nterms == 1 and option[0] == '': - nterms = 0 - option = [] - action = '\n {\n' - if 'error' in option: - action = action + " yyerrok;\n" - action = action + ' $$ = (*py_callback)(\n py_parser, "%s", %s, %%s' % \ - (rule[0], idx) # note we're deferring the substitution of 'nterms' (last arg) - args = [] - i = -1 - - if nterms == 0: - args.append('NULL') - else: - for i in range(nterms): - if option[i] == '%prec': - i = i - 1 - break # hack for rules using '%prec' - o = option[i].replace('"', '\\"') - args.append('"%s", $%d' % (o, i+1)) - - # now, we have the correct terms count - action = action % (i + 1) - - # assemble the full rule + action, add to list - action = action + ",\n " - action = action + ",\n ".join(args) + "\n );\n" - - if 'error' in option: - action = action + " PyObject_SetAttrString(py_parser, \"last_error\", Py_None);\n" - action = action + " Py_INCREF(Py_None);\n" - action = action + " yyclearin;\n" - - action = action + self.generate_exception_handler() - - action = action + ' }\n' - - options.append(" ".join(option) + action) - idx = idx + 1 - write(" | ".join(options) + " ;\n\n") - except: - traceback.print_exc() - - write('\n\n%%\n\n') - - # now generate C code - epilogue = '\n'.join([ - 'void do_parse(void *parser1,', - ' void *(*cb)(void *, char *, int, int, ...),', - ' void (*in)(void *, char*, int *, int),', - ' int debug', - ' )', - '{', - ' py_callback = cb;', - ' py_input = in;', - ' py_parser = parser1;', - ' yydebug = debug;', - ' yyparse();', - '}', - '', - 'int yyerror(char *msg)', - '{', - ' PyObject *fn = PyObject_GetAttrString((PyObject *)py_parser,', - ' "report_syntax_error");', - ' if (!fn)', - ' return 1;', - '', - ' PyObject *args;', - ' args = Py_BuildValue("(s,s,i,i,i,i)", msg, yytext,', - ' yylloc.first_line, yylloc.first_column,', - ' yylloc.last_line, yylloc.last_column);', - '', - ' if (!args)', - ' return 1;', - #'', - #' fprintf(stderr, "%d.%d-%d.%d: error: \'%s\' before \'%s\'.",', - #' yylloc.first_line, yylloc.first_column,', - #' yylloc.last_line, yylloc.last_column, msg, yytext);', - '', - ' PyObject *res = PyObject_CallObject(fn, args);', - ' Py_DECREF(args);', - '', - ' if (!res)', - ' return 1;', - '', - ' Py_DECREF(res);', - ' return 0;', - '}', - ]) + '\n' - write(epilogue) - - # done with grammar file - f.close() - - # ----------------------------------------------- - # now generate the lex script - if os.path.isfile(buildDirectory + parser.flexFile): - os.unlink(buildDirectory + parser.flexFile) - - lexLines = gLex.split("\n") - tmp = [] - for line in lexLines: - tmp.append(line.strip()) - f = open(buildDirectory + parser.flexFile, 'w') - f.write('\n'.join(tmp) + '\n') - f.close() - - # create and set up a compiler object - env = distutils.ccompiler.new_compiler(verbose=parser.verbose) - env.set_include_dirs([distutils.sysconfig.get_python_inc()]) - - # ----------------------------------------- - # Now run bison on the grammar file - #os.system('bison -d tmp.y') - bisonCmd = parser.bisonCmd + [buildDirectory + parser.bisonFile] - - if parser.verbose: - print 'bison cmd:', ' '.join(bisonCmd) - - env.spawn(bisonCmd) - - if parser.verbose: - print 'renaming bison output files' - print '%s => %s%s' % (parser.bisonCFile, buildDirectory, - parser.bisonCFile1) - print '%s => %s%s' % (parser.bisonHFile, buildDirectory, - parser.bisonHFile1) - - if os.path.isfile(buildDirectory + parser.bisonCFile1): - os.unlink(buildDirectory + parser.bisonCFile1) - - shutil.copy(parser.bisonCFile, buildDirectory + parser.bisonCFile1) - - if os.path.isfile(buildDirectory + parser.bisonHFile1): - os.unlink(buildDirectory + parser.bisonHFile1) - - shutil.copy(parser.bisonHFile, buildDirectory + parser.bisonHFile1) - - # ----------------------------------------- - # Now run lex on the lex file - #os.system('lex tmp.l') - flexCmd = parser.flexCmd + [buildDirectory + parser.flexFile] - - if parser.verbose: - print 'flex cmd:', ' '.join(flexCmd) - - env.spawn(flexCmd) - - if os.path.isfile(buildDirectory + parser.flexCFile1): - os.unlink(buildDirectory + parser.flexCFile1) - - if parser.verbose: - print '%s => %s%s' % (parser.flexCFile, buildDirectory, - parser.flexCFile1) - - shutil.copy(parser.flexCFile, buildDirectory + parser.flexCFile1) - - # ----------------------------------------- - # Now compile the files into a shared lib - - # compile bison and lex c sources - #bisonObj = env.compile([parser.bisonCFile1]) - #lexObj = env.compile([parser.flexCFile1]) - - #cl /DWIN32 /G4 /Gs /Oit /MT /nologo /W3 /WX bisondynlib-win32.c /Id:\python23\include - #cc.compile(['bisondynlib-win32.c'], - # extra_preargs=['/DWIN32', '/G4', '/Gs', '/Oit', '/MT', '/nologo', '/W3', '/WX', '/Id:\python23\include']) - - # link 'em into a shared lib - objs = env.compile([buildDirectory + parser.bisonCFile1, - buildDirectory + parser.flexCFile1], - extra_preargs=parser.cflags_pre, - extra_postargs=parser.cflags_post, - debug=parser.debugSymbols) - - libFileName = buildDirectory + parser.bisonEngineLibName \ - + imp.get_suffixes()[0][0] - - if os.path.isfile(libFileName+".bak"): - os.unlink(libFileName+".bak") - - if os.path.isfile(libFileName): - os.rename(libFileName, libFileName+".bak") - - if parser.verbose: - print 'linking: %s => %s' % (', '.join(objs), libFileName) - - if sys.platform.startswith('darwin'): - # on OSX, ld throws undefined symbol for shared library references - # however, we would like to link against libpython dynamically, so that - # the built .so will not depend on which python interpreter it runs on - env.linker_so += ['-undefined', 'dynamic_lookup'] - - env.link_shared_object(objs, libFileName) - - #cdef char *incdir - #incdir = PyString_AsString(get_python_inc()) - #bisondynlib_build(self.libFilename_py, incdir) - - # -------------------------------------------- - # clean up, if we succeeded - hitlist = objs[:] - hitlist.append("tmp.output") - - if os.path.isfile(libFileName): - for name in ['bisonFile', 'bisonCFile', 'bisonHFile', - 'bisonCFile1', 'bisonHFile1', 'flexFile', - 'flexCFile', 'flexCFile1', - ] + objs: - if hasattr(parser, name): - fname = buildDirectory + getattr(parser, name) - else: - fname = None - #print "want to delete %s" % fname - if fname and os.path.isfile(fname): - hitlist.append(fname) - - if not parser.keepfiles: - for f in hitlist: - try: - os.unlink(f) - except: - print "Warning: failed to delete temporary file %s" % f - - if parser.verbose: - print 'deleting temporary bison output files:' - - for f in [parser.bisonCFile, parser.bisonHFile, parser.flexCFile]: - if parser.verbose: - print 'rm %s' % f - - if os.path.isfile(f): - os.unlink(f) - - def closeLib(self): - """ - Does the necessary cleanups and closes the parser library - """ - bisondynlib_close(self.libHandle) - - def runEngine(self, debug=0): - """ - Runs the binary parser engine, as loaded from the lib - """ - cdef void *handle - - cdef void *cbvoid - cdef void *invoid - - handle = self.libHandle - parser = self.parser - - cbvoid = py_callback - invoid = py_input - - return bisondynlib_run(handle, parser, cbvoid, invoid, debug) - - def __del__(self): - """ - Clean up and bail - """ - self.closeLib() - - -def cmpLines(meth1, meth2): - """ - Used as a sort() argument for sorting parse target handler methods by - the order of their declaration in their source file. - """ - try: - line1 = meth1.func_code.co_firstlineno - line2 = meth2.func_code.co_firstlineno - except: - line1 = meth1.__init__.func_code.co_firstlineno - line2 = meth2.__init__.func_code.co_firstlineno - - return cmp(line1, line2) - - -def hashParserObject(parser): - """ - Calculates an sha1 hex 'hash' of the lex script - and grammar rules in a parser class instance. - - This is based on the raw text of the lex script attribute, - and the grammar rule docstrings within the handler methods. - - Used to detect if someone has changed any grammar rules or - lex script, and therefore, whether a shared parser lib rebuild - is required. - """ - hasher = sha.new() - - # add the lex script - hasher.update(parser.lexscript) - - # add the tokens - - # workaround pyrex weirdness - tokens = list(parser.tokens) - hasher.update(",".join(list(parser.tokens))) - - # add the precedences - for direction, tokens in parser.precedences: - hasher.update(direction + "".join(tokens)) - - # extract the parser target handler names - handlerNames = dir(parser) - - #handlerNames = filter(lambda m: m.startswith('on_'), dir(parser)) - tmp = [] - for name in handlerNames: - if name.startswith('on_'): - tmp.append(name) - handlerNames = tmp - handlerNames.sort() - - # extract method objects, filter down to callables - #handlers = [getattr(parser, m) for m in handlerNames] - #handlers = filter(lambda h: callable(h), handlers) - tmp = [] - for m in handlerNames: - attr = getattr(parser, m) - if callable(attr): - tmp.append(attr) - handlers = tmp - - # now add in the methods' docstrings - for h in handlers: - docString = h.__doc__ - hasher.update(docString) - - # done - return hasher.hexdigest() diff --git a/src/python/__init__.py b/src/python/__init__.py deleted file mode 100644 index 7851b17..0000000 --- a/src/python/__init__.py +++ /dev/null @@ -1,352 +0,0 @@ -""" -Wrapper module for interfacing with Bison (yacc) - -Written April 2004 by David McNab -Copyright (c) 2004 by David McNab, all rights reserved. - -Released under the GNU General Public License, a copy of which should appear in -this distribution in the file called 'COPYING'. If this file is missing, then -you can obtain a copy of the GPL license document from the GNU website at -http://www.gnu.org. - -This software is released with no warranty whatsoever. Use it at your own -risk. - -If you wish to use this software in a commercial application, and wish to -depart from the GPL licensing requirements, please contact the author and apply -for a commercial license. -""" - -import sys -import traceback - -from bison_ import ParserEngine -from .node import BisonNode -from .convert import bisonToPython - -class BisonSyntaxError(Exception): - def __init__(self, msg, args=[]): - super(BisonSyntaxError, self).__init__(msg) - - if args: - self.first_line, self.first_col, self.last_line, self.last_col, \ - self.message, self.token_value = args - -class TimeoutError(Exception): - pass - - -class BisonParser(object): - """ - Base parser class - - You should subclass this, and provide a bunch of methods called - 'on_TargetName', where 'TargetName' is the name of each target in - your grammar (.y) file. - """ - # --------------------------------------- - # override these if you need to - - # Command and options for running yacc/bison, except for filename arg - bisonCmd = ['bison', '-d', '-v', '-t'] - - bisonFile = 'tmp.y' - bisonCFile = 'tmp.tab.c' - - # Name of header file generated by bison cmd. - bisonHFile = 'tmp.tab.h' - - # C output file from bison gets renamed to this. - bisonCFile1 = 'tmp.bison.c' - - # Bison-generated header file gets renamed to this. - bisonHFile1 = 'tokens.h' - - # command and options for running [f]lex, except for filename arg. - flexCmd = ['flex', ] - flexFile = 'tmp.l' - flexCFile = 'lex.yy.c' - - # C output file from flex gets renamed to this. - flexCFile1 = 'tmp.lex.c' - - # CFLAGS added before all command line arguments. - cflags_pre = ['-fPIC'] - - # CFLAGS added after all command line arguments. - cflags_post = ['-O3', '-g'] - - # Directory used to store the generated / compiled files. - buildDirectory = './' - - # Add debugging symbols to the binary files. - debugSymbols = 1 - - # Enable verbose debug message sent to stdout. - verbose = 0 - - # Timeout in seconds after which the parser is terminated. - # TODO: this is currently not implemented. - timeout = 1 - - # Default to sys.stdin. - file = None - - # Last parsed target, top of parse tree. - last = None - - # Enable this to keep all temporary engine build files. - keepfiles = 0 - - # Prefix of the shared object / dll file. Defaults to 'modulename-engine'. - # If the module is executed directly, "__main__" will be used (since that - # that is the "module name", in that case). - bisonEngineLibName = None - - # Class to use by default for creating new parse nodes. If set to None, - # BisonNode will be used. - default_node_class = BisonNode - - error_threshold = 10 - - def __init__(self, **kw): - """ - Abstract representation of parser - - Keyword arguments: - - read - a callable accepting an int arg (nbytes) and returning a string, - default is this class' read() method - - file - a file object, or string of a pathname to open as a file, defaults - to sys.stdin. Note that you can leave this blank, and pass a file keyword - argument to the .run() method. - - verbose - set to 1 to enable verbose output messages, default 0 - - keepfiles - if non-zero, keeps any files generated in the - course of building the parser engine; by default, all these - files get deleted upon a successful engine build - - defaultNodeClass - the class to use for creating parse nodes, default - is self.defaultNodeClass (in this base class, BisonNode) - """ - # setup - read = kw.get('read', None) - if read: - self.read = read - - fileobj = kw.get('file', None) - if fileobj: - if isinstance(fileobj, str): - try: - fileobj = open(fileobj, 'rb') - except: - raise Exception('Cannot open input file %s' % fileobj) - self.file = fileobj - else: - self.file = sys.stdin - - nodeClass = kw.get('defaultNodeClass', None) - if nodeClass: - self.defaultNodeClass = nodeClass - - self.verbose = kw.get('verbose', 0) - - if kw.has_key('keepfiles'): - self.keepfiles = kw['keepfiles'] - - # if engine lib name not declared, invent ont - if not self.bisonEngineLibName: - self.bisonEngineLibName = self.__class__.__module__ + '-parser' - - # get an engine - self.engine = ParserEngine(self) - - def __getitem__(self, idx): - return self.last[idx] - - def _handle(self, targetname, option, names, values): - """ - Callback which receives a target from parser, as a targetname - and list of term names and values. - - Tries to dispatch to on_TargetName() methods if they exist, - otherwise wraps the target in a BisonNode object - """ - handler = getattr(self, 'on_' + targetname, None) - - if handler: - if self.verbose: - try: - hdlrline = handler.func_code.co_firstlineno - except: - hdlrline = handler.__init__.func_code.co_firstlineno - - print 'BisonParser._handle: call handler at line %s with: %s' \ - % (hdlrline, str((targetname, option, names, values))) - - self.last = handler(target=targetname, option=option, names=names, - values=values) - - #if self.verbose: - # print 'handler for %s returned %s' \ - # % (targetname, repr(self.last)) - else: - if self.verbose: - print 'no handler for %s, using default' % targetname - - cls = self.default_node_class - self.last = cls(target=targetname, option=option, names=names, - values=values) - - # assumedly the last thing parsed is at the top of the tree - return self.last - - def handle_timeout(self, signum, frame): - raise TimeoutError('Computation exceeded timeout limit.') - - def reset(self): - self.engine.reset() - - def run(self, **kw): - """ - Runs the parser, and returns the top-most parse target. - - Keywords: - - file - either a string, comprising a file to open and read input from, or - a Python file object - - debug - enables garrulous parser debugging output, default 0 - """ - if self.verbose: - print 'Parser.run: calling engine' - - # grab keywords - fileobj = kw.get('file', self.file) - if isinstance(fileobj, str): - filename = fileobj - try: - fileobj = open(fileobj, 'rb') - except: - raise Exception('Cannot open input file "%s"' % fileobj) - else: - filename = None - fileobj = None - - read = kw.get('read', self.read) - - debug = kw.get('debug', 0) - - # back up existing attribs - oldfile = self.file - oldread = self.read - - # plug in new ones, if given - if fileobj: - self.file = fileobj - if read: - self.read = read - - if self.verbose and self.file.closed: - print 'Parser.run(): self.file', self.file, 'is closed' - - error_count = 0 - - # TODO: add option to fail on first error. - while not self.file.closed: - # do the parsing job, spew if error - self.last = None - self.engine.reset() - - try: - self.engine.runEngine(debug) - except Exception as e: - error_count += 1 - - if error_count > self.error_threshold: - raise - - self.report_last_error(filename, e) - - if self.verbose: - print 'Parser.run: back from engine' - - if hasattr(self, 'hook_run'): - self.last = self.hook_run(filename, self.last) - - if self.verbose and not self.file.closed: - print 'last:', self.last - - if self.verbose: - print 'last:', self.last - - # restore old values - self.file = oldfile - self.read = oldread - - if self.verbose: - print '------------------ result=', self.last - - # TODO: return last result (see while loop): - # return self.last[:-1] - return self.last - - def read(self, nbytes): - """ - Override this in your subclass, if you desire. - - Arguments: - - nbytes - the maximum length of the string which you may return. - DO NOT return a string longer than this, or else Bad Things will - happen. - """ - # default to stdin - if self.verbose: - print 'Parser.read: want %s bytes' % nbytes - - bytes = self.file.readline(nbytes) - - if self.verbose: - print 'Parser.read: got %s bytes' % len(bytes) - - return bytes - - def report_last_error(self, filename, error): - """ - Report a raised exception. Depending on the mode in which the parser is - running, it will: - - - write a verbose message to stderr (verbose=True; interactive=True). - The written error message will include the type, value and traceback - of the raised exception. - - - write a minimal message to stderr (verbose=False; interactive=True). - The written error message will only include the type and value of - the raised exception. - - """ - - #if filename != None: - # msg = '%s:%d: "%s" near "%s"' \ - # % ((filename,) + error) - - # if not self.interactive: - # raise BisonSyntaxError(msg) - - # print >>sys.stderr, msg - #elif hasattr(error, '__getitem__') and isinstance(error[0], int): - # msg = 'Line %d: "%s" near "%s"' % error - - # if not self.interactive: - # raise BisonSyntaxError(msg) - - # print >>sys.stderr, msg - #else: - if not self.interactive: - raise - - if self.verbose: - traceback.print_exc() - - print 'ERROR:', error - - def report_syntax_error(self, msg, yytext, first_line, first_col, - last_line, last_col): - yytext = yytext.replace('\n', '\\n') - args = (first_line, first_col, last_line, last_col, msg, yytext) - raise BisonSyntaxError('%d.%d-%d.%d: "%s" near "%s".' % args, args) diff --git a/src/python/convert.py b/src/python/convert.py deleted file mode 100644 index 21b295f..0000000 --- a/src/python/convert.py +++ /dev/null @@ -1,377 +0,0 @@ -""" -Module for converting a bison file to a PyBison-python file. - -Written April 2004 by David McNab -Copyright (c) 2004 by David McNab, all rights reserved. - -Released under the GNU General Public License, a copy of which should appear in -this distribution in the file called 'COPYING'. If this file is missing, then -you can obtain a copy of the GPL license document from the GNU website at -http://www.gnu.org. - -This software is released with no warranty whatsoever. Use it at your own -risk. - -If you wish to use this software in a commercial application, and wish to -depart from the GPL licensing requirements, please contact the author and apply -for a commercial license. -""" -import re -import os - -from bison_ import unquoted - - -reSpaces = re.compile('\\s+') - - -def bisonToPython(bisonfileName, lexfileName, pyfileName, generateClasses=0): - """ - Rips the rules, tokens and precedences from a bison file, and the verbatim - text from a lex file and generates a boilerplate python file containing a - Parser class with handler methods and grammar attributes. - - Arguments: - * bisonfileName - name of input bison script - * lexfileName - name of input flex script - * pyfileName - name of output python file - * generateClasses - flag - default 0 - if 1, causes a unique class to - be defined for each parse target, and for the corresponding target - handler method in the main Parser class to use this class when creating - the node. - """ - # try to create output file - try: - pyfile = file(pyfileName, 'w') - except: - raise Exception('Cannot create output file "%s"' % pyfileName) - - # try to open/read the bison file - try: - rawBison = file(bisonfileName).read() - except: - raise Exception('Cannot open bison file "%s"' % bisonfileName) - - # try to open/read the lex file - try: - rawLex = file(lexfileName).read() - except: - raise Exception('Cannot open lex file %s' % lexfileName) - - # break up into the three '%%'-separated sections - try: - prologue, rulesRaw, epilogue = rawBison.split('\n%%\n') - except: - raise Exception( - 'File %s is not a properly formatted bison file' - ' (needs 3 sections separated by %%%%' % (bisonfileName) - ) - - # -------------------------------------- - # process prologue - - prologue = prologue.split('%}')[-1].strip() # ditch the C code - prologue = re.sub('\\n([\t ]+)', ' ', prologue) # join broken lines - - #prologueLines = [line.strip() for line in prologue.split('\n')] - lines = prologue.split('\n') - tmp = [] - - for line in lines: - tmp.append(line.strip()) - - prologueLines = tmp - - prologueLines = filter(None, prologueLines) - tokens = [] - precRules = [] - - for line in prologueLines: - words = reSpaces.split(line) - kwd = words[0] - args = words[1:] - - if kwd == '%token': - tokens.extend(args) - elif kwd in ['%left', '%right', '%nonassoc']: - precRules.append((kwd, args)) - elif kwd == '%start': - startTarget = args[0] - - # ------------------------------------------------------------- - # process rules - rulesRaw = re.sub('\\n([\t ]+)', ' ', rulesRaw) # join broken lines - rulesLines = filter(lambda s: s != '', map(str.strip, re.split(unquoted % ';', rulesRaw))) - - rules = [] - for rule in rulesLines: - #print '--' - #print repr(rule) - - #tgt, terms = rule.split(':') - try: - tgt, terms = re.split(unquoted % ':', rule) - except ValueError: - print 'Error in rule: %s' % rule - raise - - tgt, terms = tgt.strip(), terms.strip() - - #terms = [t.strip() for t in terms.split('|')] - #terms = [reSpaces.split(t) for t in terms] - - tmp = [] - #for t in terms.split('|'): - for t in re.split(unquoted % r'\|', terms): - - t = t.strip() - tmp.append(reSpaces.split(t)) - terms = tmp - - rules.append((tgt, terms)) - - # now we have our rulebase, we can churn out our skeleton Python file - pyfile.write('\n'.join([ - '#!/usr/bin/env python', - '', - '"""', - 'PyBison file automatically generated from grammar file %s' % bisonfileName, - 'You can edit this module, or import it and subclass the Parser class', - '"""', - '', - 'import sys', - '', - 'from bison import BisonParser, BisonNode, BisonSyntaxError', - '', - 'bisonFile = \'%s\' # original bison file' % bisonfileName, - 'lexFile = \'%s\' # original flex file' % lexfileName, - '\n', - ])) - - # if generating target classes - if generateClasses: - # create a base class for all nodes - pyfile.write("\n".join([ - 'class ParseNode(BisonNode):', - ' """', - ' This is the base class from which all your', - ' parse nodes are derived.', - ' Add methods to this class as you need them', - ' """', - ' def __init__(self, **kw):', - ' BisonNode.__init__(self, **kw)', - '', - ' def __str__(self):', - ' """Customise as needed"""', - ' return \'<%s instance at 0x%x>\' % (self.__class__.__name__, hash(self))', - '', - ' def __repr__(self):', - ' """Customise as needed"""', - ' return str(self)', - '', - ' def dump(self, indent=0):', - ' """', - ' Dump out human-readable, indented parse tree', - ' Customise as needed - here, or in the node-specific subclasses', - ' """', - ' BisonNode.dump(self, indent) # alter as needed', - '\n', - '# ------------------------------------------------------', - '# Define a node class for each grammar target', - '# ------------------------------------------------------', - '\n', - ])) - - # now spit out class decs for every parse target - for target, options in rules: - tmp = map(' '.join, options) - - # totally self-indulgent grammatical pedantry - if target[0].lower() in ['a', 'e', 'i', 'o', 'u']: - plural = 'n' - else: - plural = '' - - pyfile.write("\n".join([ - 'class %s_Node(ParseNode):' % target, - ' """', - ' Holds a%s "%s" parse target and its components.' % (plural, target), - ' """', - ' def __init__(self, **kw):', - ' ParseNode.__init__(self, **kw)', - '', - ' def dump(self, indent=0):', - ' ParseNode.dump(self, indent)', - '\n', - ])) - - # start churning out the class dec - pyfile.write('\n'.join([ - 'class Parser(BisonParser):', - ' """', - ' bison Parser class generated automatically by bison2py from the', - ' grammar file "%s" and lex file "%s"' % (bisonfileName, lexfileName), - '', - ' You may (and probably should) edit the methods in this class.', - ' You can freely edit the rules (in the method docstrings), the', - ' tokens list, the start symbol, and the precedences.', - '', - ' Each time this class is instantiated, a hashing technique in the', - ' base class detects if you have altered any of the rules. If any', - ' changes are detected, a new dynamic lib for the parser engine', - ' will be generated automatically.', - ' """', - '\n', - ])) - - # add the default node class - if not generateClasses: - pyfile.write('\n'.join([ - ' # -------------------------------------------------', - ' # Default class to use for creating new parse nodes', - ' # -------------------------------------------------', - ' defaultNodeClass = BisonNode', - '\n', - ])) - - # add the name of the dynamic library we need - libfileName = os.path.splitext(os.path.split(pyfileName)[1])[0] \ - + '-engine' - - pyfile.write('\n'.join([ - ' # --------------------------------------------', - ' # basename of binary parser engine dynamic lib', - ' # --------------------------------------------', - ' bisonEngineLibName = \'%s\'' % libfileName, - '\n', - ])) - - # add the tokens - #pyfile.write(' tokens = (%s,)\n\n' % ', '.join([''%s'' % t for t in tokens])) - #toks = ', '.join(tokens) - - pyfile.write(' # ----------------------------------------------------------------\n') - pyfile.write(' # lexer tokens - these must match those in your lex script (below)\n') - pyfile.write(' # ----------------------------------------------------------------\n') - pyfile.write(' tokens = %s\n\n' % tmp) - - # add the precedences - pyfile.write(' # ------------------------------\n') - pyfile.write(' # precedences\n') - pyfile.write(' # ------------------------------\n') - pyfile.write(' precedences = (\n') - for prec in precRules: - #precline = ', '.join(prec[1]) - pyfile.write(' (\'%s\', %s,),\n' % ( - prec[0][1:], # left/right/nonassoc, quote-wrapped, no '%s' - tmp, # quote-wrapped targets - ) - ) - pyfile.write(' )\n\n'), - - pyfile.write('\n'.join([ - ' # ---------------------------------------------------------------', - ' # Declare the start target here (by name)', - ' # ---------------------------------------------------------------', - ' start = \'%s\'' % startTarget, - '\n', - ])) - - # now the interesting bit - write the rule handler methods - pyfile.write('\n'.join([ - ' # ---------------------------------------------------------------', - ' # These methods are the python handlers for the bison targets.', - ' # (which get called by the bison code each time the corresponding', - ' # parse target is unambiguously reached)', - ' #', - ' # WARNING - don\'t touch the method docstrings unless you know what', - ' # you are doing - they are in bison rule syntax, and are passed', - ' # verbatim to bison to build the parser engine library.', - ' # ---------------------------------------------------------------', - '\n', - ])) - - for target, options in rules: - tmp = map(' '.join, options) - - if generateClasses: - nodeClassName = target + '_Node' - else: - nodeClassName = 'self.defaultNodeClass' - - pyfile.write('\n'.join([ - ' def on_%s(self, target, option, names, values):' % target, - ' """', - ' %s' % target, - ' : ' + '\n | '.join(tmp), - ' """', - ' return %s(' % nodeClassName, - ' target=\'%s\',' % target, - ' option=option,', - ' names=names,', - ' values=values)', - '\n', - ])) - - # now the ugly bit - add the raw lex script - pyfile.write('\n'.join([ - ' # -----------------------------------------', - ' # raw lex script, verbatim here', - ' # -----------------------------------------', - ' lexscript = r"""', - rawLex, - ' """', - ' # -----------------------------------------', - ' # end raw lex script', - ' # -----------------------------------------', - '', - '', - ])) - - # and now, create a main for testing which either reads stdin, or a filename arg - pyfile.write('\n'.join([ - 'def usage():', - ' print \'%s: PyBison parser derived from %s and %s\' % (sys.argv[0], bisonFile, lexFile)', - ' print \'Usage: %s [-k] [-v] [-d] [filename]\' % sys.argv[0]', - ' print \' -k Keep temporary files used in building parse engine lib\'', - ' print \' -v Enable verbose messages while parser is running\'', - ' print \' -d Enable garrulous debug messages from parser engine\'', - ' print \' filename path of a file to parse, defaults to stdin\'', - '', - 'def main(*args):', - ' """', - ' Unit-testing func', - ' """', - '', - ' keepfiles = 0', - ' verbose = 0', - ' debug = 0', - ' filename = None', - '', - ' for s in [\'-h\', \'-help\', \'--h\', \'--help\', \'-?\']:', - ' if s in args:', - ' usage()', - ' sys.exit(0)', - '', - ' if len(args) > 0:', - ' if \'-k\' in args:', - ' keepfiles = 1', - ' args.remove(\'-k\')', - ' if \'-v\' in args:', - ' verbose = 1', - ' args.remove(\'-v\')', - ' if \'-d\' in args:', - ' debug = 1', - ' args.remove(\'-d\')', - ' if len(args) > 0:', - ' filename = args[0]', - '', - ' p = Parser(verbose=verbose, keepfiles=keepfiles)', - ' tree = p.run(file=filename, debug=debug)', - ' return tree', - '', - 'if __name__ == \'__main__\':', - ' main(*(sys.argv[1:]))', - '', - '', - ])) diff --git a/src/python/node.py b/src/python/node.py deleted file mode 100644 index 62b2210..0000000 --- a/src/python/node.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Generic module for wrapping parse targets. - -Written April 2004 by David McNab -Copyright (c) 2004 by David McNab, all rights reserved. - -Released under the GNU General Public License, a copy of which should appear in -this distribution in the file called 'COPYING'. If this file is missing, then -you can obtain a copy of the GPL license document from the GNU website at -http://www.gnu.org. - -This software is released with no warranty whatsoever. Use it at your own -risk. - -If you wish to use this software in a commercial application, and wish to -depart from the GPL licensing requirements, please contact the author and apply -for a commercial license. -""" -import xml - -class BisonNode: - """ - Generic class for wrapping parse targets. - - Arguments: - - targetname - the name of the parse target being wrapped. - - items - optional - a list of items comprising a clause - in the target rule - typically this will only be used - by the PyBison callback mechanism. - - Keywords: - - any keywords you want (except 'items'), with any type of value. - keywords will be stored as attributes in the constructed object. - """ - - def __init__(self, **kw): - - self.__dict__.update(kw) - - # ensure some default attribs - self.target = kw.get('target', 'UnnamedTarget') - self.names = kw.get('names', []) - self.values = kw.get('values', []) - self.option = kw.get('option', 0) - - # mirror this dict to simplify dumping - self.kw = kw - - def __str__(self): - return '' % self.target - - def __repr__(self): - return str(self) - - def __getitem__(self, item): - """ - Retrieves the ith value from this node, or child nodes - - If the subscript is a single number, it will be used as an - index into this node's children list. - - If the subscript is a list or tuple, we recursively fetch - the item by using the first element as an index into this - node's children, the second element as an index into that - child node's children, and so on - """ - if type(item) in [type(0), type(0L)]: - return self.values[item] - elif type(item) in [type(()), type([])]: - if len(item) == 0: - return self - return self.values[item[0]][item[1:]] - else: - raise TypeError('Can only index %s objects with an int or a' - ' list/tuple' % self.__class.__name__) - - def __len__(self): - - return len(self.values) - - def __getslice__(self, fromidx, toidx): - return self.values[fromidx:toidx] - - def __iter__(self): - return iter(self.values) - - def dump(self, indent=0): - """ - For debugging - prints a recursive dump of a parse tree node and its children - """ - specialAttribs = ['option', 'target', 'names', 'values'] - indents = ' ' * indent * 2 - #print "%s%s: %s %s" % (indents, self.target, self.option, self.names) - print '%s%s:' % (indents, self.target) - - for name, val in self.kw.items() + zip(self.names, self.values): - if name in specialAttribs or name.startswith('_'): - continue - - if isinstance(val, BisonNode): - val.dump(indent + 1) - else: - print indents + ' %s=%s' % (name, val) - - def toxml(self): - """ - Returns an xml serialisation of this node and its children, as a raw string - - Called on the toplevel node, the xml is a representation of the - entire parse tree. - """ - return self.toxmldoc().toxml() - - def toprettyxml(self, indent=' ', newl='\n', encoding=None): - """ - Returns a human-readable xml serialisation of this node and its - children. - """ - return self.toxmldoc().toprettyxml(indent=indent, - newl=newl, - encoding=encoding) - - def toxmldoc(self): - """ - Returns the node and its children as an xml.dom.minidom.Document - object. - """ - d = xml.dom.minidom.Document() - d.appendChild(self.toxmlelem(d)) - return d - - def toxmlelem(self, docobj): - """ - Returns a DOM Element object of this node and its children. - """ - specialAttribs = ['option', 'target', 'names', 'values'] - - # generate an xml element obj for this node - x = docobj.createElement(self.target) - - # set attribs - for name, val in self.kw.items(): - if name in ['names', 'values'] or name.startswith('_'): - continue - - x.setAttribute(name, str(val)) - #x.setAttribute('target', self.target) - #x.setAttribute('option', self.option) - - # and add the children - for name, val in zip(self.names, self.values): - if name in specialAttribs or name.startswith('_'): - continue - - if isinstance(val, BisonNode): - x.appendChild(val.toxmlelem(docobj)) - else: - sn = docobj.createElement(name) - sn.setAttribute('target', name) - tn = docobj.createTextNode(val) - sn.appendChild(tn) - x.appendChild(sn) - - # done - return x - - - diff --git a/src/python/xmlifier.py b/src/python/xmlifier.py deleted file mode 100644 index 1594b70..0000000 --- a/src/python/xmlifier.py +++ /dev/null @@ -1,136 +0,0 @@ -""" -Wrapper module for importing and exporting bison grammar from/to XML. - -Written April 2004 by David McNab -Copyright (c) 2004 by David McNab, all rights reserved. - -Released under the GNU General Public License, a copy of which should appear in -this distribution in the file called 'COPYING'. If this file is missing, then -you can obtain a copy of the GPL license document from the GNU website at -http://www.gnu.org. - -This software is released with no warranty whatsoever. Use it at your own -risk. - -If you wish to use this software in a commercial application, and wish to -depart from the GPL licensing requirements, please contact the author and apply -for a commercial license. -""" - -# TODO: use cElementTree instead of Python's xml module. -# TODO: test this module, since it is currently only moved to another file. - -import xml.dom -import xml.dom.minidom -import types - -class XMLifier(object): - - def __init__(self, parser): - self.parser = parser - - def toxml(self): - """ - Serialises the parse tree and returns it as a raw xml string - """ - return self.parser.last.toxml() - - def toxmldoc(self): - """ - Returns an xml.dom.minidom.Document object containing the parse tree - """ - return self.parser.last.toxmldoc() - - def toprettyxml(self): - """ - Returns a human-readable xml representation of the parse tree - """ - return self.parser.last.toprettyxml() - - def loadxml(self, raw, namespace=None): - """ - Loads a parse tree from raw xml text. - - Arguments: - - raw - string containing the raw xml - - namespace - a dict or module object, where the node classes required for - reconstituting the parse tree, can be found - - Returns: - - root node object of reconstituted parse tree - """ - doc = xml.dom.minidom.parseString(raw) - tree = self.loadxmldoc(doc, namespace) - return tree - - def loadxmldoc(self, xmldoc, namespace=None): - """ - Returns a reconstituted parse tree, loaded from an - xml.dom.minidom.Document instance - - Arguments: - - xmldoc - an xml.dom.minidom.Document instance - - namespace - a dict from which to find the classes needed - to translate the document into a tree of parse nodes - """ - return self.loadxmlobj(xmldoc.childNodes[0], namespace) - - def loadxmlobj(self, xmlobj, namespace=None): - """ - Returns a node object, being a parse tree, reconstituted from an - xml.dom.minidom.Element object - - Arguments: - - xmlobj - an xml.dom.minidom.Element instance - - namespace - a namespace from which the node classes - needed for reconstituting the tree, can be found - """ - # check on namespace - if type(namespace) is types.ModuleType: - namespace = namespace.__dict__ - elif namespace == None: - namespace = globals() - - objname = xmlobj.tagName - classname = objname + '_Node' - classobj = namespace.get(classname, None) - - namespacekeys = namespace.keys() - - # barf if node is not a known parse node or token - if (not classobj) and objname not in self.tokens: - raise Exception('Cannot reconstitute %s: can\'t find required' - ' node class or token %s' % (objname, classname)) - - if classobj: - nodeobj = classobj() - - # add the attribs - for k, v in xmlobj.attributes.items(): - setattr(nodeobj, k, v) - else: - nodeobj = None - - #print '----------------' - #print 'objname=%s' % repr(objname) - #print 'classname=%s' % repr(classname) - #print 'classobj=%s' % repr(classobj) - #print 'nodeobj=%s' % repr(nodeobj) - - # now add the children - for child in xmlobj.childNodes: - #print '%s attributes=%s' % (child, child.attributes.items()) - childname = child.attributes['target'].value - #print 'childname=%s' % childname - if childname + '_Node' in namespacekeys: - #print 'we have a node for class %s' % classname - childobj = self.loadxmlobj(child, namespace) - else: - # it's a token - childobj = child.childNodes[0].nodeValue - #print 'got token %s=%s' % (childname, childobj) - - nodeobj.names.append(childname) - nodeobj.values.append(childobj) - - return nodeobj From 4997521c99de1bba80b59cf9582a428d9101c6a5 Mon Sep 17 00:00:00 2001 From: peg626 Date: Thu, 9 Apr 2026 19:23:30 -0300 Subject: [PATCH 2/5] Delete utils directory --- utils/bison2py | 68 ----------------------------------------------- utils/bison2py.py | 67 ---------------------------------------------- 2 files changed, 135 deletions(-) delete mode 100644 utils/bison2py delete mode 100644 utils/bison2py.py diff --git a/utils/bison2py b/utils/bison2py deleted file mode 100644 index 3abce9d..0000000 --- a/utils/bison2py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python -""" -Utility which creates a boilerplate pybison-compatible -python file from a yacc file and lex file - -Run it with 2 arguments - filename.y and filename.l -Output is filename.py -""" - -import sys, re - -from bison import bisonToPython - -argv = sys.argv -argc = len(argv) -progname = argv[0] - -reSpaces = re.compile("\\s+") - -def usage(s=None): - """ - Display usage info and exit - """ - if s: - print progname+": "+s - - print "\n".join([ - "Usage: %s [-c] basefilename" % progname, - " or: %s [-c] grammarfile.y lexfile.l pyfile.py" % progname, - "(generates a boilerplate python file from a grammar and lex file)", - "The first form uses 'basefilename' as base for all files, so:", - " %s fred" % progname, - "is equivalent to:", - " %s fred.y fred.l fred.py" % progname, - '', - 'The "-c" argument causes the creation of a unique node class', - 'for each parse target - highly recommended for complex grammars', - ]) - - sys.exit(1) - -def main(): - """ - Command-line interface for bison2py - """ - global argc, argv - - if '-c' in argv: - generateClasses = 1 - argv.remove('-c') - argc = argc - 1 - else: - generateClasses = 0 - - if argc == 2: - basename = argv[1] - bisonfile = basename+".y" - lexfile = basename+".l" - pyfile = basename+".py" - elif argc == 4: - bisonfile, lexfile, pyfile = argv[1:4] - else: - usage("Bad argument count") - - bisonToPython(bisonfile, lexfile, pyfile, generateClasses) - -if __name__ == '__main__': - main() diff --git a/utils/bison2py.py b/utils/bison2py.py deleted file mode 100644 index 915a313..0000000 --- a/utils/bison2py.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python -""" -Utility which creates a boilerplate pybison-compatible -python file from a yacc file and lex file - -Run it with 2 arguments - filename.y and filename.l -Output is filename.py -""" -import sys - -from bison import bisonToPython - - -def usage(s=None): - """ - Display usage info and exit - """ - progname = sys.argv[0] - - if s: - print progname + ': ' + s - - print '\n'.join([ - 'Usage: %s [-c] basefilename' % progname, - ' or: %s [-c] grammarfile.y lexfile.l pyfile.py' % progname, - '(generates a boilerplate python file from a grammar and lex file)', - 'The first form uses "basefilename" as base for all files, so:', - ' %s fred' % progname, - 'is equivalent to:', - ' %s fred.y fred.l fred.py' % progname, - '', - 'The "-c" argument causes the creation of a unique node class', - 'for each parse target - highly recommended for complex grammars', - ]) - - sys.exit(1) - - -def main(): - """ - Command-line interface for bison2py - """ - argv = sys.argv - argc = len(argv) - - if '-c' in argv: - generateClasses = 1 - argv.remove('-c') - argc = argc - 1 - else: - generateClasses = 0 - - if argc == 2: - basename = argv[1] - bisonfile = basename + '.y' - lexfile = basename + '.l' - pyfile = basename + '.py' - elif argc == 4: - bisonfile, lexfile, pyfile = argv[1:4] - else: - usage('Bad argument count') - - bisonToPython(bisonfile, lexfile, pyfile, generateClasses) - - -if __name__ == '__main__': - main() From aa2d657bb692a888d322a747ec33238fc04f7773 Mon Sep 17 00:00:00 2001 From: peg626 Date: Thu, 9 Apr 2026 19:27:56 -0300 Subject: [PATCH 3/5] Create a --- docs/a | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/a diff --git a/docs/a b/docs/a new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/a @@ -0,0 +1 @@ + From 9e118352ddc9caf0aaadcb61fc7b996851be83e8 Mon Sep 17 00:00:00 2001 From: peg626 Date: Thu, 9 Apr 2026 19:28:40 -0300 Subject: [PATCH 4/5] Delete docs directory --- docs/a | 1 - 1 file changed, 1 deletion(-) delete mode 100644 docs/a diff --git a/docs/a b/docs/a deleted file mode 100644 index 8b13789..0000000 --- a/docs/a +++ /dev/null @@ -1 +0,0 @@ - From f5637f040023db4d66ee501de96a52e85c7c0e2f Mon Sep 17 00:00:00 2001 From: peg626 Date: Thu, 9 Apr 2026 19:33:27 -0300 Subject: [PATCH 5/5] Delete examples directory --- examples/C/README | 16 - examples/C/c.l | 189 ------ examples/C/c.y | 464 --------------- examples/calc/README | 7 - examples/calc/calc.py | 142 ----- examples/calc/run.py | 10 - examples/calc1/README | 30 - examples/calc1/calc1.py | 252 -------- examples/java/CREDITS | 12 - examples/java/HelloWorldApp.java | 13 - examples/java/README | 20 - examples/java/javaparser.l | 180 ------ examples/java/javaparser.y | 954 ------------------------------- examples/java/run.py | 38 -- examples/java/table.h | 68 --- examples/template/README | 15 - examples/template/template.py | 202 ------- 17 files changed, 2612 deletions(-) delete mode 100644 examples/C/README delete mode 100644 examples/C/c.l delete mode 100644 examples/C/c.y delete mode 100644 examples/calc/README delete mode 100755 examples/calc/calc.py delete mode 100755 examples/calc/run.py delete mode 100644 examples/calc1/README delete mode 100755 examples/calc1/calc1.py delete mode 100644 examples/java/CREDITS delete mode 100644 examples/java/HelloWorldApp.java delete mode 100644 examples/java/README delete mode 100644 examples/java/javaparser.l delete mode 100644 examples/java/javaparser.y delete mode 100755 examples/java/run.py delete mode 100644 examples/java/table.h delete mode 100644 examples/template/README delete mode 100755 examples/template/template.py diff --git a/examples/C/README b/examples/C/README deleted file mode 100644 index 298f95c..0000000 --- a/examples/C/README +++ /dev/null @@ -1,16 +0,0 @@ -This is a parser for ANSI C code. - -To generate your python parser, type: - bison2py c -or, if you want parse node classes to be generated as well: - bison2py c -c - -Either of those commands will generate an executable c.py file, -containing a Parser class, plus unit test code which by default -reads C source from stdin. - -Grammar and scanner scripts were sourced from: - - * http://www.lysator.liu.se/c/ANSI-C-grammar-y.html - * http://www.lysator.liu.se/c/ANSI-C-grammar-l.html - diff --git a/examples/C/c.l b/examples/C/c.l deleted file mode 100644 index 29310f4..0000000 --- a/examples/C/c.l +++ /dev/null @@ -1,189 +0,0 @@ -D [0-9] -L [a-zA-Z_] -H [a-fA-F0-9] -E [Ee][+-]?{D}+ -FS (f|F|l|L) -IS (u|U|l|L)* - - -%{ - -/* this scanner sourced from: http://www.lysator.liu.se/c/ANSI-C-grammar-l.html */ - -void count(); -int yylineno = 0; -#include -#include -#include "Python.h" -#define YYSTYPE void * -#include "tokens.h" -extern void *py_parser; -extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); -#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyString_FromString(strdup(yytext)); return (tok); -#define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } - -%} - - -%% -"/*" { comment(); } - -"auto" { count(); returntoken(AUTO); } -"break" { count(); returntoken(BREAK); } -"case" { count(); returntoken(CASE); } -"char" { count(); returntoken(CHAR); } -"const" { count(); returntoken(CONST); } -"continue" { count(); returntoken(CONTINUE); } -"default" { count(); returntoken(DEFAULT); } -"do" { count(); returntoken(DO); } -"double" { count(); returntoken(DOUBLE); } -"else" { count(); returntoken(ELSE); } -"enum" { count(); returntoken(ENUM); } -"extern" { count(); returntoken(EXTERN); } -"float" { count(); returntoken(FLOAT); } -"for" { count(); returntoken(FOR); } -"goto" { count(); returntoken(GOTO); } -"if" { count(); returntoken(IF); } -"int" { count(); returntoken(INT); } -"long" { count(); returntoken(LONG); } -"register" { count(); returntoken(REGISTER); } -"return" { count(); returntoken(RETURN); } -"short" { count(); returntoken(SHORT); } -"signed" { count(); returntoken(SIGNED); } -"sizeof" { count(); returntoken(SIZEOF); } -"static" { count(); returntoken(STATIC); } -"struct" { count(); returntoken(STRUCT); } -"switch" { count(); returntoken(SWITCH); } -"typedef" { count(); returntoken(TYPEDEF); } -"union" { count(); returntoken(UNION); } -"unsigned" { count(); returntoken(UNSIGNED); } -"void" { count(); returntoken(VOID); } -"volatile" { count(); returntoken(VOLATILE); } -"while" { count(); returntoken(WHILE); } - -{L}({L}|{D})* { count(); returntoken(check_type()); } - -0[xX]{H}+{IS}? { count(); returntoken(CONSTANT); } -0{D}+{IS}? { count(); returntoken(CONSTANT); } -{D}+{IS}? { count(); returntoken(CONSTANT); } -L?'(\\.|[^\\'])+' { count(); returntoken(CONSTANT); } - -{D}+{E}{FS}? { count(); returntoken(CONSTANT); } -{D}*"."{D}+({E})?{FS}? { count(); returntoken(CONSTANT); } -{D}+"."{D}*({E})?{FS}? { count(); returntoken(CONSTANT); } - -L?\"(\\.|[^\\"])*\" { count(); returntoken(STRING_LITERAL); } - -"..." { count(); returntoken(ELLIPSIS); } -">>=" { count(); returntoken(RIGHT_ASSIGN); } -"<<=" { count(); returntoken(LEFT_ASSIGN); } -"+=" { count(); returntoken(ADD_ASSIGN); } -"-=" { count(); returntoken(SUB_ASSIGN); } -"*=" { count(); returntoken(MUL_ASSIGN); } -"/=" { count(); returntoken(DIV_ASSIGN); } -"%=" { count(); returntoken(MOD_ASSIGN); } -"&=" { count(); returntoken(AND_ASSIGN); } -"^=" { count(); returntoken(XOR_ASSIGN); } -"|=" { count(); returntoken(OR_ASSIGN); } -">>" { count(); returntoken(RIGHT_OP); } -"<<" { count(); returntoken(LEFT_OP); } -"++" { count(); returntoken(INC_OP); } -"--" { count(); returntoken(DEC_OP); } -"->" { count(); returntoken(PTR_OP); } -"&&" { count(); returntoken(BOOL_AND_OP); } -"||" { count(); returntoken(BOOL_OR_OP); } -"<=" { count(); returntoken(LE_OP); } -">=" { count(); returntoken(GE_OP); } -"==" { count(); returntoken(EQ_OP); } -"!=" { count(); returntoken(NE_OP); } -";" { count(); returntoken(SEMICOLON); } -("{"|"<%") { count(); returntoken(LBRACE); } -("}"|"%>") { count(); returntoken(RBRACE); } -"," { count(); returntoken(COMMA); } -":" { count(); returntoken(COLON); } -"=" { count(); returntoken(ASSIGN); } -"(" { count(); returntoken(LPAREN); } -")" { count(); returntoken(RPAREN); } -("["|"<:") { count(); returntoken(LBRACKET); } -("]"|":>") { count(); returntoken(RBRACKET); } -"." { count(); returntoken(PERIOD); } -"&" { count(); returntoken(AND_OP); } -"!" { count(); returntoken(BANG); } -"~" { count(); returntoken(TILDE); } -"-" { count(); returntoken(MINUS); } -"+" { count(); returntoken(PLUS); } -"*" { count(); returntoken(STAR); } -"/" { count(); returntoken(SLASH); } -"%" { count(); returntoken(PERCENT); } -"<" { count(); returntoken(LT_OP); } -">" { count(); returntoken(GT_OP); } -"^" { count(); returntoken(CIRCUMFLEX); } -"|" { count(); returntoken(OR_OP); } -"?" { count(); returntoken(QUESTIONMARK); } - -[ \t\v\n\f] { count(); } -. { /* ignore bad characters */ } - -%% - -yywrap() -{ - return(1); -} - - -comment() -{ - char c, c1; - -loop: - while ((c = input()) != '*' && c != 0) - /*putchar(c)*/; - - if ((c1 = input()) != '/' && c != 0) - { - unput(c1); - goto loop; - } - - if (c != 0) - /*putchar(c1)*/; -} - - -int column = 0; - -void count() -{ - int i; - - for (i = 0; yytext[i] != '\0'; i++) - if (yytext[i] == '\n') - column = 0; - else if (yytext[i] == '\t') - column += 8 - (column % 8); - else - column++; - - /*ECHO*/; -} - - -int check_type() -{ -/* -* pseudo code --- this is what it should check -* -* if (yytext == type_name) -* return(TYPE_NAME); -* -* return(IDENTIFIER); -*/ - -/* -* it actually will only return IDENTIFIER -*/ - - return(IDENTIFIER); -} - diff --git a/examples/C/c.y b/examples/C/c.y deleted file mode 100644 index e00f765..0000000 --- a/examples/C/c.y +++ /dev/null @@ -1,464 +0,0 @@ -/* - * This grammar sourced from: - * http://www.lysator.liu.se/c/ANSI-C-grammar-y.html - */ - -%{ - -%} - -%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF -%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP -%token BOOL_AND_OP BOOL_OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN -%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN -%token XOR_ASSIGN OR_ASSIGN TYPE_NAME - -%token LPAREN RPAREN LBRACKET RBRACKET LBRACE RBRACE -%token PERIOD COMMA COLON SEMICOLON QUESTIONMARK -%token PLUS MINUS STAR SLASH ASSIGN AND_OP OR_OP -%token BANG TILDE PERCENT CIRCUMFLEX -%token GT_OP LT_OP - -%token TYPEDEF EXTERN STATIC AUTO REGISTER -%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID -%token STRUCT UNION ENUM ELLIPSIS - -%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN - - -%left COMMA -%right ASSIGN ADD_ASSIGN SUB_ASSIGN MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN -%right QUESTIONMARK COLON -%left BOOL_OR_OP -%left BOOL_AND_OP -%left OR_OP -%left CIRCUMFLEX -%left AND_OP -%left EQ_OP NE_OP -%left LT_OP GT_OP LE_OP GE_OP -%left LEFT_OP RIGHT_OP -%left PLUS MINUS -%left STAR SLASH PERCENT -%right NOT NEG -%right INC_OP SIZEOF DEC_OP -%left LBRACKET LPAREN PERIOD PTR_OP - - -%start translation_unit - -%% - -primary_expression - : IDENTIFIER - | CONSTANT - | STRING_LITERAL - | LPAREN expression RPAREN - ; - -postfix_expression - : primary_expression - | postfix_expression LBRACKET expression RBRACKET - | postfix_expression LPAREN RPAREN - | postfix_expression LPAREN argument_expression_list RPAREN - | postfix_expression PERIOD IDENTIFIER - | postfix_expression PTR_OP IDENTIFIER - | postfix_expression INC_OP - | postfix_expression DEC_OP - ; - -argument_expression_list - : assignment_expression - | argument_expression_list COMMA assignment_expression - ; - -unary_expression - : postfix_expression - | INC_OP unary_expression - | DEC_OP unary_expression - | unary_operator cast_expression - | SIZEOF unary_expression - | SIZEOF LPAREN type_name RPAREN - ; - -unary_operator - : AND_OP - | STAR - | PLUS - | MINUS - | TILDE - | BANG - ; - -cast_expression - : unary_expression - | LPAREN type_name RPAREN cast_expression - ; - -multiplicative_expression - : cast_expression - | multiplicative_expression STAR cast_expression - | multiplicative_expression SLASH cast_expression - | multiplicative_expression PERCENT cast_expression - ; - -additive_expression - : multiplicative_expression - | additive_expression PLUS multiplicative_expression - | additive_expression MINUS multiplicative_expression - ; - -shift_expression - : additive_expression - | shift_expression LEFT_OP additive_expression - | shift_expression RIGHT_OP additive_expression - ; - -relational_expression - : shift_expression - | relational_expression LT_OP shift_expression - | relational_expression GT_OP shift_expression - | relational_expression LE_OP shift_expression - | relational_expression GE_OP shift_expression - ; - -equality_expression - : relational_expression - | equality_expression EQ_OP relational_expression - | equality_expression NE_OP relational_expression - ; - -and_expression - : equality_expression - | and_expression AND_OP equality_expression - ; - -exclusive_or_expression - : and_expression - | exclusive_or_expression CIRCUMFLEX and_expression - ; - -inclusive_or_expression - : exclusive_or_expression - | inclusive_or_expression OR_OP exclusive_or_expression - ; - -logical_and_expression - : inclusive_or_expression - | logical_and_expression BOOL_AND_OP inclusive_or_expression - ; - -logical_or_expression - : logical_and_expression - | logical_or_expression BOOL_OR_OP logical_and_expression - ; - -conditional_expression - : logical_or_expression - | logical_or_expression QUESTIONMARK expression COLON conditional_expression - ; - -assignment_expression - : conditional_expression - | unary_expression assignment_operator assignment_expression - ; - -assignment_operator - : ASSIGN - | MUL_ASSIGN - | DIV_ASSIGN - | MOD_ASSIGN - | ADD_ASSIGN - | SUB_ASSIGN - | LEFT_ASSIGN - | RIGHT_ASSIGN - | AND_ASSIGN - | XOR_ASSIGN - | OR_ASSIGN - ; - -expression - : assignment_expression - | expression COMMA assignment_expression - ; - -constant_expression - : conditional_expression - ; - -declaration - : declaration_specifiers SEMICOLON - | declaration_specifiers init_declarator_list SEMICOLON - ; - -declaration_specifiers - : storage_class_specifier - | storage_class_specifier declaration_specifiers - | type_specifier - | type_specifier declaration_specifiers - | type_qualifier - | type_qualifier declaration_specifiers - ; - -init_declarator_list - : init_declarator - | init_declarator_list COMMA init_declarator - ; - -init_declarator - : declarator - | declarator ASSIGN initializer - ; - -storage_class_specifier - : TYPEDEF - | EXTERN - | STATIC - | AUTO - | REGISTER - ; - -type_specifier - : VOID - | CHAR - | SHORT - | INT - | LONG - | FLOAT - | DOUBLE - | SIGNED - | UNSIGNED - | struct_or_union_specifier - | enum_specifier - | TYPE_NAME - ; - -struct_or_union_specifier - : struct_or_union IDENTIFIER LBRACE struct_declaration_list RBRACE - | struct_or_union LBRACE struct_declaration_list RBRACE - | struct_or_union IDENTIFIER - ; - -struct_or_union - : STRUCT - | UNION - ; - -struct_declaration_list - : struct_declaration - | struct_declaration_list struct_declaration - ; - -struct_declaration - : specifier_qualifier_list struct_declarator_list SEMICOLON - ; - -specifier_qualifier_list - : type_specifier specifier_qualifier_list - | type_specifier - | type_qualifier specifier_qualifier_list - | type_qualifier - ; - -struct_declarator_list - : struct_declarator - | struct_declarator_list COMMA struct_declarator - ; - -struct_declarator - : declarator - | COLON constant_expression - | declarator COLON constant_expression - ; - -enum_specifier - : ENUM LBRACE enumerator_list RBRACE - | ENUM IDENTIFIER LBRACE enumerator_list RBRACE - | ENUM IDENTIFIER - ; - -enumerator_list - : enumerator - | enumerator_list COMMA enumerator - ; - -enumerator - : IDENTIFIER - | IDENTIFIER ASSIGN constant_expression - ; - -type_qualifier - : CONST - | VOLATILE - ; - -declarator - : pointer direct_declarator - | direct_declarator - ; - -direct_declarator - : IDENTIFIER - | LPAREN declarator RPAREN - | direct_declarator LBRACKET constant_expression RBRACKET - | direct_declarator LBRACKET RBRACKET - | direct_declarator LPAREN parameter_type_list RPAREN - | direct_declarator LPAREN identifier_list RPAREN - | direct_declarator LPAREN RPAREN - ; - -pointer - : STAR - | STAR type_qualifier_list - | STAR pointer - | STAR type_qualifier_list pointer - ; - -type_qualifier_list - : type_qualifier - | type_qualifier_list type_qualifier - ; - - -parameter_type_list - : parameter_list - | parameter_list COMMA ELLIPSIS - ; - -parameter_list - : parameter_declaration - | parameter_list COMMA parameter_declaration - ; - -parameter_declaration - : declaration_specifiers declarator - | declaration_specifiers abstract_declarator - | declaration_specifiers - ; - -identifier_list - : IDENTIFIER - | identifier_list COMMA IDENTIFIER - ; - -type_name - : specifier_qualifier_list - | specifier_qualifier_list abstract_declarator - ; - -abstract_declarator - : pointer - | direct_abstract_declarator - | pointer direct_abstract_declarator - ; - -direct_abstract_declarator - : LPAREN abstract_declarator RPAREN - | LBRACKET RBRACKET - | LBRACKET constant_expression RBRACKET - | direct_abstract_declarator LBRACKET RBRACKET - | direct_abstract_declarator LBRACKET constant_expression RBRACKET - | LPAREN RPAREN - | LPAREN parameter_type_list RPAREN - | direct_abstract_declarator LPAREN RPAREN - | direct_abstract_declarator LPAREN parameter_type_list RPAREN - ; - -initializer - : assignment_expression - | LBRACE initializer_list RBRACE - | LBRACE initializer_list COMMA RBRACE - ; - -initializer_list - : initializer - | initializer_list COMMA initializer - ; - -statement - : labeled_statement - | compound_statement - | expression_statement - | selection_statement - | iteration_statement - | jump_statement - ; - -labeled_statement - : IDENTIFIER COLON statement - | CASE constant_expression COLON statement - | DEFAULT COLON statement - ; - -compound_statement - : LBRACE RBRACE - | LBRACE statement_list RBRACE - | LBRACE declaration_list RBRACE - | LBRACE declaration_list statement_list RBRACE - ; - -declaration_list - : declaration - | declaration_list declaration - ; - -statement_list - : statement - | statement_list statement - ; - -expression_statement - : SEMICOLON - | expression SEMICOLON - ; - -selection_statement - : IF LPAREN expression RPAREN statement - | IF LPAREN expression RPAREN statement ELSE statement - | SWITCH LPAREN expression RPAREN statement - ; - -iteration_statement - : WHILE LPAREN expression RPAREN statement - | DO statement WHILE LPAREN expression RPAREN SEMICOLON - | FOR LPAREN expression_statement expression_statement RPAREN statement - | FOR LPAREN expression_statement expression_statement expression RPAREN statement - ; - -jump_statement - : GOTO IDENTIFIER SEMICOLON - | CONTINUE SEMICOLON - | BREAK SEMICOLON - | RETURN SEMICOLON - | RETURN expression SEMICOLON - ; - -translation_unit - : external_declaration - | translation_unit external_declaration - ; - -external_declaration - : function_definition - | declaration - ; - -function_definition - : declaration_specifiers declarator declaration_list compound_statement - | declaration_specifiers declarator compound_statement - | declarator declaration_list compound_statement - | declarator compound_statement - ; - -%% -#include - -extern char yytext[]; -extern int column; - -yyerror(s) -char *s; -{ - fflush(stdout); - printf("\n%*s\n%*s\n", column, "^", column, s); -} - diff --git a/examples/calc/README b/examples/calc/README deleted file mode 100644 index 1643b39..0000000 --- a/examples/calc/README +++ /dev/null @@ -1,7 +0,0 @@ -This is a minimal calculator example, as discussed in the walkthrough -document. - -Provided you have installed PyBison, running the run.py script should -trigger the creation of the calc-parser.so parser engine lib, and -run a calculator parser which accepts expressions you type in, and -prints out results. diff --git a/examples/calc/calc.py b/examples/calc/calc.py deleted file mode 100755 index 9a27348..0000000 --- a/examples/calc/calc.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python -""" -A simple pybison parser program implementing a calculator -""" -from bison import BisonParser - - -class Parser(BisonParser): - """ - Implements the calculator parser. Grammar rules are defined in the method - docstrings. Scanner rules are in the 'lexscript' attribute. - """ - # ---------------------------------------------------------------- - # lexer tokens - these must match those in your lex script (below) - # ---------------------------------------------------------------- - tokens = ['NUMBER', - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'POW', - 'LPAREN', 'RPAREN', - 'NEWLINE', 'QUIT'] - - # ------------------------------ - # precedences - # ------------------------------ - precedences = ( - ('left', ('MINUS', 'PLUS')), - ('left', ('TIMES', 'DIVIDE')), - ('left', ('NEG', )), - ('right', ('POW', )), - ) - - # ------------------------------------------------------------------ - # override default read method with a version that prompts for input - # ------------------------------------------------------------------ - def read(self, nbytes): - try: - return raw_input("> ") + "\n" - except EOFError: - return '' - - # --------------------------------------------------------------- - # These methods are the python handlers for the bison targets. - # (which get called by the bison code each time the corresponding - # parse target is unambiguously reached) - # - # WARNING - don't touch the method docstrings unless you know what - # you are doing - they are in bison rule syntax, and are passed - # verbatim to bison to build the parser engine library. - # --------------------------------------------------------------- - - # Declare the start target here (by name) - start = "input" - - def on_input(self, target, option, names, values): - """ - input : - | input line - """ - return - - def on_line(self, target, option, names, values): - """ - line : NEWLINE - | exp NEWLINE - """ - if option == 1: - print values[0] - - def on_exp(self, target, option, names, values): - """ - exp : NUMBER - | exp PLUS exp - | exp MINUS exp - | exp TIMES exp - | exp DIVIDE exp - | MINUS exp %prec NEG - | exp POW exp - | LPAREN exp RPAREN - """ - #print "on_exp: got %s %s %s %s" % (target, option, names, values) - if option == 0: - return float(values[0]) - elif option == 1: - return values[0] + values[2] - elif option == 2: - return values[0] - values[2] - elif option == 3: - return values[0] * values[2] - elif option == 4: - return values[0] / values[2] - elif option == 5: - return - values[1] - elif option == 6: - return values[0] ** values[2] - elif option == 7: - return values[1] - - # ----------------------------------------- - # raw lex script, verbatim here - # ----------------------------------------- - lexscript = r""" - %{ - //int yylineno = 0; - #include - #include - #include "Python.h" - #define YYSTYPE void * - #include "tokens.h" - extern void *py_parser; - extern void (*py_input)(PyObject *parser, char *buf, int *result, - int max_size); - #define returntoken(tok) \ - yylval = PyString_FromString(strdup(yytext)); return (tok); - #define YY_INPUT(buf,result,max_size) { \ - (*py_input)(py_parser, buf, &result, max_size); \ - } - %} - - %% - - [0-9]+ { returntoken(NUMBER); } - "(" { returntoken(LPAREN); } - ")" { returntoken(RPAREN); } - "+" { returntoken(PLUS); } - "-" { returntoken(MINUS); } - "*" { returntoken(TIMES); } - "**" { returntoken(POW); } - "/" { returntoken(DIVIDE); } - "quit" { printf("lex: got QUIT\n"); yyterminate(); returntoken(QUIT); } - - [ \t\v\f] {} - [\n] {yylineno++; returntoken(NEWLINE); } - . { printf("unknown char %c ignored, yytext=0x%lx\n", yytext[0], - yytext); /* ignore bad chars */} - - %% - - yywrap() { return(1); } - """ - -if __name__ == '__main__': - p = Parser() - p.run() diff --git a/examples/calc/run.py b/examples/calc/run.py deleted file mode 100755 index f796000..0000000 --- a/examples/calc/run.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python - -import sys - -sys.path.insert(0, '../../build/lib.linux-x86_64-2.7/') - -import calc - -parser = calc.Parser(verbose=1, keepfiles=0) -parser.run() diff --git a/examples/calc1/README b/examples/calc1/README deleted file mode 100644 index ab716e9..0000000 --- a/examples/calc1/README +++ /dev/null @@ -1,30 +0,0 @@ -A slightly more powerful calculator that supports variables, -plus some scientific functions like log(), sqr(), sin() etc. - -Most notably, this example demonstrates error handling. - -If/when one of your rule handlers comes across a condition -which constitutes an error, but which the parser doesn't or can't -pick up (eg division by zero), the handler should do: - - return self.error("string-describing-the-error") - -This will cause the parser to go into error handling mode. - -Also, your rules can pick up errors, by using the magic -target name 'error'. Within error handling code, the -'lasterror' attribute of the parser object will be a -3-tuple: - - (line-num, msg, near-token) - -where: - - 'line-num' is the input line at which the error (most likely) - occured, - - - 'msg' is the error message text (supplied by either your own - prior 'self.error(somestring)' call, or by the parser itself - - - 'near-token' is a string, the input token which triggered the - error condition - diff --git a/examples/calc1/calc1.py b/examples/calc1/calc1.py deleted file mode 100755 index ef2b365..0000000 --- a/examples/calc1/calc1.py +++ /dev/null @@ -1,252 +0,0 @@ -#!/usr/bin/env python -""" -A more advanced calculator example, with variable storage and scientific -functions (courtesy of python 'math' module) -""" -import math - -from bison import BisonParser - - -class Parser(BisonParser): - """ - Implements the calculator parser. Grammar rules are defined in the method docstrings. - Scanner rules are in the 'lexscript' attribute. - """ - # ---------------------------------------------------------------- - # lexer tokens - these must match those in your lex script (below) - # ---------------------------------------------------------------- - tokens = ['NUMBER', - 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD', 'POW', - 'LPAREN', 'RPAREN', - 'NEWLINE', 'QUIT', - 'EQUALS', 'PI', 'E', - 'IDENTIFIER', - 'HELP'] - - # ------------------------------ - # precedences - # ------------------------------ - precedences = ( - ('left', ('MINUS', 'PLUS')), - ('left', ('TIMES', 'DIVIDE', 'MOD')), - ('left', ('NEG', )), - ('right', ('POW', )), - ) - - # -------------------------------------------- - # basename of binary parser engine dynamic lib - # -------------------------------------------- - bisonEngineLibName = "calc1-engine" - - # ------------------------------------------------------------------ - # override default read method with a version that prompts for input - # ------------------------------------------------------------------ - def read(self, nbytes): - try: - return raw_input("> ") + "\n" - except EOFError: - return '' - - # ----------------------------------------------------------- - # override default run method to set up our variables storage - # ----------------------------------------------------------- - def run(self, *args, **kw): - self.vars = {} - BisonParser.run(self, *args, **kw) - - # --------------------------------------------------------------- - # These methods are the python handlers for the bison targets. - # (which get called by the bison code each time the corresponding - # parse target is unambiguously reached) - # - # WARNING - don't touch the method docstrings unless you know what - # you are doing - they are in bison rule syntax, and are passed - # verbatim to bison to build the parser engine library. - # --------------------------------------------------------------- - - # Declare the start target here (by name) - start = "input" - - def on_input(self, target, option, names, values): - """ - input : - | input line - """ - if option == 1: - return values[0] - - def on_line(self, target, option, names, values): - """ - line : NEWLINE - | exp NEWLINE - | IDENTIFIER EQUALS exp NEWLINE - | HELP - | error - """ - if option == 1: - print values[0] - return values[0] - elif option == 2: - self.vars[values[0]] = values[2] - return values[2] - elif option == 3: - self.show_help() - elif option == 4: - line, msg, near = self.lasterror - print "Line %s: \"%s\" near %s" % (line, msg, repr(near)) - - def on_exp(self, target, option, names, values): - """ - exp : number | plusexp | minusexp | timesexp | divexp | modexp - | negexp | powexp | parenexp | varexp | functioncall | constant - """ - return values[0] - - def on_number(self, target, option, names, values): - """ - number : NUMBER - """ - return float(values[0]) - - def on_plusexp(self, target, option, names, values): - """ - plusexp : exp PLUS exp - """ - return values[0] + values[2] - - def on_minusexp(self, target, option, names, values): - """ - minusexp : exp MINUS exp - """ - return values[0] - values[2] - - def on_timesexp(self, target, option, names, values): - """ - timesexp : exp TIMES exp - """ - return values[0] * values[2] - - def on_divexp(self, target, option, names, values): - """ - divexp : exp DIVIDE exp - """ - try: - return values[0] / values[2] - except: - return self.error("Division by zero error") - - def on_modexp(self, target, option, names, values): - """ - modexp : exp MOD exp - """ - try: - return values[0] % values[2] - except: - return self.error("Modulus by zero error") - - def on_powexp(self, target, option, names, values): - """ - powexp : exp POW exp - """ - return values[0] ** values[2] - - def on_negexp(self, target, option, names, values): - """ - negexp : MINUS exp %prec NEG - """ - return values[1] - - def on_parenexp(self, target, option, names, values): - """ - parenexp : LPAREN exp RPAREN - """ - return values[1] - - def on_varexp(self, target, option, names, values): - """ - varexp : IDENTIFIER - """ - if self.vars.has_key(values[0]): - return self.vars[values[0]] - else: - return self.error("No such variable '%s'" % values[0]) - - def on_functioncall(self, target, option, names, values): - """ - functioncall : IDENTIFIER LPAREN exp RPAREN - """ - func = getattr(math, values[0], None) - if not callable(func): - return self.error("No such function '%s'" % values[0]) - try: - return func(values[2]) - except Exception, e: - return self.error(e.args[0]) - - def on_constant(self, target, option, names, values): - """ - constant : PI - | E - """ - return getattr(math, values[0]) - - # ----------------------------------------- - # Display help - # ----------------------------------------- - def show_help(self): - print "This PyBison parser implements a basic scientific calculator" - print " * scientific notation now works for numbers, eg '2.3e+12'" - print " * you can assign values to variables, eg 'x = 23.2'" - print " * the constants 'pi' and 'e' are supported" - print " * all the python 'math' module functions are available, eg 'sin(pi/6)'" - print " * errors, such as division by zero, are now reported" - - # ----------------------------------------- - # raw lex script, verbatim here - # ----------------------------------------- - lexscript = r""" - %{ - #include - #include - #include "Python.h" - #define YYSTYPE void * - #include "tokens.h" - extern void *py_parser; - extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); - #define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok); - #define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } - %} - - %% - - ([0-9]*\.?)([0-9]+)(e[-+]?[0-9]+)? { returntoken(NUMBER); } - ([0-9]+)(\.?[0-9]*)(e[-+]?[0-9]+)? { returntoken(NUMBER); } - "(" { returntoken(LPAREN); } - ")" { returntoken(RPAREN); } - "+" { returntoken(PLUS); } - "-" { returntoken(MINUS); } - "*" { returntoken(TIMES); } - "**" { returntoken(POW); } - "/" { returntoken(DIVIDE); } - "%" { returntoken(MOD); } - "quit" { printf("lex: got QUIT\n"); yyterminate(); returntoken(QUIT); } - "=" { returntoken(EQUALS); } - "e" { returntoken(E); } - "pi" { returntoken(PI); } - "help" { returntoken(HELP); } - [a-zA-Z_][0-9a-zA-Z_]* { returntoken(IDENTIFIER); } - - [ \t\v\f] {} - [\n] {yylineno++; returntoken(NEWLINE); } - . { printf("unknown char %c ignored, yytext=0x%lx\n", yytext[0], yytext); /* ignore bad chars */} - - %% - - yywrap() { return(1); } - """ - -if __name__ == '__main__': - p = Parser(keepfiles=0) - print "Scientific calculator example. Type 'help' for help" - p.run() diff --git a/examples/java/CREDITS b/examples/java/CREDITS deleted file mode 100644 index 0809c18..0000000 --- a/examples/java/CREDITS +++ /dev/null @@ -1,12 +0,0 @@ -The javaparser.l and javaparser.y files herein were NOT written by -myself (David McNab). - -javaparser.l was taken almost verbatim from an apparently public -domain version, found on the website of -Professor Dave Binkley -(http://www.cs.loyola.edu/~binkley/) -at the Department of Computer Science at Loyola College in Maryland -(http://www.cs.loyola.edu/) - -javaparser.y was taken almost verbatim from the source code to -the GNU gcj java compiler. diff --git a/examples/java/HelloWorldApp.java b/examples/java/HelloWorldApp.java deleted file mode 100644 index bfd7243..0000000 --- a/examples/java/HelloWorldApp.java +++ /dev/null @@ -1,13 +0,0 @@ -public class HelloWorldApp { - - public HelloWorldApp() {} - - public static void main(String[] args) { - System.out.println("Hello, world"); - } - - public static void main() { - main(new String[]{}); - } -} - diff --git a/examples/java/README b/examples/java/README deleted file mode 100644 index e91729d..0000000 --- a/examples/java/README +++ /dev/null @@ -1,20 +0,0 @@ -This is a somewhat more advanced example, containing -bison and lex scripts for a java source code parser. - -Note that there is no python file containing a Parser class. We -have to generate one. - -To do this, run: - - $ bison2py javaparser.y javaparser.l javaparser.py - -or just: - - $ bison2py javaparser - -You should now see a file javaparser.py - -Then, just run the run-javaparser.py script. The -script will instantiate a parser and run it on the -whatever source file you give as an argument. - diff --git a/examples/java/javaparser.l b/examples/java/javaparser.l deleted file mode 100644 index f0199d2..0000000 --- a/examples/java/javaparser.l +++ /dev/null @@ -1,180 +0,0 @@ -D [0-9] -N [1-9] -L [a-zA-Z_$] -H [a-fA-F0-9] -OC [0-7] -E [Ee][+-]?{D}+ -LS [fFdD] -Escape \\[ntbrf\\'"] -Escchar \\{D}({D}?{D})? -Escunichar \\u{H}{H}{H}{H} - -%{ -int yylineno = 0; -#include -#include -#include "Python.h" -#define YYSTYPE void * -#include "tokens.h" -extern void *py_parser; -extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); -#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyString_FromString(strdup(yytext)); return (tok); -#define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } - -#include "table.h" - -%} - -%% - - -{N}{D}*(l|L)? { returntoken(INTEGER_LITERAL_TOKEN); } -{N}{D}*(d|D)? { returntoken(INTEGER_LITERAL_TOKEN); } -0[xX]{H}+(l|L)? { returntoken(INTEGER_LITERAL_TOKEN); } -0{OC}*(l|L)? { returntoken(INTEGER_LITERAL_TOKEN); } - -{D}+"."{D}*({E})?{LS}? { returntoken(FLOATING_POINT_LITERAL_TOKEN); } -"."{D}+({E})?{LS}? { returntoken(FLOATING_POINT_LITERAL_TOKEN); } -{D}+{E}{LS}? { returntoken(FLOATING_POINT_LITERAL_TOKEN); } -{D}{LS} { returntoken(FLOATING_POINT_LITERAL_TOKEN); } - - -"true" { returntoken(BOOLEAN_LITERAL_TOKEN); } -"false" { returntoken(BOOLEAN_LITERAL_TOKEN); } - -{L}({L}|{D})* { returntoken(Table_Lookup(KeywordTable));} - -'[^'\\]' { returntoken(CHARACTER_LITERAL_TOKEN);} -'{Escape}' { returntoken(CHARACTER_LITERAL_TOKEN);} - - - \"([^\"\\]|{Escape}|{Escchar}|{Escunichar})*\" { returntoken(STRING_LITERAL_TOKEN); } - -\/\/.*$ {} -\/\* { comment();} - -"(" { returntoken(OPEN_PAREN_TOKEN); } -")" { returntoken(CLOSE_PAREN_TOKEN); } -"{" { returntoken(OPEN_BRACE_TOKEN); } -"}" { returntoken(CLOSE_BRACE_TOKEN); } -"[" { returntoken(OPEN_BRACKET_TOKEN); } -"]" { returntoken(CLOSE_BRACKET_TOKEN); } -";" { returntoken(SEMICOLON_TOKEN); } -"," { returntoken(COMMA_TOKEN); } -"." { returntoken(PERIOD_TOKEN); } -"=" { returntoken(ASSIGNS_TOKEN); } -">" { returntoken(GREATER_TOKEN); } -"<" { returntoken(LESS_TOKEN); } -"!" { returntoken(NOT_TOKEN); } -"~" { returntoken(TILDE_TOKEN); } -"?" { returntoken(CONDITIONAL_TOKEN); } -":" { returntoken(COLON_TOKEN); } -"==" { returntoken(EQ_TOKEN); } -"<=" { returntoken(LE_TOKEN); } -">=" { returntoken(GE_TOKEN); } -"!=" { returntoken(NE_OP_TOKEN); } -"||" { returntoken(LOGICAL_OR_TOKEN); } -"&&" { returntoken(LOGICAL_AND_TOKEN); } -"++" { returntoken(INC_TOKEN); } -"--" { returntoken(DEC_TOKEN); } -"+" { returntoken(PLUS_TOKEN); } -"-" { returntoken(MINUS_TOKEN); } -"*" { returntoken(MUL_TOKEN); } -"/" { returntoken(DIV_TOKEN); } -"&" { returntoken(AND_TOKEN); } -"|" { returntoken(OR_TOKEN); } -"^" { returntoken(XOR_TOKEN); } -"%" { returntoken(MOD_TOKEN); } -"<<" { returntoken(SHL_TOKEN); } -">>" { returntoken(SAR_TOKEN); } -">>>" { returntoken(SHL_TOKEN); } -"+=" { returntoken(ADD_ASSIGN_TOKEN); } -"-=" { returntoken(SUB_ASSIGN_TOKEN); } -"*=" { returntoken(MUL_ASSIGN_TOKEN); } -"/=" { returntoken(DIV_ASSIGN_TOKEN); } -"&=" { returntoken(AND_ASSIGN_TOKEN); } -"|=" { returntoken(OR_ASSIGN_TOKEN); } -"^=" { returntoken(XOR_ASSIGN_TOKEN); } -"%=" { returntoken(MOD_ASSIGN_TOKEN); } -"<<=" { returntoken(SHL_ASSIGN_TOKEN); } -">>=" { returntoken(SAR_ASSIGN_TOKEN); } -">>>=" { returntoken(SHR_ASSIGN_TOKEN); } - -[ \t\v\f] {} -[\n] {yylineno++;} - -. { printf("unknown char %c ignored\n", yytext[0]); /* ignore bad chars */} - - -%% - -yywrap() { return(1); } - -/* test_main() -{ - int t; - - for(t = yylex(), t > 0, t = yylex()) - { - printf("%s\t%d\n", yytext,t); - } - - return 0; -} -*/ - - -/* input: Table of tokens - * returns TokenCode of keyword if matched or - * ID_TOKEN if no match is found - */ -int Table_Lookup(struct KeywordToken Table[]) -{ - struct KeywordToken *Curr; - int i = 0; - - for (Curr = Table; Curr->Keyword != ""; Curr++) - { - //printf("Table_Lookup: yytext='%s', Curr->Keyword='%s', idx=%d\n", yytext, Curr->Keyword, i); - - if (strcmp(Curr->Keyword, yytext)==0) - { - //printf("Table_Lookup: '%s' => %d\n", yytext, Curr->TokenCode); - return (Curr->TokenCode); - } - i++; - } - return ID_TOKEN; -} - -commentold() -{ - char c = -1, c1; - while(c != 0) - { - for(c = input(); c != '*' && c!=0; c = input()) - ; - - /* now we have a star or no more chars */ - if(c == 0 || (c1 = input() == '/')) - return; - - if (c1 == '*') - unput(c1); - } -} - -comment() -{ - int prev=-1, cur=-1; - - while (1) - { - cur = input(); - if (cur == '/' && prev == '*') - return; - else if (cur == 0) - return; - prev = cur; - } -} diff --git a/examples/java/javaparser.y b/examples/java/javaparser.y deleted file mode 100644 index b200fd0..0000000 --- a/examples/java/javaparser.y +++ /dev/null @@ -1,954 +0,0 @@ -/* yacc grammar for JAVA language */ -/* print with psf -p 10 -L50 -l 50 -w -E j.y > xx */ - -%{ -#define REDUCE /* will display the reduce rules */ -#undef REDUCE - -#define PRNT_SYM -#undef PRNT_SYM - -#define YYDEBUG 1 - -#include - -extern FILE *yyin; -extern int yylineno; -extern int yydebug; -extern char yytext[]; - -#ifdef REDUCE -# define reduce(a) printf("%s\n",a) -#else -# define reduce(a) -#endif - -#define YYSTYPE PyObject * - -%} - -/* Things defined here have to match the order of what's in the - binop_lookup table. */ - -%token PLUS_TOKEN MINUS_TOKEN MUL_TOKEN DIV_TOKEN MOD_TOKEN -%token SHL_TOKEN SHR_TOKEN SAR_TOKEN -%token AND_TOKEN XOR_TOKEN OR_TOKEN -%token LOGICAL_AND_TOKEN LOGICAL_OR_TOKEN -%token EQ_TOKEN NE_OP_TOKEN GREATER_TOKEN GE_TOKEN LESS_TOKEN LE_TOKEN - -/* This maps to the same binop_lookup entry than the token above */ - -%token ADD_ASSIGN_TOKEN SUB_ASSIGN_TOKEN MUL_ASSIGN_TOKEN DIV_ASSIGN_TOKEN -%token MOD_ASSIGN_TOKEN -%token SHL_ASSIGN_TOKEN SHR_ASSIGN_TOKEN SAR_ASSIGN_TOKEN -%token AND_ASSIGN_TOKEN XOR_ASSIGN_TOKEN OR_ASSIGN_TOKEN - - -/* Modifier TOKEN have to be kept in this order. Don't scramble it */ - -%token PUBLIC_TOKEN PRIVATE_TOKEN PROTECTED_TOKEN -%token STATIC_TOKEN FINAL_TOKEN SYNCHRONIZED_TOKEN -%token VOLATILE_TOKEN TRANSIENT_TOKEN NATIVE_TOKEN -%token PAD_TOKEN ABSTRACT_TOKEN MODIFIER_TOKEN -%token STRICT_TOKEN STRICTFP_TOKEN - -/* Keep those two in order, too */ -%token DEC_TOKEN INC_TOKEN - -/* From now one, things can be in any order */ - -%token DEFAULT_TOKEN IF_TOKEN THROW_TOKEN -%token BOOLEAN_TOKEN DO_TOKEN IMPLEMENTS_TOKEN -%token THROWS_TOKEN BREAK_TOKEN IMPORT_TOKEN -%token ELSE_TOKEN INSTANCEOF_TOKEN RETURN_TOKEN -%token VOID_TOKEN CATCH_TOKEN INTERFACE_TOKEN -%token CASE_TOKEN EXTENDS_TOKEN FINALLY_TOKEN -%token SUPER_TOKEN WHILE_TOKEN CLASS_TOKEN -%token SWITCH_TOKEN CONST_TOKEN TRY_TOKEN -%token FOR_TOKEN NEW_TOKEN CONTINUE_TOKEN -%token GOTO_TOKEN PACKAGE_TOKEN THIS_TOKEN -%token ASSERT_TOKEN - -%token BYTE_TOKEN SHORT_TOKEN INT_TOKEN LONG_TOKEN -%token CHAR_TOKEN - -%token FLOAT_TOKEN DOUBLE_TOKEN - -%token ID_TOKEN - -%token CONDITIONAL_TOKEN COLON_TOKEN TILDE_TOKEN NOT_TOKEN - -%token ASSIGN_ANY_TOKEN ASSIGNS_TOKEN -%token OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN OPEN_BRACE_TOKEN CLOSE_BRACE_TOKEN OPEN_BRACKET_TOKEN CLOSE_BRACKET_TOKEN SEMICOLON_TOKEN COMMA_TOKEN PERIOD_TOKEN - -%token INTEGER_LITERAL_TOKEN FLOATING_POINT_LITERAL_TOKEN BOOLEAN_LITERAL_TOKEN STRING_LITERAL_TOKEN -%token CHARACTER_LITERAL_TOKEN NULL_TOKEN - - -%right SHL_ASSIGN_TOKEN SAR_ASSIGN_TOKEN AND_ASSIGN_TOKEN OR_ASSIGN_TOKEN XOR_ASSIGN_TOKEN - ASSIGNS_TOKEN ADD_ASSIGN_TOKEN SUB_ASSIGN_TOKEN MUL_ASSIGN_TOKEN DIV_ASSIGN_TOKEN MOD_ASSIGN_TOKEN -%left LOGICAL_OR_TOKEN -%left LOGICAL_AND_TOKEN -%left OR_TOKEN -%left XOR_TOKEN -%left AND_TOKEN -%left RELATIVEQEUAL_TOKEN NE_OP_TOKEN -%left GREATER_TOKEN LESS_TOKEN GE_TOKEN LE_TOKEN -%left SHL_TOKEN SAR_TOKEN SHR_TOKEN -%left PLUS_TOKEN MINUS_TOKEN -%left MUL_TOKEN DIV_TOKEN MOD_TOKEN -%nonassoc NOT_TOKEN TILDE_TOKEN - - -%start goal - -%% - -goal -: compilation_unit -; - -literal -: INTEGER_LITERAL_TOKEN -| FLOATING_POINT_LITERAL_TOKEN -| BOOLEAN_LITERAL_TOKEN -| CHARACTER_LITERAL_TOKEN -| STRING_LITERAL_TOKEN -| NULL_TOKEN -; - -type -: primitive_type -| reference_type -; - -primitive_type -: INT_TOKEN -| LONG_TOKEN -| FLOAT_TOKEN -| DOUBLE_TOKEN -| BOOLEAN_TOKEN -| BYTE_TOKEN -| CHAR_TOKEN -| SHORT_TOKEN -; - -reference_type -: class_or_interface_type -| array_type -; - -class_or_interface_type -: name -; - -class_type -: class_or_interface_type -; - -interface_type -: class_or_interface_type -; - -array_type -: primitive_type dims -| name dims -; - -name -: simple_name -| qualified_name -; - -simple_name -: identifier -; - -qualified_name -: name PERIOD_TOKEN identifier -; - -identifier -: ID_TOKEN -; - -compilation_unit -: -| package_declaration -| import_declarations -| type_declarations -| package_declaration import_declarations -| package_declaration type_declarations -| import_declarations type_declarations -| package_declaration import_declarations type_declarations -; - -import_declarations -: import_declaration -| import_declarations import_declaration -; - -type_declarations -: type_declaration -| type_declarations type_declaration -; - -package_declaration -: PACKAGE_TOKEN name SEMICOLON_TOKEN -; - -import_declaration -: single_type_import_declaration -| type_import_on_demand_declaration -; - -single_type_import_declaration -: IMPORT_TOKEN name SEMICOLON_TOKEN -; - -type_import_on_demand_declaration -: IMPORT_TOKEN name PERIOD_TOKEN MUL_TOKEN SEMICOLON_TOKEN -; - -type_declaration -: class_declaration -| interface_declaration -| empty_statement -; - -modifiers -: modifier -| modifiers modifier -; - -modifier -: STATIC_TOKEN -| PUBLIC_TOKEN -| PROTECTED_TOKEN -| PRIVATE_TOKEN -| ABSTRACT_TOKEN -| FINAL_TOKEN -| NATIVE_TOKEN -| SYNCHRONIZED_TOKEN -| TRANSIENT_TOKEN -| VOLATILE_TOKEN -; - -class_declaration -: modifiers CLASS_TOKEN identifier super interfaces class_body -| CLASS_TOKEN identifier super interfaces class_body -; - -super -: -| EXTENDS_TOKEN class_type -; - -interfaces -: -| IMPLEMENTS_TOKEN interface_type_list -; - -interface_type_list -: interface_type -| interface_type_list COMMA_TOKEN interface_type -; - -class_body -: OPEN_BRACE_TOKEN CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN class_body_declarations CLOSE_BRACE_TOKEN -; - -class_body_declarations -: class_body_declaration -| class_body_declarations class_body_declaration -; - -class_body_declaration -: class_member_declaration -| static_initializer -| constructor_declaration -| block -; - -class_member_declaration -: field_declaration -| method_declaration -| class_declaration -| interface_declaration -| empty_statement -; - -field_declaration -: type variable_declarators SEMICOLON_TOKEN -| modifiers type variable_declarators SEMICOLON_TOKEN -; - -variable_declarators -: variable_declarator -| variable_declarators COMMA_TOKEN variable_declarator -; - -variable_declarator -: variable_declarator_id -| variable_declarator_id ASSIGNS_TOKEN variable_initializer -; - -variable_declarator_id -: identifier -| variable_declarator_id OPEN_BRACKET_TOKEN CLOSE_BRACKET_TOKEN -; - -variable_initializer -: expression -| array_initializer -; - -method_declaration -: method_header method_body -; - -method_header -: type method_declarator throws -| VOID_TOKEN method_declarator throws -| modifiers type method_declarator throws -| modifiers VOID_TOKEN method_declarator throws -; - -method_declarator -: identifier OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN -| identifier OPEN_PAREN_TOKEN formal_parameter_list CLOSE_PAREN_TOKEN -| method_declarator OPEN_BRACKET_TOKEN CLOSE_BRACKET_TOKEN -; - -formal_parameter_list -: formal_parameter -| formal_parameter_list COMMA_TOKEN formal_parameter -; - -formal_parameter -: type variable_declarator_id -| modifiers type variable_declarator_id -; - -throws -: -| THROWS_TOKEN class_type_list -; - -class_type_list -: class_type -| class_type_list COMMA_TOKEN class_type -; - -method_body -: block -| SEMICOLON_TOKEN -; - -static_initializer -: static block -; - -static -: modifiers -; - -constructor_declaration -: constructor_header constructor_body -; - -constructor_header -: constructor_declarator throws -| modifiers constructor_declarator throws -; - -constructor_declarator -: simple_name OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN -| simple_name OPEN_PAREN_TOKEN formal_parameter_list CLOSE_PAREN_TOKEN -; - -constructor_body -: block_begin constructor_block_end -| block_begin explicit_constructor_invocation constructor_block_end -| block_begin block_statements constructor_block_end -| block_begin explicit_constructor_invocation block_statements constructor_block_end -; - -constructor_block_end -: block_end -; - -block_begin -: OPEN_BRACE_TOKEN -; - -block_end -: CLOSE_BRACE_TOKEN -; - -explicit_constructor_invocation -: this_or_super OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN SEMICOLON_TOKEN -| this_or_super OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN SEMICOLON_TOKEN -| name PERIOD_TOKEN SUPER_TOKEN OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN SEMICOLON_TOKEN -| name PERIOD_TOKEN SUPER_TOKEN OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN SEMICOLON_TOKEN -; - -this_or_super -: THIS_TOKEN -| SUPER_TOKEN -; - -interface_declaration -: INTERFACE_TOKEN identifier interface_body -| modifiers INTERFACE_TOKEN identifier interface_body -| INTERFACE_TOKEN identifier extends_interfaces interface_body -| modifiers INTERFACE_TOKEN identifier extends_interfaces interface_body -; - -extends_interfaces -: EXTENDS_TOKEN interface_type -| extends_interfaces COMMA_TOKEN interface_type -; - -interface_body -: OPEN_BRACE_TOKEN CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN interface_member_declarations CLOSE_BRACE_TOKEN -; - -interface_member_declarations -: interface_member_declaration -| interface_member_declarations interface_member_declaration -; - -interface_member_declaration -: constant_declaration -| abstract_method_declaration -| class_declaration -| interface_declaration -; - -constant_declaration -: field_declaration -; - -abstract_method_declaration -: method_header SEMICOLON_TOKEN -; - -array_initializer -: OPEN_BRACE_TOKEN CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN COMMA_TOKEN CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN variable_initializers CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN variable_initializers COMMA_TOKEN CLOSE_BRACE_TOKEN -; - -variable_initializers -: variable_initializer -| variable_initializers COMMA_TOKEN variable_initializer -; - -block -: OPEN_BRACE_TOKEN CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN block_statements CLOSE_BRACE_TOKEN -; - -block_statements -: block_statement -| block_statements block_statement -; - -block_statement -: local_variable_declaration_statement -| statement -| class_declaration -; - -local_variable_declaration_statement -: local_variable_declaration SEMICOLON_TOKEN -; - -local_variable_declaration -: type variable_declarators -| modifiers type variable_declarators -; - -statement -: statement_without_trailing_substatement -| labeled_statement -| if_then_statement -| if_then_else_statement -| while_statement -| for_statement -; - -statement_nsi -: statement_without_trailing_substatement -| labeled_statement_nsi -| if_then_else_statement_nsi -| while_statement_nsi -| for_statement_nsi -; - -statement_without_trailing_substatement -: block -| empty_statement -| expression_statement -| switch_statement -| do_statement -| break_statement -| continue_statement -| return_statement -| synchronized_statement -| throw_statement -| try_statement -| assert_statement -; - -empty_statement -: SEMICOLON_TOKEN -; - -label_decl -: identifier COLON_TOKEN -; - -labeled_statement -: label_decl statement -; - -labeled_statement_nsi -: label_decl statement_nsi -; - -expression_statement -: statement_expression SEMICOLON_TOKEN -| SYNCHRONIZED_TOKEN OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN block -; - -statement_expression -: assignment -| primary -| pre_increment_expression -| pre_decrement_expression -| post_increment_expression -| post_decrement_expression -| method_invocation -| class_instance_creation_expression -; - -if_then_statement -: IF_TOKEN OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN statement -; - -if_then_else_statement -: IF_TOKEN OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN statement_nsi ELSE_TOKEN statement -; - -if_then_else_statement_nsi -: IF_TOKEN OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN statement_nsi ELSE_TOKEN statement_nsi -; - -switch_statement -: switch_expression switch_block -; - -switch_expression -: SWITCH_TOKEN OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN -; - -switch_block -: OPEN_BRACE_TOKEN CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN switch_labels CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN switch_block_statement_groups CLOSE_BRACE_TOKEN -| OPEN_BRACE_TOKEN switch_block_statement_groups switch_labels CLOSE_BRACE_TOKEN -; - -switch_block_statement_groups -: switch_block_statement_group -| switch_block_statement_groups switch_block_statement_group -; - -switch_block_statement_group -: switch_labels block_statements -; - -switch_labels -: switch_label -| switch_labels switch_label -; - -switch_label -: CASE_TOKEN constant_expression COLON_TOKEN -| DEFAULT_TOKEN COLON_TOKEN -; - -while_expression -: WHILE_TOKEN OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN -; - -while_statement -: while_expression statement -; - -while_statement_nsi -: while_expression statement_nsi -; - -do_statement_begin -: DO_TOKEN -; - -do_statement -: do_statement_begin statement WHILE_TOKEN OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN SEMICOLON_TOKEN -; - -for_statement -: for_begin SEMICOLON_TOKEN expression SEMICOLON_TOKEN for_update CLOSE_PAREN_TOKEN statement -| for_begin SEMICOLON_TOKEN SEMICOLON_TOKEN for_update CLOSE_PAREN_TOKEN statement -; - -for_statement_nsi -: for_begin SEMICOLON_TOKEN expression SEMICOLON_TOKEN for_update CLOSE_PAREN_TOKEN statement_nsi -| for_begin SEMICOLON_TOKEN SEMICOLON_TOKEN for_update CLOSE_PAREN_TOKEN statement_nsi -; - -for_header -: FOR_TOKEN OPEN_PAREN_TOKEN -; - -for_begin -: for_header for_init -; - -for_init -: -| statement_expression_list -| local_variable_declaration -; - -for_update -: -| statement_expression_list -; - -statement_expression_list -: statement_expression -| statement_expression_list COMMA_TOKEN statement_expression -; - -break_statement -: BREAK_TOKEN SEMICOLON_TOKEN -| BREAK_TOKEN identifier SEMICOLON_TOKEN -; - -continue_statement -: CONTINUE_TOKEN SEMICOLON_TOKEN -| CONTINUE_TOKEN identifier SEMICOLON_TOKEN -; - -return_statement -: RETURN_TOKEN SEMICOLON_TOKEN -| RETURN_TOKEN expression SEMICOLON_TOKEN -; - -throw_statement -: THROW_TOKEN expression SEMICOLON_TOKEN -; - -assert_statement -: ASSERT_TOKEN expression COLON_TOKEN expression SEMICOLON_TOKEN -| ASSERT_TOKEN expression SEMICOLON_TOKEN -| ASSERT_TOKEN error -| ASSERT_TOKEN expression error -; -synchronized_statement -: synchronized OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN block -| synchronized OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN error -; - -synchronized -: MODIFIER_TOKEN -; - -try_statement -: TRY_TOKEN block catches -| TRY_TOKEN block finally -| TRY_TOKEN block catches finally -; - -catches -: catch_clause -| catches catch_clause -; - -catch_clause -: CATCH_TOKEN OPEN_PAREN_TOKEN formal_parameter CLOSE_PAREN_TOKEN block -; - -finally -: FINALLY_TOKEN block -; - -primary -: primary_no_new_array -| array_creation_expression -; - -primary_no_new_array -: literal -| THIS_TOKEN -| OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN -| class_instance_creation_expression -| field_access -| method_invocation -| array_access -| type_literals -| name PERIOD_TOKEN THIS_TOKEN -; - -type_literals -: name PERIOD_TOKEN CLASS_TOKEN -| array_type PERIOD_TOKEN CLASS_TOKEN -| primitive_type PERIOD_TOKEN CLASS_TOKEN -| VOID_TOKEN PERIOD_TOKEN CLASS_TOKEN -; - -class_instance_creation_expression -: NEW_TOKEN class_type OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN -| NEW_TOKEN class_type OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN -| anonymous_class_creation -| something_dot_new identifier OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN -| something_dot_new identifier OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN class_body -| something_dot_new identifier OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN -| something_dot_new identifier OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN class_body -; - -anonymous_class_creation -: NEW_TOKEN class_type OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN class_body -| NEW_TOKEN class_type OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN class_body -; - -something_dot_new -: name PERIOD_TOKEN NEW_TOKEN -| primary PERIOD_TOKEN NEW_TOKEN -; - -argument_list -: expression -| argument_list COMMA_TOKEN expression -| argument_list COMMA_TOKEN error -; - -array_creation_expression -: NEW_TOKEN primitive_type dim_exprs -| NEW_TOKEN class_or_interface_type dim_exprs -| NEW_TOKEN primitive_type dim_exprs dims -| NEW_TOKEN class_or_interface_type dim_exprs dims -| NEW_TOKEN class_or_interface_type dims array_initializer -| NEW_TOKEN primitive_type dims array_initializer -; - -dim_exprs -: dim_expr -| dim_exprs dim_expr -; - -dim_expr -: OPEN_BRACKET_TOKEN expression CLOSE_BRACKET_TOKEN -; - -dims -: OPEN_BRACKET_TOKEN CLOSE_BRACKET_TOKEN -| dims OPEN_BRACKET_TOKEN CLOSE_BRACKET_TOKEN -; - -field_access -: primary PERIOD_TOKEN identifier -| SUPER_TOKEN PERIOD_TOKEN identifier -; - -method_invocation -: name OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN -| name OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN -| primary PERIOD_TOKEN identifier OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN -| primary PERIOD_TOKEN identifier OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN -| SUPER_TOKEN PERIOD_TOKEN identifier OPEN_PAREN_TOKEN CLOSE_PAREN_TOKEN -| SUPER_TOKEN PERIOD_TOKEN identifier OPEN_PAREN_TOKEN argument_list CLOSE_PAREN_TOKEN -; - -array_access -: name OPEN_BRACKET_TOKEN expression CLOSE_BRACKET_TOKEN -| primary_no_new_array OPEN_BRACKET_TOKEN expression CLOSE_BRACKET_TOKEN -; - -postfix_expression -: primary -| name -| post_increment_expression -| post_decrement_expression -; - -post_increment_expression -: postfix_expression INC_TOKEN -; - -post_decrement_expression -: postfix_expression DEC_TOKEN -; - -trap_overflow_corner_case -: pre_increment_expression -| pre_decrement_expression -| PLUS_TOKEN unary_expression -| unary_expression_not_plus_minus -; - -unary_expression -: trap_overflow_corner_case -| MINUS_TOKEN trap_overflow_corner_case -| MINUS_TOKEN error -; - -pre_increment_expression -: INC_TOKEN unary_expression -; - -pre_decrement_expression -: DEC_TOKEN unary_expression -; - -unary_expression_not_plus_minus -: postfix_expression -| TILDE_TOKEN unary_expression -| NOT_TOKEN unary_expression -| cast_expression -; - -cast_expression -: OPEN_PAREN_TOKEN primitive_type dims CLOSE_PAREN_TOKEN unary_expression -| OPEN_PAREN_TOKEN primitive_type CLOSE_PAREN_TOKEN unary_expression -| OPEN_PAREN_TOKEN expression CLOSE_PAREN_TOKEN unary_expression_not_plus_minus -| OPEN_PAREN_TOKEN name dims CLOSE_PAREN_TOKEN unary_expression_not_plus_minus -; - -multiplicative_expression -: unary_expression -| multiplicative_expression MUL_TOKEN unary_expression -| multiplicative_expression DIV_TOKEN unary_expression -| multiplicative_expression MOD_TOKEN unary_expression -; - -additive_expression -: multiplicative_expression -| additive_expression PLUS_TOKEN multiplicative_expression -| additive_expression MINUS_TOKEN multiplicative_expression -; - -shift_expression -: additive_expression -| shift_expression SHL_TOKEN additive_expression -| shift_expression SHR_TOKEN additive_expression -| shift_expression SAR_TOKEN additive_expression -; - -relational_expression -: shift_expression -| relational_expression LESS_TOKEN shift_expression -| relational_expression GREATER_TOKEN shift_expression -| relational_expression LE_TOKEN shift_expression -| relational_expression GE_TOKEN shift_expression -| relational_expression INSTANCEOF_TOKEN reference_type -; - -equality_expression -: relational_expression -| equality_expression EQ_TOKEN relational_expression -| equality_expression NE_OP_TOKEN relational_expression -; - -and_expression -: equality_expression -| and_expression AND_TOKEN equality_expression -; - -exclusive_or_expression -: and_expression -| exclusive_or_expression XOR_TOKEN and_expression -; - -inclusive_or_expression -: exclusive_or_expression -| inclusive_or_expression OR_TOKEN exclusive_or_expression -; - -conditional_and_expression -: inclusive_or_expression -| conditional_and_expression LOGICAL_AND_TOKEN inclusive_or_expression -; - -conditional_or_expression -: conditional_and_expression -| conditional_or_expression LOGICAL_OR_TOKEN conditional_and_expression -; - -conditional_expression -: conditional_or_expression -| conditional_or_expression CONDITIONAL_TOKEN expression COLON_TOKEN conditional_expression -; - -assignment_expression -: conditional_expression -| assignment -; - -assignment -: left_hand_side assignment_operator assignment_expression -; - -left_hand_side -: name -| field_access -| array_access -; - -assignment_operator -: ASSIGNS_TOKEN -| ADD_ASSIGN_TOKEN -| SUB_ASSIGN_TOKEN -| MUL_ASSIGN_TOKEN -| DIV_ASSIGN_TOKEN -| MOD_ASSIGN_TOKEN -| SHL_ASSIGN_TOKEN -| SHR_ASSIGN_TOKEN -| SAR_ASSIGN_TOKEN -| AND_ASSIGN_TOKEN -| XOR_ASSIGN_TOKEN -| OR_ASSIGN_TOKEN -; - -expression -: assignment_expression -; - -constant_expression -: expression -; - -%% - - -void print_prototype(void) -{ -} - -void generate_default_constructor(char *name) -{ -} - -int yyerror(char *mesg) -{ - printf("line %d: %s before %s\n", yylineno, mesg, yytext); - exit(0); -} - diff --git a/examples/java/run.py b/examples/java/run.py deleted file mode 100755 index 94ba609..0000000 --- a/examples/java/run.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python -""" -Runs the java parser on a small java source file -""" -import sys - -import javaparser - -#src = "tst.java" - -argv = sys.argv -argc = len(argv) - -if '-v' in argv: - argv.remove('-v') - argc -= 1 - verbose = 1 -else: - verbose = 0 - -if argc == 2: - src = argv[1] -else: - src = None - -src = "I2PClient.java" - -p = javaparser.Parser(verbose=verbose) - -print "delmebld.py: running parser on HelloWorldApp.java" -res = p.run(file=src) -print "back from engine, parse tree dump follows:" -if 0: - print "------------------------------------------" - res.dump() - print "------------------------------------------" - print "end of parse tree dump" - diff --git a/examples/java/table.h b/examples/java/table.h deleted file mode 100644 index d18ba75..0000000 --- a/examples/java/table.h +++ /dev/null @@ -1,68 +0,0 @@ -//@+leo-ver=4 -//@+node:@file examples/java/table.h -//@@language c -#include "tokens.h" - -struct KeywordToken -{ - char * Keyword; - int TokenCode; - -}; - -struct KeywordToken KeywordTable [] = -{ - - {"abstract", ABSTRACT_TOKEN }, - {"boolean", BOOLEAN_TOKEN }, - {"break", BREAK_TOKEN }, - {"byte", BYTE_TOKEN }, - {"case", CASE_TOKEN }, - {"catch", CATCH_TOKEN }, - {"char", CHAR_TOKEN }, - {"class", CLASS_TOKEN }, - {"const", CONST_TOKEN }, - {"continue", CONTINUE_TOKEN }, - {"default", DEFAULT_TOKEN }, - {"do", DO_TOKEN }, - {"double", DOUBLE_TOKEN }, - {"else", ELSE_TOKEN }, - {"extends", EXTENDS_TOKEN }, - {"final", FINAL_TOKEN }, - {"finally", FINALLY_TOKEN }, - {"float", FLOAT_TOKEN }, - {"for", FOR_TOKEN }, - {"goto", GOTO_TOKEN }, - {"if", IF_TOKEN }, - {"implements", IMPLEMENTS_TOKEN }, - {"import", IMPORT_TOKEN }, - {"instanceof", INSTANCEOF_TOKEN }, - {"int", INT_TOKEN }, - {"interface", INTERFACE_TOKEN }, - {"long", LONG_TOKEN }, - {"native", NATIVE_TOKEN }, - {"new", NEW_TOKEN }, - {"null", NULL_TOKEN }, - {"package", PACKAGE_TOKEN }, - {"private", PRIVATE_TOKEN }, - {"protected", PROTECTED_TOKEN }, - {"public", PUBLIC_TOKEN }, - {"return", RETURN_TOKEN }, - {"short", SHORT_TOKEN }, - {"static", STATIC_TOKEN }, - {"strictfp", STRICTFP_TOKEN }, - {"super", SUPER_TOKEN }, - {"switch", SWITCH_TOKEN }, - {"synchronized", SYNCHRONIZED_TOKEN }, - {"this", THIS_TOKEN }, - {"throw", THROW_TOKEN }, - {"throws", THROWS_TOKEN }, - {"transient", TRANSIENT_TOKEN }, - {"try", TRY_TOKEN }, - {"void", VOID_TOKEN }, - {"volatile", VOLATILE_TOKEN }, - {"while", WHILE_TOKEN }, - {"", ID_TOKEN } -}; -//@-node:@file examples/java/table.h -//@-leo diff --git a/examples/template/README b/examples/template/README deleted file mode 100644 index b8654e1..0000000 --- a/examples/template/README +++ /dev/null @@ -1,15 +0,0 @@ -The 'template.py' file in this directory is a working -but very minimal parser. - -You can execute the file as is - the first time you -do so, it will automatically build and load a parser -engine library. - -This parser does nothing except read a bunch of words -from its input stream, where a word is an alphanumeric -string. - -Feel free to copy this file somewhere, and tinker away -to your heart's content. This may be for you the fastest -way to get a comfortable 'feel' of how PyBison does things. - diff --git a/examples/template/template.py b/examples/template/template.py deleted file mode 100755 index 9d87973..0000000 --- a/examples/template/template.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python -""" -Template of a pyBison parser file - -This is actually a working parser, but so -minimal as to be totally useless. Refer to the -'.on_someTarget()' rule handler in the class -'Parser' below for more info. - -You can do much worse than to copy this file -somewhere, and tinker away to your heart's content. -""" - -import sys - -from bison import BisonParser, BisonNode - -# ------------------------------------------- -# Our own custom base class for all objects -# which get inserted into the parse tree -# ------------------------------------------- - -class ParseNode(BisonNode): - """ - This is the base class from which all your - parse nodes are derived. - Add methods to this class as you need them - """ - def __init__(self, **kw): - BisonNode.__init__(self, **kw) - - def __str__(self): - """Customise as needed""" - return "<%s instance at 0x%x>" % (self.__class__.__name__, hash(self)) - - def __repr__(self): - """Customise as needed""" - return str(self) - - def dump(self, indent=0): - """ - Dump out human-readable, indented parse tree - Customise as needed - here, or in the node-specific subclasses - """ - BisonNode.dump(self, indent) # alter as needed - -# ---------------------------------------------------- -# Now, we need to define a node class for each parse -# target. (This is completely optional, but it can -# turn out to be a PITA if you don't). -# ---------------------------------------------------- - -class someTarget_Node(ParseNode): - """ - Holds a "someTarget" parse target and its components. - """ - def __init__(self, **kw): - ParseNode.__init__(self, **kw) - - def dump(self, indent=0): - ParseNode.dump(self, indent) - - -# ---------------------------------------------------- -# Now, at last, we get to the main Parser class itself -# ---------------------------------------------------- - -class Parser(BisonParser): - """ - Describe your parser here - """ - - # basename of binary parser engine dynamic lib - bisonEngineLibName = "template-engine" - - # ---------------------------------------------------------------- - # lexer tokens - these must match those in your lex script (below) - # ---------------------------------------------------------------- - tokens = [ 'WORD' ] - - # ------------------------------ - # precedences - # ------------------------------ - precedences = ( - #('left', ('aTarget1', 'aTarget2',..., 'aTargetn')), - #('right', ('another_target1', 'another_target2',..., 'another_targetn')), - ) - - # --------------------------------------------------------------- - # These methods are the python handlers for the bison targets. - # (which get called by the bison code each time the corresponding - # parse target is unambiguously reached) - # - # WARNING - don't touch the method docstrings unless you know what - # you are doing - they are in bison rule syntax, and are passed - # verbatim to bison to build the parser engine library. - # --------------------------------------------------------------- - - # Declare the start target here (by name) - start = "someTarget" - - def on_someTarget(self, target, option, names, values): - """ - someTarget - : - | someTarget WORD - """ - print "on_someTarget: %s %s" % (option, repr(values)) - node = someTarget_Node(target=target, - option=option, - names=names, - values=values) - return node - - # ----------------------------------------- - # raw lex script, verbatim here - # - # the script used here in this template is one which - # breaks up the input stream into strings of - # alphanumeric 'words' and discards everything else - # ----------------------------------------- - lexscript = r""" -%{ -#include -#include -#include "Python.h" -#define YYSTYPE void * -#include "tokens.h" -int yylineno = 0; -int yywrap() { return(1); } -extern void *py_parser; -extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size); -#define returntoken(tok) yylval = PyString_FromString(strdup(yytext)); return (tok); -#define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); } -%} - -%% - -[a-zA-Z0-9\.]+ { returntoken(WORD); } -[ \t\n] { /* ignore spaces/tabs/newlines */ } -. { printf("unknown char %c ignored\n", yytext[0]); /* ignore bad chars */} - -%% -//yywrap() { return(1); } - - """ - # ----------------------------------------- - # end raw lex script - # ----------------------------------------- - -# -------------------------------------------------- -# global functions to add in unit-testing our parser -# (same as what gets generated by bison2py) -# -------------------------------------------------- - -def usage(): - print "%s: PyBison template parser" % sys.argv[0] - print "Usage: %s [-k] [-v] [-d] [filename]" % sys.argv[0] - print " -k Keep temporary files used in building parse engine lib" - print " -v Enable verbose messages while parser is running" - print " -d Enable garrulous debug messages from parser engine" - print " filename path of a file to parse, defaults to stdin" - -def main(*args): - """ - Unit-testing func - """ - - keepfiles = 0 - verbose = 0 - debug = 0 - filename = None - - for s in ["-h", "-help", "--h", "--help", "-?"]: - if s in args: - usage() - sys.exit(0) - - if len(args) > 0: - if "-k" in args: - keepfiles = 1 - args.remove("-k") - if "-v" in args: - verbose = 1 - args.remove("-v") - if "-d" in args: - debug = 1 - args.remove("-d") - if len(args) > 0: - filename = args[0] - - p = Parser(verbose=verbose, keepfiles=keepfiles) - - if filename == None: - print "(Reading from standard input - please type stuff)" - - tree = p.run(file=filename, debug=debug) - return tree - -if __name__ == "__main__": - main(*(sys.argv[1:])) -