gwenn / lemon-rs Goto Github PK
View Code? Open in Web Editor NEWLALR(1) parser generator for Rust based on Lemon + SQL parser
License: The Unlicense
LALR(1) parser generator for Rust based on Lemon + SQL parser
License: The Unlicense
% rg -FN 'sqlite3ErrorMsg(pParse, "' sqlite3.c
sqlite3ErrorMsg(pParse, "misuse of aliased aggregate %s", zAs);
sqlite3ErrorMsg(pParse, "misuse of aliased window function %s",zAs);
sqlite3ErrorMsg(pParse, "row value misused");
sqlite3ErrorMsg(pParse, "%s: %s.%s.%s", zErr, zDb, zTab, zCol);
sqlite3ErrorMsg(pParse, "%s: %s.%s", zErr, zTab, zCol);
sqlite3ErrorMsg(pParse, "%s: %s", zErr, zCol);
sqlite3ErrorMsg(pParse, "%s prohibited in %s", zMsg, zIn);
sqlite3ErrorMsg(pParse, "not authorized to use function: %#T",
sqlite3ErrorMsg(pParse, "misuse of %s function %#T()",zType,pExpr);
sqlite3ErrorMsg(pParse, "no such function: %#T", pExpr);
sqlite3ErrorMsg(pParse, "row value misused");
sqlite3ErrorMsg(pParse, "too many terms in ORDER BY clause");
sqlite3ErrorMsg(pParse, "%r ORDER BY term does not match any "
sqlite3ErrorMsg(pParse, "too many terms in %s BY clause", zType);
sqlite3ErrorMsg(pParse, "HAVING clause on a non-aggregate query");
sqlite3ErrorMsg(pParse, "aggregate functions are not allowed in "
sqlite3ErrorMsg(pParse, "row value misused");
sqlite3ErrorMsg(pParse, "IN(...) element has %d term%s - expected %d",
sqlite3ErrorMsg(pParse, "too many arguments on function %T", pToken);
sqlite3ErrorMsg(pParse, "unsafe use of %#T()", pExpr);
sqlite3ErrorMsg(pParse, "variable number must be between ?1 and ?%d",
sqlite3ErrorMsg(pParse, "too many SQL variables");
sqlite3ErrorMsg(pParse, "%d columns assigned %d values",
sqlite3ErrorMsg(pParse, "too many columns in %s", zObject);
sqlite3ErrorMsg(pParse, "row value misused");
sqlite3ErrorMsg(pParse, "oversized integer: %s%#T", negFlag?"-":"",pExpr);
sqlite3ErrorMsg(pParse, "hex literal too big: %s%#T",
sqlite3ErrorMsg(pParse, "generated column loop on \"%s\"",
sqlite3ErrorMsg(pParse, "generated column loop on \"%s\"",
sqlite3ErrorMsg(pParse, "misuse of aggregate: %#T()", pExpr);
sqlite3ErrorMsg(pParse, "unknown function: %#T()", pExpr);
sqlite3ErrorMsg(pParse, "%d columns assigned %d values",
sqlite3ErrorMsg(pParse, "row value misused");
sqlite3ErrorMsg(pParse, "table %s may not be altered", pTab->zName);
sqlite3ErrorMsg(pParse, "view %s may not be altered", pTab->zName);
sqlite3ErrorMsg(pParse, "Cannot add a PRIMARY KEY column");
sqlite3ErrorMsg(pParse, "virtual tables may not be altered");
sqlite3ErrorMsg(pParse, "Cannot add a column to a view");
sqlite3ErrorMsg(pParse, "cannot %s %s \"%s\"",
sqlite3ErrorMsg(pParse, "no such column: \"%T\"", pOld);
sqlite3ErrorMsg(pParse, "no such column: \"%T\"", pName);
sqlite3ErrorMsg(pParse, "cannot drop %s column: \"%s\"",
sqlite3ErrorMsg(pParse, "cannot drop column \"%s\": no other columns exist",zCol);
sqlite3ErrorMsg(pParse, "authorizer malfunction");
sqlite3ErrorMsg(pParse, "access to %z is prohibited", z);
sqlite3ErrorMsg(pParse, "not authorized");
sqlite3ErrorMsg(pParse, "user not authenticated");
sqlite3ErrorMsg(pParse, "%s: %s.%s", zMsg, zDbase, zName);
sqlite3ErrorMsg(pParse, "%s: %s", zMsg, zName);
sqlite3ErrorMsg(pParse, "corrupt database");
sqlite3ErrorMsg(pParse, "unknown database %T", pName1);
sqlite3ErrorMsg(pParse, ""); /* corruptSchema() will supply the error */
sqlite3ErrorMsg(pParse, "object name reserved for internal use: %s",
sqlite3ErrorMsg(pParse, "temporary table name must be unqualified");
sqlite3ErrorMsg(pParse, "%s %T already exists",
sqlite3ErrorMsg(pParse, "there is already an index named %s", zName);
sqlite3ErrorMsg(pParse, "cannot use RETURNING in a trigger");
sqlite3ErrorMsg(pParse, "too many columns on %s", p->zName);
sqlite3ErrorMsg(pParse, "duplicate column name: %s", z); // <--
sqlite3ErrorMsg(pParse, "default value of column [%s] is not constant",
sqlite3ErrorMsg(pParse, "cannot use DEFAULT on a generated column");
sqlite3ErrorMsg(pParse, "AUTOINCREMENT is only allowed on an "
sqlite3ErrorMsg(pParse, "virtual tables cannot use computed columns");
sqlite3ErrorMsg(pParse, "error in generated column \"%s\"",
sqlite3ErrorMsg(pParse, "generated columns not supported");
sqlite3ErrorMsg(pParse, "");
sqlite3ErrorMsg(pParse, "missing datatype for %s.%s",
sqlite3ErrorMsg(pParse, "PRIMARY KEY missing on table %s", p->zName);
sqlite3ErrorMsg(pParse, "must have at least one non-generated column");
sqlite3ErrorMsg(pParse, "parameters are not allowed in views");
sqlite3ErrorMsg(pParse, "view %s is circularly defined", pTable->zName);
if( iTable<2 ) sqlite3ErrorMsg(pParse, "corrupt schema");
sqlite3ErrorMsg(pParse, "table %s may not be dropped", pTab->zName);
sqlite3ErrorMsg(pParse, "use DROP TABLE to delete table %s", pTab->zName);
sqlite3ErrorMsg(pParse, "use DROP VIEW to delete view %s", pTab->zName);
sqlite3ErrorMsg(pParse, "foreign key on %s"
sqlite3ErrorMsg(pParse, "unsupported use of NULLS %s",
sqlite3ErrorMsg(pParse, "table %s may not be indexed", pTab->zName);
sqlite3ErrorMsg(pParse, "views may not be indexed");
sqlite3ErrorMsg(pParse, "virtual tables may not be indexed");
sqlite3ErrorMsg(pParse, "there is already a table named %s", zName);
sqlite3ErrorMsg(pParse, "index %s already exists", zName);
sqlite3ErrorMsg(pParse, "expressions prohibited in PRIMARY KEY and "
sqlite3ErrorMsg(pParse, "invalid rootpage");
sqlite3ErrorMsg(pParse, "no such index: %S", pName->a);
sqlite3ErrorMsg(pParse, "index associated with UNIQUE "
sqlite3ErrorMsg(pParse, "too many FROM clause terms, max: %d",
sqlite3ErrorMsg(pParse, "a JOIN clause is required before %s",
sqlite3ErrorMsg(pParse, "unable to open a temporary database "
sqlite3ErrorMsg(pParse, "unable to identify the object to be reindexed");
sqlite3ErrorMsg(pParse, "duplicate WITH table name: %s", zName);
sqlite3ErrorMsg(pParse, "no such collation sequence: %s", zName);
sqlite3ErrorMsg(pParse, "table %s may not be modified", pTab->zName);
sqlite3ErrorMsg(pParse, "ORDER BY without LIMIT on %s", zStmtType);
sqlite3ErrorMsg(pParse, "generated column loop on \"%s\"", pRedo->zCnName);
sqlite3ErrorMsg(pParse, "table %S has no column named %s",
sqlite3ErrorMsg(pParse, "%d values for %d columns", nColumn, pColumn->nId);
sqlite3ErrorMsg(pParse, "UPSERT not implemented for virtual table \"%s\"",
sqlite3ErrorMsg(pParse, "cannot UPSERT a view");
sqlite3ErrorMsg(pParse, "temporary storage cannot be changed "
sqlite3ErrorMsg(pParse, "%s", aFcntl[0]);
sqlite3ErrorMsg(pParse, "not a writable directory");
sqlite3ErrorMsg(pParse, "not a writable directory");
sqlite3ErrorMsg(pParse, "failed to set lock proxy file");
sqlite3ErrorMsg(pParse, "unsupported encoding: %s", zRight);
if( db->mallocFailed ) sqlite3ErrorMsg(pParse, "out of memory");
sqlite3ErrorMsg(pParse, "unknown join type: "
sqlite3ErrorMsg(pParse, "a NATURAL join may not have "
sqlite3ErrorMsg(pParse, "cannot join using column %s - column "
sqlite3ErrorMsg(pParse, "ambiguous reference to %s in USING()",
sqlite3ErrorMsg(pParse, "cannot use window functions in recursive queries");
sqlite3ErrorMsg(pParse, "recursive aggregate queries not supported");
sqlite3ErrorMsg(pParse, "all VALUES must have the same number of terms");
sqlite3ErrorMsg(pParse, "SELECTs to the left and right of %s"
sqlite3ErrorMsg(pParse, "no such index: %s", zIndexedBy, 0);
sqlite3ErrorMsg(pParse, "'%s' is not a function", pFrom->zName);
sqlite3ErrorMsg(pParse, "no such index: \"%s\"", pFrom->u1.zIndexedBy);
sqlite3ErrorMsg(pParse, "table %s has %d values for %d columns",
sqlite3ErrorMsg(pParse, "too many references to \"%s\": max 65535",
sqlite3ErrorMsg(pParse, "access to view \"%s\" prohibited",
sqlite3ErrorMsg(pParse, "unsafe use of virtual table \"%s\"",
sqlite3ErrorMsg(pParse, "no such table: %s", zTName);
sqlite3ErrorMsg(pParse, "no tables specified");
sqlite3ErrorMsg(pParse, "too many columns in result set");
sqlite3ErrorMsg(pParse, "DISTINCT aggregates must have exactly one "
sqlite3ErrorMsg(pParse, "expected %d columns for '%s' but got %d",
sqlite3ErrorMsg(pParse, "temporary trigger may not have qualified name");
sqlite3ErrorMsg(pParse, "cannot create triggers on virtual tables");
sqlite3ErrorMsg(pParse, "trigger %T already exists", pName);
sqlite3ErrorMsg(pParse, "cannot create trigger on system table");
sqlite3ErrorMsg(pParse, "cannot create %s trigger on view: %S",
sqlite3ErrorMsg(pParse, "cannot create INSTEAD OF"
sqlite3ErrorMsg(pParse, "no such trigger: %S", pName->a);
sqlite3ErrorMsg(pParse, "RETURNING may not use \"TABLE.*\" wildcards");
sqlite3ErrorMsg(pParse, "ORDER BY without LIMIT on UPDATE");
sqlite3ErrorMsg(pParse, "no such column: %s", pChanges->a[i].zEName);
sqlite3ErrorMsg(pParse, "%sON CONFLICT clause does not match any "
sqlite3ErrorMsg(pParse, "too many columns on %s", pTable->zName);
sqlite3ErrorMsg(pParse, "no such module: %s", zModule);
sqlite3ErrorMsg(pParse, "%s", zErr);
sqlite3ErrorMsg(pParse, "%s", zErr);
sqlite3ErrorMsg(pParse, "ON clause references tables to its right");
sqlite3ErrorMsg(pParse, "ON clause references tables to its right");
sqlite3ErrorMsg(pParse, "too many arguments on %s() - max %d",
sqlite3ErrorMsg(pParse, "out of memory");
sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc));
sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg);
sqlite3ErrorMsg(pParse, "no query solution");
sqlite3ErrorMsg(pParse, "at most %d tables in a join", BMS);
sqlite3ErrorMsg(pParse, "no such window: %s", zName);
sqlite3ErrorMsg(pParse, "unsupported frame specification");
sqlite3ErrorMsg(pParse, "syntax error near \"ORDER BY\"");
sqlite3ErrorMsg(pParse, "syntax error near \"LIMIT\"");
sqlite3ErrorMsg(pParse, "too many terms in compound SELECT");
sqlite3ErrorMsg(pParse, "syntax error after column name \"%.*s\"",
sqlite3ErrorMsg(pParse, "parser stack overflow");
sqlite3ErrorMsg(pParse, "unknown table option: %.*s", yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.z);
sqlite3ErrorMsg(pParse, "unknown table option: %.*s", yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.z);
sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &t);
sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &TOKEN);
sqlite3ErrorMsg(pParse, "incomplete input");
sqlite3ErrorMsg(pParse, "unrecognized token: \"%T\"", &x);
The following input causes a panic: CREATE TABLE L(x)L
Not sure what to do about this one, it hits an unreachable statement in parse.rs:
fn yy222(self) -> Name {
if let YYMINORTYPE::yy222(v) = self.minor {
v
} else {
unreachable!()
}
}
I have many more inputs that trigger that as well, but they all are relatively similar to that one.
See https://github.com/gwenn/lemon-rs/blob/master/src/lexer/sql/mod.rs#L23-L24
// TODO Extract scanning stuff and move this into the parser crate
// to make possible to use the tokenizer without depending on the parser...
See here.
See translate_code
Maybe we can keep original C code untouched: access YYMINORTYPE
like an union but then translate this pseudo-code in valid Rust code with a proc_macro which wraps yy_reduce body:
Raw action -> Pseudo code -> Rust code
The following input crashes the parser because of an invalid UTF8 sequence
bad-utf8.txt
We need to refactor:
Result
Hi! The following legal statement in sqlite:
SELECT * FROM sqlite_master a LEFT OUTER JOIN sqlite_master b;
causes the parser to panic on unreachable code:
2023-03-07T13:53:10.036847Z DEBUG scanner: scan(line: 1, column: 41)
2023-03-07T13:53:10.036856Z DEBUG scanner: consume(1)
2023-03-07T13:53:10.036866Z DEBUG scanner: consume(4)
2023-03-07T13:53:10.036875Z DEBUG sqlite3Parser: Input 'JOIN' with pending reduce 32
2023-03-07T13:53:10.036883Z DEBUG sqlite3Parser: Reduce 32 [nm ::= JOIN_KW], pop back to state 235.
2023-03-07T13:53:10.036893Z DEBUG sqlite3Parser: ... then shift 'nm', go to state 234
2023-03-07T13:53:10.036902Z DEBUG sqlite3Parser: Shift 'JOIN', pending reduce Some(142)
2023-03-07T13:53:10.036929Z DEBUG sqlite3Parser: Return. Stack=[SELECT distinct selcollist FROM seltablist JOIN_KW nm JOIN]
2023-03-07T13:53:10.036941Z DEBUG scanner: scan(line: 1, column: 46)
2023-03-07T13:53:10.036950Z DEBUG scanner: consume(1)
2023-03-07T13:53:10.036962Z DEBUG scanner: consume(13)
2023-03-07T13:53:10.036971Z DEBUG sqlite3Parser: Input 'ID' with pending reduce 142
2023-03-07T13:53:10.036980Z DEBUG sqlite3Parser: Reduce 142 [joinop ::= JOIN_KW nm JOIN], pop back to state 278.
thread 'tokio-runtime-worker' panicked at 'internal error: entered unreachable code', /home/sarna/.cargo/registry/src/github.com-1ecc6299db9ec823/sqlite3-parser-0.6.0/src/parser/ast/mod.rs:1712:17
I am wondering how hard it would be to have spanned AST node. It would be very useful to retrieve the subslice of the original string that an AST node corresponds to.
This is quite common in parsers, such as
Looks super neat!
(a) from official test suite, we need to have both DDL and statement.
(b) introduce rusqlite
as a test dependency ?
(c) check Cmd#column_count
vs sqlite3_column_count
(d) check Cmd#readonly
vs sqlite3_stmt_readonly
As a passive onlooker: It's a bit strange that this project happens to contain both the lemon implementation and the SQLite-compatible SQL syntax parser in the same crate.
I feel like it might be cleaner if you separated these, so that someone who just wanted to use lemon without using it to parse SQL wouldn't have to bring that in.
Alternatively, just doing it as a feature would avoid that too (although I think it makes more sense to have lemon and SQL parsing separated).
Splitting would also fix this issue I think, since these would just be features on the generated parser/lexer: https://github.com/gwenn/lemon-rs/blob/master/Cargo.toml#L20
If you want to parse a huge SQL dump, slice-deque
make sense.
But if you want to parse some few queries like here, it seems useless.
I've been fuzzing the parser and fixed a common cause of panic in tursodatabase/libsql#1209
If the fix suits you, open a PR here.
I have the following statement:
CREATE VIRTUAL TABLE t3 using fts5(a,b,c);
It gets parsed to the Cmd
:
Stmt(CreateVirtualTable { if_not_exists: false, tbl_name: QualifiedName { db_name: None, name: Name("t3"), alias: None }, module_name: Name("fts5"), args: None })
As you can see, the arguments disappeared.
if we call to_string() on it, indeed, we see:
CREATE VIRTUAL TABLE t3 USING fts5 (); # no arguments
See https://github.com/IreneKnapp/language-sqlite/blob/master/language-sqlite.cabal#L16-L17
It is well-typed in the sense that it is possible to construct an abstract syntax tree if and only if it corresponds to valid SQL.
warning: the following packages contain code that will be rejected by a future version of Rust: buf_redux v0.8.4
We must compile C lemon
and run it on grammar file(s) to generate rust parser(s).
So if we want to distribute (deploy) this crate, we must either:
build.rs
script.cc
does not support C executable. We can still use cc
with a workaroud but if we do, the crate cannot be cross-compiled anymore.Hey @gwenn,
would it be possible to cut a release from main? We'd like to publish on crates.io, but then we rely on the upstream patch ๐
Have a nice day
I'm trying to build an online yacc/lex (LALR(1)) grammar editor/tester to help develop/debug/document grammars the main repository is here https://github.com/mingodad/parsertl-playground and the online playground with several non trivial examples is here https://mingodad.github.io/parsertl-playground/playground/ .
Select a grammar/example from "Examples" select box and then click "Parse" to see a parser tree for the source in "Input source" editor.
It's based on https://github.com/BenHanson/gram_grep and https://github.com/BenHanson/lexertl14 .
Any feedback is welcome !
The grammars available so far (with varying state of correctness):
Just stumbled on this lemon-rs
repo while looking through some of your other (Go) SQLite code again. ๐
At the bottom of the Readme, there's a mention of lemon_rust.
As a data point, the author of lemon_rust seems to have created a 2nd implementation, called Pomelo:
ย ย https://github.com/rodrigorc/pomelo
Not sure if it's a useful source of idea, but mentioning just in case. ๐
To fix grammar ambiguities, SQLite tokenizer does some arbitrary lookahead without any allocation.
But we cannot do the same with our streaming version.
https://github.com/gwenn/lemon-rs/blob/master/src/lexer/sql/mod.rs#L30
See if we can prevent the scanner from consuming the input stream while performing lookahead.
Or even better, consume buffer only when advancing to the next statement. So we can do zero-copy (except buffer) parsing.
Currently we support only Read
input and the lexer has its one mutable buffer.
It would be nice to support string / bytes &str
/ &[u8]
directly.
Or memory-mapped I/O for file (mmap).
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.