In every explanation of parsing that I've seen, operator precedence is handled by assigning a precedence number to each operator. Operators with higher precedence numbers bind tighter eg a + b * c
would parse as a + (b * c)
because *
has a higher precedence than +
. We want this because it reduces the number of parentheses needed, and we can get away with it because the precedence rules of arithmetic are familiar to most people.
But what about expressions like a + b | c
or a * b | c
? (Where |
is bitwise-or). It's not obvious to most people what precedence to expect. But in a system that handles precedence by assigning numbers to every operator, one of those expressions must parse without producing an error because the precedence of |
can't be equal to the precedences of both +
and *
.
I think this is confusing.
An entirely unrelated problem: In Pratt parsers mixing left-associative and right-associative operators is possible (eg, eg) but I find the existing solutions a little tricky to work with.
There is a technique that solves both problems! Rather than mapping operators to a precedence number and comparing the numbers, we can compare operators directly.
const Precedence = enum {
LeftBindsTighter,
RightBindsTighter,
Ambiguous,
};
fn comparePrecedence(left: Operator, right: Operator) Precedence {
...
}
Given an expression like a left_op b right_op c
, we call comparePrecedence(left_op, right_op)
. If the result is...
- ...LeftBindsTighter, the parse is
(a left_op b) right_op c
- ...RightBindsTighter, the parse is
a left_op (b right_op c)
- ...Ambiguous, we raise a parse error asking the user to add parentheses to disambiguate
Associativity then is easy. If comparePrecedence(op, op)
returns...
- ...LeftBindsTighter, then
op
is left-associative - ...RightBindsTighter, then
op
is right-associative - ...Ambiguous, then
op
always requires parentheses
There are a few conditions that comparePrecedence
needs to satisfy to avoid being confusing. Since the number of operators is finite, we can test these exhaustively:
for (ops) |a| {
for (ops) |b| {
const ab = comparePrecedence(a, b);
const ba = comparePrecedence(b, a);
// symmetric (except when a == b)
if (ab == .Ambiguous) try expectEqual(ba, .Ambiguous);
if (a != b and ab == .LeftBindsTighter) try expectEqual(ba, .RightBindsTighter);
if (a != b and ab == .RightBindsTighter) try expectEqual(ba, .LeftBindsTighter);
for (ops) |c| {
const bc = comparePrecedence(b, c);
const ac = comparePrecedence(a, b);
// transitive
if (ab == .LeftBindsTighter and bc == .LeftBindsTighter) try expectEqual(ac, .LeftBindsTighter);
if (ab == .RightBindsTighter and bc == .RightBindsTighter) try expectEqual(ac, .RightBindsTighter);
}
}
}
For languages with only a few operators it's easy to just write the comparePrecedence
function by hand. For languages with many operators it might be easier to start with an N*N matrix filled in with Ambiguous, set the few comparisons we care about and then take the symmetric and transitive closure.
Here is a full example for a very simple arithmetic language:
// > zig version
// 0.8.0
// > zig test ./precedence.zig
// All 2 tests passed.
const std = @import("std");
const panic = std.debug.panic;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = &gpa.allocator;
const Operator = enum {
Add,
Multiply,
BitwiseOr,
};
const Precedence = enum {
LeftBindsTighter,
RightBindsTighter,
Ambiguous,
};
fn comparePrecedence(left: Operator, right: Operator) Precedence {
switch (left) {
.Add => switch (right) {
.Add => return .LeftBindsTighter,
.Multiply => return .RightBindsTighter,
.BitwiseOr => return .Ambiguous,
},
.Multiply => switch (right) {
.Add, .Multiply => return .LeftBindsTighter,
.BitwiseOr => return .Ambiguous,
},
.BitwiseOr => switch (right) {
.Add, .Multiply => return .Ambiguous,
.BitwiseOr => return .RightBindsTighter,
},
}
}
test "precedence" {
// list all operators
const info = @typeInfo(Operator).Enum;
var ops = try allocator.alloc(Operator, info.fields.len);
inline for (info.fields) |field, i|
ops[i] = @intToEnum(Operator, field.value);
for (ops) |a| {
for (ops) |b| {
const ab = comparePrecedence(a, b);
const ba = comparePrecedence(b, a);
// symmetric (except when a == b)
if (ab == .Ambiguous) try expectEqual(ba, .Ambiguous);
if (a != b and ab == .LeftBindsTighter) try expectEqual(ba, .RightBindsTighter);
if (a != b and ab == .RightBindsTighter) try expectEqual(ba, .LeftBindsTighter);
for (ops) |c| {
const bc = comparePrecedence(b, c);
const ac = comparePrecedence(a, c);
// transitive
if (ab == .LeftBindsTighter and bc == .LeftBindsTighter) try expectEqual(ac, .LeftBindsTighter);
if (ab == .RightBindsTighter and bc == .RightBindsTighter) try expectEqual(ac, .RightBindsTighter);
}
}
}
}
const Token = enum {
Number,
Add,
Multiply,
BitwiseOr,
OpenParen,
CloseParen,
};
const Expr = union(enum) {
Number,
BinaryOp: struct {
op: Operator,
left: *const Expr,
right: *const Expr,
},
fn box(self: Expr) !*const Expr {
const box_expr = try allocator.create(Expr);
box_expr.* = self;
return box_expr;
}
fn isEqualTo(self: Expr, other: Expr) bool {
return switch (self) {
.Number => other == .Number,
.BinaryOp => other == .BinaryOp and
self.BinaryOp.op == other.BinaryOp.op and
self.BinaryOp.left.isEqualTo(other.BinaryOp.left.*) and
self.BinaryOp.right.isEqualTo(other.BinaryOp.right.*),
};
}
};
const Parser = struct {
tokens: []const Token,
position: usize = 0,
const Error = error{
ExpectedNumber,
ExpectedCloseParen,
ExpectedStartOfExpression,
ExpectedEnd,
AmbiguousPrecedence,
OutOfMemory,
};
fn nextToken(self: *Parser) ?Token {
if (self.position >= self.tokens.len) return null;
const token = self.tokens[self.position];
self.position += 1;
return token;
}
fn parseExprInner(self: *Parser) Error!Expr {
const token = self.nextToken() orelse return error.ExpectedStartOfExpression;
switch (token) {
.Number => return Expr.Number,
.OpenParen => {
const expr = try self.parseExprOuter(null);
const close_token = self.nextToken() orelse return error.ExpectedCloseParen;
if (close_token != .CloseParen) return error.ExpectedCloseParen;
return expr;
},
else => return error.ExpectedNumber,
}
}
fn parseExprOuter(self: *Parser, prev_op_o: ?Operator) Error!Expr {
var left = try self.parseExprInner();
while (true) {
const start = self.position;
const token = self.nextToken() orelse return left;
const op = switch (token) {
.Add => Operator.Add,
.Multiply => Operator.Multiply,
.BitwiseOr => Operator.BitwiseOr,
else => {
self.position = start;
return left;
},
};
const precedence = if (prev_op_o) |prev_op|
comparePrecedence(prev_op, op)
else
.RightBindsTighter;
switch (precedence) {
.LeftBindsTighter => {
self.position = start;
return left;
},
.RightBindsTighter => {
const right = try self.parseExprOuter(op);
const new_left = Expr{ .BinaryOp = .{
.op = op,
.left = try left.box(),
.right = try right.box(),
} };
left = new_left;
},
.Ambiguous => return error.AmbiguousPrecedence,
}
}
}
};
fn parse(tokens: []const Token) Parser.Error!Expr {
var parser = Parser{ .tokens = tokens };
const expr = try parser.parseExprOuter(null);
if (parser.position < tokens.len) return error.ExpectedEnd;
return expr;
}
test "parse" {
// Multiply has higher precedence than Add
try expect(Expr.isEqualTo(
try parse(&.{ .Number, .Add, .Number, .Multiply, .Number }),
try parse(&.{ .Number, .Add, .OpenParen, .Number, .Multiply, .Number, .CloseParen }),
));
// Parens trump precedence
try expect(Expr.isEqualTo(
try parse(&.{ .OpenParen, .Number, .Add, .Number, .CloseParen, .Multiply, .Number }),
Expr{ .BinaryOp = .{
.op = .Multiply,
.left = try Expr.box(.{ .BinaryOp = .{
.op = .Add,
.left = try Expr.box(.Number),
.right = try Expr.box(.Number),
} }),
.right = try Expr.box(.Number),
} },
));
// Add/Multiply have ambiguous precedence against BitwiseOr
try expectEqual(
parse(&.{ .Number, .Add, .Number, .BitwiseOr, .Number }),
error.AmbiguousPrecedence,
);
try expectEqual(
parse(&.{ .Number, .Multiply, .Number, .BitwiseOr, .Number }),
error.AmbiguousPrecedence,
);
// Add is left-associative
try expect(Expr.isEqualTo(
try parse(&.{ .Number, .Add, .Number, .Add, .Number }),
try parse(&.{ .OpenParen, .Number, .Add, .Number, .CloseParen, .Add, .Number }),
));
// BitwiseOr is right-associative
try expect(Expr.isEqualTo(
try parse(&.{ .Number, .BitwiseOr, .Number, .BitwiseOr, .Number }),
try parse(&.{ .Number, .BitwiseOr, .OpenParen, .Number, .BitwiseOr, .Number, .CloseParen }),
));
}