Better operator precedence

Published 2021-10-09

In every explanation of parsing that I've seen, operator precedence is handled by assigning a precedence number to each operator. Operators with higher precedence numbers bind tighter eg a + b * c would parse as a + (b * c) because * has a higher precedence than +. We want this because it reduces the number of parentheses needed, and we can get away with it because the precedence rules of arithmetic are familiar to most people.

But what about expressions like a + b | c or a * b | c? (Where | is bitwise-or). It's not obvious to most people what precedence to expect. But in a system that handles precedence by assigning numbers to every operator, one of those expressions must parse without producing an error because the precedence of | can't be equal to the precedences of both + and *.

I think this is confusing.


An entirely unrelated problem: In Pratt parsers mixing left-associative and right-associative operators is possible (eg, eg) but I find the existing solutions a little tricky to work with.


There is a technique that solves both problems! Rather than mapping operators to a precedence number and comparing the numbers, we can compare operators directly.

const Precedence = enum {
    LeftBindsTighter,
    RightBindsTighter,
    Ambiguous,
};

fn comparePrecedence(left: Operator, right: Operator) Precedence {
    ...
}

Given an expression like a left_op b right_op c, we call comparePrecedence(left_op, right_op). If the result is...

Associativity then is easy. If comparePrecedence(op, op) returns...

There are a few conditions that comparePrecedence needs to satisfy to avoid being confusing. Since the number of operators is finite, we can test these exhaustively:

for (ops) |a| {
    for (ops) |b| {
        const ab = comparePrecedence(a, b);
        const ba = comparePrecedence(b, a);
        
        // symmetric (except when a == b)
        if (ab == .Ambiguous) try expectEqual(ba, .Ambiguous);
        if (a != b and ab == .LeftBindsTighter) try expectEqual(ba, .RightBindsTighter);
        if (a != b and ab == .RightBindsTighter) try expectEqual(ba, .LeftBindsTighter);
        
        for (ops) |c| {
            const bc = comparePrecedence(b, c);
            const ac = comparePrecedence(a, b);
            
            // transitive
            if (ab == .LeftBindsTighter and bc == .LeftBindsTighter) try expectEqual(ac, .LeftBindsTighter);
            if (ab == .RightBindsTighter and bc == .RightBindsTighter) try expectEqual(ac, .RightBindsTighter);
        }
    }
}

For languages with only a few operators it's easy to just write the comparePrecedence function by hand. For languages with many operators it might be easier to start with an N*N matrix filled in with Ambiguous, set the few comparisons we care about and then take the symmetric and transitive closure.


Here is a full example for a very simple arithmetic language:

// > zig version
// 0.8.0
// > zig test ./precedence.zig
// All 2 tests passed.

const std = @import("std");
const panic = std.debug.panic;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;

var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = &gpa.allocator;

const Operator = enum {
    Add,
    Multiply,
    BitwiseOr,
};

const Precedence = enum {
    LeftBindsTighter,
    RightBindsTighter,
    Ambiguous,
};

fn comparePrecedence(left: Operator, right: Operator) Precedence {
    switch (left) {
        .Add => switch (right) {
            .Add => return .LeftBindsTighter,
            .Multiply => return .RightBindsTighter,
            .BitwiseOr => return .Ambiguous,
        },
        .Multiply => switch (right) {
            .Add, .Multiply => return .LeftBindsTighter,
            .BitwiseOr => return .Ambiguous,
        },
        .BitwiseOr => switch (right) {
            .Add, .Multiply => return .Ambiguous,
            .BitwiseOr => return .RightBindsTighter,
        },
    }
}

test "precedence" {
    // list all operators
    const info = @typeInfo(Operator).Enum;
    var ops = try allocator.alloc(Operator, info.fields.len);
    inline for (info.fields) |field, i|
        ops[i] = @intToEnum(Operator, field.value);

    for (ops) |a| {
        for (ops) |b| {
            const ab = comparePrecedence(a, b);
            const ba = comparePrecedence(b, a);

            // symmetric (except when a == b)
            if (ab == .Ambiguous) try expectEqual(ba, .Ambiguous);
            if (a != b and ab == .LeftBindsTighter) try expectEqual(ba, .RightBindsTighter);
            if (a != b and ab == .RightBindsTighter) try expectEqual(ba, .LeftBindsTighter);

            for (ops) |c| {
                const bc = comparePrecedence(b, c);
                const ac = comparePrecedence(a, b);

                // transitive
                if (ab == .LeftBindsTighter and bc == .LeftBindsTighter) try expectEqual(ac, .LeftBindsTighter);
                if (ab == .RightBindsTighter and bc == .RightBindsTighter) try expectEqual(ac, .RightBindsTighter);
            }
        }
    }
}

const Token = enum {
    Number,
    Add,
    Multiply,
    BitwiseOr,
    OpenParen,
    CloseParen,
};

const Expr = union(enum) {
    Number,
    BinaryOp: struct {
        op: Operator,
        left: *const Expr,
        right: *const Expr,
    },

    fn box(self: Expr) !*const Expr {
        const box_expr = try allocator.create(Expr);
        box_expr.* = self;
        return box_expr;
    }

    fn isEqualTo(self: Expr, other: Expr) bool {
        return switch (self) {
            .Number => other == .Number,
            .BinaryOp => other == .BinaryOp and
                self.BinaryOp.op == other.BinaryOp.op and
                self.BinaryOp.left.isEqualTo(other.BinaryOp.left.*) and
                self.BinaryOp.right.isEqualTo(other.BinaryOp.right.*),
        };
    }
};

const Parser = struct {
    tokens: []const Token,
    position: usize = 0,

    const Error = error{
        ExpectedNumber,
        ExpectedCloseParen,
        ExpectedStartOfExpression,
        ExpectedEnd,
        AmbiguousPrecedence,
        OutOfMemory,
    };

    fn nextToken(self: *Parser) ?Token {
        if (self.position >= self.tokens.len) return null;
        const token = self.tokens[self.position];
        self.position += 1;
        return token;
    }

    fn parseExprInner(self: *Parser) Error!Expr {
        const token = self.nextToken() orelse return error.ExpectedStartOfExpression;
        switch (token) {
            .Number => return Expr.Number,
            .OpenParen => {
                const expr = try self.parseExprOuter(null);
                const close_token = self.nextToken() orelse return error.ExpectedCloseParen;
                if (close_token != .CloseParen) return error.ExpectedCloseParen;
                return expr;
            },
            else => return error.ExpectedNumber,
        }
    }

    fn parseExprOuter(self: *Parser, prev_op_o: ?Operator) Error!Expr {
        var left = try self.parseExprInner();
        while (true) {
            const start = self.position;
            const token = self.nextToken() orelse return left;
            const op = switch (token) {
                .Add => Operator.Add,
                .Multiply => Operator.Multiply,
                .BitwiseOr => Operator.BitwiseOr,
                else => {
                    self.position = start;
                    return left;
                },
            };
            const precedence = if (prev_op_o) |prev_op|
                comparePrecedence(prev_op, op)
            else
                .RightBindsTighter;
            switch (precedence) {
                .LeftBindsTighter => {
                    self.position = start;
                    return left;
                },
                .RightBindsTighter => {
                    const right = try self.parseExprOuter(op);
                    const new_left = Expr{ .BinaryOp = .{
                        .op = op,
                        .left = try left.box(),
                        .right = try right.box(),
                    } };
                    left = new_left;
                },
                .Ambiguous => return error.AmbiguousPrecedence,
            }
        }
    }
};

fn parse(tokens: []const Token) Parser.Error!Expr {
    var parser = Parser{ .tokens = tokens };
    const expr = try parser.parseExprOuter(null);
    if (parser.position < tokens.len) return error.ExpectedEnd;
    return expr;
}

test "parse" {
    // Multiply has higher precedence than Add
    try expect(Expr.isEqualTo(
        try parse(&.{ .Number, .Add, .Number, .Multiply, .Number }),
        try parse(&.{ .Number, .Add, .OpenParen, .Number, .Multiply, .Number, .CloseParen }),
    ));

    // Parens trump precedence
    try expect(Expr.isEqualTo(
        try parse(&.{ .OpenParen, .Number, .Add, .Number, .CloseParen, .Multiply, .Number }),
        Expr{ .BinaryOp = .{
            .op = .Multiply,
            .left = try Expr.box(.{ .BinaryOp = .{
                .op = .Add,
                .left = try Expr.box(.Number),
                .right = try Expr.box(.Number),
            } }),
            .right = try Expr.box(.Number),
        } },
    ));

    // Add/Multiply have ambiguous precedence against BitwiseOr
    try expectEqual(
        parse(&.{ .Number, .Add, .Number, .BitwiseOr, .Number }),
        error.AmbiguousPrecedence,
    );
    try expectEqual(
        parse(&.{ .Number, .Multiply, .Number, .BitwiseOr, .Number }),
        error.AmbiguousPrecedence,
    );

    // Add is left-associative
    try expect(Expr.isEqualTo(
        try parse(&.{ .Number, .Add, .Number, .Add, .Number }),
        try parse(&.{ .OpenParen, .Number, .Add, .Number, .CloseParen, .Add, .Number }),
    ));

    // BitwiseOr is right-associative
    try expect(Expr.isEqualTo(
        try parse(&.{ .Number, .BitwiseOr, .Number, .BitwiseOr, .Number }),
        try parse(&.{ .Number, .BitwiseOr, .OpenParen, .Number, .BitwiseOr, .Number, .CloseParen }),
    ));
}