Comptime Queries - Part 1
Recently I have been working on a new project and decided to use Zig, a new language with better syntax than anything I have seen before, and a great philosophy to match.
The main thing Zig is famous for is its comptime system, which allows users to run arbitrary code at compile time, enabling insane metaprogramming capabilities.
The project I'm working on requires me to store values in a DB (duh). As I was figuring out how to use sqlite3 with Zig, I had a weird idea: can I generate queries at compile time using struct reflection (i.e., look at the fields and types of a struct and create queries based on them)?
All the code here was compiled and tested in Zig 0.16.0-dev.2984+cb7d2b056
Let's start from the basics and see how we can generate table creation queries using structs.
Consider a simple struct Person (this will later become the name of our table), with fields: name (string), age (integer, u64), and id (integer, u8).
const Person = struct {
id: u64,
name: []const u8,
age: u8,
};
Now, how do we get the fields from the struct?
Zig has a nice built-in function called @typeInfo which provides type reflection.
When you call @typeInfo on a type, it returns a Type union that contains details about the type you provided. In our case, we can access the .@"struct" member of Type to access the fields of Person.
// main.zig
const std = @import("std");
const Person = struct {
id: u64,
name: []const u8,
age: u8,
};
pub fn main(init: std.process.Init) !void {
_ = init;
const fields: []const u8 = comptime blk: {
var field_string: []const u8 = "";
for (@typeInfo(Person).@"struct".fields) |field| {
field_string = field_string ++ field.name ++ " ";
}
break :blk field_string;
};
std.debug.print("{s}\n", .{fields});
}
Here I have concatenated the field names into a single string, since that is much easier than printing each one at comptime.
# zig run main.zig
id name age
Now let's make a simple query generator with the following rules:
- Any integer will be treated as
INTEGER - Any pointer to u8 (or slice of u8) will be treated as
TXT(we are generating SQLite SQL queries)
pub fn main(init: std.process.Init) !void {
_ = init;
const query: []const u8 = comptime blk: {
// Hard-coding 'Person' for now
var query_string: []const u8 = "CREATE TABLE IF NOT EXISTS Person (";
const fields = @typeInfo(Person).@"struct".fields;
for (fields, 0..) |field, idx| {
query_string = query_string ++ field.name ++ " ";
// field.type gives the raw type of the field; we want its info
switch (@typeInfo(field.type)) {
// All integer types: u8, u16, i64, ...
.int => query_string = query_string ++ "INTEGER",
// If it is a pointer, check if it's a u8 pointer
// []const u8 is a pointer to u8
.pointer => |p| {
if (p.child == u8) {
query_string = query_string ++ "TXT";
}
},
else => {},
}
// Don't add a comma after the last field — it would break SQL syntax
if (idx != fields.len - 1) {
query_string = query_string ++ ", ";
}
}
// Close the bracket
query_string = query_string ++ ")";
break :blk query_string;
};
std.debug.print("{s}\n", .{query});
}
Running this gives us:
# zig run main.zig
CREATE TABLE IF NOT EXISTS Person (id INTEGER, name TXT, age INTEGER)
We are still hard-coding the name of the struct. To get the name of a type at comptime, we need to call @typeName.
One issue with @typeName is that the returned name looks like this:
main.Person
We only want the last part - "Person". We can extract it like this:
table_name = @typeName(type)
// Find the last dot and create a slice starting from that position
table_name[std.mem.findLast(u8, table_name, ".").? + 1 ..]
Putting it all together and factoring query generation into a function, we get something like this:
const std = @import("std");
const Person = struct {
id: u64,
name: []const u8,
age: u8,
};
pub inline fn generateCreateQuery(comptime t: anytype) []const u8 {
return comptime blk: {
var query_string: []const u8 = "CREATE TABLE IF NOT EXISTS ";
// Add the table name
const table_name = @typeName(@TypeOf(t));
query_string = query_string ++ table_name[std.mem.findLast(u8, table_name, ".").? + 1 ..] ++ " (";
// Add the fields
const fields = @typeInfo(@TypeOf(t)).@"struct".fields;
for (fields, 0..) |field, idx| {
query_string = query_string ++ field.name ++ " ";
// field.type gives the raw type of the field; we want its info
switch (@typeInfo(field.type)) {
// All integer types: u8, u16, i64, ...
.int => query_string = query_string ++ "INTEGER",
// If it is a pointer, check if it's a u8 pointer
// []const u8 is a pointer to u8
.pointer => |p| {
if (p.child == u8) {
query_string = query_string ++ "TXT";
}
},
else => {},
}
// Don't add a comma after the last field — it would break SQL syntax
if (idx != fields.len - 1) {
query_string = query_string ++ ", ";
}
}
// Close the bracket
query_string = query_string ++ ")";
break :blk query_string;
};
}
pub fn main(init: std.process.Init) !void {
_ = init;
const p1: Person = .{
.name = "Adwaith",
.age = 21,
.id = 10,
};
const query: []const u8 = generateCreateQuery(p1);
std.debug.print("{s}\n", .{query});
}
# zig run main.zig
CREATE TABLE IF NOT EXISTS Person (id INTEGER, name TXT, age INTEGER)
This is pretty great, but we are still missing something very important: how do we mark a field as a primary key, unique, or not nullable?
The best approach I could come up with is to create custom types -Int and Txt, and attach properties to them. Let's look at a simple example.
Let's define some basic types and a properties type to go along with them:
const Options = struct {
primary_key: bool = false,
nullable: bool = true,
unique: bool = false,
};
const Int = struct {
value: i64, // Using i64 for now
options: Options = .{},
};
const Txt = struct {
value: []const u8,
options: Options = .{},
};
We can update generateCreateQuery to check for our custom types and add column constraints based on Options:
const Options = struct {
primary_key: bool = false,
nullable: bool = true,
unique: bool = false,
};
const Int = struct {
value: i64, // Using i64 for now
options: Options = .{},
};
const Txt = struct {
value: []const u8,
options: Options = .{},
};
const Person = struct {
id: Int,
name: Txt,
age: Int,
};
pub inline fn generateCreateQuery(comptime t: anytype) []const u8 {
return comptime blk: {
var query_string: []const u8 = "CREATE TABLE IF NOT EXISTS ";
// Add the table name
const table_name = @typeName(@TypeOf(t));
query_string = query_string ++ table_name[std.mem.findLast(u8, table_name, ".").? + 1 ..] ++ " (";
// Add the fields
const fields = @typeInfo(@TypeOf(t)).@"struct".fields;
for (fields, 0..) |field, idx| {
query_string = query_string ++ field.name ++ " ";
// field.type gives the raw type of the field; we want its info
switch (@typeInfo(field.type)) {
// All our custom types are structs
.@"struct" => switch (field.type) {
Int => query_string = query_string ++ "INTEGER",
Txt => query_string = query_string ++ "TXT",
else => {},
},
else => {},
}
// Add constraints
query_string = query_string ++ " ";
// @field allows us to access a type's field dynamically
const options: Options = @field(@field(t, field.name), "options");
if (options.primary_key) {
query_string = query_string ++ "PRIMARY KEY";
}
if (options.unique) {
query_string = query_string ++ "UNIQUE";
}
if (options.nullable != true) {
query_string = query_string ++ "NOT NULL";
}
// Don't add a comma after the last field, it would break SQL syntax
if (idx != fields.len - 1) {
query_string = query_string ++ ", ";
}
}
// Close the bracket
query_string = query_string ++ ")";
break :blk query_string;
};
}
Update the Person instance in main with the new type values:
pub fn main(init: std.process.Init) !void {
_ = init;
const p1: Person = .{
.name = .{
.value = "Adwaith",
.options = .{
.unique = true,
},
},
.age = .{
.value = 23,
.options = .{
.nullable = false,
},
},
.id = .{
.value = 10,
.options = .{
.primary_key = true,
},
},
};
const query: []const u8 = generateCreateQuery(p1);
std.debug.print("{s}\n", .{query});
}
And boom:
# zig run main.zig
CREATE TABLE IF NOT EXISTS Person (id INTEGER PRIMARY KEY, name TXT UNIQUE, age INTEGER NOT NULL)
We have a query.
Now, is this really comptime? One simple way to verify is to check the final binary, or the intermediate assembly that is generated. We should be able to see the complete query string embedded in the binary.
# zig build-exe src/main.zig -femit-asm -target aarch64-linux
# grep -A3 -B3 "CREATE TABLE" main.s
.type __anon_32256,@object
.section .rodata.str1.1,"aMS",@progbits,1
__anon_32256:
.asciz "CREATE TABLE IF NOT EXISTS Person (id INTEGER PRIMARY KEY, name TXT UNIQUE, age INTEGER NOT NULL)"
.size __anon_32256, 98
.type os.linux.elf_aux_maybe,@object
As you can see, the complete query is embedded directly in the binary - created at comptime.
That's it, see you in the next post. Byeee.