Files
pandas-markdown-parser/src/document.zig
2026-04-14 21:31:44 +12:00

3070 lines
78 KiB
Zig

//! By convention, root.zig is the root source file when making a library.
const std = @import("std");
pub fn stripWhiteSpace(white_space: []const u8) []const u8 {
return std.mem.trimStart(u8, white_space, " \t");
}
pub const TokenType = enum {
// macros
inbulit,
macro,
macro_arg,
arg_sep,
replace,
unknown, // this is the defualt token type and should never appear
// utils
bracket_close,
bracket_open,
raw_text,
newline,
comment,
// in text
heading,
text,
inline_style,
unordered_list,
ordered_list,
embed,
link,
table_row_sep,
table_sep,
quote,
code_block,
tickbox,
rule,
footnote,
footnote_def,
sup,
sub,
};
pub const Token = struct {
data: []const u8 = "",
type: TokenType = .unknown,
padding: usize = 0,
// this is to allow the ast generation to make sure that things are
// within the right subtrees
pub fn format(
self: Token,
writer: *std.io.Writer,
) !void {
switch (self.type) {
.newline => try writer.print(
".{{\n .data = \"\\n\"\n .type = {}\n}}",
.{self.type},
),
else => try writer.print(
".{{\n .data = \"{s}\"\n .type = {}\n}}",
.{ self.data, self.type },
),
}
}
};
pub const LineParserReturn = struct {
// the new updated pos of the token after its been parsed
pointer_offset: usize = 0,
// the token itself
token: Token = .{},
};
/// lineParser parse chunk of the line then return the rest
pub const LineParser = *const fn (line: []const u8) ?LineParserReturn;
/// multi line parsers take in the lines and the list and add the tokens
/// to the list itself
pub const MultiLineParser = *const fn (
lines: *std.mem.SplitIterator(u8, .scalar),
alloc: std.mem.Allocator,
list: *std.ArrayList(Token),
) anyerror!void;
const heading_id = "#";
const heading_depth: comptime_int = 6;
pub fn headingParser(line: []const u8) ?LineParserReturn {
const heading_end_index: usize = blk: {
comptime var i = heading_depth;
inline while (i > 0) : (i -= 1) {
if (std.mem.startsWith(u8, line, heading_id ** i)) break :blk i;
}
break :blk 0;
};
// if no heading was detected
if (heading_end_index == 0) {
return null;
}
// allows for blank headings
const space_remove: usize =
if (line.len == heading_end_index) 0 else 1;
return LineParserReturn{
.pointer_offset = heading_end_index + space_remove,
.token = .{
.type = .heading,
.data = line[0..heading_end_index],
},
};
}
pub fn text(line: []const u8) ?LineParserReturn {
return LineParserReturn{
.pointer_offset = line.len,
.token = .{
.data = line[0..],
.type = .text,
},
};
}
pub fn listOrderedParser(line: []const u8) ?LineParserReturn {
const clean_line = stripWhiteSpace(line);
const whitespace_chars_removed = @intFromPtr(clean_line.ptr) - @intFromPtr(line.ptr);
// check for the min length of chars
if (clean_line.len < 2) return null;
// find a point within the line
const index_of_dot = blk: {
if (std.mem.indexOf(u8, clean_line, ". ")) |dex| {
break :blk dex;
}
return null;
};
const number = clean_line[0..index_of_dot];
// if it has any spaces its not a order list id
if (std.mem.containsAtLeastScalar(u8, number, 1, ' ') or
std.mem.containsAtLeastScalar(u8, number, 1, '\t'))
{
return null;
}
// now work out what the number is
// "I": 1,
// "V": 5,
// "X": 10,
// "L": 50,
// "C": 100,
// "D": 500,
// "M": 1000
// roman numerials ill deal with these later
// these can be the same
// numbers
// letters & roman (at least all the i's)
if (std.ascii.isAlphabetic(number[0])) {
return LineParserReturn{
// + 2 to remove the dot and space
// + 1 to make sure the dot is the last char
.pointer_offset = whitespace_chars_removed + number.len + 2,
.token = .{
.data = line[0 .. whitespace_chars_removed + number.len + 1],
.type = .ordered_list,
},
};
}
// checks to see if the number can be parsed if it can then its a number
_ = std.fmt.parseInt(usize, number, 10) catch return null;
return LineParserReturn{
// + 2 to remove the dot and space
// + 1 to make sure the dot is the last char
.pointer_offset = whitespace_chars_removed + number.len + 2,
.token = .{
.data = line[0 .. whitespace_chars_removed + number.len + 1],
.type = .ordered_list,
},
};
}
pub fn listUnorderedParser(line: []const u8) ?LineParserReturn {
const clean_line = stripWhiteSpace(line);
const whitespace_chars_removed = @intFromPtr(clean_line.ptr) - @intFromPtr(line.ptr);
// check for the min length of chars
if (clean_line.len < 2) return null;
// check there is a space seperating the char and the text
if (clean_line[1] != ' ') return null;
if (clean_line[0] == '-' or clean_line[0] == '=' or clean_line[0] == '+' or clean_line[0] == '*') {
return LineParserReturn{
// +2 to remove "-. "
.pointer_offset = whitespace_chars_removed + 2,
.token = .{
.data = clean_line[0..1],
.type = .unordered_list,
},
};
}
return null;
}
// pub fn styleParser(line: []const u8) ?LineParserReturn {}
// pub fn embedParser(line: []const u8) ?LineParserReturn {}
// pub fn tableParser(line: []const u8) ?LineParserReturn {}
pub fn quoteParser(line: []const u8) ?LineParserReturn {
if (std.mem.startsWith(u8, line, "| ") or
std.mem.startsWith(u8, line, "> "))
{
return LineParserReturn{ .pointer_offset = 2, .token = .{
.data = line[0..1],
.type = .quote,
} };
}
return null;
}
// // multiline might have to add some extra to this
pub fn codeBlockParser(
current_line: []const u8,
lines: *std.mem.SplitIterator(u8, .scalar),
alloc: std.mem.Allocator,
list: *std.ArrayList(Token),
) anyerror!bool {
if (!std.mem.startsWith(u8, current_line, "```")) {
// std.debug.print("line didnt start with ```\n", .{});
return false;
}
try list.append(alloc, .{
.data = "```",
.type = .code_block,
});
const lang_name = std.mem.trimEnd(
u8,
current_line[3..],
" \t\n",
);
// std.debug.print("lang_name = {}\n", .{lang_name.len});
if (lang_name.len != 0) {
try list.append(alloc, .{
.data = lang_name,
.type = .raw_text,
});
}
try list.append(alloc, .{
.data = "\n",
.type = .newline,
});
// std.debug.print("first values\n", .{});
// +1 to remove the newline
const raw_text_start_index =
@intFromPtr(current_line.ptr) - @intFromPtr(lines.buffer.ptr) + lang_name.len + 3 + 1;
// std.debug.print("raw_text_start_index : {}\n", .{raw_text_start_index});
const ending_line = find_end: {
while (lines.next()) |line| {
if (std.mem.startsWith(u8, line, "```"))
break :find_end line;
} else {
return error.No_closing_tag;
}
};
const raw_text_end_index = @intFromPtr(ending_line.ptr) - @intFromPtr(lines.buffer.ptr) - 1;
// std.debug.print("raw_text_end_index : {}\n", .{raw_text_end_index});
try list.append(alloc, .{
.data = lines.buffer[raw_text_start_index..raw_text_end_index],
.type = .raw_text,
});
try list.append(alloc, .{
.data = "\n",
.type = .newline,
});
try list.append(alloc, .{
.data = ending_line,
.type = .code_block,
});
// for (list.items) |value| {
// std.debug.print("t : {f}\n", .{value});
// }
return true;
}
// pub fn codeBlockParser(line: []const u8) ?LineParserReturn {}
pub fn tickBoxParser(line: []const u8) ?LineParserReturn {
if (!std.mem.startsWith(u8, line, "- [")) {
return null;
}
// not long enough for a tick box
if (line.len < 6) return null;
// - [
if (line[3] == ' ' or
line[3] == 'x' or
line[3] == '/' or
line[3] == '~' and
line[4] == ']' and line[5] == ' ')
{
return LineParserReturn{
.pointer_offset = 6,
.token = .{
.type = .tickbox,
.data = line[0..5],
},
};
}
return null;
}
// might make this anylength
pub fn ruleCheck(comptime rule_char: []const u8, line: []const u8) usize {
if (!std.mem.startsWith(u8, line, rule_char ** 3)) return 0;
for (line, 0..) |char, i| {
if (char == rule_char[0] or char == ' ' or char == '\t')
continue
else
return i;
}
return line.len;
}
pub fn horizontalRuleParser(line: []const u8) ?LineParserReturn {
const rule_length = ruleCheck("-", line) +
ruleCheck("=", line) +
ruleCheck("*", line) +
ruleCheck(".", line) +
ruleCheck("&", line) +
ruleCheck("^", line);
if (rule_length == 0) return null;
return LineParserReturn{
.pointer_offset = rule_length,
.token = .{
.data = line[0..rule_length],
.type = .rule,
},
};
}
// pub fn footnoteParser(line: []const u8) ?LineParserReturn {}
// pub fn MacroParser(line: []const u8) ?LineParserReturn {}
const comment_id = "// ";
pub fn commentParser(line: []const u8) ?LineParserReturn {
// find the index of the comment
const index_of_commnet = blk: {
if (std.mem.indexOf(u8, line, comment_id)) |index| {
if (index == 0)
break :blk index;
if (line[index - 1] != '\\') {
return null;
}
break :blk index;
}
return null;
};
return LineParserReturn{
.pointer_offset = index_of_commnet + comment_id.len,
.token = .{
.type = .comment,
.data = line[index_of_commnet .. index_of_commnet + comment_id.len],
},
};
}
// const LineParsers : [_]LineParser = {};
pub fn tokenize(
// comptime start_parsers : []LineParser,
// comptime text_parsers : []LineParser,
// comptime end_parsers : []LineParser,
// comptime multiline_parsers : []MultiLineParser,
str: []const u8,
alloc: std.mem.Allocator,
) !std.ArrayList(Token) {
var lines = std.mem.splitScalar(u8, str, '\n');
var token_list: std.ArrayList(Token) = .empty;
while (lines.next()) |current_line| {
var line = current_line[0..];
// std.debug.print("line : \"{s}\"\n", .{line});
if (try codeBlockParser(current_line, &lines, alloc, &token_list)) continue;
// start end parsings
const comment_token = commentParser(line);
if (comment_token) |token| {
// sets the end of the line to just before the comment
std.debug.print("comment_token.pointer_offset : {}\n", .{token.pointer_offset - token.token.data.len});
line = line[0 .. token.pointer_offset - token.token.data.len];
}
// actual parsing
// multiline parsing
// start parsing
const start_token: ?LineParserReturn = blk: {
if (headingParser(line)) |t| {
break :blk t;
}
if (tickBoxParser(line)) |t| {
break :blk t;
}
if (listOrderedParser(line)) |t| {
break :blk t;
}
if (listUnorderedParser(line)) |t| {
break :blk t;
}
if (quoteParser(line)) |t| {
break :blk t;
}
if (horizontalRuleParser(line)) |t| {
break :blk t;
}
break :blk null;
};
if (start_token) |val| {
line = line[val.pointer_offset..];
try token_list.append(alloc, val.token);
}
// mid parsing
if (line.len != 0) { // makes sure that i dont add blank lines
const mid_token: ?LineParserReturn = blk: {
if (text(line)) |t| {
break :blk t;
}
break :blk null;
};
if (mid_token) |token| {
try token_list.append(alloc, token.token);
}
}
if (comment_token) |token| {
// append the comment token
try token_list.append(alloc, token.token);
try token_list.append(alloc, .{
.type = .raw_text,
.data = current_line[token.pointer_offset..],
});
}
try token_list.append(alloc, .{
.type = .newline,
// make sure its a pointer to the actual char
.data = "\n",
});
}
return token_list;
}
const expect = std.testing.expect;
test "heading_1" {
const input = "# hello this is a heading\n";
const parsed = [_]Token{
Token{
.type = .heading,
.data = "#",
},
Token{
.type = .text,
.data = "hello this is a heading",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
// for (parsing.items) |value| {
// std.debug.print("{f}\n", .{value});
// }
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "heading_2" {
const input = "## hello this is a heading\n";
const parsed = [_]Token{
Token{
.type = .heading,
.data = "##",
},
Token{
.type = .text,
.data = "hello this is a heading",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "heading_3" {
const input = "### hello this is a heading\n";
const parsed = [_]Token{
Token{
.type = .heading,
.data = "###",
},
Token{
.type = .text,
.data = "hello this is a heading",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "heading_4" {
const input = "#### hello this is a heading\n";
const parsed = [_]Token{
Token{
.type = .heading,
.data = "####",
},
Token{
.type = .text,
.data = "hello this is a heading",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "heading_5" {
const input = "##### hello this is a heading\n";
const parsed = [_]Token{
Token{
.type = .heading,
.data = "#####",
},
Token{
.type = .text,
.data = "hello this is a heading",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "heading_6" {
const input = "###### hello this is a heading\n";
const parsed = [_]Token{
Token{
.type = .heading,
.data = "######",
},
Token{
.type = .text,
.data = "hello this is a heading",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "header_no_text" {
const input = "######\n";
const parsed = [_]Token{
Token{
.type = .heading,
.data = "######",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text" {
const input = "plain test\n";
const parsed = [_]Token{
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_code" {
const input = "`plain test`\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "`",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "`",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_bold" {
const input = "!plain test!\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "!",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "!",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_italic" {
const input = "*plain test*\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "*",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "*",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_underline" {
const input = "_plain test_\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "_",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "_",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_strikethrough" {
const input = "~plain test~\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "~",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "~",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_highlighted" {
const input = "|plain test|\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "|",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "|",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_&" {
const input = "&plain test&\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "&",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "&",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_%" {
const input = "%plain test%\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "%",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "%",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_$" {
const input = "$plain test$\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "$",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "$",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_#" {
const input = "#plain test#\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "#",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "#",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_-" {
const input = "-plain test-\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "-",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "-",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_+" {
const input = "+plain test+\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "+",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "+",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_=" {
const input = "=plain test=\n";
const parsed = [_]Token{
Token{
.type = .inline_style,
.data = "=",
},
Token{
.type = .text,
.data = "plain test",
},
Token{
.type = .inline_style,
.data = "=",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_super_script" {
const input = "sometext^2\n";
const parsed = [_]Token{
Token{
.type = .text,
.data = "sometext",
},
Token{
.type = .sup,
.data = "^",
},
Token{
.type = .text,
.data = "2",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_style_sub_script" {
const input = "sometext_2\n";
const parsed = [_]Token{
Token{
.type = .text,
.data = "sometext",
},
Token{
.type = .sub,
.data = "_",
},
Token{
.type = .text,
.data = "2",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "list_ordered_number" {
const input = "1. ordered list\n2. ordered list\n3. ordered list\n";
const parsed = [_]Token{
Token{
.type = .ordered_list,
.data = "1.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .ordered_list,
.data = "2.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .ordered_list,
.data = "3.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
// for (parsing.items) |value| {
// std.debug.print("token : {f}\n", .{value});
// }
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "list_ordered_i" {
const input = "i. ordered list\nii. ordered list\niii. ordered list\n";
const parsed = [_]Token{
Token{
.type = .ordered_list,
.data = "i.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .ordered_list,
.data = "ii.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .ordered_list,
.data = "iii.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "list_ordered_letter" {
const input = "a. ordered list\nb. ordered list\nc. ordered list\n";
const parsed = [_]Token{
Token{
.type = .ordered_list,
.data = "a.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .ordered_list,
.data = "b.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .ordered_list,
.data = "c.",
},
Token{
.type = .text,
.data = "ordered list",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "list_unordered_dashed" {
const input = "- unordered list\n- unordered list\n- unordered list\n";
const parsed = [_]Token{
Token{
.type = .unordered_list,
.data = "-",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "-",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "-",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
// for (parsing.items) |value| {
// std.debug.print("token : {f}\n", .{value});
// }
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "list_unordered_eq" {
const input = "= unordered list\n= unordered list\n= unordered list\n";
const parsed = [_]Token{
Token{
.type = .unordered_list,
.data = "=",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "=",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "=",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "list_unordered_plus" {
const input = "+ unordered list\n+ unordered list\n+ unordered list\n";
const parsed = [_]Token{
Token{
.type = .unordered_list,
.data = "+",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "+",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "+",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "list_unordered_dot" {
const input = "* unordered list\n* unordered list\n* unordered list\n";
const parsed = [_]Token{
Token{
.type = .unordered_list,
.data = "*",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "*",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .unordered_list,
.data = "*",
},
Token{
.type = .text,
.data = "unordered list",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_image" {
const input = "![figure text](relitive path to image)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "!",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "figure text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "relitive path to image",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_table" {
const input = "#[table text](relitive path to tabulated data)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "#",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "table text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "relitive path to tabulated data",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_file" {
const input = "$[file text](path to file#starting_line:ending_line)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "$",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "path to file#starting_line:ending_line",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_*" {
const input = "*[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "*",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed__" {
const input = "_[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "_",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_~" {
const input = "~[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "~",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_|" {
const input = "|[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "|",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_&" {
const input = "&[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "&",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_%" {
const input = "%[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "%",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_-" {
const input = "-[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "-",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_+" {
const input = "+[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "+",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_=" {
const input = "=[file text](file_path)\n";
const parsed = [_]Token{
Token{
.type = .embed,
.data = "=",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "file_path",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "embed_link" {
const input = "/[file text](link to something)\n";
const parsed = [_]Token{
Token{
.type = .link,
.data = "/",
},
Token{
.type = .bracket_open,
.data = "[",
},
Token{
.type = .text,
.data = "file text",
},
Token{
.type = .bracket_close,
.data = "]",
},
Token{
.type = .bracket_open,
.data = "(",
},
Token{
.type = .raw_text,
.data = "link to something",
},
Token{
.type = .bracket_close,
.data = ")",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
// test "table" {
// const input = "| col | col | col |\n| --- | --- | --- |\n| row | row | row |\n| row | row | row |\n| row | row | row |\n| row | row | row |\n";
// const parsed = [_]Token{
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " col ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " col ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " col ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .table_row_sep,
// .data = " --- ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .table_row_sep,
// .data = " --- ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .table_row_sep,
// .data = " --- ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .text,
// .data = " row ",
// },
// Token{
// .type = .table_sep,
// .data = "|",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// };
// var parsing = try tokenize(input, std.testing.allocator);
// try expect(parsing.items.len >= parsed.len);
// for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// try expect(expected_token.type == actual_token.type);
// try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
// }
// parsing.deinit(std.testing.allocator);
// }
test "quote_arrow" {
const input = "> quote\n> quote\n> quote\n";
const parsed = [_]Token{
Token{
.type = .quote,
.data = ">",
},
Token{
.type = .text,
.data = "quote",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .quote,
.data = ">",
},
Token{
.type = .text,
.data = "quote",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .quote,
.data = ">",
},
Token{
.type = .text,
.data = "quote",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
// for (parsing.items) |value| {
// std.debug.print("parsing : {f}\n", .{value});
// }
// for (parsed) |value| {
// std.debug.print("parsed : {f}\n", .{value});
// }
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// std.debug.print("expected_token : {f}\n", .{expected_token});
// std.debug.print("actual_token : {f}\n", .{actual_token});
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "quote_bar" {
const input = "| quote\n| quote\n| quote\n";
const parsed = [_]Token{
Token{
.type = .quote,
.data = "|",
},
Token{
.type = .text,
.data = "quote",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .quote,
.data = "|",
},
Token{
.type = .text,
.data = "quote",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .quote,
.data = "|",
},
Token{
.type = .text,
.data = "quote",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// std.debug.print("expected_token : {f}\n", .{expected_token});
// std.debug.print("actual_token : {f}\n", .{actual_token});
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "text_block" {
const input = "```\nsome text or code\n```\n";
const parsed = [_]Token{
Token{
.type = .code_block,
.data = "```",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .raw_text,
.data = "some text or code",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .code_block,
.data = "```",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
// try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// std.debug.print("expected_token : {f}\n", .{expected_token});
// std.debug.print("actual_token : {f}\n", .{actual_token});
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "code_block" {
const input = "```lang_name\nsome text or code\n```\n";
const parsed = [_]Token{
Token{
.type = .code_block,
.data = "```",
},
Token{
.type = .raw_text,
.data = "lang_name",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .raw_text,
.data = "some text or code",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .code_block,
.data = "```",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "passthrough_block" {
const input = "```passthrough\nsome text or code\n```\n";
const parsed = [_]Token{
Token{
.type = .code_block,
.data = "```",
},
Token{
.type = .raw_text,
.data = "passthrough",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .raw_text,
.data = "some text or code",
},
Token{
.type = .newline,
.data = "\n",
},
Token{
.type = .code_block,
.data = "```",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "comment" {
const input = "// this is a comment\n";
const parsed = [_]Token{
Token{
.type = .comment,
.data = "// ",
},
Token{
.type = .raw_text,
.data = "this is a comment",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "tick_box_empty" {
const input = "- [ ] something to check off\n";
const parsed = [_]Token{
Token{
.type = .tickbox,
.data = "- [ ]",
},
Token{
.type = .text,
.data = "something to check off",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// std.debug.print("expected_token {f}\n", .{expected_token});
// std.debug.print("actual_token {f}\n", .{actual_token});
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "tick_box_checked" {
const input = "- [x] something checked off\n";
const parsed = [_]Token{
Token{
.type = .tickbox,
.data = "- [x]",
},
Token{
.type = .text,
.data = "something checked off",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "tick_box_ticked" {
const input = "- [/] something ticked off\n";
const parsed = [_]Token{
Token{
.type = .tickbox,
.data = "- [/]",
},
Token{
.type = .text,
.data = "something ticked off",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "tick_box_dashed" {
const input = "- [~] something dashed off\n";
const parsed = [_]Token{
Token{
.type = .tickbox,
.data = "- [~]",
},
Token{
.type = .text,
.data = "something dashed off",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "horizontal_rule_straight_line" {
const input = "---\n";
const parsed = [_]Token{
Token{
.type = .rule,
.data = "---",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// std.debug.print("expected_token {f}\n", .{expected_token});
// std.debug.print("actual_token {f}\n", .{actual_token});
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "horizontal_rule_double_straight_line" {
const input = "===\n";
const parsed = [_]Token{
Token{
.type = .rule,
.data = "===",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "horizontal_rule_big_dotted_straight_line" {
const input = "***\n";
const parsed = [_]Token{
Token{
.type = .rule,
.data = "***",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "horizontal_rule_small_dotted_line" {
const input = "...\n";
const parsed = [_]Token{
Token{
.type = .rule,
.data = "...",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "horizontal_rule_squiggly_line" {
const input = "&&&\n";
const parsed = [_]Token{
Token{
.type = .rule,
.data = "&&&",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
test "horizontal_rule_jagged_line" {
const input = "^^^\n";
const parsed = [_]Token{
Token{
.type = .rule,
.data = "^^^",
},
Token{
.type = .newline,
.data = "\n",
},
};
var parsing = try tokenize(input, std.testing.allocator);
try expect(parsing.items.len >= parsed.len);
for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
try expect(expected_token.type == actual_token.type);
try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
}
parsing.deinit(std.testing.allocator);
}
// test "footnote" {
// const input = "sometext [^1]\n\n[^1]: footnote\n";
// const parsed = [_]Token{
// Token{
// .type = .text,
// .data = "sometext ",
// },
// Token{
// .type = .footnote,
// .data = "[^1]",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// Token{
// .type = .footnote_def,
// .data = "[^1]:",
// },
// Token{
// .type = .text,
// .data = "footnote",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// };
// var parsing = try tokenize(input, std.testing.allocator);
// try expect(parsing.items.len >= parsed.len);
// for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// try expect(expected_token.type == actual_token.type);
// try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
// }
// parsing.deinit(std.testing.allocator);
// }
// test "macro" {
// const input = "@macro(arg, arg2)\n";
// const parsed = [_]Token{
// Token{
// .type = .macro,
// .data = "macro",
// },
// Token{
// .type = .bracket_open,
// .data = "(",
// },
// Token{
// .type = .macro_arg,
// .data = "arg",
// },
// Token{
// .type = .arg_sep,
// .data = ",",
// },
// Token{
// .type = .macro_arg,
// .data = "arg2",
// },
// Token{
// .type = .bracket_close,
// .data = ")",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// };
// var parsing = try tokenize(input, std.testing.allocator);
// try expect(parsing.items.len >= parsed.len);
// for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// try expect(expected_token.type == actual_token.type);
// try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
// }
// parsing.deinit(std.testing.allocator);
// }
// test "replace" {
// const input = "@replace\n";
// const parsed = [_]Token{
// Token{
// .type = .replace,
// .data = "replace",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// };
// var parsing = try tokenize(input, std.testing.allocator);
// try expect(parsing.items.len >= parsed.len);
// for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// try expect(expected_token.type == actual_token.type);
// try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
// }
// parsing.deinit(std.testing.allocator);
// }
// test "inbulit" {
// const input = "@.macro(arg, arg2)\n";
// const parsed = [_]Token{
// Token{
// .type = .inbulit,
// .data = "macro",
// },
// Token{
// .type = .bracket_open,
// .data = "(",
// },
// Token{
// .type = .macro_arg,
// .data = "arg",
// },
// Token{
// .type = .arg_sep,
// .data = ",",
// },
// Token{
// .type = .macro_arg,
// .data = "arg2",
// },
// Token{
// .type = .bracket_close,
// .data = ")",
// },
// Token{
// .type = .newline,
// .data = "\n",
// },
// };
// var parsing = try tokenize(input, std.testing.allocator);
// try expect(parsing.items.len >= parsed.len);
// for (parsed, parsing.items[0..parsed.len]) |expected_token, actual_token| {
// try expect(expected_token.type == actual_token.type);
// try expect(std.mem.eql(u8, expected_token.data, actual_token.data));
// }
// parsing.deinit(std.testing.allocator);
// }