From 1e1c8e2ca7df02a8f5882e92234ab9a15d2101ff Mon Sep 17 00:00:00 2001 From: Sam Nystrom Date: Thu, 22 Jun 2023 23:21:58 -0400 Subject: parse and format XML in subtitle content --- main.ha | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 165 insertions(+), 31 deletions(-) diff --git a/main.ha b/main.ha index 8de4287..6d97ae9 100644 --- a/main.ha +++ b/main.ha @@ -5,22 +5,30 @@ use bufio; use encoding::utf8; use getopt; use io; +use format::xml; use fmt; use fs; use os; use strconv; use strings; +use strio; use time; use unix::poll; use unix::tty; -type segment = struct { +type subtitle = struct { index: uint, start: time::duration, end: time::duration, - text: str, + text: []text, }; +type text = (str | setbold | setitalic | setunderline | setcolor); +type setbold = bool; +type setitalic = bool; +type setunderline = bool; +type setcolor = str; + export fn main() void = { let help: []getopt::help = [ "Play a .srt subtitle file", @@ -45,10 +53,10 @@ export fn main() void = { fmt::fatalf("Error reading '{}': {}\n", path, fs::strerror(err)); }; - let segments = parse_srt(file); - defer free(segments); - defer for (let i = 0z; i < len(segments); i += 1) { - free(segments[i].text); + let subtitles = parse_srt(file); + defer free(subtitles); + defer for (let i = 0z; i < len(subtitles); i += 1) { + free_text(subtitles[i].text); }; fmt::fprint(os::stdout_file, "\x1b[?25l")!; @@ -66,9 +74,9 @@ export fn main() void = { ]; let end: time::duration = 0; - for (let i = 0z; i < len(segments); i += 1) { - if (segments[i].end > end) { - end = segments[i].end; + for (let i = 0z; i < len(subtitles); i += 1) { + if (subtitles[i].end > end) { + end = subtitles[i].end; }; }; let start = time::now(time::clock::REALTIME); @@ -94,8 +102,8 @@ export fn main() void = { let timeout = end; - for (let i = 0z; i < len(segments); i += 1) { - let seg = segments[i]; + for (let i = 0z; i < len(subtitles); i += 1) { + let seg = subtitles[i]; if (seg.start > elapsed && seg.start < timeout) { timeout = seg.start; }; @@ -103,7 +111,7 @@ export fn main() void = { timeout = seg.end; }; if (seg.start <= elapsed && elapsed <= seg.end) { - fmt::println(seg.text)!; + print_subtitle(os::stdout, seg.text)!; }; }; @@ -166,9 +174,12 @@ type state = enum { TEXT, }; -fn parse_srt(file: io::handle) []segment = { - let segments: []segment = []; - let current = segment { ... }; +fn parse_srt(file: io::handle) []subtitle = { + let subtitles: []subtitle = []; + let current = subtitle { ... }; + let content = bufio::dynamic(io::mode::RDWR); + defer io::close(&content)!; + io::writeall(&content, strings::toutf8("\n\n"))!; let state = state::INDEX; for (let nr = 0; true; nr += 1) { @@ -204,38 +215,161 @@ fn parse_srt(file: io::handle) []segment = { }; state = state::TEXT; case state::TEXT => - if (len(line) == 0) { - state = state::INDEX; - append(segments, current); - current = segment { ... }; + if (len(line) > 0) { + io::writeall(&content, strings::toutf8(line))!; + strio::appendrune(&content, '\n')!; continue; }; - let text = strings::concat(current.text, line, "\r\n"); - free(current.text); - current.text = text; + + io::writeall(&content, strings::toutf8(""))!; + content.pos = 0; + current.text = match (parse_text(&content)) { + case let t: []text => + yield t; + case let err: io::error => + fmt::fatal("Error parsing subtitles:", io::strerror(err)); + case let err: xml::error => + fmt::fatal("Error parsing subtitles:", xml::strerror(err)); + }; + + append(subtitles, current); + current = subtitle { ... }; + + bufio::reset(&content); + io::writeall(&content, strings::toutf8("\n\n"))!; + + state = state::INDEX; }; }; - return segments; + return subtitles; }; -fn parse_timecode(line: str) ((time::duration, time::duration) | strconv::invalid | strconv::overflow) = { - let (start, end) = strings::cut(line, " --> "); - let start = parse_part(start)?; - let end = parse_part(end)?; +fn parse_timecode(timecode: str) ((time::duration, time::duration) | strconv::invalid | strconv::overflow) = { + let (start, end) = strings::cut(timecode, " --> "); + let start = parse_time(start)?; + let end = parse_time(end)?; return (start, end); }; -fn parse_part(text: str) (time::duration | strconv::invalid | strconv::overflow) = { +fn parse_time(time: str) (time::duration | strconv::invalid | strconv::overflow) = { let dur: time::duration = 0; - let (text, ms) = strings::cut(text, ","); + let (time, ms) = strings::cut(time, ","); dur += strconv::stoi64(ms)? * time::MILLISECOND; - let (hrs, text) = strings::cut(text, ":"); + let (hrs, time) = strings::cut(time, ":"); dur += strconv::stoi64(hrs)? * time::HOUR; - let (mins, secs) = strings::cut(text, ":"); + let (mins, secs) = strings::cut(time, ":"); dur += strconv::stoi64(mins)? * time::MINUTE; dur += strconv::stoi64(secs)? * time::SECOND; return dur; }; + +fn parse_text(in: io::handle) ([]text | io::error | xml::error) = { + let parser = xml::parse(in)?; + defer xml::parser_free(parser); + + let text: []text = []; + let bold = false; + let italic = false; + let underline = false; + let colorstack: []str = []; + defer strings::freeall(colorstack); + for (true) { + let tok = match (xml::scan(parser)?) { + case let tok: xml::token => + yield tok; + case void => + break; + }; + + match (tok) { + case let start: xml::elementstart => + switch (start) { + case "b" => + if (!bold) { + append(text, true: setbold); + bold = true; + }; + case "i" => + if (!italic) { + append(text, true: setitalic); + italic = true; + }; + case "u" => + if (!underline) { + append(text, true: setunderline); + underline = true; + }; + case "font" => + append(text, "": setcolor); + case => void; + }; + case let end: xml::elementend => + switch (end) { + case "b" => + if (bold) { + append(text, false: setbold); + bold = false; + }; + case "i" => + if (italic) { + append(text, false: setitalic); + italic = false; + }; + case "u" => + if (underline) { + append(text, false: setunderline); + underline = false; + }; + case "font" => + if (len(colorstack) > 0) { + append(text, colorstack[len(colorstack) - 1]: setcolor); + delete(colorstack[len(colorstack) - 1]); + }; + case => void; + }; + case let attr: xml::attribute => + if (attr.0 == "color" && text[len(text) - 1] is setcolor) { + text[len(text) - 1] = strings::dup(attr.1): setcolor; + append(colorstack, strings::dup(attr.1)); + }; + case let t: xml::text => + // Necessary because of raw mode + append(text, strings::replace(t, "\n", "\r\n")); + }; + }; + return text; +}; + +fn print_subtitle(out: io::handle, text: []text) (void | io::error) = { + for (let i = 0z; i < len(text); i += 1) { + let output = match (text[i]) { + case let s: str => + yield s; + case let bold: setbold => + yield if (bold) "\x1b[1m" else "\x1b[22m"; + case let italic: setitalic => + yield if (italic) "\x1b[3m" else "\x1b[23m"; + case let underline: setunderline => + yield if (underline) "\x1b[4m" else "\x1b[24m"; + case let color: setcolor => + // TODO: implement this + // fmt::fprintf(out, "\x1b[38;2;{};{};{}m", r, g, b)?; + continue; + }; + io::writeall(out, strings::toutf8(output))?; + }; +}; + +fn free_text(text: []text) void = { + for (let i = 0z; i < len(text); i += 1) { + match (text[i]) { + case let s: str => + free(s); + case => void; + }; + }; + free(text); +}; -- cgit v1.2.3