You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.1 KiB
Rust

use std::{env, fs::File, io::{BufReader, BufRead, Write}, path::Path};
use utfdump_core::{chardata::CharData, encoded::DataBuf};
const UNICODE_DATA_PATH: &str = "unicode_data_latest.txt";
const OUT_DATA_PATH: &str = "unicode_data_encoded";
fn main() {
println!("cargo:rerun-if-changed={}", UNICODE_DATA_PATH);
let out_dir = env::var_os("OUT_DIR").unwrap();
let out_path = Path::new(&out_dir).join(OUT_DATA_PATH);
let data_file = File::open(UNICODE_DATA_PATH)
.expect("failed to open unicode data file");
let buf_reader = BufReader::new(data_file);
let mut data = DataBuf::new();
let mut start_codepoint = None;
for line in buf_reader.lines() {
let line = line.unwrap();
let (codepoint, char_data) = CharData::from_row(&line).unwrap();
match start_codepoint {
Some(start_codepoint_inner) => {
let prefix = char_data.name()
.strip_suffix(", Last>")
.expect("expected end of codepoint block");
let name = {
let mut buf = String::with_capacity(prefix.len() + 1);
buf.push_str(prefix);
buf.push('>');
buf
};
let char_data = char_data.with_name(&name);
data.insert(char_data, start_codepoint_inner..(codepoint + 1))
.unwrap();
start_codepoint = None;
},
None => {
if char_data.name().ends_with(", First>") {
start_codepoint = Some(codepoint);
} else {
data.insert(char_data, codepoint..(codepoint + 1))
.unwrap();
}
},
}
}
let (strings_len, [strings, data]) = data
.as_ref_type()
.to_bytes()
.unwrap();
let mut out_file = File::create(&out_path)
.expect("failed to open output file");
out_file.write_all(&strings_len).unwrap();
out_file.write_all(strings).unwrap();
out_file.write_all(data).unwrap();
drop(out_file);
}