From 1b4be0862cd43f7f31bfd74acce2ff3401f409aa Mon Sep 17 00:00:00 2001 From: Pantonshire Date: Wed, 25 May 2022 20:52:23 +0100 Subject: [PATCH] Atom feed, GUID for RSS feed --- Cargo.lock | 18 +++--- blog_server/src/main.rs | 33 +++++++++-- blog_server/src/post.rs | 38 +++++++++---- blog_server/src/posts_store.rs | 18 +++--- blog_server/src/render.rs | 17 ++++-- blog_server/src/service/atom.rs | 82 +++++++++++++++++++++++++++ blog_server/src/service/index.rs | 2 +- blog_server/src/service/mod.rs | 1 + blog_server/src/service/posts_list.rs | 2 +- blog_server/src/service/rss.rs | 54 +++++++++++------- blog_server/src/service/site.rs | 6 +- blog_server/src/time.rs | 5 ++ blog_server/src/uuid.rs | 60 ++++++++++++++++++++ 13 files changed, 276 insertions(+), 60 deletions(-) create mode 100644 blog_server/src/service/atom.rs create mode 100644 blog_server/src/time.rs create mode 100644 blog_server/src/uuid.rs diff --git a/Cargo.lock b/Cargo.lock index 80e823c..a7b404a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -485,9 +485,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", "http", @@ -650,7 +650,7 @@ checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" [[package]] name = "libshire" version = "0.1.0" -source = "git+https://github.com/pantonshire/libshire#62dae931409c14531cf338d73269e415c638dede" +source = "git+https://github.com/pantonshire/libshire#bd2b3a8a29b34ffeecd658b74ef81a1431fc91f0" [[package]] name = "line-wrap" @@ -920,9 +920,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.10.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" +checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" [[package]] name = "onig" @@ -1108,9 +1108,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.5.5" +version = "1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1" dependencies = [ "aho-corasick", "memchr", @@ -1119,9 +1119,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.25" +version = "0.6.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64" [[package]] name = "rss" diff --git a/blog_server/src/main.rs b/blog_server/src/main.rs index e9d59ec..5d4430a 100644 --- a/blog_server/src/main.rs +++ b/blog_server/src/main.rs @@ -5,8 +5,10 @@ mod posts_store; mod render; mod service; mod template; +mod time; +mod uuid; -use std::{env, fs, path::PathBuf, thread}; +use std::{env, fs, path::PathBuf, sync::Arc, thread}; use axum::Server; use miette::{IntoDiagnostic, Context}; @@ -26,8 +28,22 @@ pub struct Config { posts_dir: PathBuf, #[knuffel(child, unwrap(argument))] static_dir: PathBuf, + #[knuffel(child, unwrap(argument))] + namespace_uuid: uuid::Uuid, + #[knuffel(child)] + self_ref: SelfRefConfig, #[knuffel(child)] rss: RssConfig, + #[knuffel(child)] + atom: AtomConfig, +} + +#[derive(knuffel::Decode, Clone, Debug)] +pub struct SelfRefConfig { + #[knuffel(child, unwrap(argument))] + protocol: String, + #[knuffel(child, unwrap(argument))] + domain: String, } #[derive(knuffel::Decode, Clone, Debug)] @@ -38,10 +54,14 @@ pub struct RssConfig { title: String, #[knuffel(child, unwrap(argument))] ttl: u32, +} + +#[derive(knuffel::Decode, Clone, Debug)] +pub struct AtomConfig { #[knuffel(child, unwrap(argument))] - protocol: String, + num_posts: usize, #[knuffel(child, unwrap(argument))] - domain: String, + title: String, } fn main() -> miette::Result<()> { @@ -49,7 +69,7 @@ fn main() -> miette::Result<()> { // Load the configuration from the KDL config file specified by the first command-line // argument. - let config = { + let config = Arc::new({ let config_path = env::args().nth(1) .ok_or_else(|| miette::Error::msg("No config file specified"))?; @@ -61,7 +81,7 @@ fn main() -> miette::Result<()> { knuffel::parse::(&config_path, &contents) .wrap_err_with(|| format!("Failed to parse config file {}", config_path))? - }; + }); // Create the data structure used to store the rendered posts. This uses an `Arc` internally, // so clones will point to the same underlying data. @@ -71,6 +91,7 @@ fn main() -> miette::Result<()> { // Create the post renderer and the mpsc channel that will be used to communicate with it. let (renderer, tx) = Renderer::new( + config.clone(), posts_store.clone(), code_renderer, config.posts_dir.clone() @@ -96,7 +117,7 @@ fn main() -> miette::Result<()> { } async fn run( - config: Config, + config: Arc, posts_store: ConcurrentPostsStore, ) -> miette::Result<()> { diff --git a/blog_server/src/post.rs b/blog_server/src/post.rs index 9b7b13e..f398971 100644 --- a/blog_server/src/post.rs +++ b/blog_server/src/post.rs @@ -1,7 +1,7 @@ use std::{borrow, error, fmt, ops}; use chrono::{DateTime, Utc}; -use libshire::strings::ShString22; +use libshire::{strings::ShString22, uuid::{Uuid, UuidV5Error}}; use maud::{Markup, PreEscaped}; use crate::codeblock::CodeBlockRenderer; @@ -73,6 +73,7 @@ impl fmt::Display for PostId { } pub struct Post { + uuid: Uuid, id: PostId, title: String, author: String, @@ -83,8 +84,8 @@ pub struct Post { } impl Post { - pub fn id_str(&self) -> &str { - &self.id + pub fn uuid(&self) -> Uuid { + self.uuid } pub fn id(&self) -> &PostId { @@ -117,6 +118,7 @@ impl Post { pub fn parse( code_renderer: &CodeBlockRenderer, + namespace: Uuid, post_id: PostId, file_name: &str, created: DateTime, @@ -124,17 +126,25 @@ impl Post { source: &str, ) -> Result { - let mdpost = MdPost::parse(file_name, source)?; - Ok(Self::from_mdpost(code_renderer, post_id, created, updated, mdpost)) + MdPost::parse(file_name, source) + .and_then(|post| Self::from_mdpost( + code_renderer, + namespace, + post_id, + created, + updated, + post + )) } fn from_mdpost( code_renderer: &CodeBlockRenderer, + namespace: Uuid, id: PostId, created: DateTime, updated: DateTime, mdpost: MdPost, - ) -> Self + ) -> Result { use pulldown_cmark::{Options, Parser, html::push_html}; @@ -142,6 +152,11 @@ impl Post { .union(Options::ENABLE_FOOTNOTES) .union(Options::ENABLE_STRIKETHROUGH); + let uuid = Uuid::new_v5(namespace, &*id) + .map_err(|err| match err { + UuidV5Error::NameTooLong(len) => ParseError::IdTooLong(len), + })?; + let mut parser = PostMdParser::new( Parser::new_ext(&mdpost.markdown, PARSER_OPTIONS), code_renderer @@ -150,7 +165,8 @@ impl Post { let mut html_buf = String::new(); push_html(&mut html_buf, parser.by_ref()); - Self { + Ok(Self { + uuid, id, title: mdpost.title, author: mdpost.author, @@ -158,7 +174,7 @@ impl Post { tags: mdpost.tags, created, updated, - } + }) } } @@ -281,13 +297,15 @@ impl MdPost { pub enum ParseError { MissingHeader, InvalidHeader(Box), + IdTooLong(usize), } impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - ParseError::MissingHeader => write!(f, "Post file has no header"), - ParseError::InvalidHeader(err) => fmt::Display::fmt(err, f), + Self::MissingHeader => write!(f, "post file has no header"), + Self::InvalidHeader(err) => fmt::Display::fmt(err, f), + Self::IdTooLong(len) => write!(f, "post id too long ({} bytes)", len), } } } diff --git a/blog_server/src/posts_store.rs b/blog_server/src/posts_store.rs index ac243ae..a4e996c 100644 --- a/blog_server/src/posts_store.rs +++ b/blog_server/src/posts_store.rs @@ -29,7 +29,7 @@ impl ConcurrentPostsStore { } pub async fn get(&self, id: &str) -> Option> { - self.read().await.get(id) + self.read().await.get(id).cloned() } } @@ -56,12 +56,12 @@ impl PostsStore { } } - pub fn get(&self, id: &str) -> Option> { - self.posts.get(id).cloned() + pub fn get(&self, id: &str) -> Option<&Arc> { + self.posts.get(id) } pub fn insert(&mut self, post: Post) -> Option> { - let old_post = self.remove(post.id_str()); + let old_post = self.remove(post.id()); // Insert the post into each of the tag indexes. for tag in post.tags() { @@ -109,19 +109,23 @@ impl PostsStore { self.posts.clear(); } + pub fn last_updated(&self) -> Option> { + self.iter().map(|post| post.updated()).max() + } + pub fn iter(&self) -> impl '_ - + Iterator> + + Iterator> + ExactSizeIterator + FusedIterator + Clone { - self.posts.values().cloned() + self.posts.values() } pub fn iter_by_created(&self) -> impl '_ - + Iterator> + + Iterator> + DoubleEndedIterator + ExactSizeIterator + FusedIterator diff --git a/blog_server/src/render.rs b/blog_server/src/render.rs index fad6e88..36b08e0 100644 --- a/blog_server/src/render.rs +++ b/blog_server/src/render.rs @@ -3,20 +3,23 @@ use std::{ fs, io::{self, Read}, path::PathBuf, - sync::mpsc, + sync::{Arc, mpsc}, }; -use chrono::{DateTime, Utc}; +use chrono::DateTime; use notify::DebouncedEvent; use tracing::{info, warn, error}; use crate::{ codeblock::CodeBlockRenderer, + Config, post::{ParseError, Post, PostId}, posts_store::ConcurrentPostsStore, + time::unix_epoch, }; pub struct Renderer { + config: Arc, posts: ConcurrentPostsStore, code_renderer: CodeBlockRenderer, posts_dir_path: PathBuf, @@ -25,6 +28,7 @@ pub struct Renderer { impl Renderer { pub fn new( + config: Arc, posts: ConcurrentPostsStore, code_renderer: CodeBlockRenderer, posts_dir_path: PathBuf, @@ -38,6 +42,7 @@ impl Renderer { tx.send(DebouncedEvent::Rescan).unwrap(); (Self { + config, posts, code_renderer, posts_dir_path, @@ -185,10 +190,11 @@ impl Renderer { let (created, updated) = metadata.created() .and_then(|created| metadata.modified() - .map(|modified| (DateTime::::from(created), DateTime::::from(modified)))) + .map(|modified| (DateTime::from(created), DateTime::from(modified)))) + // If created / modified metadata is not available, default to the UNIX epoch. .unwrap_or_else(|_| { - let now = Utc::now(); - (now, now) + let epoch = unix_epoch(); + (epoch, epoch) }); let contents = { @@ -202,6 +208,7 @@ impl Renderer { Post::parse( &self.code_renderer, + *self.config.namespace_uuid, target.id.clone(), &target.path.to_string_lossy(), created, diff --git a/blog_server/src/service/atom.rs b/blog_server/src/service/atom.rs new file mode 100644 index 0000000..66b18c4 --- /dev/null +++ b/blog_server/src/service/atom.rs @@ -0,0 +1,82 @@ +use std::sync::Arc; + +use atom_syndication as atom; +use axum::{ + body::Bytes, + extract::Extension, +}; + +use super::response::Atom; +use crate::{ + Config, + posts_store::ConcurrentPostsStore, + time::unix_epoch, +}; + +pub async fn handle( + Extension(config): Extension>, + Extension(posts): Extension, +) -> Atom { + let (atom_entries, updated) = { + let guard = posts.read().await; + + let atom_entries = guard.iter_by_created() + .take(config.atom.num_posts) + .map(|post| { + atom::EntryBuilder::default() + .id(format!("urn:uuid:{}", post.uuid())) + .title(post.title().to_owned()) + .updated(post.updated()) + .links(vec![ + atom::LinkBuilder::default() + .href(format!( + "{}://{}/articles/{}", + config.self_ref.protocol, + config.self_ref.domain, + post.id() + )) + .rel("alternate".to_owned()) + .mime_type(Some("text/html".to_owned())) + .build() + ]) + .author(atom::PersonBuilder::default() + .name(post.author().to_owned()) + .build()) + .build() + }) + .collect::>(); + + let updated = guard.last_updated() + .unwrap_or_else(unix_epoch); + + (atom_entries, updated) + }; + + Atom(atom::FeedBuilder::default() + .id(format!("urn:uuid:{}", *config.namespace_uuid)) + .title(config.atom.title.clone()) + .updated(updated) + .links(vec![ + atom::LinkBuilder::default() + .href(format!( + "{}://{}/atom.xml", + config.self_ref.protocol, + config.self_ref.domain + )) + .rel("self".to_owned()) + .build(), + atom::LinkBuilder::default() + .href(format!( + "{}://{}/articles/", + config.self_ref.protocol, + config.self_ref.domain + )) + .rel("alternate".to_owned()) + .mime_type(Some("text/html".to_owned())) + .build() + ]) + .entries(atom_entries) + .build() + .to_string() + .into()) +} diff --git a/blog_server/src/service/index.rs b/blog_server/src/service/index.rs index b84796f..ea76c0d 100644 --- a/blog_server/src/service/index.rs +++ b/blog_server/src/service/index.rs @@ -43,7 +43,7 @@ pub async fn handle(Extension(posts): Extension) -> Html { ul { @for post in posts.read().await.iter_by_created().rev().take(5) { li { - a href={"/articles/" (post.id_str())} { (post.title()) } + a href={"/articles/" (post.id())} { (post.title()) } } } } diff --git a/blog_server/src/service/mod.rs b/blog_server/src/service/mod.rs index 54f1faf..02941eb 100644 --- a/blog_server/src/service/mod.rs +++ b/blog_server/src/service/mod.rs @@ -1,3 +1,4 @@ +mod atom; mod contact; mod index; mod post; diff --git a/blog_server/src/service/posts_list.rs b/blog_server/src/service/posts_list.rs index 19e3acc..6a0b563 100644 --- a/blog_server/src/service/posts_list.rs +++ b/blog_server/src/service/posts_list.rs @@ -23,7 +23,7 @@ pub async fn handle(Extension(posts): Extension) -> Html { ul { @for post in posts.read().await.iter_by_created().rev() { li { - a href={"/articles/" (post.id_str())} { (post.title()) } + a href={"/articles/" (post.id())} { (post.title()) } span class="quiet" { " — " (post.created().format("%Y/%m/%d")) } diff --git a/blog_server/src/service/rss.rs b/blog_server/src/service/rss.rs index 09cd1d1..3e6dee8 100644 --- a/blog_server/src/service/rss.rs +++ b/blog_server/src/service/rss.rs @@ -6,37 +6,53 @@ use axum::{ }; use super::response::Rss; -use crate::{posts_store::ConcurrentPostsStore, Config}; +use crate::{ + Config, + posts_store::ConcurrentPostsStore, + time::unix_epoch, +}; pub async fn handle( Extension(config): Extension>, Extension(posts): Extension, ) -> Rss { - let rss_items = posts.read() - .await - .iter_by_created() - .take(config.rss.num_posts) - .map(|post| { - rss::ItemBuilder::default() - .title(Some(post.title().to_owned())) - .link(Some(format!( - "{}://{}/articles/{}", - config.rss.protocol, - config.rss.domain, - post.id() - ))) - .pub_date(Some(post.created().to_rfc2822())) - .build() - }) - .collect::>(); + let (rss_items, updated) = { + let guard = posts.read().await; + + let rss_items = guard.iter_by_created() + .take(config.rss.num_posts) + .map(|post| { + rss::ItemBuilder::default() + .title(Some(post.title().to_owned())) + .guid(Some(rss::GuidBuilder::default() + .value(post.uuid().to_string()) + .permalink(false) + .build())) + .link(Some(format!( + "{}://{}/articles/{}", + config.self_ref.protocol, + config.self_ref.domain, + post.id() + ))) + .pub_date(Some(post.created().to_rfc2822())) + .build() + }) + .collect::>(); + + let updated = guard.last_updated() + .unwrap_or_else(unix_epoch); + + (rss_items, updated) + }; Rss(rss::ChannelBuilder::default() .title(config.rss.title.clone()) .link(format!( "{}://{}", - config.rss.protocol, config.rss.domain + config.self_ref.protocol, config.self_ref.domain )) .ttl(Some(config.rss.ttl.to_string())) + .last_build_date(Some(updated.to_rfc2822())) .items(rss_items) .build() .to_string() diff --git a/blog_server/src/service/site.rs b/blog_server/src/service/site.rs index 74fa9c9..e4ff228 100644 --- a/blog_server/src/service/site.rs +++ b/blog_server/src/service/site.rs @@ -16,6 +16,7 @@ use crate::{ posts_store::ConcurrentPostsStore }; use super::{ + atom, contact, index, post, @@ -26,13 +27,14 @@ use super::{ }; pub fn service( - config: Config, + config: Arc, posts_store: ConcurrentPostsStore, ) -> Router { Router::new() .route("/", get(index::handle)) .route("/rss.xml", get(rss::handle)) + .route("/atom.xml", get(atom::handle)) .route("/contact", get(contact::handle)) .route("/articles", get(posts_list::handle)) .route("/articles/:post_id", get(post::handle)) @@ -40,7 +42,7 @@ pub fn service( .fallback(handle_fallback.into_service()) .layer(ConcurrencyLimitLayer::new(config.concurrency_limit)) .layer(TraceLayer::new_for_http()) - .layer(Extension(Arc::new(config))) + .layer(Extension(config)) .layer(Extension(posts_store)) } diff --git a/blog_server/src/time.rs b/blog_server/src/time.rs new file mode 100644 index 0000000..f59bb36 --- /dev/null +++ b/blog_server/src/time.rs @@ -0,0 +1,5 @@ +use chrono::{DateTime, NaiveDateTime, Utc}; + +pub fn unix_epoch() -> DateTime { + DateTime::from_utc(NaiveDateTime::from_timestamp(0, 0), Utc) +} diff --git a/blog_server/src/uuid.rs b/blog_server/src/uuid.rs new file mode 100644 index 0000000..aa455ce --- /dev/null +++ b/blog_server/src/uuid.rs @@ -0,0 +1,60 @@ +use std::ops; + +use knuffel::{ + ast::{Literal, TypeName}, + decode::{Context, Kind}, + errors::{DecodeError, ExpectedType}, + span::Spanned, + traits::ErrorSpan, + DecodeScalar, +}; + +#[derive(Clone, Copy, Default, Debug)] +#[repr(transparent)] +pub struct Uuid(pub libshire::uuid::Uuid); + +impl Uuid { + pub fn as_inner(&self) -> &libshire::uuid::Uuid { + &self.0 + } +} + +impl ops::Deref for Uuid { + type Target = libshire::uuid::Uuid; + + fn deref(&self) -> &Self::Target { + self.as_inner() + } +} + +impl DecodeScalar for Uuid { + fn type_check(type_name: &Option>, ctx: &mut Context) { + if let Some(type_name) = type_name { + ctx.emit_error(DecodeError::TypeName { + span: type_name.span().clone(), + found: Some((&**type_name).clone()), + expected: ExpectedType::no_type(), + rust_type: "Uuid", + }); + } + } + + fn raw_decode( + value: &Spanned, + ctx: &mut Context, + ) -> Result> { + match &**value { + Literal::String(s) => match s.parse() { + Ok(uuid) => Ok(Self(uuid)), + Err(err) => { + ctx.emit_error(DecodeError::conversion(value, err)); + Ok(Default::default()) + } + }, + _ => { + ctx.emit_error(DecodeError::scalar_kind(Kind::String, value)); + Ok(Default::default()) + } + } + } +}