mirror of
https://github.com/Brandon-Rozek/website-toots.git
synced 2024-11-20 23:46:28 -05:00
Rust implementation
TODO: Make sure that JSON file writes are equivalent for less commit noise
This commit is contained in:
parent
d0571cfc2a
commit
bc01b85f87
7 changed files with 1989 additions and 7 deletions
22
.github/workflows/refresh.yml
vendored
22
.github/workflows/refresh.yml
vendored
|
@ -5,15 +5,12 @@ on:
|
|||
schedule:
|
||||
- cron: "12 23 * * *"
|
||||
|
||||
#concurrency:
|
||||
# cancel-in-progress: true
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
jobs:
|
||||
build_and_publish:
|
||||
build_and_sync:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
|
@ -25,9 +22,20 @@ jobs:
|
|||
git config user.name "GitHub Actions Bot"
|
||||
git config user.email "<>"
|
||||
|
||||
- name: Grab latest toots
|
||||
- name: Install Rust
|
||||
run: |
|
||||
./.scripts/refreshtoots.py
|
||||
sudo apt update
|
||||
sudo apt install -y rustc cargo
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
cd .scripts
|
||||
cargo build --release
|
||||
|
||||
- name: Retrieve toots
|
||||
run: |
|
||||
cd .scripts
|
||||
./target/release/retrieve_toots
|
||||
|
||||
- name: Check if there's any changes
|
||||
id: verify_diff
|
||||
|
@ -37,6 +45,6 @@ jobs:
|
|||
- name: Commit and push
|
||||
if: steps.verify_diff.outputs.changed == 'true'
|
||||
run: |
|
||||
git add *.md
|
||||
git add .data
|
||||
git commit -m "New/Modified Toots"
|
||||
git push origin main
|
||||
|
|
1
.scripts/.gitignore
vendored
Normal file
1
.scripts/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
target/
|
1711
.scripts/Cargo.lock
generated
Normal file
1711
.scripts/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
20
.scripts/Cargo.toml
Normal file
20
.scripts/Cargo.toml
Normal file
|
@ -0,0 +1,20 @@
|
|||
[package]
|
||||
name = "website-toots"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tera = "1"
|
||||
reqwest = { version = "0.11", features = ["json"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
|
||||
regex = "1.10.5"
|
||||
|
||||
[[bin]]
|
||||
name = "retrieve_toots"
|
||||
path = "src/retrieve_toots.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "generate_md"
|
||||
path = "src/generate_md.rs"
|
64
.scripts/src/generate_md.rs
Normal file
64
.scripts/src/generate_md.rs
Normal file
|
@ -0,0 +1,64 @@
|
|||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
|
||||
use tera::Tera;
|
||||
use tera::Context;
|
||||
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
|
||||
let mut tera = Tera::default();
|
||||
tera.add_template_file("src/toot.md", None).unwrap();
|
||||
|
||||
let data_dir = std::fs::read_dir("../.data")
|
||||
.expect("Failed to open data directory");
|
||||
|
||||
for entry in data_dir {
|
||||
|
||||
let opath = entry.ok().map(|v| v.path());
|
||||
|
||||
|
||||
// Skip if we encountered an error
|
||||
if opath.is_none() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let path = opath.unwrap();
|
||||
|
||||
// Skip if we're not looking at a JSON file
|
||||
if !path.is_file() || !path.extension().map(|x| x == "json").unwrap_or(false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let contents = std::fs::read_to_string(&path)
|
||||
.expect(format!("Failed to read file {}", path.to_string_lossy()).as_ref());
|
||||
|
||||
let mut json_response: serde_json::Value = serde_json::from_str(contents.as_ref())
|
||||
.expect("JSON parse error");
|
||||
|
||||
let content: serde_json::Value = json_response
|
||||
.get("content")
|
||||
.expect("Failed to get content from JSON response")
|
||||
.to_owned();
|
||||
json_response.as_object_mut().unwrap().remove("content");
|
||||
|
||||
let frontmatter = serde_json::to_string(&json_response)
|
||||
.expect("Failed to serialize to string");
|
||||
|
||||
let mut context = Context::new();
|
||||
context.insert("frontmatter", &frontmatter);
|
||||
context.insert("body", content.as_str().unwrap());
|
||||
|
||||
let output = tera.render("src/toot.md", &context)
|
||||
.expect("Failed to render template.");
|
||||
|
||||
let new_path = format!("../{}.md", path.file_stem().unwrap().to_string_lossy());
|
||||
let mut file = File::create(new_path)
|
||||
.expect("Failed to create new markdown file");
|
||||
|
||||
file.write_all(output.as_bytes())
|
||||
.expect("Failed to write to markdown file");
|
||||
|
||||
}
|
||||
|
||||
}
|
168
.scripts/src/retrieve_toots.rs
Normal file
168
.scripts/src/retrieve_toots.rs
Normal file
|
@ -0,0 +1,168 @@
|
|||
use regex::Regex;
|
||||
use reqwest::header::HeaderValue;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
|
||||
static RETRIEVE_NUM_TOOTS: usize = 1000;
|
||||
static SERVER: &str = "fosstodon.org";
|
||||
static MUID: &str = "108219415927856966";
|
||||
|
||||
fn reformat_toot(x: &mut serde_json::Value) -> Result<(), String> {
|
||||
let toot = x
|
||||
.as_object_mut()
|
||||
.ok_or_else(|| "JSON not an object".to_string())?;
|
||||
|
||||
// URL -> Syndication
|
||||
let toot_url: serde_json::Value = toot
|
||||
.get_mut("url")
|
||||
.ok_or_else(|| "Missing URL".to_string())?
|
||||
.to_owned();
|
||||
toot.remove("uri");
|
||||
toot.remove("url");
|
||||
toot.insert("syndication".to_string(), toot_url.to_owned());
|
||||
|
||||
// Created At -> Date
|
||||
let toot_date: serde_json::Value = toot
|
||||
.get_mut("created_at")
|
||||
.ok_or_else(|| "Missing created_at".to_string())?
|
||||
.to_owned();
|
||||
// Note: Already checked whether created_at exists
|
||||
toot.remove("created_at");
|
||||
toot.insert("date".to_string(), toot_date.to_owned());
|
||||
|
||||
// Strip out highly dynamic account information
|
||||
let account: &mut tera::Map<String, serde_json::Value> = toot
|
||||
.get_mut("account")
|
||||
.ok_or_else(|| "Missing account field".to_string())?
|
||||
.as_object_mut()
|
||||
.ok_or_else(|| "Account field not an object".to_string())?;
|
||||
|
||||
// Doesn't matter if we fail to remove these
|
||||
account.remove("avatar_static");
|
||||
account.remove("header_static");
|
||||
account.remove("noindex");
|
||||
account.remove("roles");
|
||||
account.remove("locked");
|
||||
account.remove("bot");
|
||||
account.remove("discoverable");
|
||||
account.remove("group");
|
||||
account.remove("created_at");
|
||||
account.remove("note");
|
||||
account.remove("followers_count");
|
||||
account.remove("following_count");
|
||||
account.remove("statuses_count");
|
||||
account.remove("last_status_at");
|
||||
account.remove("emojis");
|
||||
account.remove("fields");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_link_header(header: &HeaderValue) -> Result<HashMap<String, String>, String> {
|
||||
let mut links = HashMap::new();
|
||||
let re = Regex::new(r#"<([^>]*)>;\s*rel="([^"]*)""#)
|
||||
.map_err(|_| "Regex compilation failed".to_string())?;
|
||||
|
||||
let header_str = header
|
||||
.to_str()
|
||||
.map_err(|v| v.to_string())?;
|
||||
|
||||
for cap in re.captures_iter(header_str) {
|
||||
if let (Some(url), Some(rel)) = (cap.get(1), cap.get(2)) {
|
||||
links.insert(rel.as_str().to_owned(), url.as_str().to_owned());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(links)
|
||||
}
|
||||
|
||||
// TODO: Make sure that the JSON blobs aren't
|
||||
// the same!
|
||||
fn write_json_to_file(item: &serde_json::Value, path: &str) -> Result<(), String> {
|
||||
let item_str = serde_json::to_string(&item)
|
||||
.map_err(|x| x.to_string())?;
|
||||
|
||||
let mut file = File::create(path)
|
||||
.map_err(|x| x.to_string())?;
|
||||
|
||||
// Write the content to the file
|
||||
file.write_all(item_str.as_bytes())
|
||||
.map_err(|x| x.to_string())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let mut url = format!(
|
||||
"https://{}/api/v1/accounts/{}/statuses?limit={}",
|
||||
SERVER, MUID, RETRIEVE_NUM_TOOTS
|
||||
);
|
||||
|
||||
let mut more_toots_exist = true;
|
||||
|
||||
while more_toots_exist {
|
||||
let response = client
|
||||
.get(url.clone())
|
||||
.send()
|
||||
.await
|
||||
.expect("Unable to reach server");
|
||||
|
||||
// Note: Cannot clone entire response
|
||||
let headers = response.headers().clone();
|
||||
|
||||
// Note: .text() consumes response
|
||||
let text_response = response
|
||||
.text()
|
||||
.await
|
||||
.expect("Unable to grab response text");
|
||||
|
||||
let mut json_response: serde_json::Value =
|
||||
serde_json::from_str(&text_response).expect("JSON parse error");
|
||||
|
||||
let json_array = json_response
|
||||
.as_array_mut()
|
||||
.expect("Expected JSON Array");
|
||||
|
||||
for item in json_array.iter_mut() {
|
||||
reformat_toot(item).unwrap();
|
||||
|
||||
let toot_id = item
|
||||
.get("id")
|
||||
.and_then(|x| x.as_str())
|
||||
.expect("Failed to get toot id");
|
||||
|
||||
let data_dir = "../.data";
|
||||
let data_dir_exists = std::fs::metadata(data_dir)
|
||||
.map(|metadata| metadata.is_dir())
|
||||
.unwrap_or(false);
|
||||
if !data_dir_exists {
|
||||
std::fs::create_dir(data_dir)
|
||||
.expect("Failed to create directory");
|
||||
}
|
||||
|
||||
let path = format!("{}/{}.json", data_dir, toot_id);
|
||||
write_json_to_file(&item, path.as_ref())
|
||||
.expect("Failed to write to file");
|
||||
}
|
||||
println!("Retrieved {} toots from server", json_array.len());
|
||||
|
||||
let next_url_result: Result<String, String> = headers
|
||||
.get("link")
|
||||
.ok_or_else(|| "No header link".to_string())
|
||||
.and_then(parse_link_header)
|
||||
.and_then(|v| {
|
||||
v.get("next")
|
||||
.cloned()
|
||||
.ok_or_else(|| "No next tag".to_string())
|
||||
});
|
||||
|
||||
match next_url_result {
|
||||
Ok(next_url) => url = next_url,
|
||||
Err(_) => more_toots_exist = false,
|
||||
}
|
||||
}
|
||||
}
|
10
.scripts/src/toot.md
Normal file
10
.scripts/src/toot.md
Normal file
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
{{ frontmatter }}
|
||||
---
|
||||
{% raw -%}
|
||||
{{< unsafe >}}
|
||||
{% endraw -%}
|
||||
{{ body }}
|
||||
{%- raw %}
|
||||
{{< /unsafe >}}
|
||||
{%- endraw -%}
|
Loading…
Reference in a new issue