From 6a0d71c121b18b7046ccdf70b314b6fb78fc1022 Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Sat, 14 May 2022 22:58:49 -0400 Subject: [PATCH] New version of toot grabber which creates a hugo markdown file --- refreshtoots_v2.py | 186 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100755 refreshtoots_v2.py diff --git a/refreshtoots_v2.py b/refreshtoots_v2.py new file mode 100755 index 0000000..b9e5cbf --- /dev/null +++ b/refreshtoots_v2.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +""" +Script to create Hugo markdown +files from Mastodon Toots +""" + +from urllib import request +from glob import glob +from http.client import HTTPResponse +from pathlib import Path +from typing import Any, Dict, Optional +import json +import sys + +TOOT_CONTENT_LOCATION = "content/toots" +SERVER="https://fosstodon.org" +# Quick way to find user id: https://prouser123.me/mastodon-userid-lookup/ +MUID=108219415927856966 +# Server default (when < 0) is 20 +# max allowed is 40 +RETRIEVE_NUM_TOOTS=40 + + +def retrieve_toots_from_server(): + """ + Grabs toots from Mastodon server + """ + # Grab toots from Mastodon + limit_param = "?limit=" + str(RETRIEVE_NUM_TOOTS) \ + if RETRIEVE_NUM_TOOTS > 0 else "" + url = SERVER + "/api/v1/accounts/" + str(MUID) + "/statuses" + limit_param + response: Optional[HTTPResponse] = None + + try: + response = request.urlopen(url) + except Exception: + print("Unable to grab toots from Mastodon.") + + if response is None: + sys.exit(-1) + + # Parse server response + server_data: Optional[list] = None + try: + server_data = json.loads(response.read()) + except Exception: + print("Malformed JSON response from server.") + + if server_data is None: + sys.exit(-1) + + if not isinstance(server_data, list): + print("Unexpected JSON response, should be of form list.") + sys.exit(-1) + + print(f"Successfully grabbed {len(server_data)} toots from server") + return server_data + + +def findall(p, s): + """ + Yields all the positions of + the pattern p in the string s. + Source: https://stackoverflow.com/a/34445090 + """ + i = s.find(p) + while i != -1: + yield i + i = s.find(p, i+1) + + +def read_json_frontmatter(markdown_contents) -> Optional[Dict[Any, Any]]: + """ + Take the contents from a Hugo markdown + file and read the JSON frontmatter if it + exists. + """ + front_matter_indices = list(findall('---', markdown_contents)) + if len(front_matter_indices) < 2: + return None + front_matter = markdown_contents[(front_matter_indices[0] + 3):front_matter_indices[1]] + front_matter_json = None + try: + front_matter_json = json.loads(front_matter) + except Exception: + pass + if not isinstance(front_matter_json, dict): + front_matter_json = None + front_matter_json['content'] = markdown_contents[front_matter_indices[1] + 19:-17] + return front_matter_json + +def reformat_toot(toot_json): + """ + Takes a toot_json and + slightly modifies it to match + some of the fields Hugo expects. + """ + # Turn URL -> Syndication + toot_url = toot_json['url'] + del toot_json['uri'] + del toot_json['url'] + toot_json['syndication'] = toot_url + # Turn Created At -> Date + toot_date = toot_json['created_at'] + del toot_json['created_at'] + toot_json['date'] = toot_date + +def create_toot(toot_json): + """ + Takes a JSON toot from Mastodon + and creates a string representing + the contents of a Hugo markdown + file. + """ + toot_content = toot_json['content'] + del toot_json['content'] + return "---\n" + \ + f"{json.dumps(toot_json)}\n" +\ + "---\n" +\ + "{{< unsafe >}}\n" +\ + f"{toot_content}\n" +\ + "{{< /unsafe >}}\n" + +def toot_file_from_id(tootid): + """Returns toot filename from id""" + return f"{TOOT_CONTENT_LOCATION}/{tootid}.md" + +def read_toot(tootid) -> Optional[Dict[Any, Any]]: + """ + Given a toot id, return + the markdown file contents + of the toot stored in Hugo + if it exists. + """ + try: + with open(toot_file_from_id(tootid), "r", encoding="UTF-8") as toot_file: + toot_data = read_json_frontmatter(toot_file.read()) + return toot_data + except Exception: + return None + +def write_toot(toot): + """ + Takes a toot json + and writes it to a hugo + markdown content file. + """ + toot_id = toot['id'] + try: + with open(toot_file_from_id(toot_id), "w", encoding="UTF-8") as toot_file: + toot_file.write(create_toot(toot)) + except Exception as e: + print("Failed to write toot", toot_id) + +# Read in saved toot data +toot_filenames = glob(TOOT_CONTENT_LOCATION + "/*.md") +toot_ids = { Path(fname).stem for fname in toot_filenames } + +server_toots = retrieve_toots_from_server() + +for stoot in server_toots: + # Skip boosts for now + if stoot['content'] == '': + continue + + reformat_toot(stoot) + stoot_id = stoot['id'] + + + # If the toot already exists + if stoot_id in toot_ids: + saved_tootdata = read_toot(stoot_id) + if saved_tootdata is None: + print("Unable to read saved toot id", stoot_id) + + # Only update if toot has changed + elif saved_tootdata != stoot: + print("Updating toot id", stoot_id) + write_toot(stoot) + + # New toot found + else: + print("Creating toot id", stoot_id) + write_toot(stoot) + +print("Completed")