New version of toot grabber which creates a hugo markdown file

This commit is contained in:
Brandon Rozek 2022-05-14 22:58:49 -04:00
parent 5f85a31a71
commit 6a0d71c121

186
refreshtoots_v2.py Executable file
View file

@ -0,0 +1,186 @@
#!/usr/bin/env python
"""
Script to create Hugo markdown
files from Mastodon Toots
"""
from urllib import request
from glob import glob
from http.client import HTTPResponse
from pathlib import Path
from typing import Any, Dict, Optional
import json
import sys
TOOT_CONTENT_LOCATION = "content/toots"
SERVER="https://fosstodon.org"
# Quick way to find user id: https://prouser123.me/mastodon-userid-lookup/
MUID=108219415927856966
# Server default (when < 0) is 20
# max allowed is 40
RETRIEVE_NUM_TOOTS=40
def retrieve_toots_from_server():
"""
Grabs toots from Mastodon server
"""
# Grab toots from Mastodon
limit_param = "?limit=" + str(RETRIEVE_NUM_TOOTS) \
if RETRIEVE_NUM_TOOTS > 0 else ""
url = SERVER + "/api/v1/accounts/" + str(MUID) + "/statuses" + limit_param
response: Optional[HTTPResponse] = None
try:
response = request.urlopen(url)
except Exception:
print("Unable to grab toots from Mastodon.")
if response is None:
sys.exit(-1)
# Parse server response
server_data: Optional[list] = None
try:
server_data = json.loads(response.read())
except Exception:
print("Malformed JSON response from server.")
if server_data is None:
sys.exit(-1)
if not isinstance(server_data, list):
print("Unexpected JSON response, should be of form list.")
sys.exit(-1)
print(f"Successfully grabbed {len(server_data)} toots from server")
return server_data
def findall(p, s):
"""
Yields all the positions of
the pattern p in the string s.
Source: https://stackoverflow.com/a/34445090
"""
i = s.find(p)
while i != -1:
yield i
i = s.find(p, i+1)
def read_json_frontmatter(markdown_contents) -> Optional[Dict[Any, Any]]:
"""
Take the contents from a Hugo markdown
file and read the JSON frontmatter if it
exists.
"""
front_matter_indices = list(findall('---', markdown_contents))
if len(front_matter_indices) < 2:
return None
front_matter = markdown_contents[(front_matter_indices[0] + 3):front_matter_indices[1]]
front_matter_json = None
try:
front_matter_json = json.loads(front_matter)
except Exception:
pass
if not isinstance(front_matter_json, dict):
front_matter_json = None
front_matter_json['content'] = markdown_contents[front_matter_indices[1] + 19:-17]
return front_matter_json
def reformat_toot(toot_json):
"""
Takes a toot_json and
slightly modifies it to match
some of the fields Hugo expects.
"""
# Turn URL -> Syndication
toot_url = toot_json['url']
del toot_json['uri']
del toot_json['url']
toot_json['syndication'] = toot_url
# Turn Created At -> Date
toot_date = toot_json['created_at']
del toot_json['created_at']
toot_json['date'] = toot_date
def create_toot(toot_json):
"""
Takes a JSON toot from Mastodon
and creates a string representing
the contents of a Hugo markdown
file.
"""
toot_content = toot_json['content']
del toot_json['content']
return "---\n" + \
f"{json.dumps(toot_json)}\n" +\
"---\n" +\
"{{< unsafe >}}\n" +\
f"{toot_content}\n" +\
"{{< /unsafe >}}\n"
def toot_file_from_id(tootid):
"""Returns toot filename from id"""
return f"{TOOT_CONTENT_LOCATION}/{tootid}.md"
def read_toot(tootid) -> Optional[Dict[Any, Any]]:
"""
Given a toot id, return
the markdown file contents
of the toot stored in Hugo
if it exists.
"""
try:
with open(toot_file_from_id(tootid), "r", encoding="UTF-8") as toot_file:
toot_data = read_json_frontmatter(toot_file.read())
return toot_data
except Exception:
return None
def write_toot(toot):
"""
Takes a toot json
and writes it to a hugo
markdown content file.
"""
toot_id = toot['id']
try:
with open(toot_file_from_id(toot_id), "w", encoding="UTF-8") as toot_file:
toot_file.write(create_toot(toot))
except Exception as e:
print("Failed to write toot", toot_id)
# Read in saved toot data
toot_filenames = glob(TOOT_CONTENT_LOCATION + "/*.md")
toot_ids = { Path(fname).stem for fname in toot_filenames }
server_toots = retrieve_toots_from_server()
for stoot in server_toots:
# Skip boosts for now
if stoot['content'] == '':
continue
reformat_toot(stoot)
stoot_id = stoot['id']
# If the toot already exists
if stoot_id in toot_ids:
saved_tootdata = read_toot(stoot_id)
if saved_tootdata is None:
print("Unable to read saved toot id", stoot_id)
# Only update if toot has changed
elif saved_tootdata != stoot:
print("Updating toot id", stoot_id)
write_toot(stoot)
# New toot found
else:
print("Creating toot id", stoot_id)
write_toot(stoot)
print("Completed")