mirror of
https://github.com/Brandon-Rozek/website-observations.git
synced 2024-12-22 07:23:26 +00:00
GH Action to sync iNaturalist Observations
This commit is contained in:
parent
d382a9411c
commit
3db68f43ad
2 changed files with 293 additions and 0 deletions
35
.github/workflows/refresh.yml
vendored
Normal file
35
.github/workflows/refresh.yml
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
name: Sync Observations from iNaturalist
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "12 23 * * *"
|
||||
|
||||
jobs:
|
||||
build_and_publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Setup git config
|
||||
run: |
|
||||
git config user.name "GitHub Actions Bot"
|
||||
git config user.email "<>"
|
||||
|
||||
- name: Grab latest observations
|
||||
run: |
|
||||
./.scripts/refreshobservations.py
|
||||
|
||||
- name: Check if there's any changes
|
||||
id: verify_diff
|
||||
run: |
|
||||
git diff --quiet . || echo "changed=true" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Commit and push
|
||||
if: steps.verify_diff.outputs.changed == 'true'
|
||||
run: |
|
||||
git add *.md
|
||||
git commit -m "New/Modified Observations"
|
||||
git push origin gh_actions
|
258
.scripts/refreshobservations.py
Executable file
258
.scripts/refreshobservations.py
Executable file
|
@ -0,0 +1,258 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Script to create Hugo markdown
|
||||
files from iNaturalist Observations
|
||||
"""
|
||||
|
||||
from urllib import request
|
||||
from glob import glob
|
||||
from http.client import HTTPResponse
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
|
||||
CONTENT_LOCATION = "./"
|
||||
USER_ID = "brandonrozek"
|
||||
MIN_OBS_ID = -1
|
||||
|
||||
|
||||
def retrieve_data_from_server():
|
||||
server_data = []
|
||||
server_ids = retrieve_obs_ids_from_server()
|
||||
for id_num in server_ids:
|
||||
# Grab observation from iNaturalist
|
||||
url = f"https://api.inaturalist.org/v1/observations/{id_num}"
|
||||
response: Optional[HTTPResponse] = None
|
||||
|
||||
try:
|
||||
response = request.urlopen(url)
|
||||
except Exception:
|
||||
print(f"Unable to grab observation {id_num} from iNaturalist.")
|
||||
|
||||
time.sleep(1) # Rate Limit: 1 request/sec
|
||||
|
||||
if response is None:
|
||||
continue
|
||||
|
||||
# Parse server response
|
||||
server_data_part = None
|
||||
try:
|
||||
server_data_part = json.loads(response.read())['results'][0]
|
||||
# Note: there is only one observation as a result
|
||||
except Exception:
|
||||
print(f"Malformed JSON response for observation {id_num}.")
|
||||
continue
|
||||
|
||||
server_data_part = reformat_obs(id_num, server_data_part)
|
||||
server_data.append(server_data_part)
|
||||
|
||||
|
||||
print(f"Successfully obtained {len(server_data)} observations from the server.")
|
||||
return server_data
|
||||
|
||||
|
||||
def retrieve_obs_ids_from_server():
|
||||
"""
|
||||
Grabs observation ids from iNaturalist server
|
||||
"""
|
||||
global MIN_OBS_ID
|
||||
server_data = []
|
||||
|
||||
finished_retrieving = False
|
||||
while not finished_retrieving:
|
||||
# Grab observations from iNaturalist
|
||||
id_below = "&id_below=" + str(MIN_OBS_ID) \
|
||||
if MIN_OBS_ID > 0 else ""
|
||||
url = "https://api.inaturalist.org/v1/observations?order=desc&order_by=created_at&only_id=true&user_id=" + USER_ID + id_below
|
||||
response: Optional[HTTPResponse] = None
|
||||
|
||||
try:
|
||||
response = request.urlopen(url)
|
||||
except Exception:
|
||||
print("Unable to grab observations from iNaturalist.")
|
||||
|
||||
if response is None:
|
||||
sys.exit(-1)
|
||||
|
||||
time.sleep(1) # Rate Limit: 1 request/sec
|
||||
|
||||
# Parse server response
|
||||
server_data_part: Optional[list] = None
|
||||
try:
|
||||
server_data_part = json.loads(response.read())['results']
|
||||
except Exception:
|
||||
print("Malformed JSON response from server.")
|
||||
|
||||
if server_data is None:
|
||||
sys.exit(-1)
|
||||
|
||||
if not isinstance(server_data_part, list):
|
||||
print("Unexpected JSON response, should be of form list.")
|
||||
sys.exit(-1)
|
||||
|
||||
# No more to retrieve
|
||||
if len(server_data_part) == 0:
|
||||
finished_retrieving = True
|
||||
break
|
||||
|
||||
server_data_part = [d['id'] for d in server_data_part]
|
||||
|
||||
# print(f"Retrieved {len(server_data_part)} observations from server")
|
||||
server_data.extend(server_data_part)
|
||||
MIN_OBS_ID = server_data_part[-1]
|
||||
|
||||
print(f"Parsed a total of {len(server_data)} ids from server")
|
||||
return server_data
|
||||
|
||||
|
||||
def reformat_obs(obsid, obs_json):
|
||||
"""
|
||||
Takes a obs_json and
|
||||
slightly modifies it to match
|
||||
some of the fields Hugo expects.
|
||||
"""
|
||||
obs_data = dict(
|
||||
id=str(obsid),
|
||||
metadata={},
|
||||
content=""
|
||||
)
|
||||
|
||||
# Turn URL -> Syndication
|
||||
obs_data['metadata']['syndication'] = obs_json['uri']
|
||||
|
||||
# Turn Created At -> Date
|
||||
obs_data['metadata']['date'] = obs_json['time_observed_at']
|
||||
|
||||
# Grab some taxonomy information about the organism
|
||||
obs_data['metadata']['taxon'] = dict(
|
||||
name=obs_json['taxon']['name'],
|
||||
common_name=obs_json['taxon']['preferred_common_name']
|
||||
)
|
||||
|
||||
# Grab only a few fields
|
||||
desired_fields = [
|
||||
'quality_grade', 'identifications_most_agree',
|
||||
'species_guess', 'identifications_most_disagree',
|
||||
'captive', 'project_ids',
|
||||
'community_taxon_id', 'geojson',
|
||||
'owners_identification_from_vision',
|
||||
'identifications_count', 'obscured',
|
||||
'num_identification_agreements',
|
||||
'num_identification_disagreements',
|
||||
'place_guess', "photos"
|
||||
]
|
||||
for key in desired_fields:
|
||||
obs_data['metadata'][key] = obs_json[key]
|
||||
|
||||
return obs_data
|
||||
|
||||
############################################################################
|
||||
|
||||
def findall(p, s):
|
||||
"""
|
||||
Yields all the positions of
|
||||
the pattern p in the string s.
|
||||
Source: https://stackoverflow.com/a/34445090
|
||||
"""
|
||||
i = s.find(p)
|
||||
while i != -1:
|
||||
yield i
|
||||
i = s.find(p, i+1)
|
||||
|
||||
def hugo_markdown_to_json(markdown_contents) -> Optional[Dict[Any, Any]]:
|
||||
"""
|
||||
Take the contents from a Hugo markdown
|
||||
file and read the JSON frontmatter if it
|
||||
exists.
|
||||
"""
|
||||
front_matter_indices = list(findall('---', markdown_contents))
|
||||
if len(front_matter_indices) < 2:
|
||||
return None
|
||||
front_matter = markdown_contents[(front_matter_indices[0] + 3):front_matter_indices[1]]
|
||||
json_contents = None
|
||||
try:
|
||||
json_contents = json.loads(front_matter)
|
||||
except Exception:
|
||||
pass
|
||||
if not isinstance(json_contents, dict):
|
||||
json_contents = None
|
||||
html_contents = markdown_contents[front_matter_indices[1] + 19:-17]
|
||||
return json_contents, html_contents
|
||||
|
||||
def create_markdown_str(frontmatter, content):
|
||||
"""
|
||||
Takes a JSON frontmatter
|
||||
and creates a string representing
|
||||
the contents of a Hugo markdown
|
||||
file.
|
||||
"""
|
||||
return "---\n" + \
|
||||
f"{json.dumps(frontmatter)}\n" +\
|
||||
"---\n" +\
|
||||
"{{< unsafe >}}\n" +\
|
||||
f"{content}\n" +\
|
||||
"{{< /unsafe >}}\n"
|
||||
|
||||
def file_from_id(idnum):
|
||||
"""Returns filename from id"""
|
||||
return f"{CONTENT_LOCATION}/{idnum}.md"
|
||||
|
||||
def read_hugo_markdown(idnum) -> Optional[Dict[Any, Any]]:
|
||||
"""
|
||||
Given an id, return the markdown file
|
||||
frontmatter and contents stored in Hugo
|
||||
if it exists.
|
||||
"""
|
||||
try:
|
||||
with open(file_from_id(idnum), "r", encoding="UTF-8") as hugo_file:
|
||||
frontmatter, contents = hugo_markdown_to_json(hugo_file.read())
|
||||
return frontmatter, contents
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def write_markdown(id_num, frontmatter, contents):
|
||||
"""
|
||||
Takes a frontmatter json
|
||||
and writes it to a hugo
|
||||
markdown content file.
|
||||
"""
|
||||
try:
|
||||
with open(file_from_id(id_num), "w", encoding="UTF-8") as hugo_file:
|
||||
hugo_file.write(create_markdown_str(frontmatter, contents))
|
||||
except Exception as e:
|
||||
print("Failed to write", id_num)
|
||||
|
||||
|
||||
############################################################################
|
||||
|
||||
# Read in saved data
|
||||
saved_filenames = glob(CONTENT_LOCATION + "/*.md")
|
||||
saved_ids = { Path(fname).stem for fname in saved_filenames }
|
||||
|
||||
server_data = retrieve_data_from_server()
|
||||
|
||||
# Data is structured like [{id: '', metadata: '', contents: ''}]
|
||||
# Where metadata is a JSON and contents is HTML
|
||||
|
||||
for data in server_data:
|
||||
id_num = data['id']
|
||||
|
||||
# If the observation already exists
|
||||
if id_num in saved_ids:
|
||||
saved_fm, saved_content = read_hugo_markdown(id_num)
|
||||
if saved_fm is None:
|
||||
print("Unable to read saved data id", id_num)
|
||||
|
||||
# Only update if observation has changed
|
||||
elif saved_fm != data['metadata']:
|
||||
print("Updating id", id_num)
|
||||
write_markdown(id_num, data['metadata'], data['content'])
|
||||
|
||||
# New observation found
|
||||
else:
|
||||
print("Creating id", id_num)
|
||||
write_markdown(id_num, data['metadata'], data['content'])
|
||||
|
||||
print("Completed")
|
Loading…
Reference in a new issue