From 8ab0da958620f5a54123aaadc8a2009500ac044e Mon Sep 17 00:00:00 2001 From: Brandon Rozek Date: Sun, 27 Apr 2025 14:02:20 -0400 Subject: [PATCH] New Post --- .../filesystem-as-persistent-kvs-python.md | 258 ++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 content/blog/filesystem-as-persistent-kvs-python.md diff --git a/content/blog/filesystem-as-persistent-kvs-python.md b/content/blog/filesystem-as-persistent-kvs-python.md new file mode 100644 index 0000000..8bda61f --- /dev/null +++ b/content/blog/filesystem-as-persistent-kvs-python.md @@ -0,0 +1,258 @@ +--- +title: "Filesystem as a persistent key-value store in Python" +date: 2025-04-27T13:39:49-04:00 +draft: false +tags: [] +math: false +medium_enabled: false +--- + +The first level within the [five levels of configuration languages](https://beza1e1.tuxen.de/config_levels.html) is a string in a file. The example that Andreas gives in his blog post is enabling/disabling energy aware scheduling in the kernel. + +```bash +# Read the current value +cat /proc/sys/kernel/sched_energy_aware +``` + +This approach does not require using any custom serializers or deserializers. Instead, the user or program only needs to navigate and read/write the filesystem.[^1] + +[^1]: You likely want to make sure that your program is sufficiently isolated. Either by running it within it's own container or setting up custom permissions on the files. + +However, having to write filesystem code is tedious. What we want is a dictionary that we can write and read from that handles all of the filesystem procedures for us. + +In this post, we'll go over how to achieve this in Python. By the end we'll be able to do the following: + +```bash +# Create key-value store +mkdir /tmp/kvs +# Add value "Brandon" to key "name" +echo "Brandon" | tee /tmp/kvs/name +``` + +```python +# Set /tmp/kvs as the root of our key-value store +kvs = FSDict("/tmp/kvs") +# Read the key 'name' +print("Name:", kvs['name']) +# Name: Brandon +``` + +## Creating the library + +To emulate reading and writing from a dictionary, we will create a custom class and implement the `__setitem__` and `__getitem__` methods. + +```python +class FSDict: + def __init__(self, base_directory: Optional[str] = None): + self.base_directory: Optional[str] = None + if base_directory is not None: + self.set_base_directory(base_directory) + + def set_base_directory(self, base_directory: str): + assert isinstance(base_directory, str) + if os.path.isfile(base_directory): + raise Exception("base_directory is an existing file not a folder.") + + self.base_directory = base_directory + if not os.path.exists(base_directory): + os.makedirs(base_directory) + + # ... +``` + +From here, we can declare a folder as the root of our key-value store and if it does not exist then the code will create it for us. + +```python +kvs = FSDict("/tmp/kvs") +``` + +When we write `kvs['name']`, we want it to read from the file or folder `/tmp/kvs/name`. Our next method then will create the path. + +```python +def get_path(self, key: str) -> str: + assert self.base_directory is not None + if not isinstance(key, str): + raise ValueError("key must be of type str") + return os.path.join(self.base_directory, key) +``` + +Just like in our initial kernel example, we want a way to have nested folders. Therefore, in this API when we read and write from the KVS, if the value is a `str` then we're writing it's contents to a file, otherwise if it is a `FSDict` then we'll create the folder if it does not already exist. + +To write to our key-value store: + +```python +def __setitem__(self, key: str, value: Union[str, "FSDict"]): + assert isinstance(value, (str, FSDict)), "Value must either be of type str or FSDict" + path = self.get_path(key) + + # If we're writing a FSDict, then create + # a folder if it doesn't already exist. + if isinstance(value, FSDict): + value.set_base_directory(path) + return None + + # Otherwise, assume we're writing a + # string to a file + with open(path, "w") as f: + f.write(value) +``` + +To read from our key-value store: + +```python +def __getitem__(self, key: str) -> Union[str, "FSDict"]: + path = self.get_path(key) + + if not os.path.exists(path): + raise KeyError(key) + + # Return a FSDict if it's a folder + if os.path.isdir(path): + return FSDict(path) + + # Otherwise, assume we're reading a file + with open(path, "r") as f: + return f.read() +``` + +With that, we have the minimum viable product for reading and writing to our dictionary-like class! + +```python +kvs = FSDict("/tmp/kvs") +kvs['name'] = 'Brandon' +print(kvs['name']) +# Prints 'Brandon' +``` + +The base dictionary class has other methods that make our lives easier. Let's implement a few more. + +```python +def __delitem__(self, key: str): + path = self.get_path(key) + + if not os.path.exists(path): + raise KeyError(key) + + # If the key is a folder, recursively + # remove all contents of that folder + if os.path.isdir(path): + shutil.rmtree(path) + return None + + # Otherwise, remove the file + os.remove(path) + +def __contains__(self, key: str) -> bool: + path = self.get_path(key) + return os.path.exists(path) + +def keys(self) -> List[str]: + assert self.base_directory is not None + return os.listdir(self.base_directory) + +def __repr__(self) -> str: + return f"FSDict(base_directory={self.base_directory})" +``` + +Extending our last example, this gives us: + +```python +# Show all keys +print(kvs.keys()) +# Result: ['name'] + +# Check if a key is in the key-value store +print('name' in kvs) +# Result: True + +# Delete a key-value by it's key +del kvs['name'] + +# Check if it still exists in our key-value store +print('name' in kvs) +# Result: False +``` + +## Conclusion + +The class `FSDict` provides a simple way in Python to use the filesystem as a persistent key-value store. Unlike storing everything in a single file or database, we do not need special serializers or deserializers to access to data. Outside the program we can use `echo` and `cat` in our terminals to interact with the key-value store. + +The full code is as follows: + +```python +from typing import List, Optional, Union +import os +import shutil + +class FSDict: + def __init__(self, base_directory: Optional[str] = None): + self.base_directory: Optional[str] = None + if base_directory is not None: + self.set_base_directory(base_directory) + + def set_base_directory(self, base_directory: str): + assert isinstance(base_directory, str) + if os.path.isfile(base_directory): + raise Exception("base_directory is an existing file not a folder.") + + self.base_directory = base_directory + if not os.path.exists(base_directory): + os.makedirs(base_directory) + + def get_path(self, key: str) -> str: + assert self.base_directory is not None + if not isinstance(key, str): + raise ValueError("Key must be of type str") + return os.path.join(self.base_directory, key) + + def __setitem__(self, key: str, value: Union[str, "FSDict"]): + assert isinstance(value, (str, FSDict)), "Value must either be of type str of FSDict" + path = self.get_path(key) + + if isinstance(value, FSDict): + value.set_base_directory(path) + return None + + # Assuming value is a str + with open(path, "w") as f: + f.write(value) + + def __getitem__(self, key: str) -> Union[str, "FSDict"]: + path = self.get_path(key) + + if not os.path.exists(path): + raise KeyError(key) + + if os.path.isdir(path): + return FSDict(path) + + # Assume it's a file + with open(path, "r") as f: + return f.read() + + def __delitem__(self, key: str): + path = self.get_path(key) + + if not os.path.exists(path): + raise KeyError(key) + + if os.path.isdir(path): + shutil.rmtree(path) + return None + + # Assume it's a file + os.remove(path) + + def __contains__(self, key: str) -> bool: + path = self.get_path(key) + return os.path.exists(path) + + def keys(self) -> List[str]: + assert self.base_directory is not None + return os.listdir(self.base_directory) + + def __repr__(self) -> str: + return f"FSDict(base_directory={self.base_directory})" + +``` +