Source code for pangea_api.file_system_cache

import os
import logging
import json
from time import time
from glob import glob
from hashlib import sha256
from random import randint

logger = logging.getLogger(__name__)  # Same name as calling module
logger.addHandler(logging.NullHandler())  # No output unless configured by calling program
CACHED_BLOB_TIME = 3 * 60 * 60  # 3 hours in seconds
CACHE_DIR = os.environ.get('PANGEA_API_CACHE_DIR', '.')


def hash_obj(obj):
    val = obj
    if not isinstance(obj, str):
        val = obj.pre_hash()
    result = sha256(val.encode())
    result = result.hexdigest()
    return result


def time_since_file_cached(blob_filepath):
    timestamp = int(blob_filepath.split('__')[-1].split('.json')[0])
    elapsed_time = int(time()) - timestamp
    return elapsed_time


[docs]class FileSystemCache: def __init__(self, timeout=CACHED_BLOB_TIME): self.timeout = timeout
[docs] def clear_blob(self, obj): blob_filepath, path_exists = self.get_cached_blob_filepath(obj) if path_exists: logger.debug(f'Clearing cached blob. {blob_filepath}') try: os.remove(blob_filepath) except FileNotFoundError: logger.debug(f'Blob was deleted before it could be removed. {obj}') pass
[docs] def get_cached_blob_filepath(self, obj): path_base = f'{CACHE_DIR}/.pangea_api_cache/v1/pangea_api_cache__{hash_obj(obj)}' os.makedirs(os.path.dirname(path_base), exist_ok=True) paths = sorted(glob(f'{path_base}__*.json')) if paths: return paths[-1], True timestamp = int(time()) blob_filepath = f'{path_base}__{timestamp}.json' return blob_filepath, False
[docs] def get_cached_blob(self, obj): logger.debug(f'Getting cached blob. {obj}') blob_filepath, path_exists = self.get_cached_blob_filepath(obj) if not path_exists: # cache not found logger.debug(f'No cached blob found. {obj}') return None elapsed_time = time_since_file_cached(blob_filepath) if elapsed_time > (self.timeout + randint(0, self.timeout // 10)): # cache is stale logger.debug(f'Found stale cached blob. {obj}') os.remove(blob_filepath) return None logger.debug(f'Found good cached blob. {obj}') try: blob = json.loads(open(blob_filepath).read()) return blob except FileNotFoundError: logger.debug(f'Blob was deleted before it could be returned. {obj}') return None
[docs] def cache_blob(self, obj, blob): logger.debug(f'Caching blob. {obj} {blob}') blob_filepath, path_exists = self.get_cached_blob_filepath(obj) if path_exists: # save a new cache if an old one exists elapsed_time = time_since_file_cached(blob_filepath) if elapsed_time < ((self.timeout / 2) + randint(0, self.timeout // 10)): # Only reload a file if it is old enough return self.clear_blob(obj) return self.cache_blob(obj, blob) with open(blob_filepath, 'w') as f: f.write(json.dumps(blob))