Added functionality for --after, more code cleanup

This commit is contained in:
Amber McCloughan 2026-01-01 02:18:39 -05:00
parent 5e1ff1c245
commit 590277d7ee
2 changed files with 61 additions and 18 deletions

View File

@ -1,7 +1,7 @@
# tumblr-stats # tumblr-stats
## Usage ## Usage
``` ```
usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] OPERATION [OPERATION ...] usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] [-i INPUT] [--after AFTER] OPERATION [OPERATION ...]
Use pytumblr to calculate stats after setting these enviroment variables: $TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET Use pytumblr to calculate stats after setting these enviroment variables: $TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET
@ -13,6 +13,8 @@ options:
-b, --blog BLOG blog name for which to calculate stats -b, --blog BLOG blog name for which to calculate stats
-t, --tags TAGS [TAGS ...] -t, --tags TAGS [TAGS ...]
tag(s) to focus on in status (if applicable) tag(s) to focus on in status (if applicable)
-i, --input INPUT Don't make API calls, just use a JSON input file
--after AFTER only gather posts after YYYY-MM-DD
— Be gay and do crime — Be gay and do crime
``` ```

View File

@ -2,10 +2,12 @@
import argparse import argparse
import csv import csv
from dataclasses import asdict from dataclasses import asdict
from datetime import datetime
import json import json
import os import os
from pathlib import Path
import sys import sys
from typing import Any, Dict, List, Tuple from typing import Any, Callable, Dict, List, Tuple
import pytumblr import pytumblr
@ -16,7 +18,7 @@ from stats_model import StatsModel
def get_args() -> Dict[str, Any]: def get_args() -> Dict[str, Any]:
"""Pull arguments from command line, turn them into a dictionary of <arg, value>""" """Pull arguments from command line, turn them into a dictionary of <arg, value>"""
parser = argparse.ArgumentParser( parser: argparse.ArgumentParser = argparse.ArgumentParser(
prog='tumblr_stats.py', prog='tumblr_stats.py',
description='Use pytumblr to calculate stats after setting these enviroment variables: ' description='Use pytumblr to calculate stats after setting these enviroment variables: '
+ '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET', + '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET',
@ -28,12 +30,13 @@ def get_args() -> Dict[str, Any]:
help='blog name for which to calculate stats') help='blog name for which to calculate stats')
parser.add_argument('-t', '--tags', type=str, nargs='+', parser.add_argument('-t', '--tags', type=str, nargs='+',
help='tag(s) to focus on in status (if applicable)') help='tag(s) to focus on in status (if applicable)')
# TODO: Make 'before' work, but it actually depends on https://github.com/tumblr/pytumblr/issues/174. parser.add_argument('-i', '--input', type=str,
help='Don\'t make API calls, just use a JSON input file')
# TODO: Make 'before' work, but it depends on https://github.com/tumblr/pytumblr/issues/174.
# parser.add_argument('--before', type=lambda s: datetime.strptime(s, '%Y-%m-%d'), # parser.add_argument('--before', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
# help='only gather posts before YYYY-MM-DD') # help='only gather posts before YYYY-MM-DD')
# TODO: Make 'after' work if they add it to pytumblr. parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
# parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'), help='only gather posts after YYYY-MM-DD')
# help='only gather posts after YYYY-MM-DD')
return vars(parser.parse_args()) return vars(parser.parse_args())
@ -61,6 +64,18 @@ def init_client() -> pytumblr.TumblrRestClient:
) )
def filter_posts_for_after(post_list: List[Dict[str, Any]],
after: datetime) -> List[Dict[str, Any]]:
# Quick short circuit check.
if not post_list or not after:
return []
# Handle 'after'.
after_check: Callable[[Dict[str, Any]], bool] = lambda x: datetime.strptime(
x['date'], '%Y-%m-%d %H:%M:%S %Z') > after
return [post for post in post_list if after_check(post)]
def build_post_maps(client: pytumblr.TumblrRestClient, def build_post_maps(client: pytumblr.TumblrRestClient,
args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]: args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
og_post_map: Dict[str, Any] = {} og_post_map: Dict[str, Any] = {}
@ -75,10 +90,6 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
# if args['before']: # if args['before']:
# before: datetime = args['before'] # before: datetime = args['before']
# params.update({'before': int(before.timestamp())}) # params.update({'before': int(before.timestamp())})
# TODO: Make 'after' work.
# if args['after']:
# after: datetime = args['after']
# params.update({'after': str(int(after.timestamp()))})
total: int = 0 total: int = 0
offset: int = 0 offset: int = 0
@ -94,7 +105,8 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
**params) **params)
# Stop the presses if we found no posts. # Stop the presses if we found no posts.
if not data['posts']: curr_posts: List[Dict[str, Any]] = data['posts']
if not curr_posts or len(curr_posts) < 1:
print('Stopping, as no posts were found.') print('Stopping, as no posts were found.')
break break
@ -105,8 +117,15 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
print(f"I'm working with {total_posts} total posts...") print(f"I'm working with {total_posts} total posts...")
total = total_posts total = total_posts
# Behavior for 'after'.
if args['after']:
after: datetime = args['after']
curr_posts = filter_posts_for_after(curr_posts, after)
if not curr_posts:
print(f"All posts after {after.year}-{after.month} processed.")
return (og_post_map, un_og_post_map)
# This block populates the local post_maps from the raw response data. # This block populates the local post_maps from the raw response data.
curr_posts: List[Dict[str, Any]] = data['posts']
local_og_post_map: Dict[str, Any] = { local_og_post_map: Dict[str, Any] = {
item['id_string']: item for item in curr_posts if 'parent_post_url' not in item item['id_string']: item for item in curr_posts if 'parent_post_url' not in item
} }
@ -120,7 +139,9 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
# The increment and status printing. Should always end the loop! # The increment and status printing. Should always end the loop!
offset += limit offset += limit
print(f"Processed batch {offset // limit} of {(total // 20) + 1}...") if not args['after']:
print(
f"Processed batch {offset // limit} of {(total // 20) + 1}...")
# End LOOP # End LOOP
# Return (og_posts, not_og_posts). # Return (og_posts, not_og_posts).
@ -128,11 +149,31 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
def main() -> None: def main() -> None:
args = get_args() args: Dict[str, Any] = get_args()
client = init_client() client: pytumblr.TumblrRestClient = init_client()
# Get the og_post_map (original posts) and un_og_post_map (not original posts). # Handle JSON input (if you don't want to make API calls.)
og_post_map, un_og_post_map = build_post_maps(args=args, client=client) if 'input' in args and args['input']:
input_path = Path(args['input'])
with open(input_path, "r") as f:
data = json.load(f)
og_post_map = data['original_post_map']
un_og_post_map = data['unoriginal_post_map']
for post_key in og_post_map.copy():
post = og_post_map[post_key]
date: datetime = datetime.strptime(
post['date'], '%Y-%m-%d %H:%M:%S %Z')
if date.year != 2025:
del og_post_map[post_key]
for post_key in un_og_post_map.copy():
post = un_og_post_map[post_key]
date: datetime = datetime.strptime(
post['date'], '%Y-%m-%d %H:%M:%S %Z')
if date.year != 2025:
del un_og_post_map[post_key]
else:
# Get the og_post_map (original posts) and un_og_post_map (not original posts).
og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
# Pick a stats model, which will determine output. # Pick a stats model, which will determine output.
stats_model: StatsModel stats_model: StatsModel