Added functionality for --after, more code cleanup

This commit is contained in:
Amber McCloughan 2026-01-01 02:18:39 -05:00
parent 5e1ff1c245
commit 590277d7ee
2 changed files with 61 additions and 18 deletions

View File

@ -1,7 +1,7 @@
# tumblr-stats
## Usage
```
usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] OPERATION [OPERATION ...]
usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] [-i INPUT] [--after AFTER] OPERATION [OPERATION ...]
Use pytumblr to calculate stats after setting these enviroment variables: $TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET
@ -13,6 +13,8 @@ options:
-b, --blog BLOG blog name for which to calculate stats
-t, --tags TAGS [TAGS ...]
tag(s) to focus on in status (if applicable)
-i, --input INPUT Don't make API calls, just use a JSON input file
--after AFTER only gather posts after YYYY-MM-DD
— Be gay and do crime
```

View File

@ -2,10 +2,12 @@
import argparse
import csv
from dataclasses import asdict
from datetime import datetime
import json
import os
from pathlib import Path
import sys
from typing import Any, Dict, List, Tuple
from typing import Any, Callable, Dict, List, Tuple
import pytumblr
@ -16,7 +18,7 @@ from stats_model import StatsModel
def get_args() -> Dict[str, Any]:
"""Pull arguments from command line, turn them into a dictionary of <arg, value>"""
parser = argparse.ArgumentParser(
parser: argparse.ArgumentParser = argparse.ArgumentParser(
prog='tumblr_stats.py',
description='Use pytumblr to calculate stats after setting these enviroment variables: '
+ '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET',
@ -28,12 +30,13 @@ def get_args() -> Dict[str, Any]:
help='blog name for which to calculate stats')
parser.add_argument('-t', '--tags', type=str, nargs='+',
help='tag(s) to focus on in status (if applicable)')
# TODO: Make 'before' work, but it actually depends on https://github.com/tumblr/pytumblr/issues/174.
parser.add_argument('-i', '--input', type=str,
help='Don\'t make API calls, just use a JSON input file')
# TODO: Make 'before' work, but it depends on https://github.com/tumblr/pytumblr/issues/174.
# parser.add_argument('--before', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
# help='only gather posts before YYYY-MM-DD')
# TODO: Make 'after' work if they add it to pytumblr.
# parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
# help='only gather posts after YYYY-MM-DD')
parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
help='only gather posts after YYYY-MM-DD')
return vars(parser.parse_args())
@ -61,6 +64,18 @@ def init_client() -> pytumblr.TumblrRestClient:
)
def filter_posts_for_after(post_list: List[Dict[str, Any]],
after: datetime) -> List[Dict[str, Any]]:
# Quick short circuit check.
if not post_list or not after:
return []
# Handle 'after'.
after_check: Callable[[Dict[str, Any]], bool] = lambda x: datetime.strptime(
x['date'], '%Y-%m-%d %H:%M:%S %Z') > after
return [post for post in post_list if after_check(post)]
def build_post_maps(client: pytumblr.TumblrRestClient,
args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
og_post_map: Dict[str, Any] = {}
@ -75,10 +90,6 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
# if args['before']:
# before: datetime = args['before']
# params.update({'before': int(before.timestamp())})
# TODO: Make 'after' work.
# if args['after']:
# after: datetime = args['after']
# params.update({'after': str(int(after.timestamp()))})
total: int = 0
offset: int = 0
@ -94,7 +105,8 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
**params)
# Stop the presses if we found no posts.
if not data['posts']:
curr_posts: List[Dict[str, Any]] = data['posts']
if not curr_posts or len(curr_posts) < 1:
print('Stopping, as no posts were found.')
break
@ -105,8 +117,15 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
print(f"I'm working with {total_posts} total posts...")
total = total_posts
# Behavior for 'after'.
if args['after']:
after: datetime = args['after']
curr_posts = filter_posts_for_after(curr_posts, after)
if not curr_posts:
print(f"All posts after {after.year}-{after.month} processed.")
return (og_post_map, un_og_post_map)
# This block populates the local post_maps from the raw response data.
curr_posts: List[Dict[str, Any]] = data['posts']
local_og_post_map: Dict[str, Any] = {
item['id_string']: item for item in curr_posts if 'parent_post_url' not in item
}
@ -120,7 +139,9 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
# The increment and status printing. Should always end the loop!
offset += limit
print(f"Processed batch {offset // limit} of {(total // 20) + 1}...")
if not args['after']:
print(
f"Processed batch {offset // limit} of {(total // 20) + 1}...")
# End LOOP
# Return (og_posts, not_og_posts).
@ -128,11 +149,31 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
def main() -> None:
args = get_args()
client = init_client()
args: Dict[str, Any] = get_args()
client: pytumblr.TumblrRestClient = init_client()
# Get the og_post_map (original posts) and un_og_post_map (not original posts).
og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
# Handle JSON input (if you don't want to make API calls.)
if 'input' in args and args['input']:
input_path = Path(args['input'])
with open(input_path, "r") as f:
data = json.load(f)
og_post_map = data['original_post_map']
un_og_post_map = data['unoriginal_post_map']
for post_key in og_post_map.copy():
post = og_post_map[post_key]
date: datetime = datetime.strptime(
post['date'], '%Y-%m-%d %H:%M:%S %Z')
if date.year != 2025:
del og_post_map[post_key]
for post_key in un_og_post_map.copy():
post = un_og_post_map[post_key]
date: datetime = datetime.strptime(
post['date'], '%Y-%m-%d %H:%M:%S %Z')
if date.year != 2025:
del un_og_post_map[post_key]
else:
# Get the og_post_map (original posts) and un_og_post_map (not original posts).
og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
# Pick a stats model, which will determine output.
stats_model: StatsModel