Added functionality for --after, more code cleanup

2026-01-01 02:18:39 -05:00 · 2026-01-01 02:18:39 -05:00 · 590277d7ee
commit 590277d7ee
parent 5e1ff1c245
2 changed files with 61 additions and 18 deletions
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # tumblr-stats
 ## Usage
 ```
-usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] OPERATION [OPERATION ...]
+usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] [-i INPUT] [--after AFTER] OPERATION [OPERATION ...]
 Use pytumblr to calculate stats after setting these enviroment variables: $TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET
@ -13,6 +13,8 @@ options:
  -b, --blog BLOG       blog name for which to calculate stats
  -t, --tags TAGS [TAGS ...]
                        tag(s) to focus on in status (if applicable)
  -i, --input INPUT     Don't make API calls, just use a JSON input file
  --after AFTER         only gather posts after YYYY-MM-DD
 — Be gay and do crime
 ```
--- a/tumblr_stats.py
+++ b/tumblr_stats.py
@ -2,10 +2,12 @@
 import argparse
 import csv
 from dataclasses import asdict
 from datetime import datetime
 import json
 import os
 from pathlib import Path
 import sys
-from typing import Any, Dict, List, Tuple
+from typing import Any, Callable, Dict, List, Tuple
 import pytumblr
@ -16,7 +18,7 @@ from stats_model import StatsModel
 def get_args() -> Dict[str, Any]:
    """Pull arguments from command line, turn them into a dictionary of <arg, value>"""
-    parser = argparse.ArgumentParser(
+    parser: argparse.ArgumentParser = argparse.ArgumentParser(
        prog='tumblr_stats.py',
        description='Use pytumblr to calculate stats after setting these enviroment variables: '
        + '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET',
@ -28,12 +30,13 @@ def get_args() -> Dict[str, Any]:
                        help='blog name for which to calculate stats')
    parser.add_argument('-t', '--tags', type=str, nargs='+',
                        help='tag(s) to focus on in status (if applicable)')
-    # TODO: Make 'before' work, but it actually depends on https://github.com/tumblr/pytumblr/issues/174.
+    parser.add_argument('-i', '--input', type=str,
                        help='Don\'t make API calls, just use a JSON input file')
    # TODO: Make 'before' work, but it depends on https://github.com/tumblr/pytumblr/issues/174.
    # parser.add_argument('--before', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
    #                     help='only gather posts before YYYY-MM-DD')
-    # TODO: Make 'after' work if they add it to pytumblr.
+    parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
-    # parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
+                        help='only gather posts after YYYY-MM-DD')
    #                     help='only gather posts after YYYY-MM-DD')
    return vars(parser.parse_args())
@ -61,6 +64,18 @@ def init_client() -> pytumblr.TumblrRestClient:
    )
 def filter_posts_for_after(post_list: List[Dict[str, Any]],
                           after: datetime) -> List[Dict[str, Any]]:
    # Quick short circuit check.
    if not post_list or not after:
        return []
    # Handle 'after'.
    after_check: Callable[[Dict[str, Any]], bool] = lambda x: datetime.strptime(
        x['date'], '%Y-%m-%d %H:%M:%S %Z') > after
    return [post for post in post_list if after_check(post)]
 def build_post_maps(client: pytumblr.TumblrRestClient,
                    args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
    og_post_map: Dict[str, Any] = {}
@ -75,10 +90,6 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
    # if args['before']:
    #     before: datetime = args['before']
    #     params.update({'before': int(before.timestamp())})
    # TODO: Make 'after' work.
    # if args['after']:
    #     after: datetime = args['after']
    #     params.update({'after': str(int(after.timestamp()))})
    total: int = 0
    offset: int = 0
@ -94,7 +105,8 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
                            **params)
        # Stop the presses if we found no posts.
-        if not data['posts']:
+        curr_posts: List[Dict[str, Any]] = data['posts']
        if not curr_posts or len(curr_posts) < 1:
            print('Stopping, as no posts were found.')
            break
@ -105,8 +117,15 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
            print(f"I'm working with {total_posts} total posts...")
            total = total_posts
        # Behavior for 'after'.
        if args['after']:
            after: datetime = args['after']
            curr_posts = filter_posts_for_after(curr_posts, after)
            if not curr_posts:
                print(f"All posts after {after.year}-{after.month} processed.")
                return (og_post_map, un_og_post_map)
        # This block populates the local post_maps from the raw response data.
        curr_posts: List[Dict[str, Any]] = data['posts']
        local_og_post_map: Dict[str, Any] = {
            item['id_string']: item for item in curr_posts if 'parent_post_url' not in item
        }
@ -120,7 +139,9 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
        # The increment and status printing. Should always end the loop!
        offset += limit
-        print(f"Processed batch {offset // limit} of {(total // 20) + 1}...")
+        if not args['after']:
            print(
                f"Processed batch {offset // limit} of {(total // 20) + 1}...")
        # End LOOP
    # Return (og_posts, not_og_posts).
@ -128,11 +149,31 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
 def main() -> None:
-    args = get_args()
+    args: Dict[str, Any] = get_args()
-    client = init_client()
+    client: pytumblr.TumblrRestClient = init_client()
-    # Get the og_post_map (original posts) and un_og_post_map (not original posts).
+    # Handle JSON input (if you don't want to make API calls.)
-    og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
+    if 'input' in args and args['input']:
        input_path = Path(args['input'])
        with open(input_path, "r") as f:
            data = json.load(f)
            og_post_map = data['original_post_map']
            un_og_post_map = data['unoriginal_post_map']
        for post_key in og_post_map.copy():
            post = og_post_map[post_key]
            date: datetime = datetime.strptime(
                post['date'], '%Y-%m-%d %H:%M:%S %Z')
            if date.year != 2025:
                del og_post_map[post_key]
        for post_key in un_og_post_map.copy():
            post = un_og_post_map[post_key]
            date: datetime = datetime.strptime(
                post['date'], '%Y-%m-%d %H:%M:%S %Z')
            if date.year != 2025:
                del un_og_post_map[post_key]
    else:
        # Get the og_post_map (original posts) and un_og_post_map (not original posts).
        og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
    # Pick a stats model, which will determine output.
    stats_model: StatsModel