Added functionality for --after, more code cleanup
This commit is contained in:
parent
5e1ff1c245
commit
590277d7ee
@ -1,7 +1,7 @@
|
|||||||
# tumblr-stats
|
# tumblr-stats
|
||||||
## Usage
|
## Usage
|
||||||
```
|
```
|
||||||
usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] OPERATION [OPERATION ...]
|
usage: tumblr_stats.py [-h] -b BLOG [-t TAGS [TAGS ...]] [-i INPUT] [--after AFTER] OPERATION [OPERATION ...]
|
||||||
|
|
||||||
Use pytumblr to calculate stats after setting these enviroment variables: $TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET
|
Use pytumblr to calculate stats after setting these enviroment variables: $TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET
|
||||||
|
|
||||||
@ -13,6 +13,8 @@ options:
|
|||||||
-b, --blog BLOG blog name for which to calculate stats
|
-b, --blog BLOG blog name for which to calculate stats
|
||||||
-t, --tags TAGS [TAGS ...]
|
-t, --tags TAGS [TAGS ...]
|
||||||
tag(s) to focus on in status (if applicable)
|
tag(s) to focus on in status (if applicable)
|
||||||
|
-i, --input INPUT Don't make API calls, just use a JSON input file
|
||||||
|
--after AFTER only gather posts after YYYY-MM-DD
|
||||||
|
|
||||||
— Be gay and do crime
|
— Be gay and do crime
|
||||||
```
|
```
|
||||||
@ -2,10 +2,12 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
from dataclasses import asdict
|
from dataclasses import asdict
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict, List, Tuple
|
from typing import Any, Callable, Dict, List, Tuple
|
||||||
|
|
||||||
import pytumblr
|
import pytumblr
|
||||||
|
|
||||||
@ -16,7 +18,7 @@ from stats_model import StatsModel
|
|||||||
|
|
||||||
def get_args() -> Dict[str, Any]:
|
def get_args() -> Dict[str, Any]:
|
||||||
"""Pull arguments from command line, turn them into a dictionary of <arg, value>"""
|
"""Pull arguments from command line, turn them into a dictionary of <arg, value>"""
|
||||||
parser = argparse.ArgumentParser(
|
parser: argparse.ArgumentParser = argparse.ArgumentParser(
|
||||||
prog='tumblr_stats.py',
|
prog='tumblr_stats.py',
|
||||||
description='Use pytumblr to calculate stats after setting these enviroment variables: '
|
description='Use pytumblr to calculate stats after setting these enviroment variables: '
|
||||||
+ '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET',
|
+ '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET',
|
||||||
@ -28,12 +30,13 @@ def get_args() -> Dict[str, Any]:
|
|||||||
help='blog name for which to calculate stats')
|
help='blog name for which to calculate stats')
|
||||||
parser.add_argument('-t', '--tags', type=str, nargs='+',
|
parser.add_argument('-t', '--tags', type=str, nargs='+',
|
||||||
help='tag(s) to focus on in status (if applicable)')
|
help='tag(s) to focus on in status (if applicable)')
|
||||||
# TODO: Make 'before' work, but it actually depends on https://github.com/tumblr/pytumblr/issues/174.
|
parser.add_argument('-i', '--input', type=str,
|
||||||
|
help='Don\'t make API calls, just use a JSON input file')
|
||||||
|
# TODO: Make 'before' work, but it depends on https://github.com/tumblr/pytumblr/issues/174.
|
||||||
# parser.add_argument('--before', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
|
# parser.add_argument('--before', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
|
||||||
# help='only gather posts before YYYY-MM-DD')
|
# help='only gather posts before YYYY-MM-DD')
|
||||||
# TODO: Make 'after' work if they add it to pytumblr.
|
parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
|
||||||
# parser.add_argument('--after', type=lambda s: datetime.strptime(s, '%Y-%m-%d'),
|
help='only gather posts after YYYY-MM-DD')
|
||||||
# help='only gather posts after YYYY-MM-DD')
|
|
||||||
return vars(parser.parse_args())
|
return vars(parser.parse_args())
|
||||||
|
|
||||||
|
|
||||||
@ -61,6 +64,18 @@ def init_client() -> pytumblr.TumblrRestClient:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_posts_for_after(post_list: List[Dict[str, Any]],
|
||||||
|
after: datetime) -> List[Dict[str, Any]]:
|
||||||
|
# Quick short circuit check.
|
||||||
|
if not post_list or not after:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Handle 'after'.
|
||||||
|
after_check: Callable[[Dict[str, Any]], bool] = lambda x: datetime.strptime(
|
||||||
|
x['date'], '%Y-%m-%d %H:%M:%S %Z') > after
|
||||||
|
return [post for post in post_list if after_check(post)]
|
||||||
|
|
||||||
|
|
||||||
def build_post_maps(client: pytumblr.TumblrRestClient,
|
def build_post_maps(client: pytumblr.TumblrRestClient,
|
||||||
args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||||
og_post_map: Dict[str, Any] = {}
|
og_post_map: Dict[str, Any] = {}
|
||||||
@ -75,10 +90,6 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
|
|||||||
# if args['before']:
|
# if args['before']:
|
||||||
# before: datetime = args['before']
|
# before: datetime = args['before']
|
||||||
# params.update({'before': int(before.timestamp())})
|
# params.update({'before': int(before.timestamp())})
|
||||||
# TODO: Make 'after' work.
|
|
||||||
# if args['after']:
|
|
||||||
# after: datetime = args['after']
|
|
||||||
# params.update({'after': str(int(after.timestamp()))})
|
|
||||||
|
|
||||||
total: int = 0
|
total: int = 0
|
||||||
offset: int = 0
|
offset: int = 0
|
||||||
@ -94,7 +105,8 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
|
|||||||
**params)
|
**params)
|
||||||
|
|
||||||
# Stop the presses if we found no posts.
|
# Stop the presses if we found no posts.
|
||||||
if not data['posts']:
|
curr_posts: List[Dict[str, Any]] = data['posts']
|
||||||
|
if not curr_posts or len(curr_posts) < 1:
|
||||||
print('Stopping, as no posts were found.')
|
print('Stopping, as no posts were found.')
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -105,8 +117,15 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
|
|||||||
print(f"I'm working with {total_posts} total posts...")
|
print(f"I'm working with {total_posts} total posts...")
|
||||||
total = total_posts
|
total = total_posts
|
||||||
|
|
||||||
|
# Behavior for 'after'.
|
||||||
|
if args['after']:
|
||||||
|
after: datetime = args['after']
|
||||||
|
curr_posts = filter_posts_for_after(curr_posts, after)
|
||||||
|
if not curr_posts:
|
||||||
|
print(f"All posts after {after.year}-{after.month} processed.")
|
||||||
|
return (og_post_map, un_og_post_map)
|
||||||
|
|
||||||
# This block populates the local post_maps from the raw response data.
|
# This block populates the local post_maps from the raw response data.
|
||||||
curr_posts: List[Dict[str, Any]] = data['posts']
|
|
||||||
local_og_post_map: Dict[str, Any] = {
|
local_og_post_map: Dict[str, Any] = {
|
||||||
item['id_string']: item for item in curr_posts if 'parent_post_url' not in item
|
item['id_string']: item for item in curr_posts if 'parent_post_url' not in item
|
||||||
}
|
}
|
||||||
@ -120,7 +139,9 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
|
|||||||
|
|
||||||
# The increment and status printing. Should always end the loop!
|
# The increment and status printing. Should always end the loop!
|
||||||
offset += limit
|
offset += limit
|
||||||
print(f"Processed batch {offset // limit} of {(total // 20) + 1}...")
|
if not args['after']:
|
||||||
|
print(
|
||||||
|
f"Processed batch {offset // limit} of {(total // 20) + 1}...")
|
||||||
# End LOOP
|
# End LOOP
|
||||||
|
|
||||||
# Return (og_posts, not_og_posts).
|
# Return (og_posts, not_og_posts).
|
||||||
@ -128,11 +149,31 @@ def build_post_maps(client: pytumblr.TumblrRestClient,
|
|||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = get_args()
|
args: Dict[str, Any] = get_args()
|
||||||
client = init_client()
|
client: pytumblr.TumblrRestClient = init_client()
|
||||||
|
|
||||||
# Get the og_post_map (original posts) and un_og_post_map (not original posts).
|
# Handle JSON input (if you don't want to make API calls.)
|
||||||
og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
|
if 'input' in args and args['input']:
|
||||||
|
input_path = Path(args['input'])
|
||||||
|
with open(input_path, "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
og_post_map = data['original_post_map']
|
||||||
|
un_og_post_map = data['unoriginal_post_map']
|
||||||
|
for post_key in og_post_map.copy():
|
||||||
|
post = og_post_map[post_key]
|
||||||
|
date: datetime = datetime.strptime(
|
||||||
|
post['date'], '%Y-%m-%d %H:%M:%S %Z')
|
||||||
|
if date.year != 2025:
|
||||||
|
del og_post_map[post_key]
|
||||||
|
for post_key in un_og_post_map.copy():
|
||||||
|
post = un_og_post_map[post_key]
|
||||||
|
date: datetime = datetime.strptime(
|
||||||
|
post['date'], '%Y-%m-%d %H:%M:%S %Z')
|
||||||
|
if date.year != 2025:
|
||||||
|
del un_og_post_map[post_key]
|
||||||
|
else:
|
||||||
|
# Get the og_post_map (original posts) and un_og_post_map (not original posts).
|
||||||
|
og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
|
||||||
|
|
||||||
# Pick a stats model, which will determine output.
|
# Pick a stats model, which will determine output.
|
||||||
stats_model: StatsModel
|
stats_model: StatsModel
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user