Added a more structured model, some basic stats, ranked post list for a tag
This commit is contained in:
parent
620f3d2975
commit
8a38cb510b
94
app.py
94
app.py
@ -1,11 +1,15 @@
|
|||||||
|
|
||||||
|
from dataclasses import asdict
|
||||||
import json
|
import json
|
||||||
|
from operator import itemgetter
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
import pytumblr
|
import pytumblr
|
||||||
|
|
||||||
|
from model import StatsModel
|
||||||
|
|
||||||
|
|
||||||
def init_client() -> pytumblr.TumblrRestClient:
|
def init_client() -> pytumblr.TumblrRestClient:
|
||||||
consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
|
consumer_key = os.getenv('TUMBLR_CONSUMER_KEY')
|
||||||
@ -18,33 +22,44 @@ def init_client() -> pytumblr.TumblrRestClient:
|
|||||||
('$TUMBLR_CONSUMER_SECRET', consumer_secret),
|
('$TUMBLR_CONSUMER_SECRET', consumer_secret),
|
||||||
('$TUMBLR_OAUTH_TOKEN', oauth_token),
|
('$TUMBLR_OAUTH_TOKEN', oauth_token),
|
||||||
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
|
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
|
||||||
|
|
||||||
if missing_vars:
|
if missing_vars:
|
||||||
print("Missing important environment variables:", missing_vars)
|
print("Missing important environment variables:", missing_vars)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
return pytumblr.TumblrRestClient(
|
return pytumblr.TumblrRestClient(
|
||||||
consumer_key=consumer_key, # type: ignore
|
consumer_key=consumer_key, # type: ignore
|
||||||
consumer_secret=consumer_secret, # type: ignore
|
consumer_secret=consumer_secret, # type: ignore
|
||||||
oauth_token=oauth_token, # type: ignore
|
oauth_token=oauth_token, # type: ignore
|
||||||
oauth_secret=oauth_secret, # type: ignore
|
oauth_secret=oauth_secret, # type: ignore
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def calculate_total_notes(post_map: Dict[str, Any]) -> int:
|
||||||
client = init_client()
|
total = 0
|
||||||
|
for post_key in post_map:
|
||||||
|
total += post_map[post_key]['note_count']
|
||||||
|
return total
|
||||||
|
|
||||||
|
|
||||||
|
def determine_top_post_urls(post_map: Dict[str, Any]) -> List[str]:
|
||||||
|
post_list = sorted(list(post_map.values()), key=itemgetter('note_count'), reverse=True)
|
||||||
|
return [post['post_url'] for post in post_list]
|
||||||
|
|
||||||
|
|
||||||
|
def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||||
post_map: Dict[str, Any] = {}
|
post_map: Dict[str, Any] = {}
|
||||||
dumpster: Dict[str, Any] = {}
|
dumpster: Dict[str, Any] = {}
|
||||||
total = 0
|
total = 0
|
||||||
offset = 0
|
offset = 0
|
||||||
limit = 20
|
limit = 20
|
||||||
while offset <= total:
|
while offset <= total:
|
||||||
# Begin LOOP
|
### Begin LOOP
|
||||||
# Get me some posts! 😈🍪🍪🍪
|
# Get me some posts! 😈🍪🍪🍪
|
||||||
data = client.posts('panda-pal.tumblr.com', **{'tag': 'inuyasha'}, \
|
data = client.posts(f"{blog_name}.tumblr.com", **{'tag': tag},
|
||||||
offset = offset, \
|
offset=offset,
|
||||||
limit = limit)
|
limit=limit)
|
||||||
|
|
||||||
# Total check for the first iteration, but always checked for sanity.
|
# Total check for the first iteration, but always checked for sanity.
|
||||||
if total == 0:
|
if total == 0:
|
||||||
# Let's see what's in there,
|
# Let's see what's in there,
|
||||||
@ -54,11 +69,11 @@ def main() -> None:
|
|||||||
if not total_posts:
|
if not total_posts:
|
||||||
print("Couldn't get total posts. We're outta here!")
|
print("Couldn't get total posts. We're outta here!")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Something was there, so we're good.
|
# Something was there, so we're good.
|
||||||
print(f"I'm working with {total_posts} total posts...")
|
print(f"I'm working with {total_posts} total posts...")
|
||||||
total = total_posts
|
total = total_posts
|
||||||
|
|
||||||
curr_posts = data['posts']
|
curr_posts = data['posts']
|
||||||
local_post_map: Dict[str, Any] = {}
|
local_post_map: Dict[str, Any] = {}
|
||||||
for curr_post in curr_posts:
|
for curr_post in curr_posts:
|
||||||
@ -78,28 +93,43 @@ def main() -> None:
|
|||||||
if any(post not in post_map for post in filtered_local_post_map):
|
if any(post not in post_map for post in filtered_local_post_map):
|
||||||
post_map.update(filtered_local_post_map)
|
post_map.update(filtered_local_post_map)
|
||||||
|
|
||||||
# The increment. Should always end the loop!
|
# The increment and status printing. Should always end the loop!
|
||||||
offset += limit
|
offset += limit
|
||||||
if offset <= total:
|
if offset < total:
|
||||||
print(f"Processed batch {(offset // limit) - 1} of {total // 20}...")
|
print(f"Processed batch {offset // limit} of {total // 20}...")
|
||||||
else:
|
else:
|
||||||
print(f"Processed final batch of {total} posts")
|
print(f"Processed all {total} posts")
|
||||||
# End LOOP
|
### End LOOP
|
||||||
|
|
||||||
if not post_map and not dumpster:
|
return (post_map, dumpster)
|
||||||
print('We found nothing, so we end with nothing. 🤷')
|
|
||||||
sys.exit(1)
|
|
||||||
else:
|
|
||||||
with open("./tumblr_data.json", "w") as f:
|
|
||||||
json.dump(post_map, f, indent=2, sort_keys=True)
|
|
||||||
with open("./tumblr_dumpster.json", "w") as f:
|
|
||||||
json.dump(dumpster, f, indent=2, sort_keys=True)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
def retrieve_all_posts(tag: str):
|
def build_tag_stats_model(client: pytumblr.TumblrRestClient, blog_name: str, tag: str) -> StatsModel:
|
||||||
|
post_map, dumpster = build_post_map_and_dumpster(client, blog_name, tag)
|
||||||
|
|
||||||
|
stats_model: StatsModel = StatsModel()
|
||||||
|
stats_model.operation = 'build_tag_stats'
|
||||||
|
stats_model.blog_name = blog_name
|
||||||
|
stats_model.post_map = post_map
|
||||||
|
stats_model.dumpster = dumpster
|
||||||
|
stats_model.total_posts = len(post_map) + len(dumpster)
|
||||||
|
stats_model.total_original_posts = len(post_map)
|
||||||
|
stats_model.total_original_post_notes = calculate_total_notes(post_map)
|
||||||
|
stats_model.ranked_post_urls = determine_top_post_urls(post_map)
|
||||||
|
|
||||||
|
return stats_model
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
client = init_client()
|
||||||
|
|
||||||
|
stats_model = build_tag_stats_model(client, 'panda-pal', 'inuyasha')
|
||||||
|
with open("./tumblr_data.json", "w") as f:
|
||||||
|
json.dump(asdict(stats_model), f, indent=2, sort_keys=True)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|||||||
33
model.py
Normal file
33
model.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class StatsModel:
|
||||||
|
"""Class that models the output of the Tumblr stats script."""
|
||||||
|
# Operation used to output stats.
|
||||||
|
operation: str = field(default_factory=str)
|
||||||
|
|
||||||
|
# Blog in question.
|
||||||
|
blog_name: str = field(default_factory=str)
|
||||||
|
|
||||||
|
# Tags used.
|
||||||
|
tags: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Original posts.
|
||||||
|
post_map: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# Posts that are not original.
|
||||||
|
dumpster: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# Total posts handled.
|
||||||
|
total_posts: int = field(default_factory=int)
|
||||||
|
|
||||||
|
# Total original posts (per blog_name) handled.
|
||||||
|
total_original_posts: int = field(default_factory=int)
|
||||||
|
|
||||||
|
# Total original post (per blog_name) notes handled
|
||||||
|
total_original_post_notes: int = field(default_factory=int)
|
||||||
|
|
||||||
|
# Posts ranked from most popular to least popular.
|
||||||
|
ranked_post_urls: List[str] = field(default_factory=list)
|
||||||
Loading…
x
Reference in New Issue
Block a user