Greatly cleaned up and simplified core logic
This commit is contained in:
parent
6e32e3ec54
commit
5e1ff1c245
@ -22,14 +22,16 @@ class BuildTagStatsModel(StatsModel):
|
|||||||
for post_key in self.original_post_map:
|
for post_key in self.original_post_map:
|
||||||
post = self.original_post_map[post_key]
|
post = self.original_post_map[post_key]
|
||||||
post_list.append({
|
post_list.append({
|
||||||
'id_string': post['id_string'],
|
|
||||||
'post_url': post['post_url'],
|
'post_url': post['post_url'],
|
||||||
'tags': post['tags'],
|
'tags': post['tags'],
|
||||||
'note_count': post['note_count']
|
'note_count': post['note_count']
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# https://stackoverflow.com/a/73050
|
||||||
sorted_list = sorted(post_list, key=itemgetter('note_count'),
|
sorted_list = sorted(post_list, key=itemgetter('note_count'),
|
||||||
reverse=True)
|
reverse=True)
|
||||||
|
|
||||||
|
# https://stackoverflow.com/a/522578
|
||||||
for i, post in enumerate(sorted_list):
|
for i, post in enumerate(sorted_list):
|
||||||
post['rank'] = i + 1
|
post['rank'] = i + 1
|
||||||
|
|
||||||
|
|||||||
@ -59,13 +59,16 @@ class StatsModel:
|
|||||||
return total
|
return total
|
||||||
|
|
||||||
def calculate_total_original_post_notes_by_month_and_year(self) -> Dict[str, int]:
|
def calculate_total_original_post_notes_by_month_and_year(self) -> Dict[str, int]:
|
||||||
|
# https://docs.python.org/3/library/collections.html#defaultdict-objects
|
||||||
date_map: Dict[str, Any] = {}
|
date_map: Dict[str, Any] = {}
|
||||||
date_map = defaultdict(lambda: {'note_count': 0,
|
date_map = defaultdict(lambda: {'note_count': 0,
|
||||||
'post_count': 0},
|
'post_count': 0},
|
||||||
date_map)
|
date_map)
|
||||||
|
|
||||||
|
# Gathering the results.
|
||||||
for post_key in self.original_post_map:
|
for post_key in self.original_post_map:
|
||||||
post = self.original_post_map[post_key]
|
post = self.original_post_map[post_key]
|
||||||
# Format is like 2025-12-28 20:00:34 GMT
|
# Format is like '2025-12-28 20:00:34 GMT'
|
||||||
post_date: datetime = datetime.strptime(
|
post_date: datetime = datetime.strptime(
|
||||||
post['date'], '%Y-%m-%d %H:%M:%S %Z')
|
post['date'], '%Y-%m-%d %H:%M:%S %Z')
|
||||||
post_date_key = f"{post_date.year}-{post_date.month:02}"
|
post_date_key = f"{post_date.year}-{post_date.month:02}"
|
||||||
@ -74,6 +77,7 @@ class StatsModel:
|
|||||||
sts['post_count'] += 1
|
sts['post_count'] += 1
|
||||||
sts['note_count'] += post['note_count']
|
sts['note_count'] += post['note_count']
|
||||||
|
|
||||||
|
# Results postprocessing.
|
||||||
for date in date_map:
|
for date in date_map:
|
||||||
sts = date_map[date]
|
sts = date_map[date]
|
||||||
post_count = sts['post_count']
|
post_count = sts['post_count']
|
||||||
@ -83,10 +87,13 @@ class StatsModel:
|
|||||||
return date_map
|
return date_map
|
||||||
|
|
||||||
def determine_most_popular_tags(self) -> List[Dict[str, Any]]:
|
def determine_most_popular_tags(self) -> List[Dict[str, Any]]:
|
||||||
|
# https://docs.python.org/3/library/collections.html#defaultdict-objects
|
||||||
tag_dict: Dict[str, Any] = {}
|
tag_dict: Dict[str, Any] = {}
|
||||||
tag_dict = defaultdict(lambda: {'note_count': 0,
|
tag_dict = defaultdict(lambda: {'note_count': 0,
|
||||||
'post_count': 0},
|
'post_count': 0},
|
||||||
tag_dict)
|
tag_dict)
|
||||||
|
|
||||||
|
# Gathering the results.
|
||||||
for post_key in self.original_post_map:
|
for post_key in self.original_post_map:
|
||||||
post = self.original_post_map[post_key]
|
post = self.original_post_map[post_key]
|
||||||
tags = post['tags']
|
tags = post['tags']
|
||||||
@ -96,12 +103,13 @@ class StatsModel:
|
|||||||
sts['post_count'] += 1
|
sts['post_count'] += 1
|
||||||
sts['note_count'] += post['note_count']
|
sts['note_count'] += post['note_count']
|
||||||
|
|
||||||
|
# Results postprocessing.
|
||||||
for tag in tag_dict:
|
for tag in tag_dict:
|
||||||
sts = tag_dict[tag]
|
sts = tag_dict[tag]
|
||||||
post_count = sts['post_count']
|
post_count = sts['post_count']
|
||||||
note_count = sts['note_count']
|
note_count = sts['note_count']
|
||||||
sts['notes_to_posts_ratio'] = note_count / post_count
|
sts['notes_to_posts_ratio'] = note_count / post_count
|
||||||
|
|
||||||
tag_list = sorted(list(tag_dict.values()), key=itemgetter('note_count'),
|
# https://stackoverflow.com/a/73050
|
||||||
reverse=True)
|
return sorted(list(tag_dict.values()), key=itemgetter('note_count'),
|
||||||
return tag_list
|
reverse=True)
|
||||||
|
|||||||
108
tumblr_stats.py
108
tumblr_stats.py
@ -21,7 +21,7 @@ def get_args() -> Dict[str, Any]:
|
|||||||
description='Use pytumblr to calculate stats after setting these enviroment variables: '
|
description='Use pytumblr to calculate stats after setting these enviroment variables: '
|
||||||
+ '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET',
|
+ '$TUMBLR_CONSUMER_KEY, $TUMBLR_CONSUMER_SECRET, $TUMBLR_OAUTH_TOKEN, and $TUMBLR_OAUTH_SECRET',
|
||||||
epilog='— Be gay and do crime')
|
epilog='— Be gay and do crime')
|
||||||
parser.add_argument('operation', type=str, nargs = '+',
|
parser.add_argument('operation', type=str, nargs='+',
|
||||||
metavar='OPERATION', choices=['build_tag_stats'],
|
metavar='OPERATION', choices=['build_tag_stats'],
|
||||||
help="operation used to calculate stats")
|
help="operation used to calculate stats")
|
||||||
parser.add_argument('-b', '--blog', type=str, required=True,
|
parser.add_argument('-b', '--blog', type=str, required=True,
|
||||||
@ -43,11 +43,11 @@ def init_client() -> pytumblr.TumblrRestClient:
|
|||||||
oauth_token = os.getenv('TUMBLR_OAUTH_TOKEN')
|
oauth_token = os.getenv('TUMBLR_OAUTH_TOKEN')
|
||||||
oauth_secret = os.getenv('TUMBLR_OAUTH_SECRET')
|
oauth_secret = os.getenv('TUMBLR_OAUTH_SECRET')
|
||||||
|
|
||||||
missing_vars = [name for name,
|
missing_vars: List[str] = [name for name,
|
||||||
val in [('$TUMBLR_CONSUMER_KEY', consumer_key),
|
val in [('$TUMBLR_CONSUMER_KEY', consumer_key),
|
||||||
('$TUMBLR_CONSUMER_SECRET', consumer_secret),
|
('$TUMBLR_CONSUMER_SECRET', consumer_secret),
|
||||||
('$TUMBLR_OAUTH_TOKEN', oauth_token),
|
('$TUMBLR_OAUTH_TOKEN', oauth_token),
|
||||||
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
|
('$TUMBLR_OAUTH_SECRET', oauth_secret)] if val is None]
|
||||||
|
|
||||||
if missing_vars:
|
if missing_vars:
|
||||||
print("Missing important environment variables:", missing_vars)
|
print("Missing important environment variables:", missing_vars)
|
||||||
@ -61,10 +61,11 @@ def init_client() -> pytumblr.TumblrRestClient:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
def build_post_maps(client: pytumblr.TumblrRestClient,
|
||||||
post_map: Dict[str, Any] = {}
|
args: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||||
dumpster: Dict[str, Any] = {}
|
og_post_map: Dict[str, Any] = {}
|
||||||
blog_name = args['blog']
|
un_og_post_map: Dict[str, Any] = {}
|
||||||
|
blog_name: str = args['blog']
|
||||||
|
|
||||||
# We populate params, starting with any tags for filtering.
|
# We populate params, starting with any tags for filtering.
|
||||||
params = {}
|
params = {}
|
||||||
@ -79,9 +80,9 @@ def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, args: Dict[st
|
|||||||
# after: datetime = args['after']
|
# after: datetime = args['after']
|
||||||
# params.update({'after': str(int(after.timestamp()))})
|
# params.update({'after': str(int(after.timestamp()))})
|
||||||
|
|
||||||
total = 0
|
total: int = 0
|
||||||
offset = 0
|
offset: int = 0
|
||||||
limit = 20
|
limit: int = 20
|
||||||
|
|
||||||
# The request loop that pulls all data from the APIs.
|
# The request loop that pulls all data from the APIs.
|
||||||
while offset <= total:
|
while offset <= total:
|
||||||
@ -98,41 +99,24 @@ def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, args: Dict[st
|
|||||||
break
|
break
|
||||||
|
|
||||||
# Total init check for the first iteration, but always checked for sanity.
|
# Total init check for the first iteration, but always checked for sanity.
|
||||||
if total == 0:
|
if not total:
|
||||||
# Let's see what's in there,
|
# Let's see what's in there,
|
||||||
total_posts = data['total_posts']
|
total_posts = data['total_posts']
|
||||||
|
|
||||||
# Something was there, so we're good.
|
|
||||||
print(f"I'm working with {total_posts} total posts...")
|
print(f"I'm working with {total_posts} total posts...")
|
||||||
total = total_posts
|
total = total_posts
|
||||||
|
|
||||||
# This block populates the local post_map from the raw response data.
|
# This block populates the local post_maps from the raw response data.
|
||||||
curr_posts = data['posts']
|
curr_posts: List[Dict[str, Any]] = data['posts']
|
||||||
local_post_map: Dict[str, Any] = {}
|
local_og_post_map: Dict[str, Any] = {
|
||||||
for curr_post in curr_posts:
|
item['id_string']: item for item in curr_posts if 'parent_post_url' not in item
|
||||||
curr_key = curr_post['id_string']
|
}
|
||||||
if curr_key not in local_post_map:
|
local_un_og_post_map: Dict[str, Any] = {
|
||||||
local_post_map[curr_key] = curr_post
|
item['id_string']: item for item in curr_posts if 'parent_post_url' in item
|
||||||
|
}
|
||||||
|
|
||||||
# This block populates the local dumpster from the raw response data.
|
# Update the maps with what we found.
|
||||||
local_dumpster = {}
|
og_post_map.update(local_og_post_map)
|
||||||
filtered_local_post_map = {}
|
un_og_post_map.update(local_un_og_post_map)
|
||||||
for local_key in local_post_map:
|
|
||||||
local_post = local_post_map[local_key]
|
|
||||||
# Determines whether this is an OG post.
|
|
||||||
if 'parent_post_url' not in local_post:
|
|
||||||
filtered_local_post_map[local_key] = local_post
|
|
||||||
else: # If it's not an OG post, into the local dumpster.
|
|
||||||
local_dumpster[local_key] = local_post
|
|
||||||
|
|
||||||
# The sacred "should we add, and if we should, DO ADD" conditional statements.
|
|
||||||
has_og_posts = any(
|
|
||||||
post not in post_map for post in filtered_local_post_map)
|
|
||||||
has_not_og_posts = any(post not in dumpster for post in local_dumpster)
|
|
||||||
if has_og_posts:
|
|
||||||
post_map.update(filtered_local_post_map)
|
|
||||||
if has_not_og_posts:
|
|
||||||
dumpster.update(local_dumpster)
|
|
||||||
|
|
||||||
# The increment and status printing. Should always end the loop!
|
# The increment and status printing. Should always end the loop!
|
||||||
offset += limit
|
offset += limit
|
||||||
@ -140,54 +124,40 @@ def build_post_map_and_dumpster(client: pytumblr.TumblrRestClient, args: Dict[st
|
|||||||
# End LOOP
|
# End LOOP
|
||||||
|
|
||||||
# Return (og_posts, not_og_posts).
|
# Return (og_posts, not_og_posts).
|
||||||
return (post_map, dumpster)
|
return (og_post_map, un_og_post_map)
|
||||||
|
|
||||||
|
|
||||||
def build_tag_stats_model(post_map: Dict[str, Any],
|
|
||||||
dumpster: Dict[str, Any],
|
|
||||||
args: Dict[str, Any]) -> BuildTagStatsModel:
|
|
||||||
stats_model: BuildTagStatsModel = BuildTagStatsModel(blog_name=args['blog'],
|
|
||||||
original_post_map=post_map,
|
|
||||||
unoriginal_post_map=dumpster)
|
|
||||||
stats_model.tags = args['tags']
|
|
||||||
return stats_model
|
|
||||||
|
|
||||||
|
|
||||||
def build_total_stats_model(post_map: Dict[str, Any],
|
|
||||||
dumpster: Dict[str, Any],
|
|
||||||
args: Dict[str, Any]) -> BuildTotalStatsModel:
|
|
||||||
stats_model: BuildTotalStatsModel = BuildTotalStatsModel(blog_name=args['blog'],
|
|
||||||
original_post_map=post_map,
|
|
||||||
unoriginal_post_map=dumpster)
|
|
||||||
return stats_model
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = get_args()
|
args = get_args()
|
||||||
client = init_client()
|
client = init_client()
|
||||||
|
|
||||||
# Get the post_map (original posts) and dumpster (not original posts).
|
# Get the og_post_map (original posts) and un_og_post_map (not original posts).
|
||||||
post_map, dumpster = build_post_map_and_dumpster(args=args, client=client)
|
og_post_map, un_og_post_map = build_post_maps(args=args, client=client)
|
||||||
|
|
||||||
# Pick a stats model, which will determine output.
|
# Pick a stats model, which will determine output.
|
||||||
stats_model: StatsModel
|
stats_model: StatsModel
|
||||||
if 'build_tag_stats' in args['operation']:
|
if 'build_tag_stats' in args['operation']:
|
||||||
stats_model = build_tag_stats_model(post_map, dumpster, args)
|
stats_model = BuildTagStatsModel(blog_name=args['blog'],
|
||||||
|
original_post_map=og_post_map,
|
||||||
|
unoriginal_post_map=un_og_post_map)
|
||||||
|
stats_model.tags = args['tags']
|
||||||
if 'build_total_stats' in args['operation']:
|
if 'build_total_stats' in args['operation']:
|
||||||
if 'before' not in args: # or 'after' not in args:
|
if 'before' not in args: # or 'after' not in args:
|
||||||
print('You must specify a time range for build_total stats. ' +
|
print('You must specify a time range for build_total stats. ' +
|
||||||
'You\'ll otherwise request TOO MUCH DATA!')
|
'You\'ll otherwise request TOO MUCH DATA!')
|
||||||
sys.exit()
|
sys.exit(1)
|
||||||
stats_model = build_total_stats_model(post_map, dumpster, args)
|
stats_model = BuildTotalStatsModel(blog_name=args['blog'],
|
||||||
|
original_post_map=og_post_map,
|
||||||
|
unoriginal_post_map=un_og_post_map)
|
||||||
|
|
||||||
# Write the chosen model as JSON output.
|
# Write the chosen model as JSON output.
|
||||||
with open('./tumblr_stats.json', 'w') as f:
|
with open('./tumblr_stats.json', 'w') as f:
|
||||||
json.dump(asdict(stats_model), f, indent=1, default=str)
|
json.dump(asdict(stats_model), f, indent=1, default=str)
|
||||||
|
|
||||||
# If there were original posts, create a CSV for them.
|
# If there were original posts, create a CSV for them.
|
||||||
if post_map:
|
if og_post_map:
|
||||||
with open('./tumblr_original_posts.csv', 'w', newline='') as f:
|
with open('./tumblr_original_posts.csv', 'w', newline='') as f:
|
||||||
post_list: List[Dict[str, Any]] = list(post_map.values())
|
post_list: List[Dict[str, Any]] = list(og_post_map.values())
|
||||||
wr = csv.DictWriter(f, quoting=csv.QUOTE_ALL, extrasaction='ignore',
|
wr = csv.DictWriter(f, quoting=csv.QUOTE_ALL, extrasaction='ignore',
|
||||||
fieldnames=post_list[0].keys())
|
fieldnames=post_list[0].keys())
|
||||||
wr.writeheader()
|
wr.writeheader()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user