moodle-scraper/scrape.py

91 lines
3.6 KiB
Python
Raw Normal View History

2022-04-08 04:33:37 +00:00
#!/usr/bin/env python
import argparse
import csv
import json
import requests
def get_args() -> dict[str, str]:
parser = argparse.ArgumentParser(description='Rake emails from Moodle')
parser.add_argument('--username','-u', metavar='username', type=str, nargs='?',
required=True, help='Username for Moodle site')
parser.add_argument('--password','-p', metavar='password', type=str, nargs='?',
required=True, help='Password for Moodle site')
parser.add_argument('--url', metavar='url', type=str, nargs='?',
required=True, help='Base URL for Moodle site (e.g. https://moodle.pucrs.br)')
args = parser.parse_args()
print(args.url)
return { 'username' : args.username, 'password' : args.password, "url" : args.url }
def get_auth(config: dict[str, str]) -> str:
uri = f"{config['url']}/login/token.php?username={config['username']}&password={config['password']}&service=moodle_mobile_app"
response = requests.get(uri)
response_dict = dict(response.json())
if response.status_code != 200 or 'token' not in response_dict:
print(f"Failed to retrieve token with status {response.status_code}:")
print(response.json())
exit(1)
return response_dict['token']
def get_debug(config: dict[str, str], token: str):
uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_webservice_get_site_info&moodlewsrestformat=json"
response = requests.get(uri)
if response.status_code != 200:
print(f"Failed to retrieve debug info with status {response.status_code}:")
print(response.json())
exit(1)
return response.json()
def get_user_ids(config: dict[str, str], token: str) -> list[int]:
uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_enrol_search_users&courseid=65031&search=&searchanywhere=1&page=0&perpage=9999&moodlewsrestformat=json"
response = requests.get(uri)
if response.status_code != 200:
print(f"Failed to retrieve users with status {response.status_code}:")
print(response.json())
exit(1)
user_list = list(response.json())
id_list = [x['id'] for x in user_list]
return id_list
def get_user_detail(config: dict[str, str], token: str, id: int) -> dict[str, str]:
uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_user_get_users_by_field&field=id&values[0]={id}&moodlewsrestformat=json"
response = requests.get(uri)
if response.status_code != 200:
print(f"Failed to retrieve users with status {response.status_code}:")
print(response.json())
exit(1)
return json.loads(response.text)[0]
def write_user_email_csv(config: dict[str, str], token: str, user_ids: list[int]) -> None:
data = []
for id in user_ids:
user_detail = get_user_detail(config, token, id)
try:
data.append([user_detail['id'], user_detail['fullname'], user_detail['email']])
except KeyError:
print(f"Discarding record with missing column: {user_detail}")
header = ['ID', 'Name', 'Email']
with open('output.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f)
writer.writerow(header)
writer.writerows(data)
return
def main() -> None:
config = get_args()
token = get_auth(config)
print("Retrieving user IDs...")
user_ids = get_user_ids(config, token)
print("Getting user data and writing CSV...")
write_user_email_csv(config, token, user_ids)
print("All done! See: ./output.csv in the working directory.")
return
if __name__ == "__main__":
main()