From 27dc04942adbce144fdacaf4e4e9a21aebdc8352 Mon Sep 17 00:00:00 2001 From: Amber Date: Fri, 8 Apr 2022 00:33:37 -0400 Subject: [PATCH] Initial commit for Moodle scraper --- Pipfile | 12 +++++++ Pipfile.lock | 60 ++++++++++++++++++++++++++++++++++ scrape.py | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100755 scrape.py diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..2b5a07a --- /dev/null +++ b/Pipfile @@ -0,0 +1,12 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +requests = "*" + +[dev-packages] + +[requires] +python_version = "3.10" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..b87c398 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,60 @@ +{ + "_meta": { + "hash": { + "sha256": "a416d48a2c30d4acf425cb96d7ac6672753db8e8f6c962a328848db5b9a290a1" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.10" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3'", + "version": "==2.0.12" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3'", + "version": "==3.3" + }, + "requests": { + "hashes": [ + "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", + "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" + ], + "index": "pypi", + "version": "==2.27.1" + }, + "urllib3": { + "hashes": [ + "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", + "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.9" + } + }, + "develop": {} +} diff --git a/scrape.py b/scrape.py new file mode 100755 index 0000000..3ac0da5 --- /dev/null +++ b/scrape.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +import argparse +import csv +import json +import requests + +def get_args() -> dict[str, str]: + parser = argparse.ArgumentParser(description='Rake emails from Moodle') + parser.add_argument('--username','-u', metavar='username', type=str, nargs='?', + required=True, help='Username for Moodle site') + parser.add_argument('--password','-p', metavar='password', type=str, nargs='?', + required=True, help='Password for Moodle site') + parser.add_argument('--url', metavar='url', type=str, nargs='?', + required=True, help='Base URL for Moodle site (e.g. https://moodle.pucrs.br)') + + args = parser.parse_args() + print(args.url) + + return { 'username' : args.username, 'password' : args.password, "url" : args.url } + +def get_auth(config: dict[str, str]) -> str: + uri = f"{config['url']}/login/token.php?username={config['username']}&password={config['password']}&service=moodle_mobile_app" + response = requests.get(uri) + response_dict = dict(response.json()) + if response.status_code != 200 or 'token' not in response_dict: + print(f"Failed to retrieve token with status {response.status_code}:") + print(response.json()) + exit(1) + return response_dict['token'] + +def get_debug(config: dict[str, str], token: str): + uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_webservice_get_site_info&moodlewsrestformat=json" + response = requests.get(uri) + if response.status_code != 200: + print(f"Failed to retrieve debug info with status {response.status_code}:") + print(response.json()) + exit(1) + return response.json() + +def get_user_ids(config: dict[str, str], token: str) -> list[int]: + uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_enrol_search_users&courseid=65031&search=&searchanywhere=1&page=0&perpage=9999&moodlewsrestformat=json" + response = requests.get(uri) + if response.status_code != 200: + print(f"Failed to retrieve users with status {response.status_code}:") + print(response.json()) + exit(1) + user_list = list(response.json()) + id_list = [x['id'] for x in user_list] + return id_list + +def get_user_detail(config: dict[str, str], token: str, id: int) -> dict[str, str]: + uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_user_get_users_by_field&field=id&values[0]={id}&moodlewsrestformat=json" + response = requests.get(uri) + if response.status_code != 200: + print(f"Failed to retrieve users with status {response.status_code}:") + print(response.json()) + exit(1) + return json.loads(response.text)[0] + +def write_user_email_csv(config: dict[str, str], token: str, user_ids: list[int]) -> None: + data = [] + for id in user_ids: + user_detail = get_user_detail(config, token, id) + try: + data.append([user_detail['id'], user_detail['fullname'], user_detail['email']]) + except KeyError: + print(f"Discarding record with missing column: {user_detail}") + + header = ['ID', 'Name', 'Email'] + with open('output.csv', 'w', encoding='UTF8', newline='') as f: + writer = csv.writer(f) + writer.writerow(header) + writer.writerows(data) + + return + +def main() -> None: + config = get_args() + token = get_auth(config) + + print("Retrieving user IDs...") + user_ids = get_user_ids(config, token) + + print("Getting user data and writing CSV...") + write_user_email_csv(config, token, user_ids) + + print("All done! See: ./output.csv in the working directory.") + return + +if __name__ == "__main__": + main() \ No newline at end of file