Initial commit for Moodle scraper

This commit is contained in:
Amber McCloughan 2022-04-08 00:33:37 -04:00
commit 27dc04942a
3 changed files with 163 additions and 0 deletions

12
Pipfile Normal file
View File

@ -0,0 +1,12 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
requests = "*"
[dev-packages]
[requires]
python_version = "3.10"

60
Pipfile.lock generated Normal file
View File

@ -0,0 +1,60 @@
{
"_meta": {
"hash": {
"sha256": "a416d48a2c30d4acf425cb96d7ac6672753db8e8f6c962a328848db5b9a290a1"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.10"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"certifi": {
"hashes": [
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
"sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
],
"version": "==2021.10.8"
},
"charset-normalizer": {
"hashes": [
"sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597",
"sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"
],
"markers": "python_version >= '3'",
"version": "==2.0.12"
},
"idna": {
"hashes": [
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
],
"markers": "python_version >= '3'",
"version": "==3.3"
},
"requests": {
"hashes": [
"sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61",
"sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"
],
"index": "pypi",
"version": "==2.27.1"
},
"urllib3": {
"hashes": [
"sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14",
"sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.9"
}
},
"develop": {}
}

91
scrape.py Executable file
View File

@ -0,0 +1,91 @@
#!/usr/bin/env python
import argparse
import csv
import json
import requests
def get_args() -> dict[str, str]:
parser = argparse.ArgumentParser(description='Rake emails from Moodle')
parser.add_argument('--username','-u', metavar='username', type=str, nargs='?',
required=True, help='Username for Moodle site')
parser.add_argument('--password','-p', metavar='password', type=str, nargs='?',
required=True, help='Password for Moodle site')
parser.add_argument('--url', metavar='url', type=str, nargs='?',
required=True, help='Base URL for Moodle site (e.g. https://moodle.pucrs.br)')
args = parser.parse_args()
print(args.url)
return { 'username' : args.username, 'password' : args.password, "url" : args.url }
def get_auth(config: dict[str, str]) -> str:
uri = f"{config['url']}/login/token.php?username={config['username']}&password={config['password']}&service=moodle_mobile_app"
response = requests.get(uri)
response_dict = dict(response.json())
if response.status_code != 200 or 'token' not in response_dict:
print(f"Failed to retrieve token with status {response.status_code}:")
print(response.json())
exit(1)
return response_dict['token']
def get_debug(config: dict[str, str], token: str):
uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_webservice_get_site_info&moodlewsrestformat=json"
response = requests.get(uri)
if response.status_code != 200:
print(f"Failed to retrieve debug info with status {response.status_code}:")
print(response.json())
exit(1)
return response.json()
def get_user_ids(config: dict[str, str], token: str) -> list[int]:
uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_enrol_search_users&courseid=65031&search=&searchanywhere=1&page=0&perpage=9999&moodlewsrestformat=json"
response = requests.get(uri)
if response.status_code != 200:
print(f"Failed to retrieve users with status {response.status_code}:")
print(response.json())
exit(1)
user_list = list(response.json())
id_list = [x['id'] for x in user_list]
return id_list
def get_user_detail(config: dict[str, str], token: str, id: int) -> dict[str, str]:
uri = f"{config['url']}/webservice/rest/server.php?wstoken={token}&wsfunction=core_user_get_users_by_field&field=id&values[0]={id}&moodlewsrestformat=json"
response = requests.get(uri)
if response.status_code != 200:
print(f"Failed to retrieve users with status {response.status_code}:")
print(response.json())
exit(1)
return json.loads(response.text)[0]
def write_user_email_csv(config: dict[str, str], token: str, user_ids: list[int]) -> None:
data = []
for id in user_ids:
user_detail = get_user_detail(config, token, id)
try:
data.append([user_detail['id'], user_detail['fullname'], user_detail['email']])
except KeyError:
print(f"Discarding record with missing column: {user_detail}")
header = ['ID', 'Name', 'Email']
with open('output.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f)
writer.writerow(header)
writer.writerows(data)
return
def main() -> None:
config = get_args()
token = get_auth(config)
print("Retrieving user IDs...")
user_ids = get_user_ids(config, token)
print("Getting user data and writing CSV...")
write_user_email_csv(config, token, user_ids)
print("All done! See: ./output.csv in the working directory.")
return
if __name__ == "__main__":
main()