Skip to content

lms_public.services.scrapers

get_course_messages(course, cookie) async

Get the course messages from LMS. Sends request to course page. Parses the text using bs4.

Parameters:

Name Type Description Default
course LMSCourse

An LMSCourser instance

required
cookie LMSCookie

An LMSCookie instance

required

Returns:

Type Description
list[PublicMessage]

List of course messages

Source code in src/lms_public/services/scrapers.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
async def get_course_messages(
    course: LMSCourse, cookie: LMSCookie
) -> list[PublicMessage]:
    """Get the course messages from LMS. Sends request to course page.
    Parses the text using bs4.

    Args:
        course (LMSCourse): An LMSCourser instance
        cookie (LMSCookie): An LMSCookie instance

    Returns:
        (list[PublicMessage]): List of course messages
    """
    page_text = await get_page_text(
        suffix_url=course.suffix_url, cookie=cookie
    )

    # Begin scrape
    soup = BeautifulSoup(page_text, "html.parser")
    message_containers = soup.find_all(class_="wall-action-item")
    parsed_messages = list()
    for msg_container in message_containers:
        parsed_messages.append(
            parse_public_message(
                message_container=msg_container,
                course=course,
            )
        )
    return parsed_messages

get_courses_suffix_urls(cookie) async

Get the course codes from LMS. Sends request to home page. Parses the text using bs4. Args: cookie (str): An LMSCookie instance

Returns:

Type Description
list[tuple[str, str]]

list[tuple[str, str]]: List of (course_suffix_url, course_name) tuples

Source code in src/lms_public/services/scrapers.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
async def get_courses_suffix_urls(
    cookie: LMSCookie,
) -> list[tuple[str, str]]:
    """Get the course codes from LMS. Sends request to home page.
    Parses the text using bs4.
    Args:
        cookie (str): An LMSCookie instance

    Returns:
        list[tuple[str, str]]: List of (course_suffix_url, course_name) tuples
    """
    HOME_SUFFIX_URL = constants.HOME_SUFFIX_URL
    page_text = await get_page_text(suffix_url=HOME_SUFFIX_URL, cookie=cookie)
    soup = BeautifulSoup(page_text, "html.parser")
    li_tags = soup.find(id="profile_groups").find_all("li")
    courses_info = list()
    for li_tag in li_tags:
        course_suffix_url = li_tag.find("a").get("href")
        course_name = li_tag.find_all("div")[1].text.split("\t")[1].strip()
        single_info = (course_suffix_url, course_name)
        courses_info.append(single_info)
    return courses_info

get_page_text(suffix_url, cookie) async

Get the page text from LMS. Sends request to URL and return the text. Encoding is set to response.charset (UTF-8). However, some messages have have character which cannot be decoded. So, errors="replace" is used.

Parameters:

Name Type Description Default
suffix_url str

URL to send request to

required
cookie LMSCookie

An LMSCookie instance

required

Returns:

Type Description
str

Page text

Source code in src/lms_public/services/scrapers.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
async def get_page_text(suffix_url: str, cookie: LMSCookie) -> str:
    """Get the page text from LMS. Sends request to URL and return the text.
    Encoding is set to response.charset (UTF-8). However, some messages have
    have character which cannot be decoded. So, `errors="replace"` is used.

    Args:
        suffix_url (str): URL to send request to
        cookie (LMSCookie): An LMSCookie instance

    Returns:
        (str): Page text
    """
    BASE_URL = constants.BASE_URL

    async with aiohttp.ClientSession(
        base_url=BASE_URL, cookies=cookie.as_dict
    ) as session:
        async with session.get(url=suffix_url) as response:
            if response.status == 200:
                return await response.text(
                    encoding=response.charset, errors="replace"
                )
            return ""