From 3b1426b0480b36b0544f123bc879ffbceadf324e Mon Sep 17 00:00:00 2001
From: Stijn De Clercq <declercq.stijn@outlook.com>
Date: Wed, 11 Aug 2021 20:43:45 +0200
Subject: [PATCH] Ufora Notifications: fix timestamp (i think), unlimited
 length, FULL html-to-markdown support (eg. hyperlinks, file download links,
 ...), clean up some ugly stuff

---
 data/embeds/ufora.py | 58 +++++++++++---------------------------------
 requirements.txt     |  3 ++-
 2 files changed, 16 insertions(+), 45 deletions(-)
diff --git a/data/embeds/ufora.py b/data/embeds/ufora.py
index 2f2dbe3..c445e1b 100644
--- a/data/embeds/ufora.py
+++ b/data/embeds/ufora.py
@@ -1,17 +1,17 @@
-from datetime import datetime, timedelta
+from datetime import datetime
 from discord import Embed, Colour
 from functions.stringFormatters import leadingZero as lz
 from functions.timeFormatters import intToWeekday
-from html import unescape
+from markdownify import markdownify as md
 import pytz
-import re
 
 
 class UforaNotification:
     def __init__(self, content: dict, course, notif_id, course_id):
         self._content: dict = content
         self._course = course
-        self._notif_id, self._course_id = notif_id, course_id
+        self._notif_id = notif_id
+        self._course_id = course_id
         self._view_url = self._create_url()
         self._title = self._clean_content(self._content["title"])
         self._description = self._get_description()
@@ -40,54 +40,24 @@ class UforaNotification:
     def _get_description(self):
         desc = self._clean_content(self._content["summary"])
 
-        if len(desc) > 500:
-            return desc[:497] + "..."
+        if len(desc) > 4096:
+            return desc[:4093] + "..."
 
         return desc
 
     def _clean_content(self, text: str):
-        # Dict with HTML & markdown tags to replace
-        html_table = {
-            # CHARACTERS:
-            "&amp;": '&',
-            "&quot;": '"',
-            "apos;": "'",
-            "&gt;": ">",
-            "&lt;": "<",
-            # MARKDOWN SUPPORT:
-            "<b>": "**",
-            "</b>": "**",
-            "<strong>": "**",
-            "</strong>": "**",
-            "<i>": "*",
-            "</i>": "*",
-            "<em>": "*",
-            "</em>": "*",
-            "<del>": "~~",
-            "</del>": "~~",
-            "<ins>": "__",
-            "</ins>": "__",
-            # Represent paragraphs with newlines
-            "</p>": "\n",
-            "<br>": "\n",
-            "<br/>": "\n",
-            "<br />": "\n"
-        }
-
-        # Unescape HTML
-        for key, value in html_table.items():
-            text = text.replace(key, value)
-
-        # Remove HTML tags
-        return unescape(re.sub(r"<[^>]*>", "", text))
+        return md(text)
 
     def _get_published(self):
-        time_string = "%a, %d %b %Y %H:%M:%S %Z"
-        dt = datetime.strptime(self._content["published"], time_string)\
+        # Datetime is unable to parse the timezone because it's useless
+        # We will hereby cut it out and pray the timezone will always be UTC+0
+        published = self._content["published"].rsplit(" ", 1)[0]
+        time_string = "%a, %d %b %Y %H:%M:%S"
+        dt = datetime.strptime(published, time_string)\
             .astimezone(pytz.timezone("Europe/Brussels"))
 
-        # Apply timezone offset
-        dt = dt + timedelta(hours=dt.utcoffset().seconds//3600)
+        # Apply timezone offset in a hacky way
+        dt = dt + dt.utcoffset()
 
         return "{} {}/{}/{} om {}:{}:{}".format(
             intToWeekday(dt.weekday()),
diff --git a/requirements.txt b/requirements.txt
index dc9dcba..036bf7e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,4 +17,5 @@ quart==0.15.1
 Quart-CORS==0.5.0
 attrs~=21.2.0
 dacite~=1.6.0
-pytest==6.2.4
\ No newline at end of file
+pytest==6.2.4
+markdownify==0.9.2
\ No newline at end of file