diff --git a/htmlparser.py b/htmlparser.py
new file mode 100644
index 0000000..94bb068
--- /dev/null
+++ b/htmlparser.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# System packages
+import pathlib
+from datetime import datetime
+from html.parser import HTMLParser
+from typing import List, Tuple, Optional, Set
+from enum import IntEnum
+
+path_project = pathlib.Path("/home/gmartin/Workspace/DiscordPrettyPrinter")
+
+
+class DiscordRollTok(IntEnum):
+ """
+ List the different times discords tokens has been modified.
+ """
+ ED_20230709 = 1 # Exports réalisés le 2023-07-09
+ ED_20240605 = 2 # Exports réalisés le 2024-06-05
+
+# Tokens used to parse pseudos.
+PSEUDO_TAG: dict[DiscordRollTok, str] = {
+ DiscordRollTok.ED_20230709: "username-h_Y3Us",
+ DiscordRollTok.ED_20240605: "username__0b0e7",
+}
+# Tokens used to parse reaction counts.
+REACTCOUNT_TAG: dict[DiscordRollTok, str] = {
+ DiscordRollTok.ED_20230709: "reactionCount-26U4As",
+ DiscordRollTok.ED_20240605: "reactionCount__2c34d",
+}
+# Tokens used to parse the divider and the date.
+DIVIDER_TAG: dict[DiscordRollTok, str] = {
+ DiscordRollTok.ED_20230709: "divider-IqmEqJ",
+ DiscordRollTok.ED_20240605: "divider__01aed",
+}
+
+
+def check_tagattr(attrs: List[Tuple[str, str]], tag: str) -> bool:
+ """Checks if the given tag, e.g. 'username-h_Y3Us', is inside the list of attributes values.
+ """
+ attrs_keyvalues: List[str] = list(zip(*attrs))
+ if len(attrs_keyvalues):
+ for attr_value in attrs_keyvalues[1]:
+ if tag in attr_value:
+ return True
+ return False
+
+
+class DiscordHTMLParser(HTMLParser):
+
+ def __init__(self, keep_time: bool, convert_charrefs=True):
+ super().__init__(convert_charrefs=convert_charrefs)
+
+ self.keep_time = keep_time
+
+ # Parsed text to be exported.
+ self.export_txt: str = ""
+ self.nb_messages: int = 0
+ self.nb_duplicate: int = 0
+
+ # Information about what is being parsed.
+ self.is_scan_message: bool = False
+ self.is_scan_pseudo: bool = False
+ self.is_scan_time: bool = False
+ self.is_scan_reactioncount: bool = False
+ self.is_scan_datedivider: bool = False
+
+ # When parsing a time element, keep its datetime to print it in the message.
+ self.current_dt: Optional[datetime] = None
+
+ # Keep track of which message and date separator have been added.
+ self.set_idmessage: Set[str] = set()
+ self.set_dateseparator: Set[str] = set()
+
+ def handle_starttag(self, tag, attrs):
+
+ # Add to the export the tags used to format the text.
+ match tag:
+ case "em":
+ if self.is_scan_message:
+ self.export_txt += ""
+ case "strong":
+ if self.is_scan_message:
+ self.export_txt += ""
+ case "li":
+ if check_tagattr(attrs, "chat-messages"):
+
+ # Retrieve the id of the message.
+ id_message: str = ""
+ for key, value in attrs:
+ if "chat-messages" in value:
+ id_message = value.split("chat-messages-")[1]
+
+ # If the message has already been seen, don't keep it.
+ if id_message in self.set_idmessage:
+ self.nb_duplicate += 1
+ else:
+ self.set_idmessage.add(id_message)
+
+ self.nb_messages += 1
+ self.is_scan_message = True
+ self.export_txt += f"\n"
+ case "strong":
+ if self.is_scan_message:
+ self.export_txt += ""
+ case "li":
+ if self.is_scan_message:
+ self.is_scan_message = False
+ self.export_txt += ""
+ case "span":
+ self.is_scan_pseudo = False
+ case "div":
+ self.is_scan_reactioncount = False
+ self.is_scan_datedivider = False
+ case "time":
+ self.is_scan_time = False
+ self.current_dt = None
+
+ def handle_data(self, data):
+
+ if self.is_scan_message:
+ if self.is_scan_reactioncount:
+ return
+ elif self.is_scan_time and self.keep_time:
+ if self.current_dt is not None:
+ self.export_txt += f"[{self.current_dt.strftime('%Y-%m-%d %H:%M')}] "
+ self.current_dt = None
+ elif self.is_scan_pseudo:
+ pseudo_ansi = ''.join(i for i in data.lower() if ord(i) < 128)
+ self.export_txt += f""
+ self.export_txt += data
+ self.export_txt += ""
+ elif not (data in [""]):
+ data = data.replace("\n", "\n
")
+ self.export_txt += data
+ elif self.is_scan_datedivider:
+ self.export_txt += f"