From 015e46134e50bbba4ddcddf21fa08ef40f6297e1 Mon Sep 17 00:00:00 2001 From: Matthew Welch Date: Fri, 6 Dec 2024 11:31:09 -0800 Subject: [PATCH] add support for file scheme split url parsing into separate function --- browser.py | 166 +++++++++++++++++++++++++++++++++++++++------- default.html | 10 +++ tests/url_test.py | 113 +++++++++++++++++++++++++++++-- 3 files changed, 260 insertions(+), 29 deletions(-) create mode 100644 default.html diff --git a/browser.py b/browser.py index 0ad9f2a..99a8a4c 100644 --- a/browser.py +++ b/browser.py @@ -1,31 +1,61 @@ +import os.path import socket import ssl +import sys + + +SUPPORTED_SCHEMES = [ + "http", + "https", + "file", +] + +DEFAULT_FILE = "default.html" class URL: - def __init__(self, url: str): - self.scheme, url = url.split("://", 1) - assert self.scheme in ["http", "https"] - if "/" not in url: - url = url + "/" - self.host, url = url.split("/", 1) - self.path = "/" + url + def __init__(self, url_string: str | None = None): + self.scheme: str = "" + self.host: str = "" + self.port: int = -1 + self.path: str = "" + self.query: str = "" + self.fragment: str = "" + self.default_port = False + if url_string is not None: + parse_url(url_string, self) - if self.scheme == "http": - self.port = 80 - elif self.scheme == "https": - self.port = 443 + def to_string(self): + url_string = self.scheme + ":" + if self.host != "": + url_string += "//" + self.host + if self.port != -1 and not self.default_port: + url_string += f":{self.port}" + url_string += self.path + if self.query != "": + url_string += "?" + self.query + if self.fragment != "": + url_string += "#" + self.fragment + return url_string - if ":" in self.host: - self.host, port = self.host.split(":", 1) - self.port = int(port) + def __str__(self): + return self.to_string() + + def __repr__(self): + return f"" + + +class Request: + def __init__(self, url: URL, method: str = "GET"): + self.url = url + self.method = method self.request_string = "" self.http_version = "HTTP/1.1" - self.headers = {"Host": self.host} + self.headers = {"Host": self.url.host} def add_request_line(self, method): - self.request_string += f"{method} {self.path} {self.http_version}\r\n" + self.request_string += f"{method} {self.url.path} {self.http_version}\r\n" def add_header(self, key, value): self.headers[key] = value @@ -44,7 +74,13 @@ class URL: self.request_string += f"{key}: {value}\r\n" self.request_string += "\r\n" - def request(self, method: str = "GET", headers: dict = None) -> str: + def send_request(self, *args, **kwargs): + if self.url.scheme in ["http", "https"]: + return self.http_request(*args, **kwargs) + elif self.url.scheme == "file": + return self.file_request() + + def http_request(self, method: str = "GET", headers: dict = None) -> str: if headers is not None: self.add_headers(headers) s = socket.socket( @@ -52,10 +88,10 @@ class URL: type=socket.SOCK_STREAM, proto=socket.IPPROTO_TCP, ) - s.connect((self.host, self.port)) - if self.scheme == "https": + s.connect((self.url.host, self.url.port)) + if self.url.scheme == "https": context = ssl.SSLContext(ssl.PROTOCOL_TLS) - s = context.wrap_socket(s, server_hostname=self.host) + s = context.wrap_socket(s, server_hostname=self.url.host) self.add_request_line(method) self.add_default_headers() @@ -81,6 +117,82 @@ class URL: s.close() return content + def file_request(self): + with open(self.url.path) as f: + return f.read() + + +def parse_url(url_string: str, url: URL | None = None) -> tuple[URL, bool]: + has_authority = False + if url is None: + url = URL() + try: + url.scheme, url_string = url_string.split(":", 1) + assert url.scheme in SUPPORTED_SCHEMES + + if url_string.startswith("//"): + has_authority = True + url_string = url_string[2:] + i = 0 + for char in url_string: + if char in ["/", "?", "#"]: + break + i += 1 + url.host += char + url_string = url_string[i:] + i = 0 + for char in url_string: + if char in ["?", "#"]: + break + url.path += char + url_string = url_string[i:] + + if has_authority and url.host == "" and url.path == "": + return url, False + elif not has_authority and url.path == "": + return url, False + + if url_string.startswith("?"): + url_string = url_string[1:] + i = 0 + for char in url_string: + if char == "#": + break + i += 1 + url.query += char + url_string = url_string[i:] + if url_string.startswith("#"): + url.fragment = url_string[1:] + + if url.scheme == "http": + url.port = 80 + url.default_port = True + elif url.scheme == "https": + url.port = 443 + url.default_port = False + if url.scheme in ["http", "https"]: + if url.path == "" or url.path is None: + url.path = "/" + if url.scheme == "file": + print(f"{url.scheme=}") + print(f"{url.host=}") + print(f"{url.port=}") + print(f"{url.path=}") + print(f"{url.query=}") + print(f"{url.fragment=}") + if sys.platform == "win32" and url.path.startswith("/") and ":" in url.path: + url.path = url.path[1:] + if url.path == "" or url.path == "/": + return url, False + + if ":" in url.host: + url.host, port = url.host.split(":", 1) + url.port = int(port) + return url, True + except Exception as e: + print(e) + return url, False + def show(body: str) -> None: in_tag = False @@ -93,11 +205,17 @@ def show(body: str) -> None: print(char, end="") -def load(url: URL): - body = url.request() +def load(url_string: str): + url, success = parse_url(url_string) + if not success: + default = os.path.abspath(DEFAULT_FILE) + if sys.platform == "win32": + default = "/" + default + url, _ = parse_url(f"file://{default}") + + body = Request(url).send_request() show(body) if __name__ == '__main__': - import sys - load(URL(sys.argv[1])) \ No newline at end of file + load(sys.argv[1]) \ No newline at end of file diff --git a/default.html b/default.html new file mode 100644 index 0000000..6769c9f --- /dev/null +++ b/default.html @@ -0,0 +1,10 @@ + + + + + Title + + +

Test

+ + \ No newline at end of file diff --git a/tests/url_test.py b/tests/url_test.py index fa88025..dd7a508 100644 --- a/tests/url_test.py +++ b/tests/url_test.py @@ -1,9 +1,11 @@ import pytest from browser import URL +from browser import parse_url +from browser import Request @pytest.mark.parametrize( - "url_string,scheme,host,port,path", + "url_string,scheme,host,port,path,query,fragment,parse_success", [ ( "http://example.com", @@ -11,6 +13,9 @@ from browser import URL "example.com", 80, "/", + "", + "", + True, ), ( "http://example.com/", @@ -18,6 +23,9 @@ from browser import URL "example.com", 80, "/", + "", + "", + True, ), ( "https://example.com/", @@ -25,6 +33,9 @@ from browser import URL "example.com", 443, "/", + "", + "", + True, ), ( "http://example.com:5000/", @@ -32,6 +43,9 @@ from browser import URL "example.com", 5000, "/", + "", + "", + True, ), ( "http://example.com:5000/test/example", @@ -39,6 +53,9 @@ from browser import URL "example.com", 5000, "/test/example", + "", + "", + True, ), ( "https://example.com:5000/test/example", @@ -46,15 +63,101 @@ from browser import URL "example.com", 5000, "/test/example", + "", + "", + True, + ), + ( + "file:///test.html", + "file", + "", + -1, + "/test.html", + "", + "", + True, + ), + ( + "file://file_host/test.html", + "file", + "file_host", + -1, + "/test.html", + "", + "", + True, + ), + ( + "file:///c:/test.txt", + "file", + "", + -1, + "c:/test.txt", + "", + "", + True, + ), + ( + r"file:///c:\test.txt", + "file", + "", + -1, + r"c:\test.txt", + "", + "", + True, + ), + ( + "file://file_host/", + "file", + "file_host", + -1, + "/", + "", + "", + False, + ), + ( + "htp://example.com/", + "htp", + "", + -1, + "", + "", + "", + False, + ), + ( + "file:test.txt", + "file", + "", + -1, + "test.txt", + "", + "", + True, + ), + ( + "file:/test.txt", + "file", + "", + -1, + "/test.txt", + "", + "", + True, ), ], ) -def test_url_parsing(url_string, scheme, host, port, path): - url = URL(url_string) +def test_url_parsing(url_string, scheme, host, port, path, query, fragment, parse_success): + url, success = parse_url(url_string) assert url.scheme == scheme assert url.host == host assert url.port == port assert url.path == path + assert url.query == query + assert url.fragment == fragment + assert success == parse_success @pytest.mark.parametrize( @@ -67,7 +170,7 @@ def test_url_parsing(url_string, scheme, host, port, path): ) def test_http_request(http_server, url_string): url = URL(url_string) - assert url.request() == "test" + assert Request(url).send_request() == "test" @pytest.mark.parametrize( @@ -80,4 +183,4 @@ def test_http_request(http_server, url_string): ) def test_https_request(https_server, url_string): url = URL(url_string) - assert url.request() == "test" + assert Request(url).send_request() == "test"