add support for file scheme
split url parsing into separate function
This commit is contained in:
parent
57e1fae538
commit
015e46134e
166
browser.py
166
browser.py
@ -1,31 +1,61 @@
|
||||
import os.path
|
||||
import socket
|
||||
import ssl
|
||||
import sys
|
||||
|
||||
|
||||
SUPPORTED_SCHEMES = [
|
||||
"http",
|
||||
"https",
|
||||
"file",
|
||||
]
|
||||
|
||||
DEFAULT_FILE = "default.html"
|
||||
|
||||
|
||||
class URL:
|
||||
def __init__(self, url: str):
|
||||
self.scheme, url = url.split("://", 1)
|
||||
assert self.scheme in ["http", "https"]
|
||||
if "/" not in url:
|
||||
url = url + "/"
|
||||
self.host, url = url.split("/", 1)
|
||||
self.path = "/" + url
|
||||
def __init__(self, url_string: str | None = None):
|
||||
self.scheme: str = ""
|
||||
self.host: str = ""
|
||||
self.port: int = -1
|
||||
self.path: str = ""
|
||||
self.query: str = ""
|
||||
self.fragment: str = ""
|
||||
self.default_port = False
|
||||
if url_string is not None:
|
||||
parse_url(url_string, self)
|
||||
|
||||
if self.scheme == "http":
|
||||
self.port = 80
|
||||
elif self.scheme == "https":
|
||||
self.port = 443
|
||||
def to_string(self):
|
||||
url_string = self.scheme + ":"
|
||||
if self.host != "":
|
||||
url_string += "//" + self.host
|
||||
if self.port != -1 and not self.default_port:
|
||||
url_string += f":{self.port}"
|
||||
url_string += self.path
|
||||
if self.query != "":
|
||||
url_string += "?" + self.query
|
||||
if self.fragment != "":
|
||||
url_string += "#" + self.fragment
|
||||
return url_string
|
||||
|
||||
if ":" in self.host:
|
||||
self.host, port = self.host.split(":", 1)
|
||||
self.port = int(port)
|
||||
def __str__(self):
|
||||
return self.to_string()
|
||||
|
||||
def __repr__(self):
|
||||
return f"<URL {self.to_string()}>"
|
||||
|
||||
|
||||
class Request:
|
||||
def __init__(self, url: URL, method: str = "GET"):
|
||||
self.url = url
|
||||
self.method = method
|
||||
|
||||
self.request_string = ""
|
||||
self.http_version = "HTTP/1.1"
|
||||
self.headers = {"Host": self.host}
|
||||
self.headers = {"Host": self.url.host}
|
||||
|
||||
def add_request_line(self, method):
|
||||
self.request_string += f"{method} {self.path} {self.http_version}\r\n"
|
||||
self.request_string += f"{method} {self.url.path} {self.http_version}\r\n"
|
||||
|
||||
def add_header(self, key, value):
|
||||
self.headers[key] = value
|
||||
@ -44,7 +74,13 @@ class URL:
|
||||
self.request_string += f"{key}: {value}\r\n"
|
||||
self.request_string += "\r\n"
|
||||
|
||||
def request(self, method: str = "GET", headers: dict = None) -> str:
|
||||
def send_request(self, *args, **kwargs):
|
||||
if self.url.scheme in ["http", "https"]:
|
||||
return self.http_request(*args, **kwargs)
|
||||
elif self.url.scheme == "file":
|
||||
return self.file_request()
|
||||
|
||||
def http_request(self, method: str = "GET", headers: dict = None) -> str:
|
||||
if headers is not None:
|
||||
self.add_headers(headers)
|
||||
s = socket.socket(
|
||||
@ -52,10 +88,10 @@ class URL:
|
||||
type=socket.SOCK_STREAM,
|
||||
proto=socket.IPPROTO_TCP,
|
||||
)
|
||||
s.connect((self.host, self.port))
|
||||
if self.scheme == "https":
|
||||
s.connect((self.url.host, self.url.port))
|
||||
if self.url.scheme == "https":
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
|
||||
s = context.wrap_socket(s, server_hostname=self.host)
|
||||
s = context.wrap_socket(s, server_hostname=self.url.host)
|
||||
|
||||
self.add_request_line(method)
|
||||
self.add_default_headers()
|
||||
@ -81,6 +117,82 @@ class URL:
|
||||
s.close()
|
||||
return content
|
||||
|
||||
def file_request(self):
|
||||
with open(self.url.path) as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def parse_url(url_string: str, url: URL | None = None) -> tuple[URL, bool]:
|
||||
has_authority = False
|
||||
if url is None:
|
||||
url = URL()
|
||||
try:
|
||||
url.scheme, url_string = url_string.split(":", 1)
|
||||
assert url.scheme in SUPPORTED_SCHEMES
|
||||
|
||||
if url_string.startswith("//"):
|
||||
has_authority = True
|
||||
url_string = url_string[2:]
|
||||
i = 0
|
||||
for char in url_string:
|
||||
if char in ["/", "?", "#"]:
|
||||
break
|
||||
i += 1
|
||||
url.host += char
|
||||
url_string = url_string[i:]
|
||||
i = 0
|
||||
for char in url_string:
|
||||
if char in ["?", "#"]:
|
||||
break
|
||||
url.path += char
|
||||
url_string = url_string[i:]
|
||||
|
||||
if has_authority and url.host == "" and url.path == "":
|
||||
return url, False
|
||||
elif not has_authority and url.path == "":
|
||||
return url, False
|
||||
|
||||
if url_string.startswith("?"):
|
||||
url_string = url_string[1:]
|
||||
i = 0
|
||||
for char in url_string:
|
||||
if char == "#":
|
||||
break
|
||||
i += 1
|
||||
url.query += char
|
||||
url_string = url_string[i:]
|
||||
if url_string.startswith("#"):
|
||||
url.fragment = url_string[1:]
|
||||
|
||||
if url.scheme == "http":
|
||||
url.port = 80
|
||||
url.default_port = True
|
||||
elif url.scheme == "https":
|
||||
url.port = 443
|
||||
url.default_port = False
|
||||
if url.scheme in ["http", "https"]:
|
||||
if url.path == "" or url.path is None:
|
||||
url.path = "/"
|
||||
if url.scheme == "file":
|
||||
print(f"{url.scheme=}")
|
||||
print(f"{url.host=}")
|
||||
print(f"{url.port=}")
|
||||
print(f"{url.path=}")
|
||||
print(f"{url.query=}")
|
||||
print(f"{url.fragment=}")
|
||||
if sys.platform == "win32" and url.path.startswith("/") and ":" in url.path:
|
||||
url.path = url.path[1:]
|
||||
if url.path == "" or url.path == "/":
|
||||
return url, False
|
||||
|
||||
if ":" in url.host:
|
||||
url.host, port = url.host.split(":", 1)
|
||||
url.port = int(port)
|
||||
return url, True
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return url, False
|
||||
|
||||
|
||||
def show(body: str) -> None:
|
||||
in_tag = False
|
||||
@ -93,11 +205,17 @@ def show(body: str) -> None:
|
||||
print(char, end="")
|
||||
|
||||
|
||||
def load(url: URL):
|
||||
body = url.request()
|
||||
def load(url_string: str):
|
||||
url, success = parse_url(url_string)
|
||||
if not success:
|
||||
default = os.path.abspath(DEFAULT_FILE)
|
||||
if sys.platform == "win32":
|
||||
default = "/" + default
|
||||
url, _ = parse_url(f"file://{default}")
|
||||
|
||||
body = Request(url).send_request()
|
||||
show(body)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
load(URL(sys.argv[1]))
|
||||
load(sys.argv[1])
|
10
default.html
Normal file
10
default.html
Normal file
@ -0,0 +1,10 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Title</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>Test</p>
|
||||
</body>
|
||||
</html>
|
@ -1,9 +1,11 @@
|
||||
import pytest
|
||||
|
||||
from browser import URL
|
||||
from browser import parse_url
|
||||
from browser import Request
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url_string,scheme,host,port,path",
|
||||
"url_string,scheme,host,port,path,query,fragment,parse_success",
|
||||
[
|
||||
(
|
||||
"http://example.com",
|
||||
@ -11,6 +13,9 @@ from browser import URL
|
||||
"example.com",
|
||||
80,
|
||||
"/",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"http://example.com/",
|
||||
@ -18,6 +23,9 @@ from browser import URL
|
||||
"example.com",
|
||||
80,
|
||||
"/",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"https://example.com/",
|
||||
@ -25,6 +33,9 @@ from browser import URL
|
||||
"example.com",
|
||||
443,
|
||||
"/",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"http://example.com:5000/",
|
||||
@ -32,6 +43,9 @@ from browser import URL
|
||||
"example.com",
|
||||
5000,
|
||||
"/",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"http://example.com:5000/test/example",
|
||||
@ -39,6 +53,9 @@ from browser import URL
|
||||
"example.com",
|
||||
5000,
|
||||
"/test/example",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"https://example.com:5000/test/example",
|
||||
@ -46,15 +63,101 @@ from browser import URL
|
||||
"example.com",
|
||||
5000,
|
||||
"/test/example",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"file:///test.html",
|
||||
"file",
|
||||
"",
|
||||
-1,
|
||||
"/test.html",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"file://file_host/test.html",
|
||||
"file",
|
||||
"file_host",
|
||||
-1,
|
||||
"/test.html",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"file:///c:/test.txt",
|
||||
"file",
|
||||
"",
|
||||
-1,
|
||||
"c:/test.txt",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
r"file:///c:\test.txt",
|
||||
"file",
|
||||
"",
|
||||
-1,
|
||||
r"c:\test.txt",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"file://file_host/",
|
||||
"file",
|
||||
"file_host",
|
||||
-1,
|
||||
"/",
|
||||
"",
|
||||
"",
|
||||
False,
|
||||
),
|
||||
(
|
||||
"htp://example.com/",
|
||||
"htp",
|
||||
"",
|
||||
-1,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
False,
|
||||
),
|
||||
(
|
||||
"file:test.txt",
|
||||
"file",
|
||||
"",
|
||||
-1,
|
||||
"test.txt",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
(
|
||||
"file:/test.txt",
|
||||
"file",
|
||||
"",
|
||||
-1,
|
||||
"/test.txt",
|
||||
"",
|
||||
"",
|
||||
True,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_url_parsing(url_string, scheme, host, port, path):
|
||||
url = URL(url_string)
|
||||
def test_url_parsing(url_string, scheme, host, port, path, query, fragment, parse_success):
|
||||
url, success = parse_url(url_string)
|
||||
assert url.scheme == scheme
|
||||
assert url.host == host
|
||||
assert url.port == port
|
||||
assert url.path == path
|
||||
assert url.query == query
|
||||
assert url.fragment == fragment
|
||||
assert success == parse_success
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -67,7 +170,7 @@ def test_url_parsing(url_string, scheme, host, port, path):
|
||||
)
|
||||
def test_http_request(http_server, url_string):
|
||||
url = URL(url_string)
|
||||
assert url.request() == "test"
|
||||
assert Request(url).send_request() == "test"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -80,4 +183,4 @@ def test_http_request(http_server, url_string):
|
||||
)
|
||||
def test_https_request(https_server, url_string):
|
||||
url = URL(url_string)
|
||||
assert url.request() == "test"
|
||||
assert Request(url).send_request() == "test"
|
||||
|
Loading…
Reference in New Issue
Block a user