add support for file scheme
split url parsing into separate function
This commit is contained in:
parent
57e1fae538
commit
015e46134e
166
browser.py
166
browser.py
@ -1,31 +1,61 @@
|
|||||||
|
import os.path
|
||||||
import socket
|
import socket
|
||||||
import ssl
|
import ssl
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
SUPPORTED_SCHEMES = [
|
||||||
|
"http",
|
||||||
|
"https",
|
||||||
|
"file",
|
||||||
|
]
|
||||||
|
|
||||||
|
DEFAULT_FILE = "default.html"
|
||||||
|
|
||||||
|
|
||||||
class URL:
|
class URL:
|
||||||
def __init__(self, url: str):
|
def __init__(self, url_string: str | None = None):
|
||||||
self.scheme, url = url.split("://", 1)
|
self.scheme: str = ""
|
||||||
assert self.scheme in ["http", "https"]
|
self.host: str = ""
|
||||||
if "/" not in url:
|
self.port: int = -1
|
||||||
url = url + "/"
|
self.path: str = ""
|
||||||
self.host, url = url.split("/", 1)
|
self.query: str = ""
|
||||||
self.path = "/" + url
|
self.fragment: str = ""
|
||||||
|
self.default_port = False
|
||||||
|
if url_string is not None:
|
||||||
|
parse_url(url_string, self)
|
||||||
|
|
||||||
if self.scheme == "http":
|
def to_string(self):
|
||||||
self.port = 80
|
url_string = self.scheme + ":"
|
||||||
elif self.scheme == "https":
|
if self.host != "":
|
||||||
self.port = 443
|
url_string += "//" + self.host
|
||||||
|
if self.port != -1 and not self.default_port:
|
||||||
|
url_string += f":{self.port}"
|
||||||
|
url_string += self.path
|
||||||
|
if self.query != "":
|
||||||
|
url_string += "?" + self.query
|
||||||
|
if self.fragment != "":
|
||||||
|
url_string += "#" + self.fragment
|
||||||
|
return url_string
|
||||||
|
|
||||||
if ":" in self.host:
|
def __str__(self):
|
||||||
self.host, port = self.host.split(":", 1)
|
return self.to_string()
|
||||||
self.port = int(port)
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<URL {self.to_string()}>"
|
||||||
|
|
||||||
|
|
||||||
|
class Request:
|
||||||
|
def __init__(self, url: URL, method: str = "GET"):
|
||||||
|
self.url = url
|
||||||
|
self.method = method
|
||||||
|
|
||||||
self.request_string = ""
|
self.request_string = ""
|
||||||
self.http_version = "HTTP/1.1"
|
self.http_version = "HTTP/1.1"
|
||||||
self.headers = {"Host": self.host}
|
self.headers = {"Host": self.url.host}
|
||||||
|
|
||||||
def add_request_line(self, method):
|
def add_request_line(self, method):
|
||||||
self.request_string += f"{method} {self.path} {self.http_version}\r\n"
|
self.request_string += f"{method} {self.url.path} {self.http_version}\r\n"
|
||||||
|
|
||||||
def add_header(self, key, value):
|
def add_header(self, key, value):
|
||||||
self.headers[key] = value
|
self.headers[key] = value
|
||||||
@ -44,7 +74,13 @@ class URL:
|
|||||||
self.request_string += f"{key}: {value}\r\n"
|
self.request_string += f"{key}: {value}\r\n"
|
||||||
self.request_string += "\r\n"
|
self.request_string += "\r\n"
|
||||||
|
|
||||||
def request(self, method: str = "GET", headers: dict = None) -> str:
|
def send_request(self, *args, **kwargs):
|
||||||
|
if self.url.scheme in ["http", "https"]:
|
||||||
|
return self.http_request(*args, **kwargs)
|
||||||
|
elif self.url.scheme == "file":
|
||||||
|
return self.file_request()
|
||||||
|
|
||||||
|
def http_request(self, method: str = "GET", headers: dict = None) -> str:
|
||||||
if headers is not None:
|
if headers is not None:
|
||||||
self.add_headers(headers)
|
self.add_headers(headers)
|
||||||
s = socket.socket(
|
s = socket.socket(
|
||||||
@ -52,10 +88,10 @@ class URL:
|
|||||||
type=socket.SOCK_STREAM,
|
type=socket.SOCK_STREAM,
|
||||||
proto=socket.IPPROTO_TCP,
|
proto=socket.IPPROTO_TCP,
|
||||||
)
|
)
|
||||||
s.connect((self.host, self.port))
|
s.connect((self.url.host, self.url.port))
|
||||||
if self.scheme == "https":
|
if self.url.scheme == "https":
|
||||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
|
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
|
||||||
s = context.wrap_socket(s, server_hostname=self.host)
|
s = context.wrap_socket(s, server_hostname=self.url.host)
|
||||||
|
|
||||||
self.add_request_line(method)
|
self.add_request_line(method)
|
||||||
self.add_default_headers()
|
self.add_default_headers()
|
||||||
@ -81,6 +117,82 @@ class URL:
|
|||||||
s.close()
|
s.close()
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
def file_request(self):
|
||||||
|
with open(self.url.path) as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_url(url_string: str, url: URL | None = None) -> tuple[URL, bool]:
|
||||||
|
has_authority = False
|
||||||
|
if url is None:
|
||||||
|
url = URL()
|
||||||
|
try:
|
||||||
|
url.scheme, url_string = url_string.split(":", 1)
|
||||||
|
assert url.scheme in SUPPORTED_SCHEMES
|
||||||
|
|
||||||
|
if url_string.startswith("//"):
|
||||||
|
has_authority = True
|
||||||
|
url_string = url_string[2:]
|
||||||
|
i = 0
|
||||||
|
for char in url_string:
|
||||||
|
if char in ["/", "?", "#"]:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
url.host += char
|
||||||
|
url_string = url_string[i:]
|
||||||
|
i = 0
|
||||||
|
for char in url_string:
|
||||||
|
if char in ["?", "#"]:
|
||||||
|
break
|
||||||
|
url.path += char
|
||||||
|
url_string = url_string[i:]
|
||||||
|
|
||||||
|
if has_authority and url.host == "" and url.path == "":
|
||||||
|
return url, False
|
||||||
|
elif not has_authority and url.path == "":
|
||||||
|
return url, False
|
||||||
|
|
||||||
|
if url_string.startswith("?"):
|
||||||
|
url_string = url_string[1:]
|
||||||
|
i = 0
|
||||||
|
for char in url_string:
|
||||||
|
if char == "#":
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
url.query += char
|
||||||
|
url_string = url_string[i:]
|
||||||
|
if url_string.startswith("#"):
|
||||||
|
url.fragment = url_string[1:]
|
||||||
|
|
||||||
|
if url.scheme == "http":
|
||||||
|
url.port = 80
|
||||||
|
url.default_port = True
|
||||||
|
elif url.scheme == "https":
|
||||||
|
url.port = 443
|
||||||
|
url.default_port = False
|
||||||
|
if url.scheme in ["http", "https"]:
|
||||||
|
if url.path == "" or url.path is None:
|
||||||
|
url.path = "/"
|
||||||
|
if url.scheme == "file":
|
||||||
|
print(f"{url.scheme=}")
|
||||||
|
print(f"{url.host=}")
|
||||||
|
print(f"{url.port=}")
|
||||||
|
print(f"{url.path=}")
|
||||||
|
print(f"{url.query=}")
|
||||||
|
print(f"{url.fragment=}")
|
||||||
|
if sys.platform == "win32" and url.path.startswith("/") and ":" in url.path:
|
||||||
|
url.path = url.path[1:]
|
||||||
|
if url.path == "" or url.path == "/":
|
||||||
|
return url, False
|
||||||
|
|
||||||
|
if ":" in url.host:
|
||||||
|
url.host, port = url.host.split(":", 1)
|
||||||
|
url.port = int(port)
|
||||||
|
return url, True
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return url, False
|
||||||
|
|
||||||
|
|
||||||
def show(body: str) -> None:
|
def show(body: str) -> None:
|
||||||
in_tag = False
|
in_tag = False
|
||||||
@ -93,11 +205,17 @@ def show(body: str) -> None:
|
|||||||
print(char, end="")
|
print(char, end="")
|
||||||
|
|
||||||
|
|
||||||
def load(url: URL):
|
def load(url_string: str):
|
||||||
body = url.request()
|
url, success = parse_url(url_string)
|
||||||
|
if not success:
|
||||||
|
default = os.path.abspath(DEFAULT_FILE)
|
||||||
|
if sys.platform == "win32":
|
||||||
|
default = "/" + default
|
||||||
|
url, _ = parse_url(f"file://{default}")
|
||||||
|
|
||||||
|
body = Request(url).send_request()
|
||||||
show(body)
|
show(body)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
load(sys.argv[1])
|
||||||
load(URL(sys.argv[1]))
|
|
10
default.html
Normal file
10
default.html
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Title</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>Test</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
@ -1,9 +1,11 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from browser import URL
|
from browser import URL
|
||||||
|
from browser import parse_url
|
||||||
|
from browser import Request
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"url_string,scheme,host,port,path",
|
"url_string,scheme,host,port,path,query,fragment,parse_success",
|
||||||
[
|
[
|
||||||
(
|
(
|
||||||
"http://example.com",
|
"http://example.com",
|
||||||
@ -11,6 +13,9 @@ from browser import URL
|
|||||||
"example.com",
|
"example.com",
|
||||||
80,
|
80,
|
||||||
"/",
|
"/",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"http://example.com/",
|
"http://example.com/",
|
||||||
@ -18,6 +23,9 @@ from browser import URL
|
|||||||
"example.com",
|
"example.com",
|
||||||
80,
|
80,
|
||||||
"/",
|
"/",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"https://example.com/",
|
"https://example.com/",
|
||||||
@ -25,6 +33,9 @@ from browser import URL
|
|||||||
"example.com",
|
"example.com",
|
||||||
443,
|
443,
|
||||||
"/",
|
"/",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"http://example.com:5000/",
|
"http://example.com:5000/",
|
||||||
@ -32,6 +43,9 @@ from browser import URL
|
|||||||
"example.com",
|
"example.com",
|
||||||
5000,
|
5000,
|
||||||
"/",
|
"/",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"http://example.com:5000/test/example",
|
"http://example.com:5000/test/example",
|
||||||
@ -39,6 +53,9 @@ from browser import URL
|
|||||||
"example.com",
|
"example.com",
|
||||||
5000,
|
5000,
|
||||||
"/test/example",
|
"/test/example",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"https://example.com:5000/test/example",
|
"https://example.com:5000/test/example",
|
||||||
@ -46,15 +63,101 @@ from browser import URL
|
|||||||
"example.com",
|
"example.com",
|
||||||
5000,
|
5000,
|
||||||
"/test/example",
|
"/test/example",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file:///test.html",
|
||||||
|
"file",
|
||||||
|
"",
|
||||||
|
-1,
|
||||||
|
"/test.html",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file://file_host/test.html",
|
||||||
|
"file",
|
||||||
|
"file_host",
|
||||||
|
-1,
|
||||||
|
"/test.html",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file:///c:/test.txt",
|
||||||
|
"file",
|
||||||
|
"",
|
||||||
|
-1,
|
||||||
|
"c:/test.txt",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
r"file:///c:\test.txt",
|
||||||
|
"file",
|
||||||
|
"",
|
||||||
|
-1,
|
||||||
|
r"c:\test.txt",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file://file_host/",
|
||||||
|
"file",
|
||||||
|
"file_host",
|
||||||
|
-1,
|
||||||
|
"/",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
False,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"htp://example.com/",
|
||||||
|
"htp",
|
||||||
|
"",
|
||||||
|
-1,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
False,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file:test.txt",
|
||||||
|
"file",
|
||||||
|
"",
|
||||||
|
-1,
|
||||||
|
"test.txt",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file:/test.txt",
|
||||||
|
"file",
|
||||||
|
"",
|
||||||
|
-1,
|
||||||
|
"/test.txt",
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
True,
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_url_parsing(url_string, scheme, host, port, path):
|
def test_url_parsing(url_string, scheme, host, port, path, query, fragment, parse_success):
|
||||||
url = URL(url_string)
|
url, success = parse_url(url_string)
|
||||||
assert url.scheme == scheme
|
assert url.scheme == scheme
|
||||||
assert url.host == host
|
assert url.host == host
|
||||||
assert url.port == port
|
assert url.port == port
|
||||||
assert url.path == path
|
assert url.path == path
|
||||||
|
assert url.query == query
|
||||||
|
assert url.fragment == fragment
|
||||||
|
assert success == parse_success
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -67,7 +170,7 @@ def test_url_parsing(url_string, scheme, host, port, path):
|
|||||||
)
|
)
|
||||||
def test_http_request(http_server, url_string):
|
def test_http_request(http_server, url_string):
|
||||||
url = URL(url_string)
|
url = URL(url_string)
|
||||||
assert url.request() == "test"
|
assert Request(url).send_request() == "test"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -80,4 +183,4 @@ def test_http_request(http_server, url_string):
|
|||||||
)
|
)
|
||||||
def test_https_request(https_server, url_string):
|
def test_https_request(https_server, url_string):
|
||||||
url = URL(url_string)
|
url = URL(url_string)
|
||||||
assert url.request() == "test"
|
assert Request(url).send_request() == "test"
|
||||||
|
Loading…
Reference in New Issue
Block a user