PyWebBrowser/browser.py

77 lines
2.0 KiB
Python

import socket
import ssl
class URL:
def __init__(self, url: str):
self.scheme, url = url.split("://", 1)
assert self.scheme in ["http", "https"]
if "/" not in url:
url = url + "/"
self.host, url = url.split("/", 1)
self.path = "/" + url
if self.scheme == "http":
self.port = 80
elif self.scheme == "https":
self.port = 443
if ":" in self.host:
self.host, port = self.host.split(":", 1)
self.port = int(port)
def request(self) -> str:
s = socket.socket(
family=socket.AF_INET,
type=socket.SOCK_STREAM,
proto=socket.IPPROTO_TCP,
)
s.connect((self.host, self.port))
if self.scheme == "https":
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
s = context.wrap_socket(s, server_hostname=self.host)
request_str = f"GET {self.path} HTTP/1.0\r\n"
request_str += f"Host: {self.host}\r\n"
request_str += "\r\n"
s.send(request_str.encode("utf8"))
response = s.makefile("r", encoding="utf8", newline="\r\n")
status_line = response.readline()
version, status, explanation = status_line.split(" ", 2)
response_headers = {}
while True:
line = response.readline()
if line == "\r\n":
break
header, value = line.split(":", 1)
response_headers[header.casefold()] = value.strip()
assert "transfer-encoding" not in response_headers
assert "content-encoding" not in response_headers
content = response.read()
s.close()
return content
def show(body: str) -> None:
in_tag = False
for char in body:
if char == "<":
in_tag = True
elif char == ">":
in_tag = False
elif not in_tag:
print(char, end="")
def load(url: URL):
body = url.request()
show(body)
if __name__ == '__main__':
import sys
load(URL(sys.argv[1]))