PyWebBrowser/browser.py
Matthew Welch 57e1fae538 add easy way to add headers
add Connection header to add support for http 1.1
2024-11-28 16:40:02 -08:00

103 lines
2.8 KiB
Python

import socket
import ssl
class URL:
def __init__(self, url: str):
self.scheme, url = url.split("://", 1)
assert self.scheme in ["http", "https"]
if "/" not in url:
url = url + "/"
self.host, url = url.split("/", 1)
self.path = "/" + url
if self.scheme == "http":
self.port = 80
elif self.scheme == "https":
self.port = 443
if ":" in self.host:
self.host, port = self.host.split(":", 1)
self.port = int(port)
self.request_string = ""
self.http_version = "HTTP/1.1"
self.headers = {"Host": self.host}
def add_request_line(self, method):
self.request_string += f"{method} {self.path} {self.http_version}\r\n"
def add_header(self, key, value):
self.headers[key] = value
def add_headers(self, headers: dict[str,str]):
self.headers.update(headers)
def add_default_headers(self):
self.add_headers({
"Connection": "close",
"User-Agent": "PyWebBrowser"
})
def end_headers(self):
for key, value in self.headers.items():
self.request_string += f"{key}: {value}\r\n"
self.request_string += "\r\n"
def request(self, method: str = "GET", headers: dict = None) -> str:
if headers is not None:
self.add_headers(headers)
s = socket.socket(
family=socket.AF_INET,
type=socket.SOCK_STREAM,
proto=socket.IPPROTO_TCP,
)
s.connect((self.host, self.port))
if self.scheme == "https":
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
s = context.wrap_socket(s, server_hostname=self.host)
self.add_request_line(method)
self.add_default_headers()
self.end_headers()
s.send(self.request_string.encode("utf8"))
response = s.makefile("r", encoding="utf8", newline="\r\n")
status_line = response.readline()
version, status, explanation = status_line.split(" ", 2)
response_headers = {}
while True:
line = response.readline()
if line == "\r\n":
break
header, value = line.split(":", 1)
response_headers[header.casefold()] = value.strip()
assert "transfer-encoding" not in response_headers
assert "content-encoding" not in response_headers
content = response.read()
s.close()
return content
def show(body: str) -> None:
in_tag = False
for char in body:
if char == "<":
in_tag = True
elif char == ">":
in_tag = False
elif not in_tag:
print(char, end="")
def load(url: URL):
body = url.request()
show(body)
if __name__ == '__main__':
import sys
load(URL(sys.argv[1]))