import socket import ssl class URL: def __init__(self, url: str): self.scheme, url = url.split("://", 1) assert self.scheme in ["http", "https"] if "/" not in url: url = url + "/" self.host, url = url.split("/", 1) self.path = "/" + url if self.scheme == "http": self.port = 80 elif self.scheme == "https": self.port = 443 if ":" in self.host: self.host, port = self.host.split(":", 1) self.port = int(port) self.request_string = "" self.http_version = "HTTP/1.1" self.headers = {"Host": self.host} def add_request_line(self, method): self.request_string += f"{method} {self.path} {self.http_version}\r\n" def add_header(self, key, value): self.headers[key] = value def add_headers(self, headers: dict[str,str]): self.headers.update(headers) def add_default_headers(self): self.add_headers({ "Connection": "close", "User-Agent": "PyWebBrowser" }) def end_headers(self): for key, value in self.headers.items(): self.request_string += f"{key}: {value}\r\n" self.request_string += "\r\n" def request(self, method: str = "GET", headers: dict = None) -> str: if headers is not None: self.add_headers(headers) s = socket.socket( family=socket.AF_INET, type=socket.SOCK_STREAM, proto=socket.IPPROTO_TCP, ) s.connect((self.host, self.port)) if self.scheme == "https": context = ssl.SSLContext(ssl.PROTOCOL_TLS) s = context.wrap_socket(s, server_hostname=self.host) self.add_request_line(method) self.add_default_headers() self.end_headers() s.send(self.request_string.encode("utf8")) response = s.makefile("r", encoding="utf8", newline="\r\n") status_line = response.readline() version, status, explanation = status_line.split(" ", 2) response_headers = {} while True: line = response.readline() if line == "\r\n": break header, value = line.split(":", 1) response_headers[header.casefold()] = value.strip() assert "transfer-encoding" not in response_headers assert "content-encoding" not in response_headers content = response.read() s.close() return content def show(body: str) -> None: in_tag = False for char in body: if char == "<": in_tag = True elif char == ">": in_tag = False elif not in_tag: print(char, end="") def load(url: URL): body = url.request() show(body) if __name__ == '__main__': import sys load(URL(sys.argv[1]))