GPT from Scratch
gpt from scratch notes
1115394
!$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
65
# tokenize the characters
# character level language model
def encode(text: str) -> List[int]:
lookup_table = {letter: idx for idx, letter in enumerate(chars, start = 1)}
rv = []
for char in text:
buffer = lookup_table[char]
rv.append(buffer)
return rv
def decode(token: List[int]) -> str:
lookup_table = {idx: letter for idx, letter in enumerate(chars, start = 1)}
rv = ""
for num in token:
buffer = lookup_table[num]
rv = rv + buffer
return rv
original_txt = "hi there"
encoded_token = encode("hi there")
decoded_txt = decode(encoded_token)
assert original_txt == decoded_txt, "Error in Encoder - Deocder"