Towards transformers

ML
Author

Nipun Batra

Published

December 21, 2023

Basic Imports

import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")
encoding.encode("Hello World! This is a simple notebook")
[9906, 4435, 0, 1115, 374, 264, 4382, 38266]
encoding.decode([9906, 4435, 0, 1115])
'Hello World! This'
ser = {}
n =20
for i in range(n**2):
    ser[i] = encoding.decode([i])
import pandas as pd
pd.DataFrame(pd.Series(ser).values.reshape(n,n))
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
0 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1 5 6 7 8 9 : ; < = > ? @ A B C D E F G H
2 I J K L M N O P Q R S T U V W X Y Z [ \
3 ] ^ _ ` a b c d e f g h i j k l m n o p
4 q r s t u v w x y z { | } ~
5
6
7
8
9         \t \n
10 \r              
11 
12 in t
13 er on a re at st en or th \n\n c le s it an ar al the
14 ;\n p f ou = is ing es w ion ed ic b d et m o \t\t ro
15 as el ct nd in h ent id n am to re -- { of om );\n im \r\n
16 ( il // and ur se l ex S ad " ch ut if ** } em ol th
17 )\n {\n g ig iv ,\n ce od v ate T ag ay * ot us C st I un
18 ul ue A ow ' ew < ation () for ab ort um ame is pe tr ck y
19 ist ---- .\n\n he e lo M be ers on con ap ub P ass int >\n ly urn