import tiktoken—author: Nipun Batrabadges: truecategories:- ML- transformers- attention-mechanism- deep-learning- neural-networks- nlp- sequence-modeling- pytorchdate: ’2023-12-21’title: Towards Transformerstoc: true—
Basic Imports
encoding = tiktoken.get_encoding("cl100k_base")encoding.encode("Hello World! This is a simple notebook")[9906, 4435, 0, 1115, 374, 264, 4382, 38266]
encoding.decode([9906, 4435, 0, 1115])'Hello World! This'
ser = {}
n =20
for i in range(n**2):
ser[i] = encoding.decode([i])import pandas as pdpd.DataFrame(pd.Series(ser).values.reshape(n,n))| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ! | " | # | $ | % | & | ' | ( | ) | * | + | , | - | . | / | 0 | 1 | 2 | 3 | 4 |
| 1 | 5 | 6 | 7 | 8 | 9 | : | ; | < | = | > | ? | @ | A | B | C | D | E | F | G | H |
| 2 | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z | [ | \ |
| 3 | ] | ^ | _ | ` | a | b | c | d | e | f | g | h | i | j | k | l | m | n | o | p |
| 4 | q | r | s | t | u | v | w | x | y | z | { | | | } | ~ | � | � | � | � | � | � |
| 5 | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � |
| 6 | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � |
| 7 | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � |
| 8 | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � |
| 9 | � | � | � | � | � | � | � | � | \t | \n | ||||||||||
| 10 | \r | |||||||||||||||||||
| 11 | | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | |
| 12 | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | � | in | t | ||
| 13 | er | on | a | re | at | st | en | or | th | \n\n | c | le | s | it | an | ar | al | the | ||
| 14 | ;\n | p | f | ou | = | is | ing | es | w | ion | ed | ic | b | d | et | m | o | \t\t | ro | |
| 15 | as | el | ct | nd | in | h | ent | id | n | am | to | re | -- | { | of | om | );\n | im | \r\n | |
| 16 | ( | il | // | and | ur | se | l | ex | S | ad | " | ch | ut | if | ** | } | em | ol | th | |
| 17 | )\n | {\n | g | ig | iv | ,\n | ce | od | v | ate | T | ag | ay | * | ot | us | C | st | I | un |
| 18 | ul | ue | A | ow | ' | ew | < | ation | () | for | ab | ort | um | ame | is | pe | tr | ck | � | y |
| 19 | ist | ---- | .\n\n | he | e | lo | M | be | ers | on | con | ap | ub | P | ass | int | >\n | ly | urn |