Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions docs/_static/css/custom.css
Original file line number Diff line number Diff line change
Expand Up @@ -209,3 +209,35 @@ html[data-theme="dark"] .button-primary:visited:hover {
padding-inline: 50px;
padding-bottom: 24px;
}

/* Inline inputs for editable commands */
.inline-input {
background-color: rgba(255, 255, 255, 0.1);
color: inherit;
border: 1px solid #555;
border-radius: 3px;
padding: 2px 6px;
font-family: inherit;
font-size: inherit;
display: inline-block;
vertical-align: middle;
margin: 0 2px;
box-sizing: content-box;
}

.inline-input:focus {
outline: none;
border-color: #1A73E8;
background-color: rgba(255, 255, 255, 0.2);
}

html[data-theme="light"] .inline-input {
background-color: rgba(0, 0, 0, 0.05);
border-color: #ccc;
color: #333;
}

html[data-theme="light"] .inline-input:focus {
background-color: rgba(0, 0, 0, 0.1);
border-color: #1A73E8;
}
130 changes: 130 additions & 0 deletions docs/_static/js/editable_commands.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/**
* Handles inline editable commands in documentation.
* Replaces placeholders in code blocks with inline input fields.
*/
document.addEventListener('DOMContentLoaded', () => {
const codeBlocks = document.querySelectorAll('div.highlight-sh pre, div.highlight-bash pre, div.highlight-default pre');

codeBlocks.forEach(block => {

const originalHTML = block.innerHTML;

const placeholders = [
"<your virtual env name>",
"<model name>",
"<tokenizer path>",
"<Hugging Face access token>",
"<output directory to store run logs>",
"<name for this run>",
"<number of fine-tuning steps to run>",
"<batch size per device>",
"<Hugging Face dataset name>",
"<data split for train>",
"<data columns to train on>",
"<gcs path for MaxText checkpoint>",
"<Google Cloud Project ID>",
"<Name of GKE Cluster>",
"<GKE Cluster Zone>",
"<Name of Workload>",
"<TPU Type>",
"<GCS Path for Output/Logs>",
"<Fine-Tuning Steps>",
"<Hugging Face Access Token>",
"<Model Name>",
"<Model Tokenizer>",
"<Hugging Face Dataset Name>",
"<Data Split for Train>",
"<Data Columns to Train on>",
"<cluster name>",
"<GCP project ID>",
"<zone name>",
"<path/to/gcr.io>",
"<number of slices>",
"<Flag to use zarr3>",
"<Flag to use ocdbt>",
"<Hugging Face Model>",
"<MaxText Model>",
"<Tokenizer>",
"<Name for this run>",
"<Docker Image Name>"
];

let newHTML = originalHTML;

placeholders.forEach(placeholder => {
// 1. create robust regex for this placeholder
// escape chars
const escapeRegex = (string) => string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');

const htmlEscapedKey = placeholder
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');

let pattern = '';
for (let i = 0; i < htmlEscapedKey.length; i++) {
const char = htmlEscapedKey[i];
pattern += escapeRegex(char) + '(?:<[^>]+>)*';
}

const regex = new RegExp(pattern, 'g');

// Replace with an input element
// We use the original placeholder text as placeholder for the input
const inputHTML = `<input class="inline-input" placeholder="${placeholder}" style="width: ${placeholder.length + 2}ch;" />`;

newHTML = newHTML.replace(regex, inputHTML);
});

if (newHTML !== originalHTML) {
block.innerHTML = newHTML;
}
});

// Add event listeners to newly created inputs to auto-resize
document.querySelectorAll('.inline-input').forEach(input => {
input.addEventListener('input', function () {
this.style.width = Math.max(this.value.length, this.placeholder.length) + 2 + 'ch';
});
});

/**
* Intercept copy button clicks to include user input values.
* Runs in capture phase to precede sphinx-copybutton's listener.
*/
document.addEventListener('click', (event) => {
// Check if the clicked element is a copy button or inside one
const button = event.target.closest('.copybtn');
if (!button) return;

// Find the associated code block
// Sphinx-copybutton places the button inside .highlight usually
const highlightDiv = button.closest('.highlight');
if (!highlightDiv) return;

const inputs = highlightDiv.querySelectorAll('input.inline-input');
if (inputs.length === 0) return;

const swaps = [];
inputs.forEach(input => {
// Create a temporary span with the input's current value
const span = document.createElement('span');
// If value is empty, fallback to placeholder to match original text behavior
const val = input.value;
span.textContent = val ? val : input.placeholder;

// Mimic input appearance slightly if needed, but plain text is what we want copied
span.style.color = val ? 'inherit' : 'gray';

input.replaceWith(span);
swaps.push({ input, span });
});

// Revert immediately after the current event loop
setTimeout(() => {
swaps.forEach(({ input, span }) => {
span.replaceWith(input);
});
}, 0);
}, true);
});
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
html_theme = "sphinx_book_theme"
html_static_path = ["_static"]
html_css_files = ["css/custom.css"]
html_js_files = ["js/editable_commands.js"]
html_logo = "_static/maxtext.png"

# -- Options for myst ----------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions docs/tutorials/posttraining/rl.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ Setup following environment variables before running GRPO/GSPO:

```bash
# -- Model configuration --
export HF_MODEL='llama3.1-8b-Instruct'
export MODEL='llama3.1-8b'
export TOKENIZER='meta-llama/Llama-3.1-8B-Instruct'
export HF_MODEL=<Hugging Face Model> # e.g. 'llama3.1-8b-Instruct'
export MODEL=<MaxText Model> # e.g. 'llama3.1-8b'
export TOKENIZER=<Tokenizer> # e.g. 'meta-llama/Llama-3.1-8B-Instruct'
export HF_TOKEN=<Hugging Face access token>

# -- MaxText configuration --
Expand Down
20 changes: 15 additions & 5 deletions docs/tutorials/posttraining/rl_on_multi_host.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,36 @@ Setup following environment variables:

```bash
# -- Model configuration --
export HF_MODEL='llama3.1-70b-Instruct'
export MODEL='llama3.1-70b'
export TOKENIZER='meta-llama/Llama-3.1-70B-Instruct'
export HF_MODEL=<Hugging Face Model> # e.g. 'llama3.1-70b-Instruct'
export MODEL=<MaxText Model> # e.g. 'llama3.1-70b'
export TOKENIZER=<Tokenizer> # e.g. 'meta-llama/Llama-3.1-70B-Instruct'
export HF_TOKEN=<Hugging Face access token>

# -- MaxText configuration --
export BASE_OUTPUT_DIRECTORY=<output directory to store run logs> # e.g., gs://my-bucket/my-output-directory
export RUN_NAME=llama-3-70b-grpo
export RUN_NAME=<Name for this run> # e.g., llama-3-70b-grpo
export MAXTEXT_CKPT_PATH=${BASE_OUTPUT_DIRECTORY}/${RUN_NAME}/0/items

# -- Workload configuration --
export WORKLOAD=${RUN_NAME}
export TPU_TYPE='v5p-128'
export TPU_TYPE=<TPU Type> # e.g., 'v5p-128'
export TPU_CLUSTER=<cluster name>
export PROJECT_ID=<GCP project ID>
export ZONE=<zone name>
```

## Get your model checkpoint

### Option 1: Using an existing MaxText checkpoint

If you already have a MaxText-compatible model checkpoint, simply set the following environment variable and move on to the next section.

```bash
export MAXTEXT_CKPT_PATH=<gcs path for MaxText checkpoint> # e.g., gs://my-bucket/my-model-checkpoint/0/items
```

### Option 2: Converting from a Hugging Face checkpoint

You can convert a Hugging Face checkpoint to MaxText format using the `src/MaxText/utils/ckpt_conversion/to_maxtext.py` script. This is useful if you have a pre-trained model from Hugging Face that you want to use with MaxText.

First, ensure you have the necessary dependencies installed. Then, run the conversion script on a CPU machine. For large models, it is recommended to use the `--lazy_load_tensors` flag to reduce memory usage during conversion. \
Expand Down
1 change: 1 addition & 0 deletions docs/tutorials/posttraining/sft.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ We use [Tunix](https://github.com/google/tunix), a JAX-based library designed fo
In this tutorial we use a single host TPU VM such as `v6e-8/v5p-8`. Let's get started!

## Install dependencies

```sh
# 1. Clone the repository
git clone https://github.com/AI-Hypercomputer/maxtext.git
Expand Down
5 changes: 2 additions & 3 deletions docs/tutorials/posttraining/sft_on_multi_host.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ bash dependencies/scripts/docker_build_dependency_image.sh WORKFLOW=post-trainin
### 1.3. Upload the Docker image to Artifact Registry
> **Note:** You will need the [**Artifact Registry Writer**](https://docs.cloud.google.com/artifact-registry/docs/access-control#permissions) role to push Docker images to your project's Artifact Registry and to allow the cluster to pull them during workload execution. If you don't have this permission, contact your project administrator to grant you this role through "Google Cloud Console -> IAM -> Grant access".
```bash
# Replace `$USER_runner` with your desired image name
export DOCKER_IMAGE_NAME=${USER}_runner
export DOCKER_IMAGE_NAME=<Docker Image Name>
bash dependencies/scripts/docker_upload_runner.sh CLOUD_IMAGE_NAME=$DOCKER_IMAGE_NAME
```
The `docker_upload_runner.sh` script uploads your Docker image to Artifact Registry.
Expand All @@ -73,7 +72,7 @@ export ZONE=<GKE Cluster Zone>
# -- Workload Configuration --
export WORKLOAD_NAME=<Name of Workload> # e.g., sft-$(date +%s)
export TPU_TYPE=<TPU Type> # e.g., v6e-256
export TPU_SLICE=1
export TPU_SLICE=<number of slices>
export DOCKER_IMAGE="gcr.io/${PROJECT}/${DOCKER_IMAGE_NAME}"

# -- MaxText Configuration --
Expand Down
Loading