<?xml version="1.0"?><feed xmlns:media="http://search.yahoo.com/mrss/" xmlns:gr="http://www.google.com/schemas/reader/atom/" xmlns:idx="urn:atom-extension:indexing" xmlns="http://www.w3.org/2005/Atom" idx:index="no" gr:dir="ltr"><!--
Content-type: Preventing XSRF in IE.

--><generator uri="https://bazqux.com">BazQux Reader</generator><id>tag:google.com,2005:reader/feed/http://007unlicensedtotest.blogspot.com/feeds/posts/default</id><title>blogs</title><subtitle type="html">blogs</subtitle><link rel="self" href="https://bazqux.com/feed/d45a6ead98c5f8f9f99f?no_branding"></link><gr:continuation>10011568767100</gr:continuation><updated>2026-06-17T18:21:30Z</updated><entry gr:crawl-timestamp-msec="1781706660000"><id gr:original-id="https://medium.com/p/e0fed9d9e61a">tag:google.com,2005:reader/item/0000091b0000007d</id><category term="technology"></category><category term="programming"></category><category term="software-testing"></category><category term="coding"></category><category term="automation"></category><title type="html">Working with Data Tables with Playwright Java</title><published>2026-06-17T14:31:00Z</published><updated>2026-06-17T14:31:00Z</updated><link rel="alternate" href="https://medium.com/javarevisited/working-with-data-tables-with-playwright-java-e0fed9d9e61a?source=rss-d56167afca7d------2" type="text/html"></link><summary type="html">&lt;div&gt;&lt;p&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://medium.com/javarevisited/working-with-data-tables-with-playwright-java-e0fed9d9e61a?source=rss-d56167afca7d------2&quot;&gt;&lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://cdn-images-1.medium.com/max/1920/1*ydZ4C7nvMrf9kfefF7U-GA.jpeg&quot; width=&quot;1920&quot;&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;Master Playwright Java table automation: locate HTML tables, rows, columns, and cells with assertions and best practices.&lt;/p&gt;&lt;p&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://medium.com/javarevisited/working-with-data-tables-with-playwright-java-e0fed9d9e61a?source=rss-d56167afca7d------2&quot;&gt;Continue reading on Javarevisited »&lt;/a&gt;&lt;/p&gt;&lt;/div&gt;</summary><author><name>Mohammad Faisal Khatri</name></author><source gr:stream-id="feed/https://medium.com/@iamfaisalkhatri/feed"><id>tag:google.com,2005:reader/feed/https://medium.com/@iamfaisalkhatri/feed</id><title type="html">Stories by Mohammad Faisal Khatri on Medium</title><link rel="alternate" href="https://medium.com/@iamfaisalkhatri?source=rss-d56167afca7d------2" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781706660000"><id gr:original-id="https://medium.com/p/e0fed9d9e61a">tag:google.com,2005:reader/item/00000a830000007c</id><category term="technology"></category><category term="programming"></category><category term="software-testing"></category><category term="coding"></category><category term="automation"></category><title type="html">Working with Data Tables with Playwright Java</title><published>2026-06-17T14:31:00Z</published><updated>2026-06-17T14:31:00Z</updated><link rel="alternate" href="https://medium.com/javarevisited/working-with-data-tables-with-playwright-java-e0fed9d9e61a?source=rss-d56167afca7d------2" type="text/html"></link><summary type="html">&lt;div&gt;&lt;p&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://medium.com/javarevisited/working-with-data-tables-with-playwright-java-e0fed9d9e61a?source=rss-d56167afca7d------2&quot;&gt;&lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://cdn-images-1.medium.com/max/1920/1*ydZ4C7nvMrf9kfefF7U-GA.jpeg&quot; width=&quot;1920&quot;&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;Master Playwright Java table automation: locate HTML tables, rows, columns, and cells with assertions and best practices.&lt;/p&gt;&lt;p&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://medium.com/javarevisited/working-with-data-tables-with-playwright-java-e0fed9d9e61a?source=rss-d56167afca7d------2&quot;&gt;Continue reading on Javarevisited »&lt;/a&gt;&lt;/p&gt;&lt;/div&gt;</summary><author><name>Mohammad Faisal Khatri</name></author><source gr:stream-id="feed/https://medium.com/feed/@iamfaisalkhatri"><id>tag:google.com,2005:reader/feed/https://medium.com/feed/@iamfaisalkhatri</id><title type="html">Stories by Mohammad Faisal Khatri on Medium</title><link rel="alternate" href="https://medium.com/@iamfaisalkhatri?source=rss-d56167afca7d------2" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781686800000"><id gr:original-id="https://karlosmid.com/2026/06/recap-on-fine-tuning-to-follow-instructions">tag:google.com,2005:reader/item/00000563000001db</id><category term="llm-from-scratch"></category><category term="gpt"></category><category term="tutorials"></category><category term="llm"></category><category term="workbook"></category><title type="html">Recap on fine-tuning to follow instructions</title><published>2026-06-17T09:00:00Z</published><updated>2026-06-17T09:00:00Z</updated><link rel="alternate" href="https://karlosmid.com/2026/06/recap-on-fine-tuning-to-follow-instructions/" type="text/html"></link><summary type="html">&lt;h2 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-tldr&quot;&gt;TL;DR&lt;/h2&gt;

&lt;p&gt;I am recapping the blog post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://karlosmid.com/fine-tuning-to-follow-instructions/&quot;&gt;Fine-tuning to follow instructions&lt;/a&gt; by reviewing the chapter 7 workbook questions from &lt;em&gt;Build an LLM from Scratch&lt;/em&gt; and reading Giles’ blog posts on chapter 7.&lt;/p&gt;

&lt;h2 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-giles-blog-posts&quot;&gt;Giles blog posts&lt;/h2&gt;

&lt;p&gt;Giles wrote three blog posts: &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.gilesthomas.com/2025/10/llm-from-scratch-24-the-transcript-hack&quot;&gt;part24&lt;/a&gt;, &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.gilesthomas.com/2025/10/llm-from-scratch-25-instruction-fine-tuning&quot;&gt;part25&lt;/a&gt;, and &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.gilesthomas.com/2025/11/llm-from-scratch-26-evaluating-the-fine-tuned-model&quot;&gt;part26&lt;/a&gt;.&lt;/p&gt;

&lt;p&gt;In the part24 post, another sidestep, Giles tries to feed instruction prompts into the GPT-2 models mentioned in the book. The result is that none of those models could give a meaningful answer, but the base Qwen 3 model with 600M parameters gave meaningful instruction results.&lt;/p&gt;

&lt;p&gt;In part25, Giles always thinks more deeply about topics presented in books. For example, in relation to the Alpaca prompt format, it was created because interaction with the model was one-shot, due to the limited size of the model context. Today’s models have context sizes in the hundreds of thousands of tokens, so they could accept lengthy conversation transcripts. Then he explains that padding tokens waste GPU cycles, so we have batches of different sizes. The question is why we do not put -100 instead of padding tokens in inputs, but only in targets. Giles explains that this is not valid token vocabulary, but targets are also vocabulary tokens, so I do not understand that reasoning.&lt;/p&gt;

&lt;p&gt;part26 is the shortest blog post! It is about evaluating our instruction fine-tuned model using an Ollama 3 model.&lt;/p&gt;

&lt;h2 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-workbook-questions&quot;&gt;Workbook questions&lt;/h2&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-quick-questions&quot;&gt;Quick questions&lt;/h3&gt;

&lt;p&gt;1 What is the primary challange that pretrained LLMs often face regarding instructions?&lt;/p&gt;

&lt;p&gt;A Inability to complete sentences&lt;br&gt;
B Difficulty in generating coherent text&lt;br&gt;
C Limited vocabulary size.&lt;br&gt;
D Struggling with specific instructions like grammar correction or voice conversion.&lt;/p&gt;

&lt;p&gt;My answer: D&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;p&gt;2 What is the key component in preparing da dataset for supervised instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;A Pre-trained language model.&lt;br&gt;
B Optimization algorithm.&lt;br&gt;
C Instruction-response pairs.&lt;br&gt;
D Tokenization algorithm.&lt;/p&gt;

&lt;p&gt;My answer: C&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;p&gt;3 What data format is commonly used for instruction datasets due to its human and machine redability?&lt;/p&gt;

&lt;p&gt;A JSON (JavaScript Object Notation)&lt;br&gt;
B CSV (comma separated values)&lt;br&gt;
C YAML (YAML Ain’t Markup Language)&lt;br&gt;
D XML (Extensible Markup Language).&lt;/p&gt;

&lt;p&gt;My answer: A&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;p&gt;4 What is the purpose of a custom collate function in the context of instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;A To optimize the model’s architecture.&lt;br&gt;
B To handle the specific formatting and requirements of the instruction fine-tuning dataset.&lt;br&gt;
C To pre-process the input data.&lt;br&gt;
D To evaluate the model’s performance&lt;/p&gt;

&lt;p&gt;My answer: B&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;p&gt;5 What is the purpose of using the ignore_index parameter (-100) in the custom collate function?&lt;/p&gt;

&lt;p&gt;A To mark the end of a sequence.&lt;br&gt;
B To indicate an unknown token.&lt;br&gt;
C To exclude padding tokens from the loss calculation.&lt;br&gt;
D To identify the start of a sequence.&lt;/p&gt;

&lt;p&gt;My answer: C&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;p&gt;6 What is the purpose of saving the fine-tuned model’s state dictionary?&lt;/p&gt;

&lt;p&gt;A To compress the model for efficient storage.&lt;br&gt;
B To visualize the model’s architecture.&lt;br&gt;
C To save the model’s parameters for later use or reuse in other projects.&lt;br&gt;
D To improve the model’s performance.&lt;/p&gt;

&lt;p&gt;My answer: C&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-71-introduction-to-instruction-fine-tuning&quot;&gt;7.1 Introduction to instruction fine-tuning&lt;/h3&gt;

&lt;p&gt;1 What is the primary function of a pretrained LLM?&lt;/p&gt;

&lt;p&gt;My answer: The primary function of a pretrained LLM is to provide model knoweledge of language in which was trained grammar patterns.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;A pretrained LLM is primarily capable of text completion, meaning it can finish sentences or write text paragraphs given a fragment as input.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;It seems that I still do not know to describe the primary task of pre-trained LLM.&lt;/p&gt;

&lt;p&gt;2 What is the challange that pretrained LLMs often face?&lt;/p&gt;

&lt;p&gt;My answer: The challange is that pretrained LLMs are not good at resolving instructions tasks.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Pretrained LLMs often struggle with specific instructions, such as grammar correction or voice conversion, requiring further fine-tuning.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;This one is correct.&lt;/p&gt;

&lt;p&gt;3 What is the purpose of instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: The purpose of instruction fine-tuning is to train model for instructions tasks solving using as fine-tuning data instruction set with response how to resolve this task.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Instruction fine-tuning aims to improve an LLM’s ability to follow specific instructions and generate desired responses based on those instructions.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;My answer is correct.&lt;/p&gt;

&lt;p&gt;4 What is a crucial aspect of instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: A crucail aspect of instruction fine-tuning is that we make LLM into chatbot.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Preparing a suitable dataset is a key aspect of instruction fine-tuning, providing the model with examples of instructions and desired responses.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Wrong answer.&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-72-preparing-a-dataset-for-supervised-instruction-fine-tuning&quot;&gt;7.2 Preparing a dataset for supervised instruction fine-tuning&lt;/h3&gt;

&lt;p&gt;1 What is the purpose of the instruction dataset used for fine-tuning a pretrained LLM?&lt;/p&gt;

&lt;p&gt;My answer: Purpose of the instruction dataset is to provide training, validation and testing datasets so we could fine tune LLM to resolve instruction tasks.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The instruction dataset consists of instruction-response pairs that are used to train the LLM to follow instructions and generate appropriate responses based on given inputs.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;My answer is wrong.&lt;/p&gt;

&lt;p&gt;2 Describe the format of the instruction dataset used in this section.&lt;/p&gt;

&lt;p&gt;My answer: Each record has three keys, instructions contains instruction task, input is empty and output contains result of instruction task.&lt;/p&gt;

&lt;p&gt;Correct answer: The dataset is stored in a JSON file and contains entries that are Python dictionary objects. Each entry includes an ‘instruction’, ‘input’, and ‘output’ field, representing the task, input data, and desired response, respectively._&lt;/p&gt;

&lt;p&gt;I forgot to answer that format is JSON.&lt;/p&gt;

&lt;p&gt;3 What are the two prompt styles mentioned in the section, and how they differ?&lt;/p&gt;

&lt;p&gt;My answer: We have alpaca format and phi3 format. In phi3 we are using user and assistant tags, and in alpaca we use instructions, input and output tags.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The two prompt styles are Alpaca and Phi-3. Alpaca uses a structured format with defined sections for instruction, input, and response, while Phi-3 employs a simpler format with designated &amp;lt;|user|&amp;gt; and &amp;lt;|assistant|&amp;gt; tokens&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;My answer is correct.&lt;/p&gt;

&lt;p&gt;4 What is the purpose of &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;format_input&lt;/code&gt; action and how does it work?&lt;/p&gt;

&lt;p&gt;My answer: The purpose of &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;format_input&lt;/code&gt; function is to format input dataset to alpaca or phi3 prompt format. It creates a chatbot prompt.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The format_input function converts the entries in the instruction dataset into the Alpaca-style input format. It constructs a formatted string that includes the instruction, input (if available), and a placeholder for the response.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;My answer is wrong.&lt;/p&gt;

&lt;p&gt;5 How is the instruction dataset divided into training, validation, and test sets?&lt;/p&gt;

&lt;p&gt;My answer: We randomly divide instruction dataset, 85% is training data, 10% is testing set, and rest is validation set.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The dataset is divided into training, validation, and test sets using a specific ratio. The training set is used to train the model, the validation set is used to evaluate the model’s performance during training, and the test set is used to evaluate the model’s final performance.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;My answer is correct.&lt;/p&gt;

&lt;p&gt;6 Match the terms on the left to their description on the right:&lt;/p&gt;

&lt;p&gt;&lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://karlosmid.com/assets/2026/06/instruction-prompt-styles-table.png&quot; alt=&quot;Instruction prompt styles table&quot;&gt;&lt;/p&gt;

&lt;p&gt;My answer: Instruction-response pairs =&amp;gt; 3, Prompt styles =&amp;gt; 1, Alpaca prompt style =&amp;gt; 4, Phi3 Prompt style =&amp;gt; 2&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-73-organizing-data-into-training-batches&quot;&gt;7.3 Organizing data into training batches&lt;/h3&gt;

&lt;p&gt;1 What is the purpose of a custom collate function in instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: Purpose of a custom collate function is to prepare prompts in alpaca or phi3 formats from input data. It also pads input data to to longest batch value, and sets in target data padding tokens to -100 value, except for the one next to last token.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;A custom collate function is used to handle the specific requirements and formatting of the instruction fine-tuning dataset. It ensures that training examples are padded to the same length within each batch, allowing for efficient processing by the model.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;My answer is correct.&lt;/p&gt;

&lt;p&gt;2 How does the custom collate function handle padding in instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: It padds inputs to the value of longest input in the batch. Targets padding tokens are replaced with -100 instead of one next to last token.&lt;/p&gt;

&lt;table&gt;
  &lt;tbody&gt;
    &lt;tr&gt;
      &lt;td&gt;Correct answer: _The custom collate function pads training examples to the length of the longest example in each batch, using the &amp;lt;&lt;/td&gt;
      &lt;td&gt;endoftext&lt;/td&gt;
      &lt;td&gt;&amp;gt; token ID (50256). This minimizes unnecessary padding by only extending sequences to match the longest one in each batch._&lt;/td&gt;
    &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;

&lt;p&gt;I forgot to answer value of padding token.&lt;/p&gt;

&lt;p&gt;3 Explain the role of target token IDs in instruction fine-tuning and how they are generated.&lt;/p&gt;

&lt;p&gt;My answer: Target token IDs are pair with the input so we know which token should come next. It is generated from input so that input is shiffted by one token to the left.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Target token IDs represent the desired output sequence that the model should generate. They are created by shifting the input token IDs one position to the right, omitting the first token and appending an end-of- text token. This setup allows the model to learn how to predict the next token in a sequence.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;One token to the right and I did not answer that we are adding endoftext token when we reach end of input.&lt;/p&gt;

&lt;p&gt;4 Why are padding tokens replaced with the -100 placeholder value in the target token IDs?&lt;/p&gt;

&lt;p&gt;My answer: We are replacing padding tokens with -100 because -100 tokens are not considered in loss calculation.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Replacing padding tokens with -100 allows the cross entropy loss function to ignore them during training. This ensures that only meaningful data influences model learning and prevents padding tokens from contributing to the loss calculation&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;I did not answer that padding tokens should not contribute to loss function calculation.&lt;/p&gt;

&lt;p&gt;5 What is the purpose of retaining one end-of-text token in the target sequence?&lt;/p&gt;

&lt;p&gt;My answer: Keeping that token we let know llm model to know where is the end of input instruction.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Retaining one end-of-text token in the target sequence helps the LLM learn to generate end-of-text tokens, which act as an indicator that the generated response is complete.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Wrong answer, we are kepping endoftext token in target sequence, not input. That was the question.&lt;/p&gt;

&lt;p&gt;6 Pieces of code have been removed from four places in this listing. Which of these terms have been removed and where should they go? Note that a term may appear more than once!&lt;/p&gt;

&lt;p&gt;A true B false C format D encode_text! E full_text&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;defp&lt;/span&gt; &lt;span&gt;encode_example&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;prompt_style&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;__1__&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;entry&lt;/span&gt;
    &lt;span&gt;# The model trains on the instruction/input prompt followed by the&lt;/span&gt;
    &lt;span&gt;# expected response target, matching the chapter 7 PyTorch dataset.&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;__3__&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt_style&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;__4__&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;encode_example&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;prompt_style&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;__2__&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;token_ids&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;entry&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;__5__&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt_style&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;__6__&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;prompt_length&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;entry&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;prompt_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt_style&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;encode_text!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Kernel&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;%{&lt;/span&gt;&lt;span&gt;token_ids:&lt;/span&gt; &lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;prompt_length:&lt;/span&gt; &lt;span&gt;prompt_length&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;encode_example&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
         &lt;span&gt;_entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;_tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;_opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;_prompt_style&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;mask_out_instructions_in_target&lt;/span&gt;
       &lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;raise&lt;/span&gt; &lt;span&gt;ArgumentError&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
          &lt;span&gt;&amp;quot;expected :mask_out_instructions_in_target to be a boolean, got: &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;inspect&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;mask_out_instructions_in_target&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;My answer: 1 =&amp;gt; B, 2 =&amp;gt; A, 3 =&amp;gt; E, 4 =&amp;gt; D, 5 =&amp;gt; E, 6 =&amp;gt; D&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-74-creating-data-loaders-for-an-instruction-dataset&quot;&gt;7.4 Creating data loaders for an instruction dataset&lt;/h3&gt;

&lt;p&gt;1 What is the purpose of the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;custom_collate_fn&lt;/code&gt; function in the context of instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: The purpose is to prepare input data into batches with input/target pairs, to padd data with endoftext tokens and skip loss function calculation token.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The custom_collate_fn function is used to batch the instruction dataset, ensuring that the input and target tensors are moved to the specified device (CPU, GPU, or MPS) before being fed into the LLM for fine-tuning.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;2 Explain the advantage of moving data to the target device within the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;custom_collate_fn&lt;/code&gt; instead of the main training loop.&lt;/p&gt;

&lt;p&gt;My answer: Advantage is faster calculation for training data.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;By performing the device transfer within the custom_collate_fn, the process becomes a background task, preventing it from blocking the GPU during model training and improving efficiency&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;3 How is the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;device&lt;/code&gt; setting determined and used in the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;custom_collate_fn&lt;/code&gt;?&lt;/p&gt;

&lt;p&gt;My answer: Did not answer as Elixir implmentation is different.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The device setting is determined based on the availability of a GPU or MPS. The partial function from functools is used to create a new version of the custom_collate_fn with the device argument prefilled, ensuring that the function uses the correct device for data transfer.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;4 What is the purpose of the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;allowed_max_length&lt;/code&gt; parameter in the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;customized_collate_fn&lt;/code&gt;?&lt;/p&gt;

&lt;p&gt;My answer: We truncate inputs and targets to the value of this parameter. If value is nil, we keep original size of inputs/targets.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The allowed_max_length parameter is used to truncate the data to the maximum context length supported by the LLM model being fine-tuned, in this case, the GPT-2 model.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;5 Describe the process of creating data loaders for the training, validation, and test sets using the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;DataLoader&lt;/code&gt; calss.&lt;/p&gt;

&lt;p&gt;My answer: We randonly split input data set to the precentage of total input data set: training is 85%, validation is 10 % and rest is test set.&lt;/p&gt;

&lt;p&gt;Correct answer: The DataLoader class is used to create data loaders for each set (training, validation, and test). The batch_size, collate_fn, shuffle, drop_last, and num_workers parameters are configured to control the batching process, shuffling, and data loading behavior_&lt;/p&gt;

&lt;p&gt;6 Pieces of the code have been removed from four places in the listing. Which of these terms have been removed and where should they go?&lt;/p&gt;

&lt;p&gt;A inputs B targets C maybe_truncate&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;def&lt;/span&gt; &lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;ignore_index&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;allowed_max_length&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;examples&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;normalize_example&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;# Find the longest sequence length after the one extra pad token that the&lt;/span&gt;
    &lt;span&gt;# Python collate function appends to every item.&lt;/span&gt;
    &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;examples&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Kernel&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;1&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;max&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;fn&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;0&lt;/span&gt; &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;targets&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;examples&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;fn&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;&lt;span&gt;token_ids:&lt;/span&gt; &lt;span&gt;item&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;prompt_length:&lt;/span&gt; &lt;span&gt;prompt_length&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;# Elixir data is immutable, so this builds the equivalent of&lt;/span&gt;
        &lt;span&gt;# `new_item = item.copy(); new_item += [pad_token_id]`.&lt;/span&gt;
        &lt;span&gt;new_item&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;item&lt;/span&gt; &lt;span&gt;++&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;

        &lt;span&gt;# Pad the copied item to the longest sequence length in the batch.&lt;/span&gt;
        &lt;span&gt;padded&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;pad_to_length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;new_item&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

        &lt;span&gt;# Match `padded[:-1]` for inputs and `padded[1:]` for next-token&lt;/span&gt;
        &lt;span&gt;# targets.&lt;/span&gt;
        &lt;span&gt;inputs&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;padded&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

        &lt;span&gt;targets&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
          &lt;span&gt;padded&lt;/span&gt;
          &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
          &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;mask_extra_padding_targets&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;ignore_index&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
          &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;mask_prompt_targets&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;ignore_index&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

        &lt;span&gt;# Optionally cap both rows to the model context length.&lt;/span&gt;
        &lt;span&gt;{&lt;/span&gt;
          &lt;span&gt;maybe_truncate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;allowed_max_length&lt;/span&gt;&lt;span&gt;),&lt;/span&gt;
          &lt;span&gt;__1__&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;targets&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;allowed_max_length&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;}&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;unzip&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;__2__&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}),&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;targets&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;
    &lt;span&gt;}&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;My answer: 1 =&amp;gt; C, 2 =&amp;gt; A&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;p&gt;7 Match the terms on the left with their description on the right:&lt;/p&gt;

&lt;table&gt;
  &lt;thead&gt;
    &lt;tr&gt;
      &lt;th&gt;Term&lt;/th&gt;
      &lt;th&gt; &lt;/th&gt;
      &lt;th&gt;Description&lt;/th&gt;
    &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
    &lt;tr&gt;
      &lt;td&gt;Batching&lt;br&gt;Process&lt;/td&gt;
      &lt;td&gt; &lt;/td&gt;
      &lt;td&gt;A function that defines how to combine individual data samples into a batch, specifically tailored for the requirements of instruction fine-tuning.&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td&gt;Custom&lt;br&gt;Collate&lt;br&gt;Function&lt;/td&gt;
      &lt;td&gt; &lt;/td&gt;
      &lt;td&gt;The process of organizing training data into groups of samples, called batches, to improve training efficiency.&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td&gt;Padding&lt;br&gt;Tokens&lt;/td&gt;
      &lt;td&gt; &lt;/td&gt;
      &lt;td&gt;Special tokens added to the end of sequences to ensure all inputs in a batch have the same length, allowing for efficient processing by the model.&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td&gt;Ignore&lt;br&gt;Index&lt;/td&gt;
      &lt;td&gt; &lt;/td&gt;
      &lt;td&gt;A special value, typically -100, used to indicate padding tokens in the target sequence, preventing them from contributing to the loss calculation during training.&lt;/td&gt;
    &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;

&lt;p&gt;My answer: 1 =&amp;gt; 2, 2 =&amp;gt; 1, 3 =&amp;gt; 3, 4 =&amp;gt; 4&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-75-loading-a-pretrained-llm&quot;&gt;7.5 Loading a pretrained LLM&lt;/h3&gt;

&lt;p&gt;1 Why is larger pretrained model, like gpt2-medium, prefered for instruction fine-tuning compared to the smaller gpt2-small model?&lt;/p&gt;

&lt;p&gt;My answer: Because pretrained medium model shows better performace for instruction fine-tuning.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The smaller model lacks the capacity to learn and retain the complex patterns and nuanced behaviors required for high-quality instruction- following tasks. The larger model has more parameters, allowing it to handle more intricate instructions and generate more accurate responses.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Did not explain why.&lt;/p&gt;

&lt;p&gt;2 What is the purpose of loading a pretrained LLM before starting instruction fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: Purpose is that we reuse step of training the LLM model.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Loading a pretrained LLM provides a foundation for the fine-tuning process. It allows the model to leverage existing knowledge and patterns learned during pretraining, enabling it to learn new tasks more efficiently and effectively.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Did not explain why.&lt;/p&gt;

&lt;p&gt;3 How does the code for loading a pretrained model differ from the code used for pretraining of classification fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: ?&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The code remains largely the same, but instead of specifying the ‘gpt2- small’ model, we now specify ‘gpt2-medium’ to load the larger model with 355 million parameters. This change reflects the choice of a more capable model for instruction fine-tuning.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Did not think of this simple answer.&lt;/p&gt;

&lt;p&gt;4 What is the purpose of evaluating the pretrained LLM’s performance on a validation task before fine-tuning?&lt;/p&gt;

&lt;p&gt;My answer: Purpose is to avoid oveffiting LLM model in pretrained phase.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Evaluating the pretrained LLM’s performance provides a baseline understanding of its capabilities before fine-tuning. This allows us to assess the impact of fine-tuning on the model’s ability to follow instructions and generate accurate responses.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Wrong answer.&lt;/p&gt;

&lt;p&gt;5 How is the model’s generated response isolated from the input instruction in the provided code?&lt;/p&gt;

&lt;p&gt;My answer: It is isolated with dedicated tag: generated_response.&lt;/p&gt;

&lt;p&gt;Correct answer:  &lt;em&gt;The code subtracts the length of the input instruction from the start of the generated text, effectively removing the input text and leaving only the model’s generated response. The strip() function is then applied to remove any extra whitespace.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;I did not answer that we need to remove instructions part from the response.&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-76-fine-tuning-the-llm-on-instruction-data&quot;&gt;7.6 Fine-tuning the LLM on instruction data&lt;/h3&gt;

&lt;p&gt;1 What is the purpose of fine-tuning an LLM on instruction data?&lt;/p&gt;

&lt;p&gt;My Answer: The purpose of fine-tuning an LLM on instruction data is to train LLM to understands the patterns for resolving instruction tasks.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Fine-tuning an LLM on instruction data aims to improve its ability to understand and follow instructions, leading to more accurate and relevant responses to user prompts.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;2 Describe the process of fine-tuning an LLM on instruction data, highlighting the key steps involved.&lt;/p&gt;

&lt;p&gt;My answer: We first format instructions input dataset in form of prompt. Then we split input data into batches where we padd each input to the longest input in particular batch. Every batch could have different max length. Padding token is |endoftext| token. Then we create input/target pairs where target is input shiffted for one token to the right. If we reached last input token, then we add &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;endoftext&lt;/code&gt; token. We first padd targets then replace all padded tokens with -100, except for the first one that is closest to last token. -100 value are skipped while calculating the loss function. Then we train the model on that data using same algorithm as for pretraining step.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The process involves loading a pretrained LLM, preparing an instruction dataset, and training the model on this dataset using a suitable loss function and optimizer. The training process aims to minimize the loss, indicating improved performance in following instructions.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;My answer is wrong because I did not mention loading pre-trained model.&lt;/p&gt;

&lt;p&gt;3 What are some potential challanges encountered during fine-tuning an LLM on instruction data, and how can these challanges be addressed?&lt;/p&gt;

&lt;p&gt;My answer: ?&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Challenges include hardware limitations, such as memory constraints, which can be addressed by using a smaller model, reducing the batch size, or utilizing a GPU for faster training. Additionally, managing the length of input sequences can be crucial for efficient training.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;4 How is the effectivness of fine-tuning evaluated during the training process?&lt;/p&gt;

&lt;p&gt;My answer: Input set is randomly split in training, validation and test sets. We are calculating entropy loss function for those steps.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The effectiveness is evaluated by monitoring the training and validation losses. A decrease in these losses indicates that the model is learning to follow instructions better. Additionally, inspecting the generated responses during training provides qualitative insights into the model’s progress.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;5 What is the significance of the Alpaca dataset in the context of fine-tuning LLMs?&lt;/p&gt;

&lt;p&gt;My answer: Alpaca dataset is in form of prompt, setting first the context for instruction that should be resolved.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The Alpaca dataset is a valuable resource for fine-tuning LLMs on instruction data. It provides a large collection of diverse instructions and corresponding responses, enabling the model to learn a wider range of task-specific behaviors.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Its a larger instruction training dataset.&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-77-extracting-and-saving-responses&quot;&gt;7.7 Extracting and saving responses&lt;/h3&gt;

&lt;p&gt;1 Describe the process of evaluating the performance of an instruction-fine-tuned Large Language Model (LLM) after training.&lt;/p&gt;

&lt;p&gt;My answer: We have two options. First is that humans checks the quality of resolving instruction tasks. This technique does not scale, so we have second option, which is to use another llm for instruction tasks resloving. We asked for example ollama3 model to score the instruction task model resolution.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Evaluation involves extracting model-generated responses from a held- out test set, manually analyzing them, and then quantifying the response quality using various methods such as benchmarks, human comparison, or automated metrics.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;2 What are the different methods for evaluating the performance of instruction-fine-tuned LLMs, and what are their relative strengths ans weaknesses?&lt;/p&gt;

&lt;p&gt;My answer: Human evaluation and another llm score evaluation. Human evaluation does not scale well.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Methods include short-answer benchmarks (e.g., MMLU), human preference comparisons, and automated conversational benchmarks (e.g., AlpacaEval). Human evaluation provides valuable insights but is time-consuming, while automated methods are efficient but may lack the nuance of human judgment.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;3 What is meant by conversational performance in the context of LLMs, and why is it important?&lt;/p&gt;

&lt;p&gt;My answer: ?&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Conversational performance refers to an LLM’s ability to engage in human-like communication, understanding context, nuance, and intent.  It’s crucial for applications like chatbots where natural and coherent interaction is essential.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Understanding context, naunce and intent.&lt;/p&gt;

&lt;p&gt;4 How can the response generated by an LLM be automatically evaluated using another LLM, and what are the advantages of this approach?&lt;/p&gt;

&lt;p&gt;My answer: We construct a prompt that askes from another llm to score the response of our model. We send to model instructions and correct response.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;An approach similar to AlpacaEval can be used, employing another LLM to evaluate the responses. This automated method is efficient, saving time and resources compared to manual human evaluation while still providing meaningful performance indicators&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;5 Explain the process of appending generated model responses to a test set and saving the updated data for later analysis.&lt;/p&gt;

&lt;p&gt;My answer: ?&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The generated model responses are added to a dictionary containing the test data. This updated data is then saved as a JSON file (e.g., ‘instruction-data-with-response.json’) for easy access and analysis in future sessions.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;Saving model responses to json file.&lt;/p&gt;

&lt;h3 id=&quot;article-M8kHfPNLYdOwwP7ZIl_m-6mpSVc-78-evaluating-the-fine-tuned-llm&quot;&gt;7.8 Evaluating the fine-tuned LLM&lt;/h3&gt;

&lt;p&gt;1 What is the purpose of using a larger LLM to evaluate the responses of a fine-tuned model?&lt;/p&gt;

&lt;p&gt;My answer: Purpose is to automate evaluation process of our fine-tuned llm.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;A larger LLM, like Llama 3 or GPT-4, can be used to automatically assess the quality of responses generated by a fine-tuned model. This provides a more objective and scalable method for evaluating model performance compared to manually reviewing a few examples&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;2 Describe the process of using Ollama to evaluate the responses of a fine-tuned model.&lt;/p&gt;

&lt;p&gt;My answer: We use testing instruction dataset. In json file we store instruction, model response, and correnct response (output). Then we send to ollama3 input, output and model response to evaluate model response. For that we create a prompt where we are asing ollama3 to score (0-100) model response.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Ollama, an open-source application, allows you to run LLMs locally. You can use the query_model function to send prompts to the LLM, such as asking it to score a model’s response on a scale of 0 to 100. The LLM’s evaluation can then be used to assess the overall performance of the fine-tuned model.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;3 What aer some alternative LLMs that chan be used for evaluating model responses, besides the 8-billion-parameter Llama 3 model?&lt;/p&gt;

&lt;p&gt;My answer: We could use Qwen 8B model.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Other LLMs, such as the 3.8-billion-parameter phi3 model or the larger 70-billion-parameter Llama 3 model, can be used with Ollama. The choice of model depends on the available computational resources and the desired level of performance.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;4 How can the generate_model_score function be used to assess the performance of a fine-tuned model?&lt;/p&gt;

&lt;p&gt;My answer: Each entry from test instructions dataset is sent to ollama3 evaluation in the form of prompt that request response score 1-100 based on input, expected output and output given by the model.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;The generate_model_scores function iterates through a set of test data, sending prompts to the LLM to evaluate each model response. It then calculates the average score across all responses, providing a quantitative measure of the model’s performance.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;5 What are some strategies for improving the performance of a fine-tuned model?&lt;/p&gt;

&lt;p&gt;My answer: We could implement Lora for model linear layers. Or we could do training on broader instructions data set, like alpaca data set.&lt;/p&gt;

&lt;p&gt;Correct answer: &lt;em&gt;Strategies for improving model performance include adjusting hyperparameters during fine-tuning, increasing the size or diversity of the training dataset, experimenting with different prompts or instruction formats, and using a larger pretrained model.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;6 Match the terms on the left with their description on the right:&lt;/p&gt;

&lt;table&gt;
  &lt;thead&gt;
    &lt;tr&gt;
      &lt;th&gt; &lt;/th&gt;
      &lt;th&gt; &lt;/th&gt;
      &lt;th&gt; &lt;/th&gt;
    &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
    &lt;tr&gt;
      &lt;td&gt;Test Set&lt;/td&gt;
      &lt;td&gt; &lt;/td&gt;
      &lt;td&gt;A method of evaluating the conversational performance of a language model using another language model to assess the quality of responses.&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td&gt;Conversational&lt;br&gt;Performance&lt;/td&gt;
      &lt;td&gt; &lt;/td&gt;
      &lt;td&gt;The ability of a language model to engage in human-like communication, understanding context, nuance, and intent.&lt;/td&gt;
    &lt;/tr&gt;
    &lt;tr&gt;
      &lt;td&gt;Automated&lt;br&gt;Conversational&lt;br&gt;Benchmarks&lt;/td&gt;
      &lt;td&gt; &lt;/td&gt;
      &lt;td&gt;A portion of data that is held out from the training process and used to evaluate the performance of a trained model.&lt;/td&gt;
    &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;

&lt;p&gt;My answer: 1 =&amp;gt; 3, 2 =&amp;gt; 2, 3 =&amp;gt; 1&lt;/p&gt;

&lt;p&gt;Correct answer: +&lt;/p&gt;</summary><author><name></name></author><source gr:stream-id="feed/https://blog.tentamen.eu/feed/"><id>tag:google.com,2005:reader/feed/https://blog.tentamen.eu/feed/</id><title type="html">Tentamen Software Testing Blog</title><link rel="alternate" href="https://karlosmid.com/" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781685442000"><id gr:original-id="https://qualityremarks.com/?p=6931">tag:google.com,2005:reader/item/00000463000004c3</id><category term="Software Testing"></category><title type="html">Continuous Everything, Except Thinking</title><published>2026-06-17T08:37:22Z</published><updated>2026-06-17T08:37:22Z</updated><link rel="alternate" href="https://qualityremarks.com/continuous-everything-except-thinking/" type="text/html"></link><summary type="html">&lt;p&gt;My friend &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.linkedin.com/in/joepschuurkes/&quot; rel=&quot;noopener&quot; title&gt;Joep Schuurkes&lt;/a&gt; wrote a great &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://smallsheds.garden/blog/2025/not-continuous-everything/&quot; rel=&quot;noopener&quot; title&gt;response&lt;/a&gt; to my chapter in &lt;em&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.wiley.com/en-us/shop/general-introductory-computer-science/taking-testing-seriously-the-rapid-software-testing-approach-p-9781394253203&quot; rel=&quot;noopener&quot; title&gt;Taking Testing Seriously&lt;/a&gt; &lt;/em&gt;last year&lt;em&gt;, &lt;/em&gt;but thanks to LinkedIn’s fantastic algorithm, it didn’t come into my feed and I only saw it when someone sent it to me via email – about a week ago!&lt;/p&gt;



&lt;p&gt;His review focuses on the line where I said modern delivery seems obsessed with “continuous everything” except continuous thinking.&lt;/p&gt;



&lt;p&gt;&lt;em&gt;“So much today is about “continuous everything” – continuous deployment, continuous integration – and speed. The pace at which people are doing things is really, really disruptive to thinking deeply about problems. Then the goal gets displaced from getting good products and services to customers quickly, and turns into, “How do we back something out quickly when we inevitably screw up?” It’s continuous everything – except continuous thinking.” &lt;/em&gt;– &lt;strong&gt;Taking Testing Seriously&lt;/strong&gt;&lt;/p&gt;



&lt;div style=&quot;height: 20px&quot; aria-hidden=&quot;true&quot;&gt;&lt;/div&gt;



&lt;div&gt;
&lt;div&gt;
&lt;p&gt;Joep’s main point is that continuous integration, delivery, and deployment are not really about speed, they are about risk.&lt;/p&gt;



&lt;p&gt;Specifically, they are meant to address certain kinds of technical risk: integration risk, deployment risk, release risk. He also makes the distinction between being able to deploy and deciding to release.&lt;/p&gt;



&lt;p&gt;On that, we agree.&lt;/p&gt;
&lt;/div&gt;



&lt;div&gt;
&lt;figure&gt;&lt;img width=&quot;640&quot; height=&quot;430&quot; data-recalc-dims=&quot;1&quot; fetchpriority=&quot;high&quot; decoding=&quot;async&quot; data-attachment-id=&quot;6932&quot; data-permalink=&quot;https://qualityremarks.com/continuous-everything-except-thinking/image-71/&quot; data-orig-file=&quot;https://i0.wp.com/qualityremarks.com/wp-content/uploads/2026/06/image-1.png?fit=640%2C430&amp;amp;quality=20&amp;amp;ssl=1&quot; data-orig-size=&quot;640,430&quot; data-comments-opened=&quot;1&quot; data-image-meta=&quot;{&amp;quot;aperture&amp;quot;:&amp;quot;0&amp;quot;,&amp;quot;credit&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;camera&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;caption&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;created_timestamp&amp;quot;:&amp;quot;0&amp;quot;,&amp;quot;copyright&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;focal_length&amp;quot;:&amp;quot;0&amp;quot;,&amp;quot;iso&amp;quot;:&amp;quot;0&amp;quot;,&amp;quot;shutter_speed&amp;quot;:&amp;quot;0&amp;quot;,&amp;quot;title&amp;quot;:&amp;quot;&amp;quot;,&amp;quot;orientation&amp;quot;:&amp;quot;0&amp;quot;,&amp;quot;alt&amp;quot;:&amp;quot;&amp;quot;}&quot; data-image-title=&quot;image&quot; data-image-description data-image-caption data-large-file=&quot;https://i0.wp.com/qualityremarks.com/wp-content/uploads/2026/06/image-1.png?fit=640%2C430&amp;amp;quality=20&amp;amp;ssl=1&quot; alt data-orig-srcset=&quot;https://i0.wp.com/qualityremarks.com/wp-content/uploads/2026/06/image-1.png?w=640&amp;amp;quality=20&amp;amp;ssl=1 640w, https://i0.wp.com/qualityremarks.com/wp-content/uploads/2026/06/image-1.png?resize=300%2C202&amp;amp;quality=20&amp;amp;ssl=1 300w&quot; src=&quot;https://i0.wp.com/qualityremarks.com/wp-content/uploads/2026/06/image-1.png?w=640&amp;amp;quality=20&amp;amp;ssl=1&quot;&gt;&lt;/figure&gt;
&lt;/div&gt;
&lt;/div&gt;



&lt;p&gt;Being able to deploy as safely as possible should be a business decision, and being able to get that information quickly and respond is very valuable.&lt;/p&gt;



&lt;p&gt;And I also agree with Joep that risk appetite matters and changes/adapts to your project and business context. A startup changing website copy is not doing the same job as a bank changing their payments infrastructure.&lt;/p&gt;



&lt;p&gt;So yes, technical capability should not determine release cadence. Risk should.&lt;/p&gt;



&lt;p&gt;But here’s where I disagree with Joep about how continuous integration/delivery/deployment is positioned or delivered by the Agile industrial complex.&lt;/p&gt;



&lt;p&gt;The continuous integration and deployment folks do a little more than “imply speed”, and that has a direct result in how it’s implemented.&lt;/p&gt;



&lt;p&gt;In the purest textbook version, CI/CD might be about reducing delivery risk, but that’s not how it’s sold. It’s sold as “faster time to market,” “code releases happen faster,” “ship better quality code faster,” “run pipelines in minutes, not hours,” and “get code to production faster.”&lt;/p&gt;



&lt;p&gt;And in my experience, that is exactly how I’ve encountered it in the wild.&lt;/p&gt;



&lt;p&gt;In enterprise tech, that gets translated into velocity targets, throughput measurement, increased deployment frequency, dopamine dashboards, and pressure to ship if everything’s green.&lt;/p&gt;



&lt;p&gt;That’s what I’m talking about.&lt;/p&gt;



&lt;p&gt;As well, CI/CD evangelists just like XP bros or most agilistas, seem to love gaslighting people trying to use this stuff with the same “my process isn’t broken, you’re just doing it wrong”, which only makes cutting corners more likely.&lt;/p&gt;



&lt;p&gt;So the best version of “continuous anything” may be sensible, but the common version I see teams struggling to implement (or living with the consequences of test automation elevated to a quality strategy) has the business (and testers) losing the time and agency required to think – continually.&lt;/p&gt;



&lt;p&gt;Anyway, thanks for the feedback Joep!&lt;/p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://qualityremarks.com/continuous-everything-except-thinking/&quot;&gt;Continuous Everything, Except Thinking&lt;/a&gt; first appeared on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://qualityremarks.com&quot;&gt;Quality Remarks&lt;/a&gt;.</summary><author><name>keithklain</name></author><source gr:stream-id="feed/http://qualityremarks.com/feed/"><id>tag:google.com,2005:reader/feed/http://qualityremarks.com/feed/</id><title type="html">Quality Remarks</title><link rel="alternate" href="https://qualityremarks.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781677800000"><id gr:original-id="https://www.thequalityduck.co.uk/?p=1906">tag:google.com,2005:reader/item/0000074400000036</id><category term="AI"></category><category term="Development Practices"></category><category term="Engineering"></category><category term="Engineering Leadership"></category><category term="Ways of working"></category><title type="html">How to safely build trust in AI generated code</title><published>2026-06-17T06:30:00Z</published><updated>2026-06-17T06:30:00Z</updated><link rel="alternate" href="https://www.thequalityduck.co.uk/how-to-safely-build-trust-in-ai-generated-code/" type="text/html"></link><summary type="html">&lt;p&gt;You’re rolling out AI-assisted engineering wrong, and it&amp;apos;s going to slow you down! Learn to buidl trust safely and quickly.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.thequalityduck.co.uk/how-to-safely-build-trust-in-ai-generated-code/&quot;&gt;How to safely build trust in AI generated code&lt;/a&gt; first appeared on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.thequalityduck.co.uk&quot;&gt;The Quality Duck&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Stuart</name></author><source gr:stream-id="feed/https://www.thequalityduck.co.uk/feed/"><id>tag:google.com,2005:reader/feed/https://www.thequalityduck.co.uk/feed/</id><title type="html">Stuart Thomas</title><link rel="alternate" href="https://www.thequalityduck.co.uk" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781667342000"><id gr:original-id="https://scrolltest.com/selenium-nightly-releases-qa-discipline/">tag:google.com,2005:reader/item/0000044400000238</id><category term="Selenium"></category><category term="Test Automation"></category><category term="Testing"></category><category term="agile test automation"></category><category term="AI SDET"></category><category term="Automation Testing Framework Using Selenium"></category><category term="Nightly Releases"></category><category term="Release Notes"></category><title type="html">Selenium Nightly Releases: A QA Discipline Habit</title><published>2026-06-17T03:35:42Z</published><updated>2026-06-17T03:35:42Z</updated><link rel="alternate" href="https://scrolltest.com/selenium-nightly-releases-qa-discipline/" type="text/html"></link><summary type="html">&lt;p&gt;Selenium nightly releases are not random noise. They are a cheap weekly drill for release-note reading, browser compatibility checks, and safer automation upgrades.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com/selenium-nightly-releases-qa-discipline/&quot;&gt;Selenium Nightly Releases: A QA Discipline Habit&lt;/a&gt; appeared first on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com&quot;&gt;Software Testing &amp;amp; Automation&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Promode</name></author><source gr:stream-id="feed/https://scrolltest.com/feed/"><id>tag:google.com,2005:reader/feed/https://scrolltest.com/feed/</id><title type="html">Software Testing &amp; Automation</title><link rel="alternate" href="https://scrolltest.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781667269000"><id gr:original-id="https://scrolltest.com/playwright-visual-testing-day-9/">tag:google.com,2005:reader/item/0000044400000237</id><category term="Javascript"></category><category term="Test Automation"></category><category term="Testing"></category><category term="agile test automation"></category><category term="AI SDET"></category><category term="API Testing with Playwright"></category><category term="typescript"></category><category term="Visual Testing"></category><title type="html">Playwright Visual Testing: Day 9 Tutorial</title><published>2026-06-17T03:34:29Z</published><updated>2026-06-17T03:34:29Z</updated><link rel="alternate" href="https://scrolltest.com/playwright-visual-testing-day-9/" type="text/html"></link><summary type="html">&lt;p&gt;Learn Playwright visual testing with TypeScript: screenshots, baselines, masks, ARIA snapshots, CI diff review, and common pitfalls.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com/playwright-visual-testing-day-9/&quot;&gt;Playwright Visual Testing: Day 9 Tutorial&lt;/a&gt; appeared first on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com&quot;&gt;Software Testing &amp;amp; Automation&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Promode</name></author><source gr:stream-id="feed/https://scrolltest.com/feed/"><id>tag:google.com,2005:reader/feed/https://scrolltest.com/feed/</id><title type="html">Software Testing &amp; Automation</title><link rel="alternate" href="https://scrolltest.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781647200000"><id gr:original-id="https://scrolltest.com/?p=7375">tag:google.com,2005:reader/item/0000044400000236</id><category term="AI Testing"></category><category term="Testing"></category><title type="html">AI-Augmented QA: The Complete Workflow for Using LLMs to 10x Your Testing Productivity</title><published>2026-06-16T22:00:00Z</published><updated>2026-06-16T22:00:00Z</updated><link rel="alternate" href="https://scrolltest.com/ai-augmented-qa-workflow-llm-10x-testing-productivity/" type="text/html"></link><summary type="html">&lt;p&gt;Before: 2 hours writing tests. After: 10 minutes with AI + 20 minutes refining. The 5-step AI QA workflow for test generation, debugging, risk analysis, and bug reports.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com/ai-augmented-qa-workflow-llm-10x-testing-productivity/&quot;&gt;AI-Augmented QA: The Complete Workflow for Using LLMs to 10x Your Testing Productivity&lt;/a&gt; appeared first on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com&quot;&gt;Software Testing &amp;amp; Automation&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Pramod Dutta</name></author><source gr:stream-id="feed/https://scrolltest.com/feed/"><id>tag:google.com,2005:reader/feed/https://scrolltest.com/feed/</id><title type="html">Software Testing &amp; Automation</title><link rel="alternate" href="https://scrolltest.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781626287000"><id gr:original-id="https://agiletestingfellow.com/blog/post/a-holistic-approach-to-spec-driven-development">tag:google.com,2005:reader/item/0000094b00000030</id><title type="html">A holistic approach to &amp;quot;spec-driven development&amp;quot;</title><published>2026-06-16T16:11:27Z</published><updated>2026-06-16T16:11:27Z</updated><link rel="alternate" href="https://agiletestingfellow.com/blog/post/a-holistic-approach-to-spec-driven-development" type="text/html"></link><summary type="html">&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;A common phrase we hear in the context of AI-assisted development is &amp;quot;spec-driven development&amp;quot; or SDD. There are many definitions of SDD floating around. Here is a good one from &lt;/span&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://martinfowler.com/articles/exploring-gen-ai/sdd-3-tools.html&quot; style=&quot;color: #467886; text-decoration: underline&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;Birgitta Böckeler&lt;/span&gt;&lt;/a&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;: &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;blockquote&gt;
&lt;p style=&quot;margin-left: .5in; margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;i&gt;&lt;span style=&quot;background: white&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;&lt;span style=&quot;color: #303633&quot;&gt;Spec-driven development means writing a “spec” before writing code with AI (“documentation first”). The spec becomes the source of truth for the human and the AI.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/i&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p style=&quot;margin-left: .5in; margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;So what&amp;apos;s a &amp;quot;spec&amp;quot;? Birgitta offers this definition:&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;blockquote&gt;
&lt;p style=&quot;margin-left: .5in; margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;i&gt;&lt;span style=&quot;background: white&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;&lt;span style=&quot;color: #303633&quot;&gt;A spec is a structured, behavior-oriented artifact - or a set of related artifacts - written in natural language that expresses software functionality and serves as guidance to AI coding agents.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/i&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;There are many AI-based tools available for teams who want to practice SDD. They can build code exactly to the spec given to them. What we miss in all the talk about SDD is - where does the spec come from? How do we know it really specifies the capabilities that the customers want and need?&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;For those that missed the world of waterfall, spec stands for specification. The specs that were written then are not what we mean now. Let’s dive into that a bit more.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 16pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Aptos Display&amp;quot;,sans-serif&quot;&gt;&lt;span style=&quot;color: #0f4761&quot;&gt;&lt;span style=&quot;font-weight: normal&quot;&gt;We still need the good practices and principles&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;The way teams build production and test code is clearly changing. The principles and practices we have followed to create examples, executable tests, prototypes and other artifacts that show how each new feature behaves have not changed. It&amp;apos;s even more important that we build the shared understanding of each new software change. &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;When you go to learn about SDD, it sounds like a new waterfall process - create a big batch of specs, feed it to the agents, and generate a huge amount of code. We&amp;apos;re hearing more every day about &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Aptos&amp;quot;,sans-serif&quot;&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://newsletter.getdx.com/p/cognitive-debt-the-hidden-risk-in&quot; style=&quot;color: #467886; text-decoration: underline&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;&amp;quot;cognitive debt&amp;quot;&lt;/span&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;, also called &amp;quot;comprehension debt&amp;quot;, on &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;the part of teams trying to review and understand these huge batches of generated code. It&amp;apos;s not humanly possible.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&lt;span style=&quot;font-size: 16pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Aptos Display&amp;quot;,sans-serif&quot;&gt;&lt;span style=&quot;color: #0f4761&quot;&gt;&lt;span style=&quot;font-weight: normal&quot;&gt;Applying the Holistic Testing Model to SDD&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;Looking at the Holistic Testing Model, would we change anything when using AI to create code and tests from specs using AI agentic development? Whether or not we adopt AI-assisted development, working incrementally and iteratively, one small change at a time, is the road to success. &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://agiletestingfellow.com/uploads/ckeditor/pictures/86/content_Holistic_testing_with_attribution.png&quot;&gt;&lt;img width=&quot;800&quot; height=&quot;450&quot; alt=&quot;holistic testing model with stages and sample activities&quot; src=&quot;https://agiletestingfellow.com/uploads/ckeditor/pictures/86/content_Holistic_testing_with_attribution.png&quot;&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;We still need to test the business ideas and make sure they will solve customers&amp;apos; problems. We still need to analyze and manage risks. It&amp;apos;s still important to prioritize the top few quality attributes for each new change. Sure, AI can help us process analytics about customer needs, so maybe it can speed up some of these activities. &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;h3 style=&quot;margin: 8pt 0in 4pt&quot;&gt;&lt;span style=&quot;font-size: 14pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;color: #0f4761&quot;&gt;&lt;span style=&quot;font-weight: normal&quot;&gt;Build quality in throughout the SDLC&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/h3&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;Using examples and tests to guide development is even more critical if we&amp;apos;re going to trust AI tools to build the code for a new capability. Behavior-driven / acceptance test-driven development and Specification by Example (remember that?) help make sure we deliver what our customers want and need. And yes, AI can speed up building prototypes that help to ensure we know how a new change should behave. &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;Automating the tests based on examples that emerge from conversations among delivery and business team members goes more quickly with AI assistance. However, we still need those human conversations to ensure shared understanding of what to build.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;Some of the testing activities we plan based on the right-hand side of the model will also be quicker and easier if we employ AI tools wisely. They still need to happen because we don&amp;apos;t want to take our human brains out of the equation. &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 12pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;If you take nothing else away from this post, just remember, &amp;quot;work in small batches&amp;quot;. This principle for successful software development, happy teams and happy customers dates from the 1970s. It&amp;apos;s time more teams learned and followed it.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-size: 14pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: Aptos,sans-serif&quot;&gt;&lt;span style=&quot;color: #0f4761&quot;&gt;&lt;span style=&quot;font-weight: normal&quot;&gt;More learning&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt;&lt;span style=&quot;font-family: Calibri, sans-serif&quot;&gt;&lt;span style=&quot;font-size: 16px&quot;&gt;If you&amp;apos;d like more details about how using a specification-by-example approach, making one tiny change at a time, we recommend Antony Marcano&amp;apos;s work on this topic. &lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;His article, &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Aptos&amp;quot;,sans-serif&quot;&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.linkedin.com/pulse/what-everyone-getting-wrong-spec-driven-development-ai-antony-marcano-ata6e/?trackingId=2vxmS6h0RvKMv5rLWTsMbQ%3D%3D&quot; style=&quot;color: #467886; text-decoration: underline&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;&amp;quot;What (Almost) Everyone Gets Wrong About Spec-Driven Development with AI&amp;quot;&lt;/span&gt;&lt;/a&gt;, is a great place to start. We also enjoyed &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;the &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Aptos&amp;quot;,sans-serif&quot;&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://www.linkedin.com/posts/antonymarcano_ive-been-playing-with-googles-notebooklm-activity-7452646689439510528-IvWa?utm_source=share&amp;amp;utm_medium=member_desktop&amp;amp;rcm=ACoAAAAH4GQBEY_oNGsDBtTQFPdHXlRKdCkFnFM&quot; style=&quot;color: #467886; text-decoration: underline&quot;&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt;video&lt;/span&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style=&quot;font-size: 12.0pt&quot;&gt;&lt;span&gt;&lt;span style=&quot;font-family: &amp;quot;Calibri&amp;quot;,sans-serif&quot;&gt; Antony generated from it using Google NotebookLM. &lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt; &lt;/p&gt;

&lt;p style=&quot;margin: 0in 0in 8pt&quot;&gt; &lt;/p&gt;

&lt;div&gt;
&lt;hr align=&quot;left&quot; size=&quot;1&quot; width=&quot;33%&quot;&gt;
&lt;div&gt;
&lt;div id=&quot;article-EY1M9lGqsBmqlqM0XEyEggliK20-_com_1&quot; language=&quot;JavaScript&quot;&gt; &lt;/div&gt;&lt;/div&gt;&lt;/div&gt;</summary><author><name></name></author><source gr:stream-id="feed/https://agiletestingfellow.com/blog.rss"><id>tag:google.com,2005:reader/feed/https://agiletestingfellow.com/blog.rss</id><title type="html">Agile Testing Fellow</title><link rel="alternate" href="https://agiletestingfellow.com/blog.rss" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781589600000"><id gr:original-id="https://testingil.com/?p=9405">tag:google.com,2005:reader/item/0000025700000112</id><category term="Uncategorized"></category><title type="html">Test the Feature, Not the Endpoint</title><published>2026-06-16T06:00:00Z</published><updated>2026-06-16T06:00:00Z</updated><link rel="alternate" href="https://testingil.com/2026/06/test-the-feature-not-the-endpoint.html" type="text/html"></link><summary type="html">&lt;p&gt;They say, an API is a window into a whole new world. Well, I said it. &lt;/p&gt;



&lt;p&gt;But really, it’s a peep hole. We need a bunch of them to really get a good picture.&lt;/p&gt;



&lt;p&gt;An API might get you something, or do something for you. But it rarely comes alone. In order to be effective, it needs other friends.&lt;/p&gt;



&lt;p&gt;For example, we can create a user with a POST API. But unless we call a GET on it, that user is lost in the database.&lt;/p&gt;



&lt;p&gt;So when we’re testing, we should look at the feature, not just the API.&lt;/p&gt;



&lt;p&gt;Which kinds of conflicts with how we “test APIs”. Because they are testable on their own. They are a piece of functionality, that fits into a workflow, that fits into the feature.&lt;/p&gt;



&lt;p&gt;But in this context, testing an API is like a unit test.&lt;/p&gt;



&lt;p&gt;Lay down the pitchfork, I’m not done. What does a unit test do? It proves that piece of code does what we expect from it.&lt;/p&gt;



&lt;p&gt;What does a test for that POST do? THE SAME.&lt;/p&gt;



&lt;p&gt;But – I hear you say through the pitchfork – but that API is a real functional piece, that can live on its own, with a possible applicative outcome.&lt;/p&gt;



&lt;p&gt;Yes – I say, backing away – so is a function, or a class.&lt;/p&gt;



&lt;p&gt;But – you lower the pitchfork, with a confused look on your face – but a unit is something small. An API is really code in different places – server, logic, database. It can be big.&lt;/p&gt;



&lt;p&gt;Ok, you got me there. There’s a bunch of differences between API tests and unit tests. But I want to get back to the important one for our discussion today.&lt;/p&gt;



&lt;p&gt;Like a passing unit test doesn’t tell you if the whole system works, a single API test passing doesn’t tell you if the feature is working.&lt;/p&gt;



&lt;p&gt;In the case of a unit test, you need more integrated tests, maybe API or an end-to-end test to create the confidence it works. An API test? The same.&lt;/p&gt;



&lt;p&gt;So, the next time somebody throws you an API spec, and tells you to test it, sure, do that. But if you really want to know the whole feature is working? Add a couple of workflow, multi-API, end-to-end tests.&lt;/p&gt;



&lt;p&gt;You’ll sleep better.&lt;/p&gt;




&lt;hr&gt;



&lt;p&gt;Want to go deeper into feature-level test design and multi-API workflows?&lt;/p&gt;



&lt;p&gt;→ &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://testingil.com/mc-api-testing-tactician&quot; style=&quot;color: #4CAF50&quot;&gt;&lt;strong&gt;The Tactician&lt;/strong&gt;&lt;/a&gt; — hands-on test design, assertions, debugging, and AI-powered testing. Coming soon.&lt;/p&gt;



&lt;p&gt;→ &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://testingil.com/mc-api-testing-strategist&quot; style=&quot;color: #4CAF50&quot;&gt;&lt;strong&gt;The Strategist&lt;/strong&gt;&lt;/a&gt; — planning, prioritization, and deciding what to test before you open Postman.&lt;/p&gt;




&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;/p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://testingil.com/2026/06/test-the-feature-not-the-endpoint.html&quot;&gt;Test the Feature, Not the Endpoint&lt;/a&gt; first appeared on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://testingil.com&quot;&gt;TestinGil&lt;/a&gt;.</summary><author><name>Gil Zilberfeld</name></author><source gr:stream-id="feed/http://www.everydayunittesting.com/feed"><id>tag:google.com,2005:reader/feed/http://www.everydayunittesting.com/feed</id><title type="html">Everyday Unit Testing</title><link rel="alternate" href="https://testingil.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781584517000"><id gr:original-id="https://scrolltest.com/ai-qa-agents-runnable-checks/">tag:google.com,2005:reader/item/0000044400000235</id><category term="AI Testing"></category><category term="Testing"></category><category term="agile test automation"></category><category term="AI QA Agents"></category><category term="AI testing"></category><category term="API Testing with Playwright"></category><category term="promptfoo"></category><title type="html">AI QA Agents: From Prompts to Runnable Checks</title><published>2026-06-16T04:35:17Z</published><updated>2026-06-16T04:35:17Z</updated><link rel="alternate" href="https://scrolltest.com/ai-qa-agents-runnable-checks/" type="text/html"></link><summary type="html">&lt;p&gt;AI QA agents should not stop at test-case text. Day 8 shows a practical workflow that turns prompts into runnable checks, evidence, and eval gates.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com/ai-qa-agents-runnable-checks/&quot;&gt;AI QA Agents: From Prompts to Runnable Checks&lt;/a&gt; appeared first on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com&quot;&gt;Software Testing &amp;amp; Automation&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Promode</name></author><source gr:stream-id="feed/https://scrolltest.com/feed/"><id>tag:google.com,2005:reader/feed/https://scrolltest.com/feed/</id><title type="html">Software Testing &amp; Automation</title><link rel="alternate" href="https://scrolltest.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781580958000"><id gr:original-id="https://scrolltest.com/playwright-api-testing-day-8/">tag:google.com,2005:reader/item/0000044400000234</id><category term="API Testing"></category><category term="Javascript"></category><category term="Test Automation"></category><category term="Testing"></category><category term="agile test automation"></category><category term="api testing"></category><category term="API Testing with Playwright"></category><category term="arc tool for rest api testing"></category><category term="typescript"></category><title type="html">Playwright API Testing: Day 8 Tutorial</title><published>2026-06-16T03:35:58Z</published><updated>2026-06-16T03:35:58Z</updated><link rel="alternate" href="https://scrolltest.com/playwright-api-testing-day-8/" type="text/html"></link><summary type="html">&lt;p&gt;Playwright API Testing tutorial for Day 8: write REST checks, seed UI state, mock network responses, and debug API failures in TypeScript.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com/playwright-api-testing-day-8/&quot;&gt;Playwright API Testing: Day 8 Tutorial&lt;/a&gt; appeared first on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com&quot;&gt;Software Testing &amp;amp; Automation&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Promode</name></author><source gr:stream-id="feed/https://scrolltest.com/feed/"><id>tag:google.com,2005:reader/feed/https://scrolltest.com/feed/</id><title type="html">Software Testing &amp; Automation</title><link rel="alternate" href="https://scrolltest.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781580925000"><id gr:original-id="https://scrolltest.com/playwright-mcp-vs-traditional-test-scripts/">tag:google.com,2005:reader/item/0000044400000233</id><category term="Test Automation"></category><category term="Testing"></category><category term="agile test automation"></category><category term="AI testing"></category><category term="API Testing with Playwright"></category><category term="CI Testing"></category><category term="Playwright MCP"></category><title type="html">Playwright MCP vs Traditional Test Scripts</title><published>2026-06-16T03:35:25Z</published><updated>2026-06-16T03:35:25Z</updated><link rel="alternate" href="https://scrolltest.com/playwright-mcp-vs-traditional-test-scripts/" type="text/html"></link><summary type="html">&lt;p&gt;Playwright MCP vs traditional test scripts is not a replacement debate. Compare repeatability, observability, CI fit, and debugging cost before agents touch your release gate.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com/playwright-mcp-vs-traditional-test-scripts/&quot;&gt;Playwright MCP vs Traditional Test Scripts&lt;/a&gt; appeared first on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com&quot;&gt;Software Testing &amp;amp; Automation&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Promode</name></author><source gr:stream-id="feed/https://scrolltest.com/feed/"><id>tag:google.com,2005:reader/feed/https://scrolltest.com/feed/</id><title type="html">Software Testing &amp; Automation</title><link rel="alternate" href="https://scrolltest.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781568000000"><id gr:original-id="https://testengineeringnotes.com/posts/2026-06-16-test-craftsman-review/">tag:google.com,2005:reader/item/0000105d000000a2</id><category term="book"></category><category term="testing"></category><category term="Reviews"></category><title type="html">Book Review: Software Testing: A Craftsman`s Approach</title><published>2026-06-16T00:00:00Z</published><updated>2026-06-16T00:00:00Z</updated><link rel="alternate" href="https://testengineeringnotes.com/posts/2026-06-16-test-craftsman-review/" type="text/html"></link><summary type="html">My insights from &amp;quot;Software Testing: A Craftsman`s Approach&amp;quot; by Paul C Jorgensen</summary><author><name></name></author><source gr:stream-id="feed/https://testengineeringnotes.com/rss.xml"><id>tag:google.com,2005:reader/feed/https://testengineeringnotes.com/rss.xml</id><title type="html">Test Engineering Notes</title><link rel="alternate" href="https://testengineeringnotes.com/" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781560800000"><id gr:original-id="https://scrolltest.com/?p=7374">tag:google.com,2005:reader/item/0000044400000232</id><category term="Career Guide"></category><category term="Testing"></category><title type="html">How to Communicate QA Value to Non-Technical Stakeholders: The Language That Gets Budget</title><published>2026-06-15T22:00:00Z</published><updated>2026-06-15T22:00:00Z</updated><link rel="alternate" href="https://scrolltest.com/communicating-qa-value-non-technical-stakeholders-budget/" type="text/html"></link><summary type="html">&lt;p&gt;Zero incidents, clean releases — and nobody noticed. How to translate invisible QA work into language executives understand: money saved, time gained, risk reduced.&lt;/p&gt;
&lt;p&gt;The post &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com/communicating-qa-value-non-technical-stakeholders-budget/&quot;&gt;How to Communicate QA Value to Non-Technical Stakeholders: The Language That Gets Budget&lt;/a&gt; appeared first on &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; rel=&quot;nofollow&quot; href=&quot;https://scrolltest.com&quot;&gt;Software Testing &amp;amp; Automation&lt;/a&gt;.&lt;/p&gt;</summary><author><name>Pramod Dutta</name></author><source gr:stream-id="feed/https://scrolltest.com/feed/"><id>tag:google.com,2005:reader/feed/https://scrolltest.com/feed/</id><title type="html">Software Testing &amp; Automation</title><link rel="alternate" href="https://scrolltest.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781542800000"><id gr:original-id="https://www.bartvanherck.com/posts/2026/20260615/">tag:google.com,2005:reader/item/00000d9a00000012</id><title type="html">Your test isn’t flaky: Page 2 just stole data from page 1</title><published>2026-06-15T17:00:00Z</published><updated>2026-06-15T17:00:00Z</updated><link rel="alternate" href="https://www.bartvanherck.com/posts/2026/20260615/" type="text/html"></link><summary type="html">&lt;h2 id=&quot;article-CjwWeJ5aCkJjhddAynWXfpSEPuo-the-problem-nobody-spots-right-away&quot;&gt;The problem nobody spots right away&lt;/h2&gt;
&lt;p&gt;&lt;/p&gt;&lt;figure&gt;

 
 &lt;div style=&quot;width: 200px&quot;&gt;
 &lt;svg xmlns=&quot;http://www.w3.org/2000/svg&quot; viewbox=&quot;0 0 176.688 185.938&quot;&gt;
 &lt;path d=&quot;m135.28 10 .156 12.594-4.156-3.469-1.406 8.594-.531-13.031 3.875 3.188zm-16.938.219 3.781 12-5-2.094 1.188 8.625L114 16.438l4.625 1.937-.282-8.156zm-20.344 9.719 9.625 8.156-5.344.875 5.563 6.656-10.188-8.094 4.937-.844zm-32.469 2.5c2.68 0 4.813 3.263 4.813 7.28v41.939c0 4.017-2.134 7.281-4.813 7.281h-5.281c-2.68 0-4.844-3.264-4.844-7.281V29.719c0-4.018 2.164-7.281 4.844-7.281zm62.693 11.187c6.523.005 12.731 3.604 15.469 9.593 3.264 7.143.49 15.283-6.156 19.438 4.053.624 7.125 3.715 7.125 7.47V132c0 .265-.037.524-.063.78h7.594v-20.655c-1.02-.716-1.656-1.737-1.656-2.875V81.03c0-2.175 2.334-3.968 5.219-3.968h5.718v.031c2.885 0 5.22 1.762 5.22 3.938v28.219c0 1.382-.953 2.613-2.376 3.312v59.25c0 2.174-1.739 3.938-3.906 3.938h-4.312c-2.168 0-3.907-1.764-3.907-3.938v-26.875h-29.344v26.625c0 2.31-1.77 4.188-3.937 4.188h-4.313c-2.167 0-3.906-1.878-3.906-4.188v-33.719h-10.094l-12.094 12.22v21.061c0 2.68-2.738 4.813-6.125 4.813h-4.375c-3.387 0-6.125-2.133-6.125-4.813v-4.25l-4.656 4.72c-3.46 3.5-8.146 4.426-10.531 2.062l-5.031-5c-.09-.09-.17-.185-.25-.282v2.75c0 2.68-2.738 4.813-6.125 4.813H40.91c-3.388 0-6.094-2.133-6.094-4.813v-58.719H16.785c-3.759 0-6.781-2.706-6.781-6.093v-4.407c0-3.387 3.022-6.093 6.78-6.093h28.939V54c0-2.144 1.776-3.875 4-3.875s4.031 1.73 4.031 3.875v41.812h4.781c-.371-.774-.562-1.682-.562-2.625 0-3.037 2.163-5.47 4.875-5.47h17.469c1.033-1.905 3.236-3.694 6.094-4.687l32.62-11.326c2.008-.697 3.989-.898 5.688-.656v-.906c0-1.857.753-3.528 2-4.844-5.96-.503-11.438-3.994-13.969-9.531-3.673-8.039.272-17.355 8.813-20.812l6.656 14.53-6.5-14.593a17.6 17.6 0 0 1 6.5-1.25zm-3.5 52.156c-.229.093-.45.198-.688.28l-21.5 7.5c-.047.807-.208 1.578-.53 2.25h7.25c3.758 0 6.78 2.707 6.78 6.095v4.406c0 3.387-3.022 6.094-6.78 6.094h-20.75v10.344l.53-.532c2.378-2.406 5.323-3.576 7.72-3.312a3.6 3.6 0 0 1 1.718-.438h26.25zm-52.844 26.625H51.409v48.656a12.5 12.5 0 0 1 2.187-3l18.281-18.5z&quot;&gt;&lt;/path&gt;
&lt;/svg&gt;

 &lt;/div&gt;
 

&lt;/figure&gt;

You run your end-to-end tests. All green. You run them again. One of them fails, in a spot where you changed nothing. Run them once more and that test passes again, but a different one starts complaining.&lt;p&gt;&lt;/p&gt;
&lt;p&gt;Frustrating? Yes, it is! And your first reflex is logical. You blame the test, or the test framework that must be “flaky”. It looks like pure bad luck after all. Red today, green tomorrow, without a single change to the code.&lt;/p&gt;
&lt;p&gt;Yet that picture is wrong again. The test is exposing something real. There is a small bug in the production code. The sorting tells the database which field to sort on, but not what to do when two records share the same value.&lt;/p&gt;
&lt;p&gt;The database then fills that gap itself. And not always in the same way. The result is that the order of records is not fixed. In production you rarely notice it. Your tests put their finger right on it.&lt;/p&gt;
&lt;h2 id=&quot;article-CjwWeJ5aCkJjhddAynWXfpSEPuo-where-it-really-goes-wrong-the-paged-table&quot;&gt;Where it really goes wrong: the paged table&lt;/h2&gt;
&lt;p&gt;We rarely show a list all at once. Displaying thousands of orders onto a single screen is unreadable and very slow. So we cut them up into pages. Page 1 shows the first ten records, page 2 the next ten, and so on.&lt;/p&gt;
&lt;p&gt;It is important to realise that every page is a separate question to the database. The database does not remember what it gave you on page 1. Behind each page sits the very same query again. The only difference is an &lt;code&gt;offset&lt;/code&gt; that says “skip the first so many records” and a &lt;code&gt;limit&lt;/code&gt; that says “then give me ten”.&lt;/p&gt;
&lt;p&gt;Picture a screen with orders, sorted by creation time. Per page the database receives something like this:&lt;/p&gt;
&lt;div&gt;&lt;pre tabindex=&quot;-1&quot; style=&quot;color: #f8f8f2; background-color: #272822; -moz-tab-size: 4; -o-tab-size: 4; tab-size: 4&quot;&gt;&lt;code class=&quot;language-sql&quot; data-lang=&quot;sql&quot;&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;-- page 1
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;&lt;/span&gt;&lt;span style=&quot;color: #66d9ef&quot;&gt;SELECT&lt;/span&gt; &lt;span style=&quot;color: #f92672&quot;&gt;*&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;FROM&lt;/span&gt; orders &lt;span style=&quot;color: #66d9ef&quot;&gt;ORDER&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;BY&lt;/span&gt; created_at &lt;span style=&quot;color: #66d9ef&quot;&gt;LIMIT&lt;/span&gt; &lt;span style=&quot;color: #ae81ff&quot;&gt;10&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;OFFSET&lt;/span&gt; &lt;span style=&quot;color: #ae81ff&quot;&gt;0&lt;/span&gt;;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;-- page 2
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;&lt;/span&gt;&lt;span style=&quot;color: #66d9ef&quot;&gt;SELECT&lt;/span&gt; &lt;span style=&quot;color: #f92672&quot;&gt;*&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;FROM&lt;/span&gt; orders &lt;span style=&quot;color: #66d9ef&quot;&gt;ORDER&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;BY&lt;/span&gt; created_at &lt;span style=&quot;color: #66d9ef&quot;&gt;LIMIT&lt;/span&gt; &lt;span style=&quot;color: #ae81ff&quot;&gt;10&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;OFFSET&lt;/span&gt; &lt;span style=&quot;color: #ae81ff&quot;&gt;10&lt;/span&gt;;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;p&gt;As long as every record has a unique timestamp, this goes fine. In our database, we sort always on date, even if another sorting parameter is used. We then sort second on date. The order is fixed, so “the first ten” on page 1 really are the first ten. The database counts off ten, and page 2 starts neatly at the eleventh. No overlap, no gaps.&lt;/p&gt;
&lt;p&gt;But what if several records share the same timestamp? Then the database has no idea which of them is “first”. To it, those records are completely equal. And because each page is a separate query, it may put those equal records in one order on page 1 and in a different order on page 2.&lt;/p&gt;
&lt;p&gt;The dangerous consequence is easy to guess. A record you just saw on page 1 can suddenly show up again in the query for page 2. And another record then falls exactly between the two pages and disappears from view entirely. Same data, same query, and yet a different result every time.&lt;/p&gt;
&lt;p&gt;For a test that checks page 1 first and then page 2, that is fatal. The content shifts under your feet, without anything in the data having changed.&lt;/p&gt;
&lt;h2 id=&quot;article-CjwWeJ5aCkJjhddAynWXfpSEPuo-why-test-data-triggers-this-so-quickly&quot;&gt;Why test data triggers this so quickly&lt;/h2&gt;
&lt;p&gt;In production this bug rarely occurs. Most of the time people work at a human pace: one record in the morning, the next half an hour later. Timestamps stay unique, the sort order holds, and nobody notices anything is off. But it is not impossible. When a team is working at full speed and multiple colleagues save records at nearly the same moment, the sort order can become unstable. The bug is there, it just does not get triggered often enough to raise an alarm.&lt;/p&gt;
&lt;p&gt;The automated tests told a different story and they did so before a single user ever touched the application. Because tests create records at machine speed, a dozen records land in the database within the same second. They all share the same sort value and the instability shows up immediately. The tests go red. Not every run, not reliably, just now and then. And that irregular pattern is exactly what makes you think of flaky tests first, rather than a real defect in the production code.&lt;/p&gt;
&lt;p&gt;In a test everything runs at top speed. No waiting user, no thinking time. In a loop you create ten orders, and they all sit within the same second:&lt;/p&gt;
&lt;div&gt;&lt;pre tabindex=&quot;-1&quot; style=&quot;color: #f8f8f2; background-color: #272822; -moz-tab-size: 4; -o-tab-size: 4; tab-size: 4&quot;&gt;&lt;code class=&quot;language-typescript&quot; data-lang=&quot;typescript&quot;&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;// Cypress: quickly set up ten records
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;&lt;/span&gt;&lt;span style=&quot;color: #66d9ef&quot;&gt;for&lt;/span&gt; (&lt;span style=&quot;color: #66d9ef&quot;&gt;let&lt;/span&gt; &lt;span style=&quot;color: #a6e22e&quot;&gt;i&lt;/span&gt; &lt;span style=&quot;color: #f92672&quot;&gt;=&lt;/span&gt; &lt;span style=&quot;color: #ae81ff&quot;&gt;0&lt;/span&gt;; &lt;span style=&quot;color: #a6e22e&quot;&gt;i&lt;/span&gt; &lt;span style=&quot;color: #f92672&quot;&gt;&amp;lt;&lt;/span&gt; &lt;span style=&quot;color: #ae81ff&quot;&gt;10&lt;/span&gt;; &lt;span style=&quot;color: #a6e22e&quot;&gt;i&lt;/span&gt;&lt;span style=&quot;color: #f92672&quot;&gt;++&lt;/span&gt;) {
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt; &lt;span style=&quot;color: #a6e22e&quot;&gt;cy&lt;/span&gt;.&lt;span style=&quot;color: #a6e22e&quot;&gt;request&lt;/span&gt;(&lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;POST&amp;apos;&lt;/span&gt;, &lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;/api/orders&amp;apos;&lt;/span&gt;, { &lt;span style=&quot;color: #a6e22e&quot;&gt;customer&lt;/span&gt;&lt;span style=&quot;color: #f92672&quot;&gt;:&lt;/span&gt; &lt;span style=&quot;color: #e6db74&quot;&gt;`customer-&lt;/span&gt;&lt;span style=&quot;color: #e6db74&quot;&gt;${&lt;/span&gt;&lt;span style=&quot;color: #a6e22e&quot;&gt;i&lt;/span&gt;&lt;span style=&quot;color: #e6db74&quot;&gt;}&lt;/span&gt;&lt;span style=&quot;color: #e6db74&quot;&gt;`&lt;/span&gt; });
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;}
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;p&gt;In MariaDB a &lt;code&gt;DATETIME&lt;/code&gt; column without fractional seconds stores time accurate to the second. If ten records are created within the same second, they all get the exact same value. Ten records, one sort value. The database has to guess who comes first, and that guess may turn out differently per page.&lt;/p&gt;
&lt;p&gt;But there is a scenario that is even worse: a &lt;code&gt;DATE&lt;/code&gt; column. MariaDB’s &lt;code&gt;DATE&lt;/code&gt; type stores only the year, month, and day, without any hours, minutes, or seconds. Every record created on the same calendar day gets the exact same sort value. In a test that creates ten records in a loop, they all land on today’s date. And this time the problem is not limited to your test environment. In production too, dozens of records can share the same date. The instability is then always there, not just when the test runs fast.&lt;/p&gt;
&lt;p&gt;That is why this is such a nasty bug. By hand you almost never click it loose, because you are too slow. But your automated test is lightning fast and provokes the problem on purpose. So your test is not the cause, it is the first thing fast enough to see the bug.&lt;/p&gt;
&lt;h2 id=&quot;article-CjwWeJ5aCkJjhddAynWXfpSEPuo-the-solution-a-tie-breaker&quot;&gt;The solution: a tie-breaker&lt;/h2&gt;
&lt;p&gt;&lt;/p&gt;&lt;figure&gt;

 
 &lt;div style=&quot;width: 200px&quot;&gt;
 &lt;svg xmlns=&quot;http://www.w3.org/2000/svg&quot; xml:space=&quot;preserve&quot; viewbox=&quot;0 0 1553.274 1667.336&quot;&gt;
 &lt;path d=&quot;M1517.641 1035.917c-32.116-47.078-253.464-539.345-256.705-568.486 70.139-10.601 114.865-68.304 115.125-128.165.207-47.633-45.524-62.622-65.021-41.503-12.995 14.072-4.814 37.253 8.364 38.248.083 22.068-11.088 28.351-27.779 44.672-16.184 15.826-72.378-4.123-93.164-12.907-32.879-13.941-62.867-32.93-102.314-56.879l.029.063c-46.478-27.522-67.176-35.704-89.923-43.522l-.003-.073c-85.179-24.578-122.322.526-147.645 14.817l.526.257-.786.099c-19.339 12.124-28.014 19.614-45.214 23.726l-7.049-2.934c-5.114-2.129-7.449-36.53-.522-51.676 12.737-27.849 55.114-32.165 63.065-81.473 6.526-39.108-15.422-67.146-43.947-93.632l.406-.202c-.429.213-9.952-9.362-10.87-10.254-4.857-4.723-8.251-10.733-12.046-16.28-14.108-20.622-28.67-15.518-26.334-15.729-25.487 2.302-29.411 25.915-38.951 34.412l-9.841 9.216.366.139c-12.823 11.975-34.196 30.473-41.405 61.973-8.44 35.793 6.238 61.675 34.322 84.128 19.361 15.479 36.403 23.899 30.822 67.251-2.389 18.555-7.595 17.137-18.495 12.969-54.042-20.67-88.358-103.158-286.719 13.454l.169.151c-7.367 4.458-7.723 4.434-16.371 10.121l-.178.753c-33.614 18.262-67.222 40.621-112.081 52.085-13.063 3.338-27.267 8.729-40.849 5.717-23.97-5.315-44.278-33.198-42.45-50.321-.029.269 7.657-3.749 8.368-4.434 15.937-15.367 2.089-49.576-30.515-42.32-70.264 15.644-29.805 167.681 80.686 177.059-1.796 15.462-147.7 364.238-255.237 564.562-3.387 8.857-8.082 12.048-5.226 20.634 3.878 11.658 21.855 22.181 33.312 26.404 1.722.051 1.684.048 1.684.048 17.702 68.837 69.831 131.404 115.903 162.096 63.917 42.578 158.721 42.549 199.904 35.323 76.89-13.073 183.395-79.768 213.216-168.979 4.029-12.053 4.142-12.169 9.119-25.497 19.816.12 28.934-22.508 31.732-26.432 1.027-13.324-3.908-22.195-17.348-26.343-38.727-86.528-47.08-66.453-131.195-278.653-95.908-242.801-68.079-216.707-109.232-294.783l9.979-5.638c-.408.306-.54 1.251-.532 1.704 15.497-8.43 41.985-38.183 83.893-61.665l-.089-.768c22.169-8.593 25.135-9.574 31.014-12.669 28.953-4.189 112.568-24.375 183.806 90.456l-.095.987c6.046 12.079 24.675 59.169 27.582 70.357-30.541 3.454-31.726 32.504-33.528 61.798l.81-.412c.074 8.592 1.331 16.444 3.187 24.608l-.939.189c2.198 4.366 1.563 2.556 2.786 7.939l1.042-.337c5.417 10.757 13.152 21.39 21.304 30.879l2.464 8.385c5.114 17.403-16.212 28.449-18.958 59.928-2.842 32.586 12.327 45.786 11.039 63.013-1.725 23.072-11.376 29.673-2.032 56.606l-.204.371c2.618 5.215 4.48 10.851 6.617 16.279 2.57 6.528 6.947 10.103 6.988 17.941.032 6.119-2.57 11.968-4.309 17.75-25.589 85.07 37.502 114.076-77.834 327.768l-.155-.191c-22.704 43.108-35.935 59.301-17.643 118.531 16.517 53.486-2.239 53.065-55.099 98.918l.539.362c-3.347 3.293-87.872 97.469-90.494 100.362-3.337 3.681 14.785 18.736 17.381 20.398 15.981 10.234 25.463 9.65 74.336 27.196 171.514 61.561 356.491 43.707 498.076-22.945 13.274-6.545 22.82-14.827 22.001-29.318-.005-.087-11.454-9.345-12.624-10.573-.452-.475-38.733-43.585-38.733-44.152-39.377-45.554-34.41-37.849-51.966-53.875l-.266.241c-61.234-48.599-51.405-42.447-32.867-116.12l-.903.432c4.975-32.617-3.745-47.721-16.038-74.714l-.488.491c-7.453-14.039-10.055-17.299-18.065-34.565l-.196-.011c-39.322-83.117-49.211-111.914-60.024-159.141 0 0-1.261-11.367-1.766-17.284l-.178.016c-1.155-19.91-.825-29.227.231-46.461l-.268-.098c1.286-31.904 1.901-48.923-3.059-74.851l.071-.023c-.333-1.229-.087-.061-1.379-6.811l-.417.185c-2.81-10.368-5.924-19.217-4.032-27.859 1.327-6.064 4.824-17.663 10.178-21.443 7.004-16.292 7.467-31.823 2.911-48.752-10.039-37.307 27.111-56.981-4.45-113.042-9.198-16.337-7.721-16.403-5.323-32.928 2.171-1.689 3.61-2.705 5.187-5.324l.02.335 13.83-19.408c11.8-16.562 21.818-92.578-22.274-100.338l-1.062 1.104-.286-.238c4.159-12.942 21.715-57.299 23.463-61.487l-.221-.391c36.087-69.395 89.512-101.139 137.879-104.455 12.172-.834 13.07-1.203 38.155 1.698l-.055-.69.458.914c27.191 6.581 35.719 11.18 60.132 24.698l-.041.628c13.987 9.178 11.01 7.099 60.564 47.253l-1-2.035a135 135 0 0 0 17.289 10.085c-41.944 71.11-20.855 85.222-124.988 332.351-82.988 197.97-74.833 150.244-116.074 241.324-13.428 4.224-18.287 12.95-17.273 25.551 2.42 4.073 11.286 27.087 31.814 27.127 21.119 57.143 9.38 39.414 29.079 68.282 39.059 57.241 99.617 100.04 160.679 118.49 74.952 22.647 169.891 9.023 214.813-17.345 61.822-36.288 113.207-96.66 133.76-172.367 9.08-1.247 33.738-15.549 35.331-29.12.37-3.165-.067-7.951-4.164-12.986M425.461 673.37c40.255 127.121 137.149 337.789 148.384 353.331-23.283 1.607-24.157 2.314-79.699.481-183.506-6.043-53.831 8.751-406.499-2.943 13.112-44.28 40.572-79.266 60.513-121.469 24.94-52.585 46.7-106.979 70.615-159.685 43.346-96.576 80.339-174.406 103.67-274.025 8.058-.172 15.796-.576 25.981-2.805 16.429 19.986 68.985 182.395 77.035 207.115m86.214-335.558c.911-1.105 1.633-2.261 2.132-3.472.032 1.015-.766 2.497-2.132 3.472m144.88 57.41a3.5 3.5 0 0 1-.313-.751c.184-.069.473-.112.789-.085a2.4 2.4 0 0 1-.476.836m86.248 33.905a2.5 2.5 0 0 1-.349-.847c.338-.091.433-.027.528.047-.014.26-.079.543-.179.8m172.685-50.426a2.1 2.1 0 0 1 .018-.618 2.8 2.8 0 0 1 1.065.489zm408.993 649.753c-72.427 3.992-145.272-3.182-192.079-2.755-65.254-.452-98.333 4.92-152.648.906 2.322-2.237.741-.898 3.67-3.331 4.56-11.776 3.385-14.999 17.477-41.286 21.256-37.447 90.841-201.128 118.549-282.5 76.122-223.835 68.395-211.672 85.934-234.088 8.379 1.913 16.952 3.076 25.594 3.51 25.87 109.88 60.247 174.716 148.973 377.786 54.433 122.893 71.078 126.669 85.972 178.125 0 0-52.889-.211-141.442 3.633&quot;&gt;&lt;/path&gt;
&lt;/svg&gt;

 &lt;/div&gt;
 

&lt;/figure&gt;

A tie-breaker is a second sort field. It steps in the moment the main field has equal values. That way the database always keeps a fixed way to cut the knot.&lt;p&gt;&lt;/p&gt;
&lt;p&gt;The best tie-breaker field is the &lt;code&gt;id&lt;/code&gt;. Every record has a unique id, so two records with the same timestamp still get a fixed order.&lt;/p&gt;
&lt;div&gt;&lt;pre tabindex=&quot;-1&quot; style=&quot;color: #f8f8f2; background-color: #272822; -moz-tab-size: 4; -o-tab-size: 4; tab-size: 4&quot;&gt;&lt;code class=&quot;language-sql&quot; data-lang=&quot;sql&quot;&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;-- without tie-breaker: order uncertain on equal timestamp
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;&lt;/span&gt;&lt;span style=&quot;color: #66d9ef&quot;&gt;ORDER&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;BY&lt;/span&gt; created_at
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;-- with tie-breaker: always the same order
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;&lt;/span&gt;&lt;span style=&quot;color: #66d9ef&quot;&gt;ORDER&lt;/span&gt; &lt;span style=&quot;color: #66d9ef&quot;&gt;BY&lt;/span&gt; created_at, id
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;p&gt;For the Java developers on your team. In Spring Data this is a small addition. The code was already sorting on &lt;code&gt;createdAt&lt;/code&gt;, but missed the second field. You just stick &lt;code&gt;id&lt;/code&gt; on the end:&lt;/p&gt;
&lt;div&gt;&lt;pre tabindex=&quot;-1&quot; style=&quot;color: #f8f8f2; background-color: #272822; -moz-tab-size: 4; -o-tab-size: 4; tab-size: 4&quot;&gt;&lt;code class=&quot;language-java&quot; data-lang=&quot;java&quot;&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;// Sort by createdAt (ASC), with id as tie-breaker&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;Sort sort &lt;span style=&quot;color: #f92672&quot;&gt;=&lt;/span&gt; Sort.&lt;span style=&quot;color: #a6e22e&quot;&gt;by&lt;/span&gt;(Sort.&lt;span style=&quot;color: #a6e22e&quot;&gt;Direction&lt;/span&gt;.&lt;span style=&quot;color: #a6e22e&quot;&gt;ASC&lt;/span&gt;, &lt;span style=&quot;color: #e6db74&quot;&gt;&amp;quot;createdAt&amp;quot;&lt;/span&gt;)
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt; .&lt;span style=&quot;color: #a6e22e&quot;&gt;and&lt;/span&gt;(Sort.&lt;span style=&quot;color: #a6e22e&quot;&gt;by&lt;/span&gt;(Sort.&lt;span style=&quot;color: #a6e22e&quot;&gt;Direction&lt;/span&gt;.&lt;span style=&quot;color: #a6e22e&quot;&gt;ASC&lt;/span&gt;, &lt;span style=&quot;color: #e6db74&quot;&gt;&amp;quot;id&amp;quot;&lt;/span&gt;));
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;List&lt;span style=&quot;color: #f92672&quot;&gt;&amp;lt;&lt;/span&gt;Order&lt;span style=&quot;color: #f92672&quot;&gt;&amp;gt;&lt;/span&gt; orders &lt;span style=&quot;color: #f92672&quot;&gt;=&lt;/span&gt; orderRepository.&lt;span style=&quot;color: #a6e22e&quot;&gt;findAll&lt;/span&gt;(sort);
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;p&gt;Working with pagination? Then you put that same sorting into a &lt;code&gt;Pageable&lt;/code&gt;. The tie-breaker then carries neatly across all pages:&lt;/p&gt;
&lt;div&gt;&lt;pre tabindex=&quot;-1&quot; style=&quot;color: #f8f8f2; background-color: #272822; -moz-tab-size: 4; -o-tab-size: 4; tab-size: 4&quot;&gt;&lt;code class=&quot;language-java&quot; data-lang=&quot;java&quot;&gt;&lt;span&gt;&lt;span&gt;Pageable pageable &lt;span style=&quot;color: #f92672&quot;&gt;=&lt;/span&gt; PageRequest.&lt;span style=&quot;color: #a6e22e&quot;&gt;of&lt;/span&gt;(
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt; 0, &lt;span style=&quot;color: #75715e&quot;&gt;// page number (0-based)&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt; 20, &lt;span style=&quot;color: #75715e&quot;&gt;// page size&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt; Sort.&lt;span style=&quot;color: #a6e22e&quot;&gt;by&lt;/span&gt;(&lt;span style=&quot;color: #e6db74&quot;&gt;&amp;quot;createdAt&amp;quot;&lt;/span&gt;).&lt;span style=&quot;color: #a6e22e&quot;&gt;ascending&lt;/span&gt;()
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt; .&lt;span style=&quot;color: #a6e22e&quot;&gt;and&lt;/span&gt;(Sort.&lt;span style=&quot;color: #a6e22e&quot;&gt;by&lt;/span&gt;(&lt;span style=&quot;color: #e6db74&quot;&gt;&amp;quot;id&amp;quot;&lt;/span&gt;).&lt;span style=&quot;color: #a6e22e&quot;&gt;ascending&lt;/span&gt;())
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;);
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;Page&lt;span style=&quot;color: #f92672&quot;&gt;&amp;lt;&lt;/span&gt;Order&lt;span style=&quot;color: #f92672&quot;&gt;&amp;gt;&lt;/span&gt; page &lt;span style=&quot;color: #f92672&quot;&gt;=&lt;/span&gt; orderRepository.&lt;span style=&quot;color: #a6e22e&quot;&gt;findAll&lt;/span&gt;(pageable);
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;p&gt;With that second field added the rule is: equal timestamp? Then the lowest id wins. Always the same record, in every query, on every page, in every test run.&lt;/p&gt;
&lt;h2 id=&quot;article-CjwWeJ5aCkJjhddAynWXfpSEPuo-what-you-can-do-as-a-tester&quot;&gt;What you can do as a tester&lt;/h2&gt;
&lt;p&gt;Do you see unstable tests around the order or the content of a list? These are strong questions for your development team:&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;“Do we also sort on id as a second field?”&lt;/strong&gt;
The key question. Is the answer no, and does the sorting happen on a timestamp in seconds? Then you have probably found your cause.&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;“Is the column a &lt;code&gt;DATE&lt;/code&gt;, a &lt;code&gt;DATETIME&lt;/code&gt;, or a &lt;code&gt;TIMESTAMP&lt;/code&gt;?”&lt;/strong&gt;
A &lt;code&gt;DATE&lt;/code&gt; column in MariaDB stores only the calendar day, so every record created on the same day is completely equal for sorting purposes. The instability then shows up in production too, not just in tests. A &lt;code&gt;DATETIME&lt;/code&gt; or &lt;code&gt;TIMESTAMP&lt;/code&gt; in seconds is already much better, but milliseconds give you the most breathing room.&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;“Can we create the test data with a fixed order?”&lt;/strong&gt;
Sometimes it helps to deliberately keep control over the order in your test, instead of relying on the timestamp.&lt;/p&gt;
&lt;h2 id=&quot;article-CjwWeJ5aCkJjhddAynWXfpSEPuo-why-you-should-not-fix-your-tests-if-you-think-it-it-flaky&quot;&gt;Why you should not fix your tests if you think it it flaky&lt;/h2&gt;
&lt;p&gt;When a test goes red now and then, the temptation is great to loosen it until it goes green again. For instance by no longer checking the exact order, but only whether a record sits somewhere on the page:&lt;/p&gt;
&lt;div&gt;&lt;pre tabindex=&quot;-1&quot; style=&quot;color: #f8f8f2; background-color: #272822; -moz-tab-size: 4; -o-tab-size: 4; tab-size: 4&quot;&gt;&lt;code class=&quot;language-typescript&quot; data-lang=&quot;typescript&quot;&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;// strict: relies on the exact order
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;&lt;/span&gt;&lt;span style=&quot;color: #a6e22e&quot;&gt;cy&lt;/span&gt;.&lt;span style=&quot;color: #66d9ef&quot;&gt;get&lt;/span&gt;(&lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;[data-test=row]&amp;apos;&lt;/span&gt;).&lt;span style=&quot;color: #a6e22e&quot;&gt;first&lt;/span&gt;().&lt;span style=&quot;color: #a6e22e&quot;&gt;should&lt;/span&gt;(&lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;contain&amp;apos;&lt;/span&gt;, &lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;customer-0&amp;apos;&lt;/span&gt;);
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;// looser: only checks the record is present somewhere
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span&gt;&lt;span&gt;&lt;span style=&quot;color: #75715e&quot;&gt;&lt;/span&gt;&lt;span style=&quot;color: #a6e22e&quot;&gt;cy&lt;/span&gt;.&lt;span style=&quot;color: #66d9ef&quot;&gt;get&lt;/span&gt;(&lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;[data-test=row]&amp;apos;&lt;/span&gt;).&lt;span style=&quot;color: #a6e22e&quot;&gt;should&lt;/span&gt;(&lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;contain&amp;apos;&lt;/span&gt;, &lt;span style=&quot;color: #e6db74&quot;&gt;&amp;apos;customer-0&amp;apos;&lt;/span&gt;);
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;p&gt;It looks like a quick fix, but think through what is really happening here. Suppose the bug strikes while your test is on page 2. A record that actually belongs on page 1 now shows up again on page 2. And a record that did belong on page 2 has disappeared entirely. So your test sees the wrong content, and it fails rightly.&lt;/p&gt;
&lt;p&gt;That is not noise. That is exactly what a real user gets in front of them too. They click to the next page and see an order they just saw, while another order shows up nowhere. The data seems to jump around.&lt;/p&gt;
&lt;p&gt;In other words: your test is not unstable, it is doing its job perfectly. It catches a real bug. If you loosen your assertions to get rid of the red, you hide a problem that genuinely affects your users. The bug is not in your test, and so it should not be fixed there.&lt;/p&gt;
&lt;p&gt;The right reflex is therefore not “how do I get my test green”, but “how do I prove this is a real bug”. Keep your strict check. Try to reproduce the pattern deliberately by creating records in quick succession and then paging through them. And pass it on to your team, with the missing tie-breaker as the concrete solution.&lt;/p&gt;
&lt;p&gt;A small change in the code, with a big impact on the stability of your test suite.&lt;/p&gt;</summary><author><name></name></author><source gr:stream-id="feed/https://feeds.feedburner.com/bartvanherck"><id>tag:google.com,2005:reader/feed/https://feeds.feedburner.com/bartvanherck</id><title type="html">The Testing Pirate</title><link rel="alternate" href="https://www.bartvanherck.com/" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781539338000"><id gr:original-id="https://angryweasel.substack.com/p/reading-the-buzz-getting-past-a-quiet">tag:google.com,2005:reader/item/00000ad700000067</id><title type="html">Reading the Buzz: Getting Past a Quiet No</title><published>2026-06-15T16:02:18Z</published><updated>2026-06-15T16:02:18Z</updated><link rel="alternate" href="https://angryweasel.substack.com/p/reading-the-buzz-getting-past-a-quiet" type="text/html"></link><summary type="html">&lt;p&gt;&lt;em&gt;This week’s post is authored by Colette Molteni. Colette and I connected after I wrote about what &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://angryweasel.substack.com/p/zen-spy&quot;&gt;six months on the PCT taught me about presence and clarity&lt;/a&gt;. Colette writes Empathy Elevated, a publication about the skills that matter more as technical work accelerates - judgment, emotional intelligence, and clear communication. &lt;/em&gt;&lt;/p&gt;&lt;p&gt;&lt;em&gt;This piece is about something I’ve watched happen in hundreds of meetings: the silent ‘no’ that everyone in the room can feel and nobody names. I think you’ll find it as useful as I did.&lt;/em&gt;&lt;/p&gt;&lt;p&gt;&lt;em&gt;Subscribe to Empathy Elevated &lt;strong&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://empathyelevated.substack.com/&quot;&gt;here&lt;/a&gt;&lt;/strong&gt;.&lt;/em&gt;&lt;/p&gt;&lt;div&gt;&lt;hr&gt;&lt;/div&gt;&lt;p&gt;You are sitting in sprint 2, and this Agile project is beginning to look like a monstrosity; instead of a nice, orderly project you can close out in JIRA at the end of the quarter.&lt;/p&gt;&lt;p&gt;The consultant has their screen up, with the user stories. The stories should incrementally radically change the system they have been using for a decade. It’s not a repeat of what we have been doing manually in an online tool, but rather a rethink of how it is approached. It is a reimagining of the total build-out for this phase 1 deployment.&lt;/p&gt;&lt;p&gt;One of your team members in the upper-right corner of your Zoom panel is clearly squirming, even though their face is expressionless.&lt;/p&gt;&lt;p&gt;Another in the lower center of your screen has wandering eyes. On camera, but not present.&lt;/p&gt;&lt;p&gt;You don’t let it distract you. The consultant clearly does not either. They end the review lightning-fast, then abruptly ask, “Any questions?”&lt;/p&gt;&lt;p&gt;There is a long 5-second silence, followed by some light head nods and one “yes” from one of the senior leaders, said so softly it sounds like it’s at the end of a tunnel.&lt;/p&gt;&lt;p&gt;Your manager, a senior leader who is more technically oriented, remarks, “I will need to follow up with you later.”&lt;/p&gt;&lt;p&gt;The consultant is perky and exclaims, “Okay, great!”&lt;/p&gt;&lt;h3&gt;​Team Buzz Is the Room Talking Without Talking&lt;/h3&gt;&lt;p&gt;The consultant heard a “yes,” but it was really a “no” camouflaged as one. They bulldozed through the presentation of user stories without a pause. Your side failed to stop the charge forward.&lt;/p&gt;&lt;p&gt;The metrics were fine. The stories on the screen were written cleanly, and nothing was technically wrong. You could see how it would mostly flow right.&lt;/p&gt;&lt;p&gt;But most of the room was not technically minded.&lt;/p&gt;&lt;p&gt;Most of the room had their fight-or-flight response activated at the prospect of a change they did not fully understand.&lt;/p&gt;&lt;p&gt;Without the option to fight, the fight became a whimper with soft-spoken words, and an almost out-of-body experience of trying to squirm away or divert focus elsewhere. It was a charade of pretense, mistaken for honest agreement.&lt;/p&gt;&lt;p&gt;The room said no, just not out loud.&lt;/p&gt;&lt;p&gt;The room was buzzing, with not much to be audible.&lt;/p&gt;&lt;p&gt;I have lived this, as the technically minded stakeholder observing the room, but missing the signal.&lt;/p&gt;&lt;p&gt;I could have put a pause. I could have set the time to actually follow up and avoid a repeat. But no, I missed it, so there was nothing to act on.&lt;/p&gt;&lt;p&gt;I want you to spot the signal and have the moves to get past it.&lt;/p&gt;&lt;h3&gt;​The Hesitant Yes Costs More Than an Honest No&lt;/h3&gt;&lt;p&gt;The hesitant yes is costly, stumbling, and easy to miss&lt;/p&gt;&lt;p&gt;It looks like eyes wandering or another excuse to be off camera. It can be the silence after “any questions?” that carries on, not out of shyness but rather a recalibration of the minds of the observers. The “I’ll get back to you” with no timing or deadlines intact appears like progress, but it’s a dead end.&lt;/p&gt;&lt;p&gt;At its face, an innovation that is better than the current system, more efficient, user-friendly, and easily distributed, as was the case of the end product discussed by the consultant, should be welcomed.&lt;/p&gt;&lt;p&gt;But the mind does not latch on to facts; it often gravitates towards feelings, what is familiar.&lt;/p&gt;&lt;p&gt;If buy-in is skipped, it does not disappear.&lt;/p&gt;&lt;p&gt;It festers.&lt;/p&gt;&lt;p&gt;It resurfaces downstream, louder, like thunder that breaks the silence on a summer evening.&lt;/p&gt;&lt;p&gt;You cannot ignore it then. It is directly in your earshot and in your line of vision.&lt;/p&gt;&lt;p&gt;It is faster now to resolve than to postpone.&lt;/p&gt;&lt;p&gt;Later, it is reworked, and the follow-up continues in perpetuity.&lt;/p&gt;&lt;p&gt;But it can be mitigated early.&lt;/p&gt;&lt;p&gt;The hesitant yes is a pass, and the honest no is the way.&lt;/p&gt;&lt;h3&gt;​Unpacking the No: A Sequence That Works&lt;/h3&gt;&lt;p&gt;You know it, that the soft Yes that’s really a No is not ideal. But how do you get past it?&lt;/p&gt;&lt;p&gt;You can make the moves when the rest of the room remains quiet and accepts the status quo, even though the buzz is contrary. You can dig into why that is, get to the roots, and pull them out.&lt;/p&gt;&lt;p&gt;That’s the only way a project like this one flourishes. Letting honest no’s stay silent is comfortable now, but costly later. Keeps the garden, drops the rhetorical question stacked on top of it.&lt;/p&gt;&lt;p&gt;Four moves - where we go:&lt;/p&gt;&lt;p&gt;Pause - do not read silence as consent. It is easy to do this, but it is a trap. Stop the charge forward if this happens. It is usually the first signal that the room’s buzz is not in consensus, and blockers will pop up like whack-a-mole later. Name it. Yes, out loud. Ask plainly whether there are questions or feedback, and mean it as a real question. Don’t try to fill the silence yourself, as that puts your words in the mouth of someone else. The discipline here is not filling the silence with a yes you conjure up and force.&lt;/p&gt;&lt;p&gt;Ask - it’s not about asking, “Why are you hesitant?” That demands a defense, even for the most emotionally regulated person. A gentler way, as usual, can be as simple as “Can you unpack that a little for me?” It is neutral and asking for a description, not naming a negatively associated emotion. Hand over the floor. Let the other person or people collect their thoughts.&lt;/p&gt;&lt;p&gt;Listen &amp;amp; make them feel heard - is about real listening, not waiting to respond. Lean in your posture a bit. Keep your eyes directed towards them. Don’t worry, you do not need to stare into their eyes like you are reading their soul. No, just make sure you are not distracted, keeping your eyes on your Slack notification on your left monitor. Listen to learn, and let them know, at a deep subconscious level, that they are being heard.&lt;/p&gt;&lt;p&gt;Reflect it back and decide - it’s about naming the real objection out loud. They hear you got it right. After listening intently, you recap for them what you think you heard. It helps you, but it is also a simple acknowledgment. Now that you have it out in the open, you do not have to solve it in the next 59 seconds before your next meeting at the top of the hour. Actually, allowing a little bit of breathing room, even if it is later that day, is better. It settles the nerves. Then set dedicated time to solve it together with them, not at them. This makes move 4 actually deliver the collaboration promise it’s named for.&lt;/p&gt;&lt;h3&gt;A Hesitant Yes Is Just a Slower No&lt;/h3&gt;&lt;p&gt;A hesitant yes feels like progress. It lets the meeting end on time and the deck advance to the next slide. But it doesn’t close anything — it defers it. The objection you didn’t surface in sprint 2 doesn’t disappear. It waits. And it comes back later as rework, as a stalled rollout, as a follow-up thread that never resolves.&lt;/p&gt;&lt;p&gt;The honest no is uncomfortable in the moment. The hesitant yes is uncomfortable for a quarter.&lt;/p&gt;&lt;p&gt;Pause when the room goes quiet. Ask in a way that invites a description, not a defense. Listen to learn, not to reply. Reflect the real objection back, and set dedicated time to resolve it together. None of these moves requires you to solve the problem in the room. They only require you to name it before it goes underground.&lt;/p&gt;&lt;p&gt;The buzz is the room talking. Your job is to give it permission to be heard.&lt;/p&gt;&lt;p style=&quot;clear: both&quot;&gt;&lt;/p&gt;&lt;p data-bqr-info=&quot;attachment&quot;&gt;&lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://substack-post-media.s3.amazonaws.com/public/images/6307b755-e6e0-43d7-b888-6963ace8b799_1254x1254.png&quot;&gt;&lt;/p&gt;</summary><author><name>Colette Molteni</name></author><source gr:stream-id="feed/https://angryweasel.substack.com/feed"><id>tag:google.com,2005:reader/feed/https://angryweasel.substack.com/feed</id><title type="html">The Weasel Speaks</title><link rel="alternate" href="https://angryweasel.substack.com" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781532497105"><id gr:original-id="https://wickstrom.tech/2021-03-01-first-winter-clearing-weeds-planting-trees.html">tag:google.com,2005:reader/item/0000065300000045</id><title type="html">The First Winter: Clearing Weeds and Planting Trees</title><published>2026-06-15T14:08:17Z</published><updated>2026-06-15T14:08:17Z</updated><link rel="alternate" href="https://wickstrom.tech/2021-03-01-first-winter-clearing-weeds-planting-trees.html" type="text/html"></link><summary type="html">&lt;p&gt;In October 2020, we got our new house. Built 2013, it’s not only new to us, and this is a major upside. While I definitely can appreciate many aspects of old houses, the comfort of a modern house fits us first-time parents very well.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/house.jpg&quot; alt=&quot;I carelessly snapped this picture while driving up to the house for the first time after we got the keys.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;I carelessly snapped this picture while driving up to the house for the first time after we got the keys.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;The site is pretty large for a regular house, at about 10 500 m². Eight years ago when the house was built, all the existing old trees were cut down. The weeds took over, as they often do after deforestation if nothing else is planted. The two dominant species were blackberries (I’m not sure which kind) and “Cytisus scoparius.” Birch trees also managed to cover some ground.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/fall-above.jpg&quot; alt=&quot;From upstairs, you could get a decent overview of the chaos. Birch trees in lovely yellow and green, racing with the weeds for sunlight.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;From upstairs, you could get a decent overview of the chaos. Birch trees in lovely yellow and green, racing with the weeds for sunlight.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;It didn’t take long before we made other plans for this place. Specifically, I decided to try restoring the forest, in a way that makes sense in this area. That means a majority of oak and beech, some sections of birch, and a sprinkle of cherry, pine, and spruce.&lt;/p&gt; &lt;p&gt;Of course, I’ll be long gone before the grand result of this project. Future generations can hopefully bask in the glory of its success. In the meantime, I’ll enjoy watching these trees grow up.&lt;/p&gt; &lt;p&gt;Please note that I’m a clumsy hobbyist. I do get solid advice from my sister who is a trained gardener, and from various other sources, but you shouldn’t trust anything I say or do.&lt;/p&gt; &lt;h2 id=&quot;article-dWgmnHtR_gdVfpuN2UVHcZN3ONQ-autumn-colors-all-around&quot;&gt;Autumn Colors, All Around&lt;/h2&gt; &lt;p&gt;When we arrived, nature was showing its autumn palette of yellow, red, and brown. The following photos are from walks in the area surrounding our house.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/fall-road.jpg&quot; alt=&quot;The long straight path, where the dark pines meet the majestic beech forest creating a dramatic contrast, is one of my favorite spots nearby.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;The long straight path, where the dark pines meet the majestic beech forest creating a dramatic contrast, is one of my favorite spots nearby.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/fall-road-2.jpg&quot; alt=&quot;Down by the sea the oak trees grow tall. This area is a protected walking area, one of the gems around here.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;Down by the sea the oak trees grow tall. This area is a protected walking area, one of the gems around here.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/fall-field.jpg&quot; alt=&quot;Cows grace here by the summer, leaving large plains of grass framed by old stone walls and gnarly trees.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;Cows grace here by the summer, leaving large plains of grass framed by old stone walls and gnarly trees.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/fall-ocean.jpg&quot; alt=&quot;By the sea there’s a one-of-a-kind sand volcano. I usually go down here and watch “Stenshuvud” in the far distance.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;By the sea there’s a one-of-a-kind sand volcano. I usually go down here and watch “Stenshuvud” in the far distance.&lt;/figcaption&gt; &lt;/figure&gt; &lt;h2 id=&quot;article-dWgmnHtR_gdVfpuN2UVHcZN3ONQ-time-to-work&quot;&gt;Time To Work&lt;/h2&gt; &lt;p&gt;Alright, enough about the surrounding nature. Around mid November I started executing on our plan. At first, I had no power tools or protective gear. I think that lasted about a day, after which I came in covered in scars.&lt;/p&gt; &lt;p&gt;The blackberries were &lt;em&gt;everywhere&lt;/em&gt;. I mean it. They often grew up to a height of two meters by growing inside and across the Cytisus scoparius branches, creating what I can only describe as a jungle of barbed wire.&lt;/p&gt; &lt;p&gt;These two have become my mortal enemies, the main targets of this operation. Blackberries is weed that is notoriusly hard to get rid of, and Cytisus scoparius is classified as invasive in some countries, though not in Sweden.&lt;/p&gt; &lt;p&gt;I cleared out a very small portion, including some very promising oaklings. While scarred, my motivation was now on top. I ordered a clearing saw and a full set of protective gear.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/fall-first-oak.jpg&quot; alt=&quot;One of the first young oak trees I dug out.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;One of the first young oak trees I dug out.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;While waiting for the delivery, I collected acorns down by the road and planted them in old planting boxes. Finally, I covered them in thick layer of oak leaves, which sadly didn’t stop birds from the enjoying the smörgåsbord. We’ll soon see how many of the 200-300 acorns that make it. My hopes are low on this one.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/acorns-planted.jpg&quot; alt=&quot;Planting acorns during autumn, hoping for small oaklings to pop up later this spring.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;Planting acorns during autumn, hoping for small oaklings to pop up later this spring.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;When the saw arrived, I did a first pass on the area closest to the house. It took a while until I found a good technique and which blade to use.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/first-clearing-1.jpg&quot; alt=&quot;A few meters into the wilderness, west side of the house.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;A few meters into the wilderness, west side of the house.&lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/first-clearing-2.jpg&quot; alt=&quot;The area around the sauna, north side of the house.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;The area around the sauna, north side of the house.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;I soon realized that a more efficient workflow would be to do it in two separate phases:&lt;/p&gt; &lt;ol type=&quot;1&quot;&gt; &lt;li&gt;Get rid of as much blackberry as possible&lt;/li&gt; &lt;li&gt;Cut down all Cytisus scoparius and other excess trees&lt;/li&gt; &lt;/ol&gt; &lt;p&gt;And so it went.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/second-clearing-panorama.jpg&quot; alt=&quot;Most of the blackberries cleared, up to the birch copses in the back.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;Most of the blackberries cleared, up to the birch copses in the back.&lt;/figcaption&gt; &lt;/figure&gt;  &lt;p&gt;Finally, the north side of the road was cleared. There remains a bunch of birch trees to be cut, but that can wait a bit.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/cleared-side.jpg&quot; alt=&quot;North side fully cleared, here in a surprisingly colorful winter sunset.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;North side fully cleared, here in a surprisingly colorful winter sunset.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;At this point, I had some space to work with. There’s a road construction planned at the corner of our lot, right through a little pond of self-seeded oaklings. Instead of them getting covered by gravel, I decided to move them to the newly cleared grounds. The first batch was around 25-30 trees, I think.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/planted-oak-2.jpg&quot; alt=&quot;One of many oaklings, with a new place to grow.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;One of many oaklings, with a new place to grow.&lt;/figcaption&gt; &lt;/figure&gt; &lt;h2 id=&quot;article-dWgmnHtR_gdVfpuN2UVHcZN3ONQ-the-other-side&quot;&gt;The Other Side&lt;/h2&gt; &lt;p&gt;Awkwardly, the south side of the road remained uncleared. There, the Cytisus scoparius grew even taller and more dense.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/clearing-other-side.jpg&quot; alt=&quot;A first round cleared some of the area behind the multi-sport court.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;A first round cleared some of the area behind the multi-sport court.&lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Speaking of the multi-sport court – it’s perhaps not the most attractive part of this place, in my opinion. But I’m guessing our toddler might have different opinions on this matter, and so it could come in handy.&lt;/p&gt; &lt;p&gt;An audiobook and some sweat later, the south side was cleared.&lt;/p&gt; &lt;figure&gt; &lt;div&gt; &lt;p&gt;&lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/cleared-other-side-panorama.jpg&quot;&gt;&lt;/p&gt; &lt;/div&gt; &lt;figcaption&gt; A panorama taken from the middle of the scene, just as I was done clearing. &lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;We got help from relatives to pile up the remaining branches. There’s a little bit left to do but the main chunk of work is done.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/winter-piles.jpg&quot; alt=&quot;Two mountains of waste, soon ready to be picked up by truck. I dare not start huge bonfires here.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;Two mountains of waste, soon ready to be picked up by truck. I dare not start huge bonfires here.&lt;/figcaption&gt; &lt;/figure&gt; &lt;h2 id=&quot;article-dWgmnHtR_gdVfpuN2UVHcZN3ONQ-planting-oak-and-beech&quot;&gt;Planting Oak and Beech&lt;/h2&gt; &lt;p&gt;In a handful spots there grew small oak and beech plants that needed moving. Either they grew too densely, below larger trees where they’d only get shade, or in areas that needed to be kept clear (sides of the road, for example.) I started moving them into newly cleared areas to get a nicer spread of trees across the entire area.&lt;/p&gt; &lt;p&gt;I’ve replanted these trees with marker sticks. This makes it easy to spot them. Also, I can support the plants by tying them to the stick, and use them to fix the net cages.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/winter-beech-tree.jpg&quot; alt=&quot;Beech trees have been harder to find on our grounds. I’ll see if can get some from the neighbors, or else I’ll buy at a nursery.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;Beech trees have been harder to find on our grounds. I’ll see if can get some from the neighbors, or else I’ll buy at a nursery.&lt;/figcaption&gt; &lt;/figure&gt; &lt;h2 id=&quot;article-dWgmnHtR_gdVfpuN2UVHcZN3ONQ-pruning&quot;&gt;Pruning&lt;/h2&gt; &lt;p&gt;Even with the fierce competition going on after the deforestation, a bunch of oak trees managed to break through. There are some growing 4-5 m tall, already. Unfortunately they’ve grown very close to other plants and got intertwined, so I’ve spent some time pruning.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/winter-messy-oak-1-before.jpg&quot;&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/winter-messy-oak-1-after.jpg&quot;&gt; &lt;figcaption&gt; Probably the largest of the post-deforestation oak trees, this one got liberated from a crowd of four. I’m very happy with this result! &lt;/figcaption&gt; &lt;/figure&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/winter-messy-oak-2-before.jpg&quot;&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/winter-messy-oak-2-after.jpg&quot;&gt; &lt;figcaption&gt; Even messier, this bunch counted almost ten separate trees. I picked out one and cut down the others. It’s shaped strangely, but it might find its way. &lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;Pruning trees like these is usually done between July and September, but I wanted to give these a full season of unhindered growth. Hopefully I won’t regret that decision.&lt;/p&gt; &lt;h2 id=&quot;article-dWgmnHtR_gdVfpuN2UVHcZN3ONQ-springtime&quot;&gt;Springtime!&lt;/h2&gt; &lt;p&gt;Last week, spring was declared in southern Sweden. Finally! I spent the weekend caging in small trees to (hopefully) protect them from wildlife, and to give them a more focused upwards growth. I’ve used cable ties to straighten and support the smaller trees, but I’m not sure if that’s a bad idea. Maybe I need to swap them for some softer rope.&lt;/p&gt; &lt;p&gt;Some of the larger trees had grown a bit sideways due to competition, so I bought heavy piles and used them for support.&lt;/p&gt; &lt;p&gt;And that’s where I am now, eagearly awaiting the warmth of spring and summer, and the blossoming of trees and flowers. We really don’t know what might show up!&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/spring-caging-trees.jpg&quot;&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/spring-supporting-oak.jpg&quot;&gt; &lt;figcaption&gt; On the left, a net cage fitted onto a marker stick. On the right, a larger tree now supported by a pile, hopefully making it grow straight and tall. &lt;/figcaption&gt; &lt;/figure&gt; &lt;p&gt;If you’ve read this far, I hope you have enjoyed the first part of our journey. If so, let me know, and I might post more pictures and words on this subject. You can also follow me on Twitter, where I nowadays torture my followers with both tech and gardening tweets.&lt;/p&gt; &lt;figure&gt; &lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://wickstrom.tech/assets/clearing-weeds-planting-trees/spring-house.jpg&quot; alt=&quot;Our beloved house, seen from the midst of newly planted trees on the slope.&quot;&gt; &lt;figcaption aria-hidden=&quot;true&quot;&gt;Our beloved house, seen from the midst of newly planted trees on the slope.&lt;/figcaption&gt;&lt;/figure&gt;</summary><author><name>Oskar Wickström</name></author><source gr:stream-id="feed/https://wickstrom.tech/feed.xml"><id>tag:google.com,2005:reader/feed/https://wickstrom.tech/feed.xml</id><title type="html">Oskar Wickström</title><link rel="alternate" href="https://wickstrom.tech/" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781514000000"><id gr:original-id="https://karlosmid.com/2026/06/fine-tuning-to-follow-instructions">tag:google.com,2005:reader/item/00000563000001da</id><category term="llm-from-scratch"></category><category term="gpt"></category><category term="tutorials"></category><category term="llm"></category><category term="workbook"></category><title type="html">Fine-tuning to follow instructions</title><published>2026-06-15T09:00:00Z</published><updated>2026-06-15T09:00:00Z</updated><link rel="alternate" href="https://karlosmid.com/2026/06/fine-tuning-to-follow-instructions/" type="text/html"></link><summary type="html">&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-tldr&quot;&gt;TL;DR&lt;/h2&gt;

&lt;p&gt;This post fine-tunes a pretrained GPT-2 355M model on instruction data in Elixir. It prepares an Alpaca-style dataset, builds custom batching and target masking, trains the model for two epochs, saves checkpoints and loss metrics, extracts test responses, and evaluates them with a local Ollama model. The fine-tuned Alpaca-style model reaches an average Ollama score of about 50, while the Phi-3 prompt style and instruction/input masking variants perform slightly worse in these experiments.&lt;/p&gt;

&lt;p&gt;This post follows &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://karlosmid.com/fine-tuning-for-classification/&quot;&gt;Fine-tuning for classification&lt;/a&gt; and uses the source code from my &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://github.com/karlosmid/llm-from-scratch&quot;&gt;GitHub repository&lt;/a&gt;.&lt;/p&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-71-introduction-to-instruction-fine-tuning&quot;&gt;7.1 Introduction to instruction fine-tuning&lt;/h2&gt;

&lt;p&gt;What does instruction fine-tuning mean? Pretraining teaches an LLM how to continue text from the input by predicting the most probable next token. With that objective, an LLM can finish sentences or create paragraphs. After instruction fine-tuning, the LLM can learn to follow commands, such as translating text into Croatian or correcting English grammar in a given input. As with classification, we need to go through several steps: download and prepare the dataset, organize it into batches, create data loaders, load a pretrained model, instruction fine-tune that model, inspect the modeling loss, extract responses, perform qualitative evaluation, and score the responses.&lt;/p&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-72-preparing-a-dataset-for-supervised-instruction-fine-tuning&quot;&gt;7.2 Preparing a dataset for supervised instruction fine-tuning&lt;/h2&gt;

&lt;p&gt;The first step is to get the training data and prepare it for instruction training with our pretrained OpenAI GPT-2 model. We are extending the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;FineTuneDataLoader&lt;/code&gt; module with a function that downloads and saves instruction training data in JSON format. The instruction training data was prepared by Sebastian, the book author:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@instruction_data_url&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch07/01_main-chapter-code/instruction-data.json&quot;&gt;https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch07/01_main-chapter-code/instruction-data.json&lt;/a&gt;&amp;quot;&lt;/span&gt;

&lt;span&gt;@type&lt;/span&gt; &lt;span&gt;instruction_record&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;
          &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
        &lt;span&gt;}&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Downloads and loads the instruction fine-tuning dataset.

  If `file_path` already exists, the local file is reused. Otherwise the JSON
  file is downloaded from `url`, saved to `file_path`, and then decoded.

  ## Input Parameters

    * `file_path` - local JSON cache path. Defaults to
      `&amp;quot;instruction-data.json&amp;quot;`.
    * `url` - URL of the instruction dataset JSON. Defaults to the chapter 7
      instruction-data file from the LLMs-from-scratch repository.

  ## Output

  Returns the decoded JSON as a list of maps with string keys. Each decoded
  instruction map contains:

    * `&amp;quot;instruction&amp;quot;` - the task description or question.
    * `&amp;quot;input&amp;quot;` - optional context for the task. This is an empty string when
      no extra input is provided.
    * `&amp;quot;output&amp;quot;` - the expected response used as the supervised fine-tuning
      target.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()]&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;@instruction_data_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;url&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;unless&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
      &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;mkdir_p!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;dirname&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;
      &lt;span&gt;download_file!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;url&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;

    &lt;span&gt;file_path&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;read!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Jason&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;decode!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The next function we need formats the training data into the Alpaca prompt format that we feed into our LLM as the training dataset:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Formats an instruction dataset entry as an Alpaca-style model input.

  This mirrors the chapter 7 prompt template:

      Below is an instruction that describes a task. Write a response that
      appropriately completes the request.

      ### Instruction:
      ...

      ### Input:
      ...

  The input section is included only when the entry&amp;apos;s `&amp;quot;input&amp;quot;` field is not an
  empty string.

  ## Input Parameters

    * `entry` - map containing string keys `&amp;quot;instruction&amp;quot;` and `&amp;quot;input&amp;quot;`, such
      as records returned by `download_and_load_instructions_file/2`.

  ## Output

  Returns the formatted prompt string without the desired response.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(%{&lt;/span&gt;&lt;span&gt;&amp;quot;instruction&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;instruction&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;input&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;input&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;
      &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;instruction&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;instruction_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;&amp;quot;Below is an instruction that describes a task. &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
        &lt;span&gt;&amp;quot;Write a response that appropriately completes the request.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
        &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;instruction&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;

    &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;if&lt;/span&gt; &lt;span&gt;input&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
        &lt;span&gt;&amp;quot;&amp;quot;&lt;/span&gt;
      &lt;span&gt;else&lt;/span&gt;
        &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Input:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;input&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;

    &lt;span&gt;instruction_text&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;input_text&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We split the input data into training, validation, and test sets for the same reason as in classification training. Here is the test that uses those two new functions:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;  &lt;span&gt;defmodule&lt;/span&gt; &lt;span&gt;LlmFromScratch7Test&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
  &lt;span&gt;use&lt;/span&gt; &lt;span&gt;ExUnit&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;Case&lt;/span&gt;

  &lt;span&gt;alias&lt;/span&gt; &lt;span&gt;LlmScratch&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;FineTuneDataLoader&lt;/span&gt;

  &lt;span&gt;@instruction_data_url&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch07/01_main-chapter-code/instruction-data.json&quot;&gt;https://raw.githubusercontent.com/rasbt/LLMs-from-scratch/main/ch07/01_main-chapter-code/instruction-data.json&lt;/a&gt;&amp;quot;&lt;/span&gt;

  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:download&lt;/span&gt;
  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.1 downloads and loads instruction data&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tmp_dir!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;llm_scratch_instruction_data&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;instruction-data.json&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;1_100&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;at&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;
             &lt;span&gt;&amp;quot;instruction&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;Identify the correct spelling of the following word.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
             &lt;span&gt;&amp;quot;input&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;Ocassion&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
             &lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;The correct spelling is &amp;apos;Occasion.&amp;apos;&amp;quot;&lt;/span&gt;
           &lt;span&gt;}&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;at&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;999&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;
             &lt;span&gt;&amp;quot;instruction&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;What is an antonym of &amp;apos;complicated&amp;apos;?&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
             &lt;span&gt;&amp;quot;input&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
             &lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;An antonym of &amp;apos;complicated&amp;apos; is &amp;apos;simple&amp;apos;.&amp;quot;&lt;/span&gt;
           &lt;span&gt;}&lt;/span&gt;

    &lt;span&gt;entry_50&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;at&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;model_input_50&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry_50&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;desired_response_50&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry_50&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model_input_50&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;desired_response_50&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;&amp;quot;Below is an instruction that describes a task. &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
               &lt;span&gt;&amp;quot;Write a response that appropriately completes the request.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
               &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Identify the correct spelling of the following word.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
               &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Input:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Ocassion&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
               &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;The correct spelling is &amp;apos;Occasion.&amp;apos;&amp;quot;&lt;/span&gt;

    &lt;span&gt;entry_999&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;at&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;999&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;model_input_999&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry_999&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;desired_response_999&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry_999&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model_input_999&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;desired_response_999&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;&amp;quot;Below is an instruction that describes a task. &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
               &lt;span&gt;&amp;quot;Write a response that appropriately completes the request.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
               &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;What is an antonym of &amp;apos;complicated&amp;apos;?&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
               &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;An antonym of &amp;apos;complicated&amp;apos; is &amp;apos;simple&amp;apos;.&amp;quot;&lt;/span&gt;

    &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.85&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;

    &lt;span&gt;train_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;935&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;55&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;110&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-73-organizing-data-into-training-batches&quot;&gt;7.3 Organizing data into training batches&lt;/h2&gt;

&lt;p&gt;The process of organizing instruction data into training batches is more complicated than organizing classification data or pretraining data into batches. The reason is that instruction data uses a prompt format. Sebastian breaks this process into five steps:&lt;/p&gt;

&lt;ul&gt;
  &lt;li&gt;use prompt template&lt;/li&gt;
  &lt;li&gt;tokenize prompt templates&lt;/li&gt;
  &lt;li&gt;pad inputs to the same length&lt;/li&gt;
  &lt;li&gt;create target token IDs&lt;/li&gt;
  &lt;li&gt;replace padding tokens with placeholders&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Here are the first two steps implemented in the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;InstructionDataset&lt;/code&gt; module, along with a test for this new functionality:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt; &lt;span&gt;defmodule&lt;/span&gt; &lt;span&gt;LlmScratch&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;InstructionDataset&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
  &lt;span&gt;@moduledoc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Pre-tokenized instruction fine-tuning dataset.

  This mirrors the PyTorch `InstructionDataset` example from chapter 7. A
  dataset is built from decoded instruction maps, formats each map as an
  Alpaca-style prompt plus response, and pre-tokenizes the full text during
  construction.

  ## Examples

      iex&amp;gt; data = [
      ...&amp;gt;   %{
      ...&amp;gt;     &amp;quot;instruction&amp;quot; =&amp;gt; &amp;quot;Name the capital of France.&amp;quot;,
      ...&amp;gt;     &amp;quot;input&amp;quot; =&amp;gt; &amp;quot;&amp;quot;,
      ...&amp;gt;     &amp;quot;output&amp;quot; =&amp;gt; &amp;quot;Paris.&amp;quot;
      ...&amp;gt;   }
      ...&amp;gt; ]
      iex&amp;gt; dataset = LlmScratch.InstructionDataset.new(data, &amp;quot;code-davinci-002&amp;quot;)
      iex&amp;gt; LlmScratch.InstructionDataset.length(dataset)
      1
      iex&amp;gt; LlmScratch.InstructionDataset.get(dataset, 0)
      [21106, 318, 281, 12064, 326, 8477, 257, 4876, 13, 19430, 257,
       2882, 326, 20431, 32543, 262, 2581, 13, 198, 198, 21017, 46486,
       25, 198, 5376, 262, 3139, 286, 4881, 13, 198, 198, 21017, 18261,
       25, 198, 40313, 13]
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;

  &lt;span&gt;@enforce_keys&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;:data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:encoded_texts&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
  &lt;span&gt;defstruct&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;:data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:encoded_texts&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;

  &lt;span&gt;@end_of_text&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;&lt;/span&gt;

  &lt;span&gt;@type&lt;/span&gt; &lt;span&gt;instruction_record&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;LlmScratch&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;@type&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;@type&lt;/span&gt; &lt;span&gt;t&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;%&lt;/span&gt;&lt;span&gt;__MODULE__&lt;/span&gt;&lt;span&gt;{&lt;/span&gt;
          &lt;span&gt;data:&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()],&lt;/span&gt;
          &lt;span&gt;encoded_texts:&lt;/span&gt; &lt;span&gt;[[&lt;/span&gt;&lt;span&gt;integer&lt;/span&gt;&lt;span&gt;()]]&lt;/span&gt;
        &lt;span&gt;}&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Creates an instruction dataset from decoded instruction records.

  Each record is formatted with `LlmScratch.FineTuneDataLoader.format_input/1`,
  then the target response is appended as:

      \\n\\n### Response:\\n...

  The resulting full text is encoded immediately and stored in
  `dataset.encoded_texts`.

  ## Input Parameters

    * `data` - list of decoded instruction maps. Each map must contain string
      keys `&amp;quot;instruction&amp;quot;`, `&amp;quot;input&amp;quot;`, and `&amp;quot;output&amp;quot;`.
    * `tokenizer` - Tiktoken model name accepted by `Tiktoken.encode/3`.
    * `opts` - optional keyword list controlling tokenization.

  ## Options

    * `:allowed_special` - special tokens allowed when `tokenizer` is a
      Tiktoken model name. Defaults to `[&amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;]`.

  ## Output

  Returns a `%LlmScratch.InstructionDataset{}`. The `:data` field contains the
  original records, and `:encoded_texts` contains one pre-tokenized token-id
  list per record.

  ## Examples

      iex&amp;gt; data = [
      ...&amp;gt;   %{
      ...&amp;gt;     &amp;quot;instruction&amp;quot; =&amp;gt; &amp;quot;Classify the sentiment.&amp;quot;,
      ...&amp;gt;     &amp;quot;input&amp;quot; =&amp;gt; &amp;quot;I loved it.&amp;quot;,
      ...&amp;gt;     &amp;quot;output&amp;quot; =&amp;gt; &amp;quot;Positive.&amp;quot;
      ...&amp;gt;   }
      ...&amp;gt; ]
      iex&amp;gt; dataset = LlmScratch.InstructionDataset.new(data, &amp;quot;code-davinci-002&amp;quot;)
      iex&amp;gt; &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;http://dataset.data&quot;&gt;dataset.data&lt;/a&gt;
      [
        %{
          &amp;quot;input&amp;quot; =&amp;gt; &amp;quot;I loved it.&amp;quot;,
          &amp;quot;instruction&amp;quot; =&amp;gt; &amp;quot;Classify the sentiment.&amp;quot;,
          &amp;quot;output&amp;quot; =&amp;gt; &amp;quot;Positive.&amp;quot;
        }
      ]
      iex&amp;gt; dataset.encoded_texts
      [
        [21106, 318, 281, 12064, 326, 8477, 257, 4876, 13, 19430, 257,
         2882, 326, 20431, 32543, 262, 2581, 13, 198, 198, 21017, 46486,
         25, 198, 9487, 1958, 262, 15598, 13, 198, 198, 21017, 23412, 25,
         198, 40, 6151, 340, 13, 198, 198, 21017, 18261, 25, 198, 21604,
         1800, 13]
      ]
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;new&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()],&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;keyword&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;[])&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;# Pre-tokenize each complete training example once so repeated dataset&lt;/span&gt;
    &lt;span&gt;# access does not rebuild the prompt or call the tokenizer again.&lt;/span&gt;
    &lt;span&gt;encoded_texts&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;fn&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;entry&lt;/span&gt;
        &lt;span&gt;# The model trains on the instruction/input prompt followed by the&lt;/span&gt;
        &lt;span&gt;# expected response target, matching the chapter 7 PyTorch dataset.&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;full_text&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;encode_text!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;# Keep the decoded records for inspection while serving encoded examples&lt;/span&gt;
    &lt;span&gt;# from `encoded_texts`, which mirrors the PyTorch dataset fields.&lt;/span&gt;
    &lt;span&gt;%&lt;/span&gt;&lt;span&gt;__MODULE__&lt;/span&gt;&lt;span&gt;{&lt;/span&gt;
      &lt;span&gt;data:&lt;/span&gt; &lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;encoded_texts:&lt;/span&gt; &lt;span&gt;encoded_texts&lt;/span&gt;
    &lt;span&gt;}&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Returns one encoded instruction example by index.

  ## Input Parameters

    * `dataset` - `%LlmScratch.InstructionDataset{}` returned by `new/3`.
    * `index` - zero-based row index.

  ## Output

  Returns the pre-tokenized token-id list for the requested record.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;get&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;non_neg_integer&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;integer&lt;/span&gt;&lt;span&gt;()]&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;get&lt;/span&gt;&lt;span&gt;(%&lt;/span&gt;&lt;span&gt;__MODULE__&lt;/span&gt;&lt;span&gt;{}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;dataset&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;index&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_integer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;index&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;index&lt;/span&gt; &lt;span&gt;&amp;gt;=&lt;/span&gt; &lt;span&gt;0&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;fetch!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;index&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Returns the number of records in the dataset.

  ## Input Parameters

    * `dataset` - `%LlmScratch.InstructionDataset{}` returned by `new/3`.

  ## Output

  Returns the number of instruction records as a non-negative integer.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;non_neg_integer&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(%&lt;/span&gt;&lt;span&gt;__MODULE__&lt;/span&gt;&lt;span&gt;{}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;dataset&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;Kernel&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;full_text&lt;/span&gt;&lt;span&gt;(%{&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;output&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;output&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;instruction_plus_input&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LlmScratch&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;response_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;output&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;

    &lt;span&gt;instruction_plus_input&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;response_text&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;encode_text!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;allowed_special&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Keyword&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;get&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:allowed_special&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;@end_of_text&lt;/span&gt;&lt;span&gt;])&lt;/span&gt;
    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:ok&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Tiktoken&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encode&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;allowed_special&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;token_ids&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;The next step is to pad inputs to the same length. Note that each batch can have a different length. We pad to the length of the longest input in the batch.&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@pad_token_id&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;
&lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Pads a batch of token-id sequences and stacks them into an input tensor.

  This mirrors the first draft of the chapter 7 Python collate function:

      inputs_1 = [0, 1, 2, 3, 4]
      inputs_2 = [5, 6]
      inputs_3 = [7, 8, 9]
      batch = (inputs_1, inputs_2, inputs_3)
      print(custom_collate_draft_1(batch))

  The function finds the longest sequence length in the batch after adding one
  extra pad token, pads every item to that length, then drops the final token
  from each row before stacking the rows. This draft returns only the input
  tensor; it does not build shifted targets yet.

  ## Input Parameters

    * `batch` - tuple or list of token-id lists.
    * `pad_token_id` - token id used for padding. Defaults to GPT-2&amp;apos;s
      end-of-text token id, `50256`.
    * `device` - included for parity with the Python example. Nx places the
      tensor on the active backend, so this argument is currently informational.

  ## Output

  Returns a signed 64-bit Nx tensor with shape
  `{batch_size, max_sequence_length}`.

  ## Examples

      iex&amp;gt; batch = {[0, 1, 2, 3, 4], [5, 6], [7, 8, 9]}
      iex&amp;gt; inputs = LlmScratch.InstructionDataset.custom_collate_draft_1(batch)
      iex&amp;gt; inputs
      #Nx.Tensor&amp;lt;
        s64[3][5]
        [
          [0, 1, 2, 3, 4],
          [5, 6, 50256, 50256, 50256],
          [7, 8, 9, 50256, 50256]
        ]
      &amp;gt;
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;custom_collate_draft_1&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tuple&lt;/span&gt;&lt;span&gt;()&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;[[&lt;/span&gt;&lt;span&gt;integer&lt;/span&gt;&lt;span&gt;()]],&lt;/span&gt; &lt;span&gt;integer&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt;
          &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;Tensor&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;custom_collate_draft_1&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;@pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;&amp;quot;cpu&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;custom_collate_draft_1&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_tuple&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;batch&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Tuple&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;to_list&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;custom_collate_draft_1&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;custom_collate_draft_1&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;_device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;# Find the longest sequence length after the one extra pad token that the&lt;/span&gt;
    &lt;span&gt;# Python draft appends to every item.&lt;/span&gt;
    &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;batch&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Kernel&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;max&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;fn&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;0&lt;/span&gt; &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;inputs&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;fn&lt;/span&gt; &lt;span&gt;item&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;# Elixir data is immutable, so this builds the equivalent of&lt;/span&gt;
        &lt;span&gt;# `new_item = item.copy(); new_item += [pad_token_id]`.&lt;/span&gt;
        &lt;span&gt;new_item&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;item&lt;/span&gt; &lt;span&gt;++&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;

        &lt;span&gt;# Pad the copied item to the longest sequence length in the batch.&lt;/span&gt;
        &lt;span&gt;padded&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;pad_to_length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;new_item&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

        &lt;span&gt;# Match `torch.tensor(padded[:-1])` by dropping the final token before&lt;/span&gt;
        &lt;span&gt;# stacking all rows into a single tensor.&lt;/span&gt;
        &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;padded&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;pad_to_length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;max_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;token_ids&lt;/span&gt; &lt;span&gt;++&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;duplicate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;max_length&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;Kernel&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is a test that uses this function on simple example data:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.2 custom collate draft pads batch inputs&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;inputs_1&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;inputs_2&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;6&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;inputs_3&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;7&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;batch&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;inputs_1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;inputs_2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;inputs_3&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;

    &lt;span&gt;inputs_tensor&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;custom_collate_draft_1&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;inputs_tensor&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
               &lt;span&gt;[&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;6&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;7&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
               &lt;span&gt;],&lt;/span&gt;
               &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
             &lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Each batch needs corresponding target tokens along with the inputs. The trick is the same as before: we get the input targets by shifting the input to the right by one token. If an input does not have a token on the right, we add a padding token:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Pads a batch of token-id sequences and creates input/target tensors.

  This mirrors the second draft of the chapter 7 Python collate function:

      inputs_1 = [0, 1, 2, 3, 4]
      inputs_2 = [5, 6]
      inputs_3 = [7, 8, 9]
      batch = (inputs_1, inputs_2, inputs_3)
      inputs, targets = custom_collate(batch)
      print(inputs)
      print(targets)

  The function finds the longest sequence length in the batch after adding one
  extra pad token, pads every item to that length, then creates next-token
  prediction pairs. Inputs drop the final token from each padded row; targets
  drop the first token.

  ## Input Parameters

    * `batch` - tuple or list of token-id lists.
    * `pad_token_id` - token id used for padding. Defaults to GPT-2&amp;apos;s
      end-of-text token id, `50256`.
    * `device` - included for parity with the Python example. Nx places the
      tensor on the active backend, so this argument is currently informational.

  ## Output

  Returns `{inputs, targets}` where both values are signed 64-bit Nx tensors
  with shape `{batch_size, max_sequence_length}`.

  ## Examples

      iex&amp;gt; batch = {[0, 1, 2, 3, 4], [5, 6], [7, 8, 9]}
      iex&amp;gt; {inputs, targets} = LlmScratch.InstructionDataset.custom_collate(batch)
      iex&amp;gt; inputs
      #Nx.Tensor&amp;lt;
        s64[3][5]
        [
          [0, 1, 2, 3, 4],
          [5, 6, 50256, 50256, 50256],
          [7, 8, 9, 50256, 50256]
        ]
      &amp;gt;
      iex&amp;gt; targets
      #Nx.Tensor&amp;lt;
        s64[3][5]
        [
          [1, 2, 3, 4, 50256],
          [6, 50256, 50256, 50256, 50256],
          [8, 9, 50256, 50256, 50256]
        ]
      &amp;gt;
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tuple&lt;/span&gt;&lt;span&gt;()&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;[[&lt;/span&gt;&lt;span&gt;integer&lt;/span&gt;&lt;span&gt;()]],&lt;/span&gt; &lt;span&gt;integer&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt;
          &lt;span&gt;{&lt;/span&gt;&lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;Tensor&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;Tensor&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()}&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;@pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;&amp;quot;cpu&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_tuple&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;batch&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Tuple&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;to_list&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;_device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;# Find the longest sequence length after the one extra pad token that the&lt;/span&gt;
    &lt;span&gt;# Python collate function appends to every item.&lt;/span&gt;
    &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;batch&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Kernel&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;max&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;fn&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;0&lt;/span&gt; &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;targets&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;fn&lt;/span&gt; &lt;span&gt;item&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;# Elixir data is immutable, so this builds the equivalent of&lt;/span&gt;
        &lt;span&gt;# `new_item = item.copy(); new_item += [pad_token_id]`.&lt;/span&gt;
        &lt;span&gt;new_item&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;item&lt;/span&gt; &lt;span&gt;++&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;

        &lt;span&gt;# Pad the copied item to the longest sequence length in the batch.&lt;/span&gt;
        &lt;span&gt;padded&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;pad_to_length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;new_item&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pad_token_id&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

        &lt;span&gt;# Match `padded[:-1]` for inputs and `padded[1:]` for next-token&lt;/span&gt;
        &lt;span&gt;# targets before stacking all rows into tensors.&lt;/span&gt;
        &lt;span&gt;{&lt;/span&gt;
          &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;padded&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;),&lt;/span&gt;
          &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;padded&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;batch_max_length&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;}&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;unzip&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}),&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;targets&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;
    &lt;span&gt;}&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is a test that uses &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;custom_collate&lt;/code&gt;:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.3 custom collate creates shifted targets&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;inputs_1&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;inputs_2&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;6&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;inputs_3&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;7&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;batch&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;inputs_1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;inputs_2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;inputs_3&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;targets&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;inputs&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
               &lt;span&gt;[&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;6&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;7&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
               &lt;span&gt;],&lt;/span&gt;
               &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
             &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;targets&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
               &lt;span&gt;[&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;6&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
               &lt;span&gt;],&lt;/span&gt;
               &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
             &lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Now we are at the final step. In the targets, we need to replace padding tokens with &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;-100&lt;/code&gt;. We keep the last padding token in the targets so the model can learn when to end the response. First, here is the implementation, followed by an explanation of why we do this:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;  diff --git a/lib/llm_scratch/instruction_dataset.ex b/lib/llm_scratch/instruction_dataset.ex
&lt;span&gt;index 4e91dfb..c161996 100644
&lt;/span&gt;&lt;span&gt;--- a/lib/llm_scratch/instruction_dataset.ex
&lt;/span&gt;&lt;span&gt;+++ b/lib/llm_scratch/instruction_dataset.ex
&lt;/span&gt;&lt;span&gt;@@ -152,26 +152,32 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
   @doc &amp;quot;&amp;quot;&amp;quot;
   Pads a batch of token-id sequences and creates input/target tensors.
 
&lt;span&gt;-  This mirrors the second draft of the chapter 7 Python collate function:
&lt;/span&gt;&lt;span&gt;+  This mirrors the chapter 7 Python collate function:
&lt;/span&gt; 
       inputs_1 = [0, 1, 2, 3, 4]
       inputs_2 = [5, 6]
       inputs_3 = [7, 8, 9]
       batch = (inputs_1, inputs_2, inputs_3)
&lt;span&gt;-      inputs, targets = custom_collate(batch)
&lt;/span&gt;&lt;span&gt;+      inputs, targets = custom_collate_fn(batch)
&lt;/span&gt;       print(inputs)
       print(targets)
 
   The function finds the longest sequence length in the batch after adding one
   extra pad token, pads every item to that length, then creates next-token
   prediction pairs. Inputs drop the final token from each padded row; targets
&lt;span&gt;-  drop the first token.
&lt;/span&gt;&lt;span&gt;+  drop the first token. In target rows, all padding tokens after the first one
+  are replaced with `ignore_index` so they do not contribute to the loss.
&lt;/span&gt; 
   ## Input Parameters
 
     * `batch` - tuple or list of token-id lists.
     * `pad_token_id` - token id used for padding. Defaults to GPT-2&amp;apos;s
       end-of-text token id, `50256`.
&lt;span&gt;+    * `ignore_index` - label value used for ignored target positions. Defaults
+      to `-100`.
+    * `allowed_max_length` - optional maximum row length after inputs and
+      targets are created. Defaults to `nil`, which keeps the full batch
+      length.
&lt;/span&gt;     * `device` - included for parity with the Python example. Nx places the
       tensor on the active backend, so this argument is currently informational.
 
&lt;span&gt;@@ -198,22 +204,36 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
         s64[3][5]
         [
           [1, 2, 3, 4, 50256],
&lt;span&gt;-          [6, 50256, 50256, 50256, 50256],
-          [8, 9, 50256, 50256, 50256]
&lt;/span&gt;&lt;span&gt;+          [6, 50256, -100, -100, -100],
+          [8, 9, 50256, -100, -100]
&lt;/span&gt;         ]
       &amp;gt;
   &amp;quot;&amp;quot;&amp;quot;
&lt;span&gt;-  @spec custom_collate(tuple() | [[integer()]], integer(), String.t()) ::
&lt;/span&gt;&lt;span&gt;+  @spec custom_collate(
+          tuple() | [[integer()]],
+          integer(),
+          integer(),
+          nil | pos_integer(),
+          String.t()
+        ) ::
&lt;/span&gt;           {Nx.Tensor.t(), Nx.Tensor.t()}
&lt;span&gt;-  def custom_collate(batch, pad_token_id \\ @pad_token_id, device \\ &amp;quot;cpu&amp;quot;)
-
-  def custom_collate(batch, pad_token_id, device) when is_tuple(batch) do
&lt;/span&gt;&lt;span&gt;+  def custom_collate(
+        batch,
+        pad_token_id \\ @pad_token_id,
+        ignore_index \\ -100,
+        allowed_max_length \\ nil,
+        device \\ &amp;quot;cpu&amp;quot;
+      )
+
+  def custom_collate(batch, pad_token_id, ignore_index, allowed_max_length, device)
+      when is_tuple(batch) do
&lt;/span&gt;     batch
     |&amp;gt; Tuple.to_list()
&lt;span&gt;-    |&amp;gt; custom_collate(pad_token_id, device)
&lt;/span&gt;&lt;span&gt;+    |&amp;gt; custom_collate(pad_token_id, ignore_index, allowed_max_length, device)
&lt;/span&gt;   end
 
&lt;span&gt;-  def custom_collate(batch, pad_token_id, _device) when is_list(batch) do
&lt;/span&gt;&lt;span&gt;+  def custom_collate(batch, pad_token_id, ignore_index, allowed_max_length, _device)
+      when is_list(batch) do
&lt;/span&gt;     # Find the longest sequence length after the one extra pad token that the
     # Python collate function appends to every item.
     batch_max_length =
&lt;span&gt;@@ -231,10 +251,18 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
         padded = pad_to_length(new_item, batch_max_length, pad_token_id)
 
         # Match `padded[:-1]` for inputs and `padded[1:]` for next-token
&lt;span&gt;-        # targets before stacking all rows into tensors.
&lt;/span&gt;&lt;span&gt;+        # targets.
+        inputs = Enum.slice(padded, 0, batch_max_length - 1)
+
+        targets =
+          padded
+          |&amp;gt; Enum.slice(1, batch_max_length - 1)
+          |&amp;gt; mask_extra_padding_targets(pad_token_id, ignore_index)
+
+        # Optionally cap both rows to the model context length.
&lt;/span&gt;         {
&lt;span&gt;-          Enum.slice(padded, 0, batch_max_length - 1),
-          Enum.slice(padded, 1, batch_max_length - 1)
&lt;/span&gt;&lt;span&gt;+          maybe_truncate(inputs, allowed_max_length),
+          maybe_truncate(targets, allowed_max_length)
&lt;/span&gt;         }
       end)
       |&amp;gt; Enum.unzip()
&lt;span&gt;@@ -261,4 +289,23 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
   defp pad_to_length(token_ids, max_length, pad_token_id) do
     token_ids ++ List.duplicate(pad_token_id, max_length - Kernel.length(token_ids))
   end
&lt;span&gt;+
+  defp mask_extra_padding_targets(targets, pad_token_id, ignore_index) do
+    {masked_targets, _seen_first_pad?} =
+      Enum.map_reduce(targets, false, fn
+        ^pad_token_id, false -&amp;gt;
+          {pad_token_id, true}
+
+        ^pad_token_id, true -&amp;gt;
+          {ignore_index, true}
+
+        token_id, seen_first_pad? -&amp;gt;
+          {token_id, seen_first_pad?}
+      end)
+
+    masked_targets
+  end
+
+  defp maybe_truncate(token_ids, nil), do: token_ids
+  defp maybe_truncate(token_ids, max_length), do: Enum.take(token_ids, max_length)
&lt;/span&gt; end
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is a test as an example:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt; &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.3 custom collate creates shifted targets with ignored padding&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;inputs_1&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;inputs_2&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;6&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;inputs_3&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;7&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
    &lt;span&gt;batch&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;inputs_1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;inputs_2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;inputs_3&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;targets&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;inputs&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
               &lt;span&gt;[&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;6&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;7&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
               &lt;span&gt;],&lt;/span&gt;
               &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
             &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;targets&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
               &lt;span&gt;[&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;4&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;6&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;-&lt;/span&gt;&lt;span&gt;100&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;-&lt;/span&gt;&lt;span&gt;100&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;-&lt;/span&gt;&lt;span&gt;100&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
                 &lt;span&gt;[&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;9&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;-&lt;/span&gt;&lt;span&gt;100&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;-&lt;/span&gt;&lt;span&gt;100&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
               &lt;span&gt;],&lt;/span&gt;
               &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
             &lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;What is so special about the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;-100&lt;/code&gt; value? PyTorch’s cross entropy loss function ignores that value. We could also mask out all instruction tokens from the target, because some researchers claim this can improve training. The last thing to implement is a refactor of our &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;cross_entropy_loss&lt;/code&gt; function so it ignores &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;-100&lt;/code&gt;:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;  diff --git a/lib/llm_scratch/loss_utils.ex b/lib/llm_scratch/loss_utils.ex
&lt;span&gt;index 59a9edc..70495f6 100644
&lt;/span&gt;&lt;span&gt;--- a/lib/llm_scratch/loss_utils.ex
&lt;/span&gt;&lt;span&gt;+++ b/lib/llm_scratch/loss_utils.ex
&lt;/span&gt;&lt;span&gt;@@ -46,6 +46,9 @@&lt;/span&gt; defmodule LlmScratch.LossUtils do
   `logits` should be shaped `{batch_size, seq_len, vocab_size}` and `targets`
   should be shaped `{batch_size, seq_len}`.
 
&lt;span&gt;+  Target positions equal to `ignore_index` are excluded from the mean loss.
+  The default `ignore_index` is `-100`, matching PyTorch&amp;apos;s cross entropy loss.
+
&lt;/span&gt;   ## Examples
 
       iex&amp;gt; logits = Nx.log(Nx.tensor([[[0.5, 0.25, 0.25], [0.125, 0.375, 0.5]]]))
&lt;span&gt;@@ -54,52 +57,69 @@&lt;/span&gt; defmodule LlmScratch.LossUtils do
       iex&amp;gt; Float.round(Nx.to_number(loss), 6)
       0.693147
   &amp;quot;&amp;quot;&amp;quot;
&lt;span&gt;-  @spec cross_entropy_loss(Nx.Tensor.t(), Nx.Tensor.t()) :: Nx.Tensor.t()
-  def cross_entropy_loss(%Nx.Tensor{} = logits, %Nx.Tensor{} = targets) do
-    cross_entropy_loss_defn(logits, targets)
&lt;/span&gt;&lt;span&gt;+  @spec cross_entropy_loss(Nx.Tensor.t(), Nx.Tensor.t(), integer()) :: Nx.Tensor.t()
+  def cross_entropy_loss(%Nx.Tensor{} = logits, %Nx.Tensor{} = targets, ignore_index \\ -100) do
+    cross_entropy_loss_defn(logits, targets, ignore_index)
&lt;/span&gt;   end
 
   @doc &amp;quot;&amp;quot;&amp;quot;
   Defn-compatible mean cross entropy loss from logits and target token ids.
   &amp;quot;&amp;quot;&amp;quot;
&lt;span&gt;-  defn cross_entropy_loss_defn(logits, targets) do
&lt;/span&gt;&lt;span&gt;+  defn cross_entropy_loss_defn(logits, targets, ignore_index \\ -100) do
&lt;/span&gt;     if Nx.rank(logits) == 3 do
       # Language-model flow:
       # logits shape is {batch_size, seq_len, vocab_size}
       # targets shape is {batch_size, seq_len}
       vocab_size = Nx.axis_size(logits, 2)
&lt;span&gt;+      valid_targets = Nx.not_equal(targets, ignore_index)
+      safe_targets = Nx.select(valid_targets, targets, 0)
&lt;/span&gt; 
       # step1: logits
&lt;span&gt;-      logits
-      # step2: probabilities
-      |&amp;gt; Axon.Activations.softmax(axis: -1)
-      # step3: target probabilities
-      |&amp;gt; Nx.reshape({:auto, vocab_size})
-      |&amp;gt; Nx.take_along_axis(Nx.reshape(targets, {:auto, 1}), axis: 1)
-      |&amp;gt; Nx.squeeze(axes: [1])
-      # step4: logarithmic probabilities
-      |&amp;gt; Nx.log()
-      # step6: negative average log probabilities
-      |&amp;gt; Nx.negate()
-      # step5: average logarithmic probabilities
-      |&amp;gt; Nx.mean()
&lt;/span&gt;&lt;span&gt;+      losses =
+        logits
+        # step2: probabilities
+        |&amp;gt; Axon.Activations.softmax(axis: -1)
+        # step3: target probabilities
+        |&amp;gt; Nx.reshape({:auto, vocab_size})
+        |&amp;gt; Nx.take_along_axis(Nx.reshape(safe_targets, {:auto, 1}), axis: 1)
+        |&amp;gt; Nx.squeeze(axes: [1])
+        # step4: logarithmic probabilities
+        |&amp;gt; Nx.log()
+        # step6: negative log probabilities
+        |&amp;gt; Nx.negate()
+
+      valid_targets = Nx.reshape(valid_targets, {:auto})
+
+      valid_targets
+      |&amp;gt; Nx.select(losses, 0.0)
+      |&amp;gt; Nx.sum()
+      # step5: average only over target positions that are not ignored
+      |&amp;gt; Nx.divide(valid_targets |&amp;gt; Nx.as_type({:f, 32}) |&amp;gt; Nx.sum())
&lt;/span&gt;     else
       # Classification flow:
       # logits shape is {batch_size, num_classes}
       # targets shape is {batch_size}
&lt;span&gt;+      valid_targets = Nx.not_equal(targets, ignore_index)
+      safe_targets = Nx.select(valid_targets, targets, 0)
+
&lt;/span&gt;       # step1: logits
&lt;span&gt;-      logits
-      # step2: probabilities
-      |&amp;gt; Axon.Activations.softmax(axis: -1)
-      # step3: target probabilities
-      |&amp;gt; Nx.take_along_axis(Nx.reshape(targets, {:auto, 1}), axis: 1)
-      |&amp;gt; Nx.squeeze(axes: [1])
-      # step4: logarithmic probabilities
-      |&amp;gt; Nx.log()
-      # step6: negative average log probabilities
-      |&amp;gt; Nx.negate()
-      # step5: average logarithmic probabilities
-      |&amp;gt; Nx.mean()
&lt;/span&gt;&lt;span&gt;+      losses =
+        logits
+        # step2: probabilities
+        |&amp;gt; Axon.Activations.softmax(axis: -1)
+        # step3: target probabilities
+        |&amp;gt; Nx.take_along_axis(Nx.reshape(safe_targets, {:auto, 1}), axis: 1)
+        |&amp;gt; Nx.squeeze(axes: [1])
+        # step4: logarithmic probabilities
+        |&amp;gt; Nx.log()
+        # step6: negative log probabilities
+        |&amp;gt; Nx.negate()
+
+      valid_targets
+      |&amp;gt; Nx.select(losses, 0.0)
+      |&amp;gt; Nx.sum()
+      # step5: average only over target positions that are not ignored
+      |&amp;gt; Nx.divide(valid_targets |&amp;gt; Nx.as_type({:f, 32}) |&amp;gt; Nx.sum())
&lt;/span&gt;     end
   end
 
&lt;span&gt;@@ -128,6 +148,8 @@&lt;/span&gt; defmodule LlmScratch.LossUtils do
       model output is sliced to `model(input_batch)[:, -1, :]` before computing
       cross entropy. Use `:first_token` to instead classify from
       `model(input_batch)[:, 0, :]`.
&lt;span&gt;+    * `:ignore_index` - target value excluded from the mean cross entropy.
+      Defaults to `-100`.
&lt;/span&gt;   &amp;quot;&amp;quot;&amp;quot;
   @spec calc_loss_batch(
           Nx.Tensor.t(),
&lt;span&gt;@@ -141,10 +163,12 @@&lt;/span&gt; defmodule LlmScratch.LossUtils do
     input_batch = maybe_transfer(input_batch, device)
     target_batch = maybe_transfer(target_batch, device)
 
&lt;span&gt;+    ignore_index = Keyword.get(opts, :ignore_index, -100)
+
&lt;/span&gt;     model
     |&amp;gt; forward_model(input_batch)
     |&amp;gt; select_loss_logits(opts)
&lt;span&gt;-    |&amp;gt; cross_entropy_loss(target_batch)
&lt;/span&gt;&lt;span&gt;+    |&amp;gt; cross_entropy_loss(target_batch, ignore_index)
&lt;/span&gt;   end
 
   @doc &amp;quot;&amp;quot;&amp;quot;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is a test from the book that checks whether it really ignores that value:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.3 cross entropy ignores masked instruction targets&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;logits_1&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;
        &lt;span&gt;[&lt;/span&gt;&lt;span&gt;-&lt;/span&gt;&lt;span&gt;1.0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.0&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
        &lt;span&gt;[&lt;/span&gt;&lt;span&gt;-&lt;/span&gt;&lt;span&gt;0.5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.5&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
      &lt;span&gt;])&lt;/span&gt;

    &lt;span&gt;targets_1&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;],&lt;/span&gt; &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;
    &lt;span&gt;loss_1&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LossUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cross_entropy_loss&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;logits_1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;targets_1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;logits_2&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;
        &lt;span&gt;[&lt;/span&gt;&lt;span&gt;-&lt;/span&gt;&lt;span&gt;1.0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.0&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
        &lt;span&gt;[&lt;/span&gt;&lt;span&gt;-&lt;/span&gt;&lt;span&gt;0.5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.5&lt;/span&gt;&lt;span&gt;],&lt;/span&gt;
        &lt;span&gt;[&lt;/span&gt;&lt;span&gt;-&lt;/span&gt;&lt;span&gt;0.5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.5&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
      &lt;span&gt;])&lt;/span&gt;

    &lt;span&gt;targets_2&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;],&lt;/span&gt; &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;
    &lt;span&gt;loss_2&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LossUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cross_entropy_loss&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;logits_2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;targets_2&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;targets_3&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;-&lt;/span&gt;&lt;span&gt;100&lt;/span&gt;&lt;span&gt;],&lt;/span&gt; &lt;span&gt;type:&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:s&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;
    &lt;span&gt;loss_3&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LossUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cross_entropy_loss&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;logits_2&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;targets_3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert_in_delta&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;to_number&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;loss_1&lt;/span&gt;&lt;span&gt;),&lt;/span&gt; &lt;span&gt;1.1269&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.0e-4&lt;/span&gt;
    &lt;span&gt;assert_in_delta&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;to_number&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;loss_2&lt;/span&gt;&lt;span&gt;),&lt;/span&gt; &lt;span&gt;0.7936&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.0e-4&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;equal&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;loss_1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;loss_3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;to_number&lt;/span&gt;&lt;span&gt;()&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-74-creating-data-loaders-for-an-instruction-dataset&quot;&gt;7.4 Creating data loaders for an instruction dataset&lt;/h2&gt;

&lt;p&gt;We are ready to load our training data into memory using the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;DataLoader&lt;/code&gt; module. For that, we first need to refactor our &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;DataLoader&lt;/code&gt; module to support a custom collate function:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;diff --git a/lib/llm_scratch/data_loader.ex b/lib/llm_scratch/data_loader.ex
index 2e05c93..350c863 100644
&lt;/span&gt;&lt;span&gt;--- a/lib/llm_scratch/data_loader.ex
&lt;/span&gt;&lt;span&gt;+++ b/lib/llm_scratch/data_loader.ex
&lt;/span&gt;&lt;span&gt;@@ -10,24 +10,30 @@&lt;/span&gt; defmodule LlmScratch.DataLoader do
   - `:shuffle` - whether batches are shuffled
   - `:drop_last` - whether incomplete batches are dropped
   - `:num_workers` - concurrency used by `iterate/2`
&lt;span&gt;+  - `:collate_fn` - function used to transform each list of examples into a
+    batch. Defaults to keeping the list unchanged.
&lt;/span&gt; 
   ## Options
   - `:batch_size` - number of samples per batch (default: `32`)
   - `:shuffle` - shuffles dataset once before cycling (default: `true`)
   - `:drop_last` - drops batches smaller than `:batch_size` (default: `false`)
   - `:num_workers` - parallel workers for iteration (default: `0`)
&lt;span&gt;+  - `:collate_fn` - one-argument function called for each batch after
+    chunking and `:drop_last` filtering. Defaults to identity.
&lt;/span&gt;   &amp;quot;&amp;quot;&amp;quot;
   def new(dataset, opts \\ []) when is_list(dataset) do
     batch_size = Keyword.get(opts, :batch_size, 32)
     shuffle = Keyword.get(opts, :shuffle, true)
     drop_last = Keyword.get(opts, :drop_last, false)
     num_workers = Keyword.get(opts, :num_workers, 0)
&lt;span&gt;+    collate_fn = Keyword.get(opts, :collate_fn, &amp;amp;identity/1)
&lt;/span&gt; 
     batches =
       dataset
       |&amp;gt; prepare_dataset(shuffle)
       |&amp;gt; Stream.chunk_every(batch_size)
       |&amp;gt; filter_incomplete_batches(drop_last, batch_size)
&lt;span&gt;+      |&amp;gt; Stream.map(collate_fn)
&lt;/span&gt;       |&amp;gt; Enum.to_list()
 
     stream = Stream.cycle(batches)
&lt;span&gt;@@ -39,7 +45,8 @@&lt;/span&gt; defmodule LlmScratch.DataLoader do
       batch_size: batch_size,
       shuffle: shuffle,
       drop_last: drop_last,
&lt;span&gt;-      num_workers: num_workers
&lt;/span&gt;&lt;span&gt;+      num_workers: num_workers,
+      collate_fn: collate_fn
&lt;/span&gt;     }
   end
 
&lt;span&gt;@@ -76,4 +83,6 @@&lt;/span&gt; defmodule LlmScratch.DataLoader do
   defp filter_incomplete_batches(stream, false, _batch_size) do
     stream
   end
&lt;span&gt;+
+  defp identity(batch), do: batch
&lt;/span&gt; end
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is a test that loads instruction testing data:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt; &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:download&lt;/span&gt;
  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.4 creates instruction data loaders with custom collate function&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;tokenizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;code-davinci-002&amp;quot;&lt;/span&gt;
    &lt;span&gt;batch_size&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;
    &lt;span&gt;num_workers&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;
    &lt;span&gt;:rand&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;seed&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;:exsss&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;

    &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tmp_dir!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;llm_scratch_instruction_data&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;instruction-data.json&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.85&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;

    &lt;span&gt;train_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_loader&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;DataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;batch_size:&lt;/span&gt; &lt;span&gt;batch_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;collate_fn:&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;shuffle:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;drop_last:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_workers:&lt;/span&gt; &lt;span&gt;num_workers&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;val_loader&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;DataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;batch_size:&lt;/span&gt; &lt;span&gt;batch_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;collate_fn:&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;shuffle:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;drop_last:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_workers:&lt;/span&gt; &lt;span&gt;num_workers&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;test_loader&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;DataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;batch_size:&lt;/span&gt; &lt;span&gt;batch_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;collate_fn:&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;shuffle:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;drop_last:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_workers:&lt;/span&gt; &lt;span&gt;num_workers&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_dataset&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;935&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_dataset&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;55&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_dataset&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;110&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;train_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;116&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;val_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;7&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;test_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;14&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;train_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;num_workers&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;val_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;num_workers&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;test_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;num_workers&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;[{&lt;/span&gt;&lt;span&gt;train_inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_targets&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;_&lt;/span&gt;&lt;span&gt;]&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;train_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;batches&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;[{&lt;/span&gt;&lt;span&gt;val_inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_targets&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;_&lt;/span&gt;&lt;span&gt;]&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;val_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;batches&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;[{&lt;/span&gt;&lt;span&gt;test_inputs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;test_targets&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;_&lt;/span&gt;&lt;span&gt;]&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;test_loader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;batches&lt;/span&gt;


    &lt;span&gt;# As we randomly shuffle data before creating batches, we get a different result than in the book&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;shape&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_inputs&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;91&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;shape&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_targets&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;91&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;shape&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_inputs&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;74&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;shape&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_targets&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;74&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;shape&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_inputs&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;shape&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_targets&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;8&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;64&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-75-loading-a-pretrained-llm&quot;&gt;7.5 Loading a pretrained LLM&lt;/h2&gt;

&lt;p&gt;We are going to load the OpenAI GPT model with 355M parameters. We need a stronger model for instruction training. After we load the model, we will try the first validation instruction from the validation set as proof that the 355M model does not understand instructions yet, because it has not been trained for that task:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:download&lt;/span&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;timeout:&lt;/span&gt; &lt;span&gt;3_600_000&lt;/span&gt;
  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.5 generates response for validation instruction with OpenAI GPT-2 355M&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;use_accelerated_backend&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;tokenizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;code-davinci-002&amp;quot;&lt;/span&gt;
    &lt;span&gt;model&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;GPT2OpenAI&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;load_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;355M&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;models_dir:&lt;/span&gt; &lt;span&gt;&amp;quot;gpt2&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tmp_dir!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;llm_scratch_instruction_data&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;instruction-data.json&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.85&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;
    &lt;span&gt;val_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;val_data&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;first&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;token_ids&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;TextGeneration&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;generate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;text_to_token_ids&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;),&lt;/span&gt;
        &lt;span&gt;35&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;context_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;0.0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;nil&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;50_256&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;BinaryBackend&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;generated_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;token_ids_to_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;response_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;generated_text&lt;/span&gt; &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;..-&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;//&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;trim&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;context_length&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;1024&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;emb_dim&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;1024&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;n_layers&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;24&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;n_heads&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;16&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;first&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_data&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;starts_with?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;generated_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;response_text&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;&amp;quot;### Response:&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;The chef cooks the meal every day.&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;Convert the active sentence to passive: &amp;apos;The chef cooks the&amp;quot;&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-76-fine-tuning-the-llm-on-instruction-data&quot;&gt;7.6 Fine-tuning the LLM on instruction data&lt;/h2&gt;

&lt;p&gt;It is time to fine-tune our GPT-2 355M model on the provided instruction set. We use the same training function as we used for model pretraining. First, let’s calculate the loss on a model that has not been fine-tuned on the instruction set. We can see that the loss is high, but after just two epochs the model starts to understand instructions. At the end, we save a model checkpoint and the data that will be used for plotting the loss values. Training took 687 seconds:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.5 and 7.6 evaluates and trains OpenAI GPT-2 355M on instruction data&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;device&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;use_accelerated_backend&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;tokenizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;code-davinci-002&amp;quot;&lt;/span&gt;
    &lt;span&gt;batch_size&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;
    &lt;span&gt;num_workers&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;

    &lt;span&gt;model&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;&amp;quot;355M&amp;quot;&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;GPT2OpenAI&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;load_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;models_dir:&lt;/span&gt; &lt;span&gt;&amp;quot;gpt2&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tmp_dir!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;llm_scratch_instruction_data&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;instruction-data.json&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.85&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;
    &lt;span&gt;train_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;:rand&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;seed&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;:exsss&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;

    &lt;span&gt;train_loader&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;DataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;batch_size:&lt;/span&gt; &lt;span&gt;batch_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;collate_fn:&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;binary_instruction_collate&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;shuffle:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;drop_last:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_workers:&lt;/span&gt; &lt;span&gt;num_workers&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;val_loader&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;DataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;batch_size:&lt;/span&gt; &lt;span&gt;batch_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;collate_fn:&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;binary_instruction_collate&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;shuffle:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;drop_last:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_workers:&lt;/span&gt; &lt;span&gt;num_workers&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_loss&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LossUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;calc_loss_loader&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;5&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_loss&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LossUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;calc_loss_loader&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;5&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;val_data&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;first&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;token_ids&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;TextGeneration&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;generate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;input_text&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;text_to_token_ids&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;device&lt;/span&gt;&lt;span&gt;),&lt;/span&gt;
        &lt;span&gt;35&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;context_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;0.0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;nil&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;50_256&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;BinaryBackend&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;generated_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;token_ids_to_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;response_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;generated_text&lt;/span&gt; &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;..-&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;//&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;trim&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;context_length&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;1024&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;emb_dim&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;1024&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;n_layers&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;24&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;n_heads&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;16&lt;/span&gt;
    &lt;span&gt;assert_in_delta&lt;/span&gt; &lt;span&gt;train_loss&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3.7422078609466554&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.0e-5&lt;/span&gt;
    &lt;span&gt;assert_in_delta&lt;/span&gt; &lt;span&gt;val_loss&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;3.7619348049163817&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;1.0e-5&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;first&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_data&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;starts_with?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;generated_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;response_text&lt;/span&gt; &lt;span&gt;==&lt;/span&gt;
             &lt;span&gt;&amp;quot;### Response:&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;The chef cooks the meal every day.&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;Convert the active sentence to passive: &amp;apos;The chef cooks the&amp;quot;&lt;/span&gt;

    &lt;span&gt;num_epochs&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;
    &lt;span&gt;optimizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Training&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;adamw&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;0.00005&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;weight_decay:&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;trained_model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;trained_optimizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Training&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;train_model_simple&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;train_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;val_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;optimizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_epochs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;generate_samples:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;return_optimizer:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;checkpoint_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;ch7_instruction_finetuned_gpt2_355m_model_and_optimizer.nx&amp;quot;&lt;/span&gt;
    &lt;span&gt;ModelCheckpoint&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;save_training_state!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;trained_model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;trained_optimizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;metrics_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;ch7_instruction_finetuning_metrics.json&amp;quot;&lt;/span&gt;

    &lt;span&gt;write_instruction_training_metrics!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
      &lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;num_epochs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;tokens_seen&lt;/span&gt;
    &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;trained_model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;__struct__&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;__struct__&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;%&lt;/span&gt;&lt;span&gt;Training&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;AdamW&lt;/span&gt;&lt;span&gt;{}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trained_optimizer&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;stat!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;size&lt;/span&gt; &lt;span&gt;&amp;gt;&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;stat!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;size&lt;/span&gt; &lt;span&gt;&amp;gt;&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;47&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;47&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;47&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;all?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_losses&lt;/span&gt; &lt;span&gt;++&lt;/span&gt; &lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;is_float&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;all?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;is_integer&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;tokens_seen&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;sort&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;last&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;&amp;gt;&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;first&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;binary_instruction_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;previous_backend&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;default_backend&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;try&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;default_backend&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;BinaryBackend&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;custom_collate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;batch&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;after&lt;/span&gt;
      &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;default_backend&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;previous_backend&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;write_instruction_training_metrics!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
         &lt;span&gt;path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;num_epochs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
         &lt;span&gt;tokens_seen&lt;/span&gt;
       &lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;metrics&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;
      &lt;span&gt;num_epochs:&lt;/span&gt; &lt;span&gt;num_epochs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;losses:&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;tokens_seen:&lt;/span&gt; &lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;train_values:&lt;/span&gt; &lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;val_values:&lt;/span&gt; &lt;span&gt;val_losses&lt;/span&gt;
      &lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;}&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:ok&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;encoded_metrics&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Jason&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encode&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;metrics&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pretty:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;write!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;encoded_metrics&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
&lt;p&gt;Here is the size of the saved instruction fine-tuned model:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;  ls -alh *.nx
-rw-r--r--  1 karlosmid  staff   623M  5 lip  13:01 ch5_2_gpt_124m_emlx.nx
-rw-r--r--  1 karlosmid  staff   623M 10 lip  15:01 ch5_2_gpt_124m.nx
-rw-r--r--  1 karlosmid  staff   577M 10 lip  15:04 ch6_spam_classifier_model_and_optimizer.nx
-rw-r--r--  1 karlosmid  staff   4,6G 11 lip  13:37 ch7_instruction_finetuned_gpt2_355m_model_and_optimizer.nx
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is the training output across epoch steps:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;Ep 1 (Step 000000): Train loss 2.510, Val loss 2.540
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The chef cooks the meal every day.

### Instruction:
Write a response that appropriately completes the request.

### Response:
The chef cooks the meal every day.

### Instruction:
Write
Ep 1 (Step 000005): Train loss 1.093, Val loss 1.079
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The chef cooks the meal every day.&amp;lt;|endoftext|&amp;gt;The following is a list of items that make good gifts for Tia, who likes Modern, white items and dislikes Basic items.

Best Gifts for Tia:
Ep 1 (Step 000010): Train loss 0.960, Val loss 0.958
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The chef cooks the meal every day.&amp;lt;|endoftext|&amp;gt;The following is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
What is the difference between a &amp;apos;good&amp;apos; and
Ep 1 (Step 000015): Train loss 0.913, Val loss 0.894
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The chef cooks the meal every day.&amp;lt;|endoftext|&amp;gt;The following is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Generate a sentence using the word &amp;apos;excellent
Ep 1 (Step 000020): Train loss 0.878, Val loss 0.850
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The chef cooks the meal every day.&amp;lt;|endoftext|&amp;gt;The following is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
What is the opposite of &amp;apos;happy&amp;apos;?

......

Ep 2 (Step 000220): Train loss 0.354, Val loss 0.645
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The meal is prepared by the chef every day.&amp;lt;|endoftext|&amp;gt;The following is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Translate &amp;apos;I love you&amp;apos; into
Ep 2 (Step 000225): Train loss 0.343, Val loss 0.644
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The meal is prepared every day by the chef.&amp;lt;|endoftext|&amp;gt;The following is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
What is the capital of the United Kingdom
Ep 2 (Step 000230): Train loss 0.339, Val loss 0.634
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;

### Response:
The meal is prepared every day by the chef.&amp;lt;|endoftext|&amp;gt;The following is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
What is the opposite of &amp;apos;cold&amp;apos;?
.
Finished in 687.2 seconds (0.00s async, 687.2s sync)
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And plot of training and validation losses over two epochs:&lt;/p&gt;

&lt;p&gt;&lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://karlosmid.com/assets/2026/06/fine-tuning-training-validation-loss.png&quot; alt=&quot;Training and validation loss over two epochs&quot;&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;Training and validation loss over two epochs.&lt;/em&gt;&lt;/p&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-77-extracting-and-saving-responses&quot;&gt;7.7 Extracting and saving responses&lt;/h2&gt;

&lt;p&gt;It is time to evaluate how good our instruction fine-tuned LLM model is. We will first extract model responses for our test input set. The first approach is to print out the first three test data entries:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.7 evaluates fine-tuned instruction model on first test samples&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;checkpoint_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;ch7_instruction_finetuned_gpt2_355m_model_and_optimizer.nx&amp;quot;&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;device&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;use_accelerated_backend&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
    &lt;span&gt;tokenizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;code-davinci-002&amp;quot;&lt;/span&gt;
    &lt;span&gt;:rand&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;seed&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;:exsss&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;

    &lt;span&gt;%{&lt;/span&gt;&lt;span&gt;model_state_dict:&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;ModelCheckpoint&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;load_training_state!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;model&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tmp_dir!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;llm_scratch_instruction_data&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;instruction-data.json&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.85&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;expected_samples&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;
      &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;input_text:&lt;/span&gt;
          &lt;span&gt;&amp;quot;Below is an instruction that describes a task. &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
            &lt;span&gt;&amp;quot;Write a response that appropriately completes the request.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
            &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Rewrite the sentence using a simile.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
            &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Input:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;The car is very fast.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;correct_response:&lt;/span&gt; &lt;span&gt;&amp;quot;The car is as fast as lightning.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model_response:&lt;/span&gt; &lt;span&gt;&amp;quot;The car is as fast as a cheetah.&amp;quot;&lt;/span&gt;
      &lt;span&gt;},&lt;/span&gt;
      &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;input_text:&lt;/span&gt;
          &lt;span&gt;&amp;quot;Below is an instruction that describes a task. &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
            &lt;span&gt;&amp;quot;Write a response that appropriately completes the request.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
            &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;What type of cloud is typically associated with thunderstorms?&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;correct_response:&lt;/span&gt;
          &lt;span&gt;&amp;quot;The type of cloud typically associated with thunderstorms is cumulonimbus.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model_response:&lt;/span&gt;
          &lt;span&gt;&amp;quot;A thunderstorm is a type of cloud that typically forms when thunderstorms produce a dense, convective layer of air that is at least 10 miles thick.&amp;quot;&lt;/span&gt;
      &lt;span&gt;},&lt;/span&gt;
      &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;input_text:&lt;/span&gt;
          &lt;span&gt;&amp;quot;Below is an instruction that describes a task. &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
            &lt;span&gt;&amp;quot;Write a response that appropriately completes the request.&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
            &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n\n&lt;/span&gt;&lt;span&gt;### Instruction:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Name the author of &amp;apos;Pride and Prejudice&amp;apos;.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;correct_response:&lt;/span&gt; &lt;span&gt;&amp;quot;Jane Austen.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model_response:&lt;/span&gt; &lt;span&gt;&amp;quot;The author of &amp;apos;Pride and Prejudice&amp;apos; is Jane Austen.&amp;quot;&lt;/span&gt;
      &lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;]&lt;/span&gt;

    &lt;span&gt;test_data&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;take&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;zip&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;expected_samples&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;each&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;fn&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
      &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

      &lt;span&gt;token_ids&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
        &lt;span&gt;TextGeneration&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;generate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
          &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
          &lt;span&gt;input_text&lt;/span&gt;
          &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;text_to_token_ids&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
          &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;device&lt;/span&gt;&lt;span&gt;),&lt;/span&gt;
          &lt;span&gt;256&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
          &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;context_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
          &lt;span&gt;0.0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
          &lt;span&gt;nil&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
          &lt;span&gt;50_256&lt;/span&gt;
        &lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;BinaryBackend&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

      &lt;span&gt;generated_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;token_ids_to_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

      &lt;span&gt;response_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
        &lt;span&gt;generated_text&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;..-&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;//&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;replace&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;### Response:&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;trim&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;
      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;correct_response&lt;/span&gt;
      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;response_text&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;model_response&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;By our own judgment, we can see that two responses are correct, while the cloud answer is wrong. We have three evaluation options:&lt;/p&gt;

&lt;ul&gt;
  &lt;li&gt;MMLU uses short-answer and multiple-choice benchmarks&lt;/li&gt;
  &lt;li&gt;LMSYS uses human preference comparisons with other LLMs&lt;/li&gt;
  &lt;li&gt;automated conversational benchmarks, where we use another LLM, e.g. GPT-5.&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Since this chapter focuses on chat conversation, we will not use the MMLU option. We will use the third option: another LLM for evaluating our fine-tuned instruction model. For that, we need a small helper module that generates responses from a defined set and saves them into a file:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;
  &lt;span&gt;defmodule&lt;/span&gt; &lt;span&gt;LlmScratch&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;InstructionsEvaluation&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
  &lt;span&gt;@moduledoc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Helpers for evaluating an instruction fine-tuned model on instruction records.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;

  &lt;span&gt;alias&lt;/span&gt; &lt;span&gt;LlmScratch&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;{&lt;/span&gt;&lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;TextGeneration&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;

  &lt;span&gt;@default_output_path&lt;/span&gt; &lt;span&gt;&amp;quot;instruction-data-with-response.json&amp;quot;&lt;/span&gt;
  &lt;span&gt;@default_max_new_tokens&lt;/span&gt; &lt;span&gt;256&lt;/span&gt;
  &lt;span&gt;@default_eos_id&lt;/span&gt; &lt;span&gt;50_256&lt;/span&gt;

  &lt;span&gt;@type&lt;/span&gt; &lt;span&gt;instruction_record&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;@type&lt;/span&gt; &lt;span&gt;device&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;nil&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;:default&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;atom&lt;/span&gt;&lt;span&gt;()&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;tuple&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Generates model responses for instruction records and writes them as JSON.

  Each input record is preserved and extended with a `&amp;quot;model_response&amp;quot;` key.
  The output mirrors the chapter 7 Python loop that writes
  `instruction-data-with-response.json`.

  ## Options

    * `:output_path` - JSON destination. Defaults to
      `&amp;quot;instruction-data-with-response.json&amp;quot;`.
    * `:max_new_tokens` - maximum generated tokens per record. Defaults to
      `256`.
    * `:context_size` - context length passed to `TextGeneration.generate/7`.
      Defaults to `model.cfg.context_length`.
    * `:eos_id` - end-of-text token id. Defaults to `50256`.

  Returns the enriched records after writing the file.

  ## Example

      test_data = [
        %{
          &amp;quot;instruction&amp;quot; =&amp;gt; &amp;quot;Say hi.&amp;quot;,
          &amp;quot;input&amp;quot; =&amp;gt; &amp;quot;&amp;quot;,
          &amp;quot;output&amp;quot; =&amp;gt; &amp;quot;Hi.&amp;quot;
        }
      ]

      LlmScratch.InstructionsEvaluation.write_responses!(
        test_data,
        model,
        &amp;quot;code-davinci-002&amp;quot;,
        device,
        output_path: &amp;quot;instruction-data-with-response.json&amp;quot;
      )

  Example output:

      [
        %{
          &amp;quot;input&amp;quot; =&amp;gt; &amp;quot;&amp;quot;,
          &amp;quot;instruction&amp;quot; =&amp;gt; &amp;quot;Say hi.&amp;quot;,
          &amp;quot;model_response&amp;quot; =&amp;gt; &amp;quot;Hi.&amp;quot;,
          &amp;quot;output&amp;quot; =&amp;gt; &amp;quot;Hi.&amp;quot;
        }
      ]
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;write_responses!&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()],&lt;/span&gt; &lt;span&gt;struct&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;keyword&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt;
          &lt;span&gt;[&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()]&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;write_responses!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;[])&lt;/span&gt;
      &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_struct&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;output_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Keyword&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;get&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:output_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;@default_output_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;enriched_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;generate_responses&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;output_path&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;dirname&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;mkdir_p!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;:ok&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;encoded&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Jason&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encode&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;enriched_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;pretty:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;write!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;output_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;encoded&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;enriched_data&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Generates model responses for instruction records.

  Returns the original records with an added `&amp;quot;model_response&amp;quot;` field.

  ## Example

      test_data = [
        %{
          &amp;quot;instruction&amp;quot; =&amp;gt; &amp;quot;Name one color.&amp;quot;,
          &amp;quot;input&amp;quot; =&amp;gt; &amp;quot;&amp;quot;,
          &amp;quot;output&amp;quot; =&amp;gt; &amp;quot;Blue.&amp;quot;
        }
      ]

      LlmScratch.InstructionsEvaluation.generate_responses(
        test_data,
        model,
        &amp;quot;code-davinci-002&amp;quot;,
        device
      )

  Example output:

      [
        %{
          &amp;quot;input&amp;quot; =&amp;gt; &amp;quot;&amp;quot;,
          &amp;quot;instruction&amp;quot; =&amp;gt; &amp;quot;Name one color.&amp;quot;,
          &amp;quot;model_response&amp;quot; =&amp;gt; &amp;quot;Blue.&amp;quot;,
          &amp;quot;output&amp;quot; =&amp;gt; &amp;quot;Blue.&amp;quot;
        }
      ]
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;generate_responses&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()],&lt;/span&gt; &lt;span&gt;struct&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;keyword&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt;
          &lt;span&gt;[&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()]&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;generate_responses&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;opts&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;[])&lt;/span&gt;
      &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_struct&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;max_new_tokens&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Keyword&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;get&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:max_new_tokens&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;@default_max_new_tokens&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;context_size&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Keyword&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;get_lazy&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:context_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;fn&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;context_length&lt;/span&gt; &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;eos_id&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Keyword&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;get&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:eos_id&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;@default_eos_id&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;with_index&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;fn&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;index&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
      &lt;span&gt;if&lt;/span&gt; &lt;span&gt;Keyword&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;get&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;opts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:progress&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
        &lt;span&gt;IO&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;puts&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;Generating response &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;index&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;1&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;

      &lt;span&gt;Map&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;put&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;&amp;quot;model_response&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;generate_response&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;max_new_tokens&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;context_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;eos_id&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;generate_response&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;max_new_tokens&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;context_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;eos_id&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;token_ids&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;TextGeneration&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;generate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;input_text&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;text_to_token_ids&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;maybe_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;device&lt;/span&gt;&lt;span&gt;),&lt;/span&gt;
        &lt;span&gt;max_new_tokens&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;context_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;0.0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;nil&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;eos_id&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;BinaryBackend&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;token_ids&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;token_ids_to_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;..-&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;//&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;replace&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;### Response:&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;trim&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;maybe_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;nil&lt;/span&gt;&lt;span&gt;),&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;&lt;span&gt;:&lt;/span&gt; &lt;span&gt;tensor&lt;/span&gt;
  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;maybe_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:default&lt;/span&gt;&lt;span&gt;),&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;&lt;span&gt;:&lt;/span&gt; &lt;span&gt;tensor&lt;/span&gt;
  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;maybe_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;),&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;&lt;span&gt;:&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tensor&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
&lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-78-evaluating-the-fine-tuned-llm&quot;&gt;7.8 Evaluating the fine-tuned LLM&lt;/h2&gt;

&lt;p&gt;We will evaluate our model using &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://ollama.com/&quot;&gt;Ollama&lt;/a&gt;, so you first need to install it and start it before running the tests. Here is the Ollama helper that will be used in the tests:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;
&lt;span&gt;defmodule&lt;/span&gt; &lt;span&gt;LlmScratch&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;OllamaUtils&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
  &lt;span&gt;@moduledoc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Utilities for working with a local Ollama runtime.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;

  &lt;span&gt;@llama_server_process&lt;/span&gt; &lt;span&gt;&amp;quot;ollama&amp;quot;&lt;/span&gt;
  &lt;span&gt;@default_model&lt;/span&gt; &lt;span&gt;&amp;quot;llama3&amp;quot;&lt;/span&gt;
  &lt;span&gt;@default_chat_url&lt;/span&gt; &lt;span&gt;&amp;quot;http://localhost:11434/api/chat&amp;quot;&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Returns whether Ollama&amp;apos;s `llama-server` process is running.

  The check first uses `pgrep -x llama-server`, then falls back to scanning
  process command names with `ps` when `pgrep` is unavailable.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;ollama_running?&lt;/span&gt;&lt;span&gt;()&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;boolean&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;ollama_running?&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;case&lt;/span&gt; &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;find_executable&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;pgrep&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
      &lt;span&gt;nil&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;ps_process_running?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;@llama_server_process&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;_pgrep&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;pgrep_process_running?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;@llama_server_process&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Sends a prompt to Ollama&amp;apos;s local chat API and returns the generated text.

  Ollama&amp;apos;s standard local chat endpoint is
  `http://localhost:11434/api/chat`. The response is streamed as
  newline-delimited JSON; this function concatenates each
  `&amp;quot;message&amp;quot;.&amp;quot;content&amp;quot;` chunk into one string.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;query_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;query_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;@default_model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;url&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;@default_chat_url&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;url&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;request_body&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;
      &lt;span&gt;model:&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;messages:&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;
        &lt;span&gt;%{&lt;/span&gt;&lt;span&gt;role:&lt;/span&gt; &lt;span&gt;&amp;quot;user&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;content:&lt;/span&gt; &lt;span&gt;prompt&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;
      &lt;span&gt;],&lt;/span&gt;
      &lt;span&gt;options:&lt;/span&gt; &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;seed:&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;temperature:&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_ctx:&lt;/span&gt; &lt;span&gt;2048&lt;/span&gt;
      &lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;}&lt;/span&gt;

    &lt;span&gt;response&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Req&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;post!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;url&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;json:&lt;/span&gt; &lt;span&gt;request_body&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;receive_timeout:&lt;/span&gt; &lt;span&gt;:infinity&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;case&lt;/span&gt; &lt;span&gt;response&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;status&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
      &lt;span&gt;200&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;parse_chat_response&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;response&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;body&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

      &lt;span&gt;status&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;raise&lt;/span&gt; &lt;span&gt;&amp;quot;Ollama chat request failed with status &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;status&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;: &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;inspect&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;response&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;body&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;pgrep_process_running?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;process_name&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;case&lt;/span&gt; &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cmd&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;pgrep&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;-x&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;process_name&lt;/span&gt;&lt;span&gt;],&lt;/span&gt; &lt;span&gt;stderr_to_stdout:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
      &lt;span&gt;{&lt;/span&gt;&lt;span&gt;_output&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;
      &lt;span&gt;{&lt;/span&gt;&lt;span&gt;_output&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;_status&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;ps_process_running?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;process_name&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;case&lt;/span&gt; &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cmd&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;ps&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;-axo&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;comm&amp;quot;&lt;/span&gt;&lt;span&gt;],&lt;/span&gt; &lt;span&gt;stderr_to_stdout:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
      &lt;span&gt;{&lt;/span&gt;&lt;span&gt;output&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;output&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;split&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;trim:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;any?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;basename&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;process_name&lt;/span&gt;&lt;span&gt;))&lt;/span&gt;

      &lt;span&gt;{&lt;/span&gt;&lt;span&gt;_output&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;_status&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
        &lt;span&gt;false&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;parse_chat_response&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;body&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;body&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;body&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;split&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;trim:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;map&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;decode_chat_chunk!&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;

  &lt;span&gt;defp&lt;/span&gt; &lt;span&gt;decode_chat_chunk!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;line&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;line&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Jason&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;decode!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;get_in&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;&amp;quot;message&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;content&amp;quot;&lt;/span&gt;&lt;span&gt;])&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Kernel&lt;/span&gt;&lt;span&gt;.||&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is a test that shows how we use Llama 3 through Ollama to evaluate our model for instruction tasks:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:ollama&lt;/span&gt;
  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.8 queries local Ollama llama3 model for response scores&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;OllamaUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;ollama_running?&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;result&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;OllamaUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;query_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;What do Llamas eat?&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;llama3&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;contains?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;result&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;Llamas are herbivores&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;contains?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;result&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;Minerals&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;contains?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;result&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;Grasses&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;response_data_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;instruction-data-with-response.json&amp;quot;&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;response_data_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;test_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;response_data_path&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;read!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Jason&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;decode!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;take&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;expected_samples&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;
      &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;output:&lt;/span&gt; &lt;span&gt;&amp;quot;The car is as fast as lightning.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model_response:&lt;/span&gt; &lt;span&gt;&amp;quot;The car is as fast as a bullet.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;score:&lt;/span&gt; &lt;span&gt;85&lt;/span&gt;
      &lt;span&gt;},&lt;/span&gt;
      &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;output:&lt;/span&gt; &lt;span&gt;&amp;quot;The type of cloud typically associated with thunderstorms is cumulonimbus.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model_response:&lt;/span&gt; &lt;span&gt;&amp;quot;The type of cloud associated with thunderstorms is a cumulus cloud.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;score:&lt;/span&gt; &lt;span&gt;40&lt;/span&gt;
      &lt;span&gt;},&lt;/span&gt;
      &lt;span&gt;%{&lt;/span&gt;
        &lt;span&gt;output:&lt;/span&gt; &lt;span&gt;&amp;quot;Jane Austen.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model_response:&lt;/span&gt; &lt;span&gt;&amp;quot;The author of &amp;apos;Pride and Prejudice&amp;apos; is Jane Austen.&amp;quot;&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;score:&lt;/span&gt; &lt;span&gt;95&lt;/span&gt;
      &lt;span&gt;}&lt;/span&gt;
    &lt;span&gt;]&lt;/span&gt;

    &lt;span&gt;test_data&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;take&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;zip&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;expected_samples&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;each&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;fn&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
      &lt;span&gt;prompt&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
        &lt;span&gt;&amp;quot;Given the input `&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;` &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;and correct output `&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;`, &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;score the model response `&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;model_response&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;`&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot; on a scale from 0 to 100, where 100 is the best score. &amp;quot;&lt;/span&gt;

      &lt;span&gt;score_response&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;OllamaUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;query_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;llama3&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

      &lt;span&gt;report&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
        &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Dataset response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;&amp;gt;&amp;gt; &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Model response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;&amp;gt;&amp;gt; &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;model_response&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Score:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;&amp;gt;&amp;gt; &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;score_response&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;-------------------------&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;

      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;output&lt;/span&gt;
      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;model_response&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;model_response&lt;/span&gt;
      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;response_score&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;score_response&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;score&lt;/span&gt;

      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;report&lt;/span&gt; &lt;span&gt;=~&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Dataset response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;gt;&amp;gt; &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;output&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;
      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;report&lt;/span&gt; &lt;span&gt;=~&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Model response:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;gt;&amp;gt; &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;expected&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;model_response&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;
      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;report&lt;/span&gt; &lt;span&gt;=~&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;Score:&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;gt;&amp;gt; &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;score_response&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;
      &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;ends_with?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;report&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;-------------------------&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;We added a new function in the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;InstructionsEvaluation&lt;/code&gt; module that evaluates our model’s instruction outputs with a score in the range &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;0..100&lt;/code&gt;:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Scores generated model responses with a local Ollama model.

  `json_key` identifies the generated response field in each instruction
  record, for example `&amp;quot;model_response&amp;quot;`. Entries whose score cannot be
  parsed as an integer are skipped, matching the chapter 7 evaluation loop.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;generate_model_scores&lt;/span&gt;&lt;span&gt;([&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;()],&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;())&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;[&lt;/span&gt;&lt;span&gt;integer&lt;/span&gt;&lt;span&gt;()]&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;generate_model_scores&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;json_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;json_key&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;&amp;quot;llama3&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_list&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;json_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;json_key&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;reduce&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;json_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;[],&lt;/span&gt; &lt;span&gt;fn&lt;/span&gt; &lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;scores&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
      &lt;span&gt;prompt&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
        &lt;span&gt;&amp;quot;Given the input `&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;` &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;and correct output `&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;&amp;quot;output&amp;quot;&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;`, &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;score the model response `&lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;[&lt;/span&gt;&lt;span&gt;json_key&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;`&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot; on a scale from 0 to 100, where 100 is the best score. &amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt;
          &lt;span&gt;&amp;quot;Respond with the integer number only.&amp;quot;&lt;/span&gt;

      &lt;span&gt;score&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;OllamaUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;query_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;prompt&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

      &lt;span&gt;try&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
        &lt;span&gt;[&lt;/span&gt;&lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;to_integer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;trim&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;score&lt;/span&gt;&lt;span&gt;))&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;scores&lt;/span&gt;&lt;span&gt;]&lt;/span&gt;
      &lt;span&gt;rescue&lt;/span&gt;
        &lt;span&gt;ArgumentError&lt;/span&gt; &lt;span&gt;-&amp;gt;&lt;/span&gt;
          &lt;span&gt;IO&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;puts&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;Could not convert score: &lt;/span&gt;&lt;span&gt;#{&lt;/span&gt;&lt;span&gt;score&lt;/span&gt;&lt;span&gt;}&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
          &lt;span&gt;scores&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;
    &lt;span&gt;end&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;reverse&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is a test that uses this function. The average score for our instruction fine-tuned model is 50:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:ollama&lt;/span&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;timeout:&lt;/span&gt; &lt;span&gt;900_000&lt;/span&gt;
  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;7.8 scores all saved instruction responses with Ollama&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;OllamaUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;ollama_running?&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;response_data_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;instruction-data-with-response.json&amp;quot;&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;response_data_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;test_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;response_data_path&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;read!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Jason&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;decode!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;scores&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionsEvaluation&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;generate_model_scores&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;model_response&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;average_score&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;sum&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;/&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;110&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;test_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert_in_delta&lt;/span&gt; &lt;span&gt;average_score&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;50.32&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0.4&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;h2 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-exercises&quot;&gt;Exercises&lt;/h2&gt;

&lt;h3 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-71-changing-prompt-styles&quot;&gt;7.1 Changing prompt styles&lt;/h3&gt;

&lt;p&gt;After fine-tuning the model with the Alpaca prompt style, try the Phi-3 prompt style shown in figure 7.4 and observe whether it affects the response quality of the model. We first need to add support for phi3 prompt format:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@doc&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&amp;quot;
  Formats an instruction dataset entry using the requested prompt style.

  `:alpaca` delegates to `format_input/1`. `:phi3` mirrors the Phi-3 chat
  prompt style shown in chapter 7:

      &amp;lt;|user|&amp;gt;
      ...
      &amp;lt;|end|&amp;gt;
      &amp;lt;|assistant|&amp;gt;

  The returned string contains only the model input, not the expected response.
  &amp;quot;&amp;quot;&amp;quot;&lt;/span&gt;
  &lt;span&gt;@spec&lt;/span&gt; &lt;span&gt;format_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;instruction_record&lt;/span&gt;&lt;span&gt;(),&lt;/span&gt; &lt;span&gt;:alpaca&lt;/span&gt; &lt;span&gt;|&lt;/span&gt; &lt;span&gt;:phi3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;::&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;t&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;format_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;style&lt;/span&gt; &lt;span&gt;\\&lt;/span&gt; &lt;span&gt;:alpaca&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;format_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;:alpaca&lt;/span&gt;&lt;span&gt;),&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;&lt;span&gt;:&lt;/span&gt; &lt;span&gt;format_input&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;entry&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

  &lt;span&gt;def&lt;/span&gt; &lt;span&gt;format_text&lt;/span&gt;&lt;span&gt;(%{&lt;/span&gt;&lt;span&gt;&amp;quot;instruction&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;instruction&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;input&amp;quot;&lt;/span&gt; &lt;span&gt;=&amp;gt;&lt;/span&gt; &lt;span&gt;input&lt;/span&gt;&lt;span&gt;},&lt;/span&gt; &lt;span&gt;:phi3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;when&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;instruction&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;and&lt;/span&gt; &lt;span&gt;is_binary&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;user_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;if&lt;/span&gt; &lt;span&gt;input&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
        &lt;span&gt;instruction&lt;/span&gt;
      &lt;span&gt;else&lt;/span&gt;
        &lt;span&gt;instruction&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;input&lt;/span&gt;
      &lt;span&gt;end&lt;/span&gt;

    &lt;span&gt;&amp;quot;&amp;lt;|user|&amp;gt;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;user_text&lt;/span&gt; &lt;span&gt;&amp;lt;&amp;gt;&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;lt;|end|&amp;gt;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;lt;|assistant|&amp;gt;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;diff --git a/lib/llm_scratch/instruction_dataset.ex b/lib/llm_scratch/instruction_dataset.ex
index a1658d8..c229439 100644
&lt;/span&gt;&lt;span&gt;--- a/lib/llm_scratch/instruction_dataset.ex
&lt;/span&gt;&lt;span&gt;+++ b/lib/llm_scratch/instruction_dataset.ex
&lt;/span&gt;&lt;span&gt;@@ -3,9 +3,10 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
   Pre-tokenized instruction fine-tuning dataset.
 
   This mirrors the PyTorch `InstructionDataset` example from chapter 7. A
&lt;span&gt;-  dataset is built from decoded instruction maps, formats each map as an
-  Alpaca-style prompt plus response, and pre-tokenizes the full text during
-  construction.
&lt;/span&gt;&lt;span&gt;+  dataset is built from decoded instruction maps, formats each map as a prompt
+  plus response, and pre-tokenizes the full text during construction. Alpaca
+  prompt formatting is used by default; Phi-3-style chat formatting can be
+  selected with `prompt_style: :phi3`.
&lt;/span&gt; 
   ## Examples
 
&lt;span&gt;@@ -26,27 +27,39 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
        25, 198, 40313, 13]
   &amp;quot;&amp;quot;&amp;quot;
 
&lt;span&gt;-  @enforce_keys [:data, :encoded_texts]
-  defstruct [:data, :encoded_texts]
&lt;/span&gt;&lt;span&gt;+  @enforce_keys [:data, :encoded_texts, :prompt_style]
+  defstruct [:data, :encoded_texts, :prompt_style]
&lt;/span&gt; 
   @end_of_text &amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;
   @pad_token_id 50_256
 
   @type instruction_record :: LlmScratch.FineTuneDataLoader.instruction_record()
   @type tokenizer :: String.t()
&lt;span&gt;+  @type prompt_style :: :alpaca | :phi3
&lt;/span&gt;   @type t :: %__MODULE__{
           data: [instruction_record()],
&lt;span&gt;-          encoded_texts: [[integer()]]
&lt;/span&gt;&lt;span&gt;+          encoded_texts: [[integer()]],
+          prompt_style: prompt_style()
&lt;/span&gt;         }
 
   @doc &amp;quot;&amp;quot;&amp;quot;
   Creates an instruction dataset from decoded instruction records.
 
&lt;span&gt;-  Each record is formatted with `LlmScratch.FineTuneDataLoader.format_input/1`,
-  then the target response is appended as:
&lt;/span&gt;&lt;span&gt;+  By default, each record is formatted with
+  `LlmScratch.FineTuneDataLoader.format_input/1`, then the target response is
+  appended as:
&lt;/span&gt; 
       \\n\\n### Response:\\n...
 
&lt;span&gt;+  With `prompt_style: :phi3`, each record is formatted as:
+
+      &amp;lt;|user|&amp;gt;
+      ...
+      &amp;lt;|end|&amp;gt;
+      &amp;lt;|assistant|&amp;gt;
+      ...
+      &amp;lt;|end|&amp;gt;
+
&lt;/span&gt;   The resulting full text is encoded immediately and stored in
   `dataset.encoded_texts`.
 
&lt;span&gt;@@ -61,6 +74,8 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
 
     * `:allowed_special` - special tokens allowed when `tokenizer` is a
       Tiktoken model name. Defaults to `[&amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;]`.
&lt;span&gt;+    * `:prompt_style` - prompt format for full training examples. Supported
+      values are `:alpaca` and `:phi3`. Defaults to `:alpaca`.
&lt;/span&gt; 
   ## Output
 
&lt;span&gt;@@ -97,6 +112,8 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
   &amp;quot;&amp;quot;&amp;quot;
   @spec new([instruction_record()], tokenizer(), keyword()) :: t()
   def new(data, tokenizer, opts \\ []) when is_list(data) do
&lt;span&gt;+    prompt_style = Keyword.get(opts, :prompt_style, :alpaca)
+
&lt;/span&gt;     # Pre-tokenize each complete training example once so repeated dataset
     # access does not rebuild the prompt or call the tokenizer again.
     encoded_texts =
&lt;span&gt;@@ -104,7 +121,7 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
         entry
         # The model trains on the instruction/input prompt followed by the
         # expected response target, matching the chapter 7 PyTorch dataset.
&lt;span&gt;-        |&amp;gt; full_text()
&lt;/span&gt;&lt;span&gt;+        |&amp;gt; full_text(prompt_style)
&lt;/span&gt;         |&amp;gt; encode_text!(tokenizer, opts)
       end)
 
&lt;span&gt;@@ -112,7 +129,8 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
     # from `encoded_texts`, which mirrors the PyTorch dataset fields.
     %__MODULE__{
       data: data,
&lt;span&gt;-      encoded_texts: encoded_texts
&lt;/span&gt;&lt;span&gt;+      encoded_texts: encoded_texts,
+      prompt_style: prompt_style
&lt;/span&gt;     }
   end
 
&lt;span&gt;@@ -269,13 +287,24 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
     }
   end
 
&lt;span&gt;-  defp full_text(%{&amp;quot;output&amp;quot; =&amp;gt; output} = entry) when is_binary(output) do
&lt;/span&gt;&lt;span&gt;+  defp full_text(%{&amp;quot;output&amp;quot; =&amp;gt; output} = entry, :alpaca) when is_binary(output) do
&lt;/span&gt;     instruction_plus_input = LlmScratch.FineTuneDataLoader.format_input(entry)
     response_text = &amp;quot;\n\n### Response:\n#{output}&amp;quot;
 
     instruction_plus_input &amp;lt;&amp;gt; response_text
   end
 
&lt;span&gt;+  defp full_text(%{&amp;quot;output&amp;quot; =&amp;gt; output} = entry, :phi3) when is_binary(output) do
+    entry
+    |&amp;gt; LlmScratch.FineTuneDataLoader.format_text(:phi3)
+    |&amp;gt; Kernel.&amp;lt;&amp;gt;(output)
+    |&amp;gt; Kernel.&amp;lt;&amp;gt;(&amp;quot;\n&amp;lt;|end|&amp;gt;&amp;quot;)
+  end
+
+  defp full_text(_entry, prompt_style) do
+    raise ArgumentError, &amp;quot;unsupported prompt style: #{inspect(prompt_style)}&amp;quot;
+  end
+
&lt;/span&gt;   defp encode_text!(text, tokenizer, opts) when is_binary(tokenizer) do
     allowed_special = Keyword.get(opts, :allowed_special, [@end_of_text])
     {:ok, token_ids} = Tiktoken.encode(tokenizer, text, allowed_special)
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Here is a test for instruction fine-tuning using the Phi-3 format:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:download&lt;/span&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:train_long&lt;/span&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;timeout:&lt;/span&gt; &lt;span&gt;3_600_000&lt;/span&gt;
  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;exercise 7.1 trains instruction model with Phi-3 prompt style&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;device&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;use_accelerated_backend&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;tokenizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;code-davinci-002&amp;quot;&lt;/span&gt;
    &lt;span&gt;batch_size&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;8&lt;/span&gt;
    &lt;span&gt;num_workers&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;3&lt;/span&gt;

    &lt;span&gt;model&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;&amp;quot;355M&amp;quot;&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;GPT2OpenAI&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;load_model&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;models_dir:&lt;/span&gt; &lt;span&gt;&amp;quot;gpt2&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tmp_dir!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;llm_scratch_instruction_data&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;instruction-data.json&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.85&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;-&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;
    &lt;span&gt;train_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;+&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;prompt_style:&lt;/span&gt; &lt;span&gt;:phi3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;prompt_style:&lt;/span&gt; &lt;span&gt;:phi3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;:rand&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;seed&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;:exsss&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;{&lt;/span&gt;&lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;123&lt;/span&gt;&lt;span&gt;})&lt;/span&gt;

    &lt;span&gt;train_loader&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;DataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;batch_size:&lt;/span&gt; &lt;span&gt;batch_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;collate_fn:&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;binary_instruction_collate&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;shuffle:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;drop_last:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_workers:&lt;/span&gt; &lt;span&gt;num_workers&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;val_loader&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;DataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;encoded_texts&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;batch_size:&lt;/span&gt; &lt;span&gt;batch_size&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;collate_fn:&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;binary_instruction_collate&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;shuffle:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;drop_last:&lt;/span&gt; &lt;span&gt;false&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_workers:&lt;/span&gt; &lt;span&gt;num_workers&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_loss&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LossUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;calc_loss_loader&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;5&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;val_loss&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;LossUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;calc_loss_loader&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;5&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;input_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;val_data&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;first&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;format_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;:phi3&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;token_ids&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;TextGeneration&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;generate&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;input_text&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;text_to_token_ids&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
        &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;device&lt;/span&gt;&lt;span&gt;),&lt;/span&gt;
        &lt;span&gt;35&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;cfg&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;context_length&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;0.0&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;nil&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;50_256&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;BinaryBackend&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;generated_text&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;TextUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;token_ids_to_text&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;token_ids&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;num_epochs&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;2&lt;/span&gt;
    &lt;span&gt;optimizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Training&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;adamw&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;0.00005&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;weight_decay:&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;{&lt;/span&gt;&lt;span&gt;trained_model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;trained_optimizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;Training&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;train_model_simple&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;train_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;val_loader&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;optimizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;num_epochs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;5&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;generate_samples:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;return_optimizer:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;checkpoint_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;ch7_instruction_finetuned_gpt2_355m_phi3_model_and_optimizer.nx&amp;quot;&lt;/span&gt;
    &lt;span&gt;ModelCheckpoint&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;save_training_state!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;trained_model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;trained_optimizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;metrics_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;ch7_instruction_finetuning_phi3_metrics.json&amp;quot;&lt;/span&gt;

    &lt;span&gt;write_instruction_training_metrics!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
      &lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;num_epochs&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
      &lt;span&gt;tokens_seen&lt;/span&gt;
    &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;train_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;prompt_style&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;:phi3&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;val_dataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;prompt_style&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;:phi3&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;starts_with?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;&amp;lt;|user|&amp;gt;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;ends_with?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;lt;|end|&amp;gt;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;lt;|assistant|&amp;gt;&lt;/span&gt;&lt;span&gt;\n&lt;/span&gt;&lt;span&gt;&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;String&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;starts_with?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;generated_text&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;input_text&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;is_float&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_loss&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;is_float&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_loss&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;trained_model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;__struct__&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;__struct__&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;%&lt;/span&gt;&lt;span&gt;Training&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;AdamW&lt;/span&gt;&lt;span&gt;{}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trained_optimizer&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;stat!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;size&lt;/span&gt; &lt;span&gt;&amp;gt;&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;stat!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;size&lt;/span&gt; &lt;span&gt;&amp;gt;&lt;/span&gt; &lt;span&gt;0&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_losses&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;47&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;47&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;47&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;all?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_losses&lt;/span&gt; &lt;span&gt;++&lt;/span&gt; &lt;span&gt;val_losses&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;is_float&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;all?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;amp;&lt;/span&gt;&lt;span&gt;is_integer&lt;/span&gt;&lt;span&gt;/&lt;/span&gt;&lt;span&gt;1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;tokens_seen&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;sort&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;last&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;&amp;gt;&lt;/span&gt; &lt;span&gt;List&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;first&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;tokens_seen&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;And here is the training output:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;  Ep 1 (Step 000000): Train loss 2.150, Val loss 2.147
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
&amp;lt;|user|&amp;gt;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
&amp;lt;|end|&amp;gt;
Ep 1 (Step 000005): Train loss 1.200, Val loss 1.174
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
The chef cooks the meal every day.
&amp;lt;|end|&amp;gt;&amp;lt;|endoftext|&amp;gt;The following is a list of all the Pokémon in the Pokémon Trading Card Game.
For a list of all Pokémon in the Pokémon Trading Card Game, see Pokémon.
If
Ep 1 (Step 000010): Train loss 1.061, Val loss 1.044
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
The chef cooks the meal every day.
&amp;lt;|end|&amp;gt;&amp;lt;|endoftext|&amp;gt;The following is a guest post by Dr. David J. Karp, a professor of psychiatry at the University of California, San Francisco.
The term &amp;apos;psychosis&amp;apos;
Ep 1 (Step 000015): Train loss 1.005, Val loss 0.975
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
The chef cooks the meal every day.
&amp;lt;|end|&amp;gt;&amp;lt;|endoftext|&amp;gt;The following is a guest post by Dr. David B. Smith.
The following is a guest post by Dr. David B. Smith.
The following is a guest

...

Ep 2 (Step 000215): Train loss 0.411, Val loss 0.716
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
The meal is prepared by the chef every day.
&amp;lt;|end|&amp;gt;&amp;lt;|endoftext|&amp;gt;The following is a list of all the animals that are native to the United States.
1. Squirrel
2. Eagle
3. Bat
4. Eagle
Ep 2 (Step 000220): Train loss 0.385, Val loss 0.716
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
The meal is prepared every day by the chef.
&amp;lt;|end|&amp;gt;&amp;lt;|endoftext|&amp;gt;The following is a list of all the animals that are native to the United States.
1. Squirrel
2. Eagle
3. Bat
4. Eagle
Ep 2 (Step 000225): Train loss 0.372, Val loss 0.714
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
The meal is prepared every day by the chef.
&amp;lt;|end|&amp;gt;&amp;lt;|endoftext|&amp;gt;The following is a list of all the animals that are native to the United States.
1. Squirrel
2. Eagle
3. Bat
4. Cro
Ep 2 (Step 000230): Train loss 0.367, Val loss 0.701
&amp;lt;|user|&amp;gt;
Convert the active sentence to passive: &amp;apos;The chef cooks the meal every day.&amp;apos;
&amp;lt;|end|&amp;gt;
&amp;lt;|assistant|&amp;gt;
The meal is cooked every day by the chef.
&amp;lt;|end|&amp;gt;&amp;lt;|endoftext|&amp;gt;The following is a list of all the animals that are classified as birds.
Birds
Cetacea
Cetacea is a type of bird.

.
Finished in 669.8 seconds (0.00s async, 669.8s sync)
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;This test calculates the Llama 3 score through Ollama. The score is worse than with the Alpaca prompt format:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:download&lt;/span&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:train&lt;/span&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;:ollama&lt;/span&gt;
  &lt;span&gt;@tag&lt;/span&gt; &lt;span&gt;timeout:&lt;/span&gt; &lt;span&gt;1_800_000&lt;/span&gt;
  &lt;span&gt;test&lt;/span&gt; &lt;span&gt;&amp;quot;exercise 7.1 scores Phi-3 prompt style instruction model with Ollama&amp;quot;&lt;/span&gt; &lt;span&gt;do&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;OllamaUtils&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;ollama_running?&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;

    &lt;span&gt;checkpoint_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;ch7_instruction_finetuned_gpt2_355m_phi3_model_and_optimizer.nx&amp;quot;&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;device&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;use_accelerated_backend&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
    &lt;span&gt;tokenizer&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;code-davinci-002&amp;quot;&lt;/span&gt;

    &lt;span&gt;%{&lt;/span&gt;&lt;span&gt;model_state_dict:&lt;/span&gt; &lt;span&gt;model&lt;/span&gt;&lt;span&gt;}&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;ModelCheckpoint&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;load_training_state!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;checkpoint_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;model&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Nx&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;backend_transfer&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;file_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;System&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;tmp_dir!&lt;/span&gt;&lt;span&gt;()&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;llm_scratch_instruction_data&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;Path&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;join&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;&amp;quot;instruction-data.json&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;FineTuneDataLoader&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;download_and_load_instructions_file&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;
        &lt;span&gt;file_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;@instruction_data_url&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;train_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.85&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_portion&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;trunc&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;*&lt;/span&gt; &lt;span&gt;0.1&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;test_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;slice&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;train_portion&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;test_portion&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;output_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;instruction-data-with-response-phi3.json&amp;quot;&lt;/span&gt;

    &lt;span&gt;enriched_data&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;test_data&lt;/span&gt;
      &lt;span&gt;|&amp;gt;&lt;/span&gt; &lt;span&gt;InstructionsEvaluation&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;write_responses!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;model&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;device&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;output_path:&lt;/span&gt; &lt;span&gt;output_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt;
        &lt;span&gt;prompt_style:&lt;/span&gt; &lt;span&gt;:phi3&lt;/span&gt;
      &lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;scores&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;InstructionsEvaluation&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;generate_model_scores&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;enriched_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;&amp;quot;model_response&amp;quot;&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;average_score&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;Enum&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;sum&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;/&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;metrics_path&lt;/span&gt; &lt;span&gt;=&lt;/span&gt; &lt;span&gt;&amp;quot;ch7_instruction_finetuning_phi3_ollama_scores.json&amp;quot;&lt;/span&gt;

    &lt;span&gt;write_ollama_score_metrics!&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;scores&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;average_score&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;output_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;File&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;exists?&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;metrics_path&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;enriched_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;110&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;110&lt;/span&gt;
    &lt;span&gt;assert&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;scores&lt;/span&gt;&lt;span&gt;)&lt;/span&gt; &lt;span&gt;==&lt;/span&gt; &lt;span&gt;length&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;enriched_data&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
    &lt;span&gt;assert_in_delta&lt;/span&gt; &lt;span&gt;average_score&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;46.67&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;0.4&lt;/span&gt;
  &lt;span&gt;end&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;h3 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-exercise-72-instruction-and-input-masking&quot;&gt;Exercise 7.2 Instruction and input masking&lt;/h3&gt;

&lt;p&gt;After completing the chapter and fine-tuning the model with &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;InstructionDataset&lt;/code&gt;, replace the instruction and input tokens with the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;-100&lt;/code&gt; mask to use the instruction masking method illustrated in figure 7.13. Then evaluate whether this has a positive effect on model performance. First, we need to support masking input prompts in targets:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;diff --git a/lib/llm_scratch/instruction_dataset.ex b/lib/llm_scratch/instruction_dataset.ex
index c229439..342a03b 100644
&lt;/span&gt;&lt;span&gt;--- a/lib/llm_scratch/instruction_dataset.ex
&lt;/span&gt;&lt;span&gt;+++ b/lib/llm_scratch/instruction_dataset.ex
&lt;/span&gt;&lt;span&gt;@@ -27,19 +27,22 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
        25, 198, 40313, 13]
   &amp;quot;&amp;quot;&amp;quot;
 
&lt;span&gt;-  @enforce_keys [:data, :encoded_texts, :prompt_style]
-  defstruct [:data, :encoded_texts, :prompt_style]
&lt;/span&gt;&lt;span&gt;+  @enforce_keys [:data, :encoded_texts, :prompt_style, :mask_out_instructions_in_target]
+  defstruct [:data, :encoded_texts, :prompt_style, :mask_out_instructions_in_target]
&lt;/span&gt; 
   @end_of_text &amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;
   @pad_token_id 50_256
 
   @type instruction_record :: LlmScratch.FineTuneDataLoader.instruction_record()
&lt;span&gt;+  @type encoded_example ::
+          [integer()] | %{token_ids: [integer()], prompt_length: non_neg_integer()}
&lt;/span&gt;   @type tokenizer :: String.t()
   @type prompt_style :: :alpaca | :phi3
   @type t :: %__MODULE__{
           data: [instruction_record()],
&lt;span&gt;-          encoded_texts: [[integer()]],
-          prompt_style: prompt_style()
&lt;/span&gt;&lt;span&gt;+          encoded_texts: [encoded_example()],
+          prompt_style: prompt_style(),
+          mask_out_instructions_in_target: boolean()
&lt;/span&gt;         }
 
   @doc &amp;quot;&amp;quot;&amp;quot;
&lt;span&gt;@@ -76,12 +79,18 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
       Tiktoken model name. Defaults to `[&amp;quot;&amp;lt;|endoftext|&amp;gt;&amp;quot;]`.
     * `:prompt_style` - prompt format for full training examples. Supported
       values are `:alpaca` and `:phi3`. Defaults to `:alpaca`.
&lt;span&gt;+    * `:mask_out_instructions_in_target` - when `true`, instruction/input
+      prompt targets are masked with `-100` so the loss is computed only after
+      the prompt. Padding targets are still masked independently by
+      `custom_collate/4`. Defaults to `false`.
&lt;/span&gt; 
   ## Output
 
   Returns a `%LlmScratch.InstructionDataset{}`. The `:data` field contains the
&lt;span&gt;-  original records, and `:encoded_texts` contains one pre-tokenized token-id
-  list per record.
&lt;/span&gt;&lt;span&gt;+  original records, and `:encoded_texts` contains one pre-tokenized example per
+  record. With `mask_out_instructions_in_target: false`, each example is a
+  token-id list. With `mask_out_instructions_in_target: true`, each example
+  also carries prompt-length metadata used by `custom_collate/4`.
&lt;/span&gt; 
   ## Examples
 
&lt;span&gt;@@ -113,16 +122,13 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
   @spec new([instruction_record()], tokenizer(), keyword()) :: t()
   def new(data, tokenizer, opts \\ []) when is_list(data) do
     prompt_style = Keyword.get(opts, :prompt_style, :alpaca)
&lt;span&gt;+    mask_out_instructions_in_target = Keyword.get(opts, :mask_out_instructions_in_target, false)
&lt;/span&gt; 
     # Pre-tokenize each complete training example once so repeated dataset
     # access does not rebuild the prompt or call the tokenizer again.
     encoded_texts =
       Enum.map(data, fn entry -&amp;gt;
&lt;span&gt;-        entry
-        # The model trains on the instruction/input prompt followed by the
-        # expected response target, matching the chapter 7 PyTorch dataset.
-        |&amp;gt; full_text(prompt_style)
-        |&amp;gt; encode_text!(tokenizer, opts)
&lt;/span&gt;&lt;span&gt;+        encode_example(entry, tokenizer, opts, prompt_style, mask_out_instructions_in_target)
&lt;/span&gt;       end)
 
     # Keep the decoded records for inspection while serving encoded examples
&lt;span&gt;@@ -130,7 +136,8 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
     %__MODULE__{
       data: data,
       encoded_texts: encoded_texts,
&lt;span&gt;-      prompt_style: prompt_style
&lt;/span&gt;&lt;span&gt;+      prompt_style: prompt_style,
+      mask_out_instructions_in_target: mask_out_instructions_in_target
&lt;/span&gt;     }
   end
 
&lt;span&gt;@@ -226,7 +233,7 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
       &amp;gt;
   &amp;quot;&amp;quot;&amp;quot;
   @spec custom_collate(
&lt;span&gt;-          tuple() | [[integer()]],
&lt;/span&gt;&lt;span&gt;+          tuple() | [encoded_example()],
&lt;/span&gt;           integer(),
           integer(),
           nil | pos_integer()
&lt;span&gt;@@ -248,15 +255,17 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
 
   def custom_collate(batch, pad_token_id, ignore_index, allowed_max_length)
       when is_list(batch) do
&lt;span&gt;+    examples = Enum.map(batch, &amp;amp;normalize_example/1)
+
&lt;/span&gt;     # Find the longest sequence length after the one extra pad token that the
     # Python collate function appends to every item.
     batch_max_length =
&lt;span&gt;-      batch
-      |&amp;gt; Enum.map(&amp;amp;(Kernel.length(&amp;amp;1) + 1))
&lt;/span&gt;&lt;span&gt;+      examples
+      |&amp;gt; Enum.map(&amp;amp;(Kernel.length(&amp;amp;1.token_ids) + 1))
&lt;/span&gt;       |&amp;gt; Enum.max(fn -&amp;gt; 0 end)
 
     {inputs, targets} =
&lt;span&gt;-      Enum.map(batch, fn item -&amp;gt;
&lt;/span&gt;&lt;span&gt;+      Enum.map(examples, fn %{token_ids: item, prompt_length: prompt_length} -&amp;gt;
&lt;/span&gt;         # Elixir data is immutable, so this builds the equivalent of
         # `new_item = item.copy(); new_item += [pad_token_id]`.
         new_item = item ++ [pad_token_id]
&lt;span&gt;@@ -272,6 +281,7 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
           padded
           |&amp;gt; Enum.slice(1, batch_max_length - 1)
           |&amp;gt; mask_extra_padding_targets(pad_token_id, ignore_index)
&lt;span&gt;+          |&amp;gt; mask_prompt_targets(prompt_length, ignore_index)
&lt;/span&gt; 
         # Optionally cap both rows to the model context length.
         {
&lt;span&gt;@@ -287,8 +297,51 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
     }
   end
 
&lt;span&gt;+  defp encode_example(entry, tokenizer, opts, prompt_style, false) do
+    entry
+    # The model trains on the instruction/input prompt followed by the
+    # expected response target, matching the chapter 7 PyTorch dataset.
+    |&amp;gt; full_text(prompt_style)
+    |&amp;gt; encode_text!(tokenizer, opts)
+  end
+
+  defp encode_example(entry, tokenizer, opts, prompt_style, true) do
+    token_ids =
+      entry
+      |&amp;gt; full_text(prompt_style)
+      |&amp;gt; encode_text!(tokenizer, opts)
+
+    prompt_length =
+      entry
+      |&amp;gt; prompt_text(prompt_style)
+      |&amp;gt; encode_text!(tokenizer, opts)
+      |&amp;gt; Kernel.length()
+
+    %{token_ids: token_ids, prompt_length: prompt_length}
+  end
+
+  defp encode_example(
+         _entry,
+         _tokenizer,
+         _opts,
+         _prompt_style,
+         mask_out_instructions_in_target
+       ) do
+    raise ArgumentError,
+          &amp;quot;expected :mask_out_instructions_in_target to be a boolean, got: #{inspect(mask_out_instructions_in_target)}&amp;quot;
+  end
+
+  defp normalize_example(token_ids) when is_list(token_ids) do
+    %{token_ids: token_ids, prompt_length: nil}
+  end
+
+  defp normalize_example(%{token_ids: token_ids, prompt_length: prompt_length})
+       when is_list(token_ids) and is_integer(prompt_length) and prompt_length &amp;gt;= 0 do
+    %{token_ids: token_ids, prompt_length: prompt_length}
+  end
+
&lt;/span&gt;   defp full_text(%{&amp;quot;output&amp;quot; =&amp;gt; output} = entry, :alpaca) when is_binary(output) do
&lt;span&gt;-    instruction_plus_input = LlmScratch.FineTuneDataLoader.format_input(entry)
&lt;/span&gt;&lt;span&gt;+    instruction_plus_input = prompt_text(entry, :alpaca)
&lt;/span&gt;     response_text = &amp;quot;\n\n### Response:\n#{output}&amp;quot;
 
     instruction_plus_input &amp;lt;&amp;gt; response_text
&lt;span&gt;@@ -305,6 +358,10 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
     raise ArgumentError, &amp;quot;unsupported prompt style: #{inspect(prompt_style)}&amp;quot;
   end
 
&lt;span&gt;+  defp prompt_text(entry, prompt_style) do
+    LlmScratch.FineTuneDataLoader.format_text(entry, prompt_style)
+  end
+
&lt;/span&gt;   defp encode_text!(text, tokenizer, opts) when is_binary(tokenizer) do
     allowed_special = Keyword.get(opts, :allowed_special, [@end_of_text])
     {:ok, token_ids} = Tiktoken.encode(tokenizer, text, allowed_special)
&lt;span&gt;@@ -331,6 +388,19 @@&lt;/span&gt; defmodule LlmScratch.InstructionDataset do
     masked_targets
   end
 
&lt;span&gt;+  defp mask_prompt_targets(targets, nil, _ignore_index), do: targets
+
+  defp mask_prompt_targets(targets, prompt_length, ignore_index) do
+    mask_count = max(prompt_length - 1, 0)
+
+    targets
+    |&amp;gt; Enum.with_index()
+    |&amp;gt; Enum.map(fn
+      {_target, index} when index &amp;lt; mask_count -&amp;gt; ignore_index
+      {target, _index} -&amp;gt; target
+    end)
+  end
+
&lt;/span&gt;   defp maybe_truncate(token_ids, nil), do: token_ids
   defp maybe_truncate(token_ids, max_length), do: Enum.take(token_ids, max_length)
 end
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Here is the only change that we need to make in the test:&lt;/p&gt;

&lt;div&gt;&lt;div&gt;&lt;pre&gt;&lt;code&gt;&lt;span&gt;train_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;train_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;mask_out_instructions_in_target:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;

    &lt;span&gt;val_dataset&lt;/span&gt; &lt;span&gt;=&lt;/span&gt;
      &lt;span&gt;InstructionDataset&lt;/span&gt;&lt;span&gt;.&lt;/span&gt;&lt;span&gt;new&lt;/span&gt;&lt;span&gt;(&lt;/span&gt;&lt;span&gt;val_data&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;tokenizer&lt;/span&gt;&lt;span&gt;,&lt;/span&gt; &lt;span&gt;mask_out_instructions_in_target:&lt;/span&gt; &lt;span&gt;true&lt;/span&gt;&lt;span&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;

&lt;p&gt;Score is slightly worse: 49.78&lt;/p&gt;

&lt;h3 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-exercise-73-fine-tuning-on-the-original-alpaca-dataset&quot;&gt;Exercise 7.3 Fine-tuning on the original Alpaca dataset&lt;/h3&gt;

&lt;p&gt;The Alpaca dataset, by researchers at Stanford, is one of the earliest and most popular openly shared instruction datasets, consisting of 52,002 entries. As an alternative to the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;instruction-data.json&lt;/code&gt; file we use here, consider fine-tuning an LLM on this dataset. The dataset is available at &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://mng.bz/NBnE&quot;&gt;https://mng.bz/NBnE&lt;/a&gt;. This dataset contains 52,002 entries, which is approximately 50 times more than the dataset we used here, and most entries are longer. Thus, I highly recommend using a GPU for the training, which will accelerate the fine-tuning process. If you encounter out-of-memory errors, consider reducing the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;batch_size&lt;/code&gt; from 8 to 4, 2, or even 1. Lowering the &lt;code class=&quot;language-plaintext language-highlighter-rouge&quot;&gt;allowed_max_length&lt;/code&gt; from 1,024 to 512 or 256 can also help manage memory problems.&lt;/p&gt;

&lt;p&gt;Per epoch, we have 5,525 steps, which is 20x more than Sebastian’s training set. With one epoch, training took 24,000 seconds and the average Llama 3 score through Ollama was 43.15. With two epochs, training took 41,060.8 seconds and improved the average score to 50.54.&lt;/p&gt;

&lt;h3 id=&quot;article-1kUfJRkVZmJRI0FzNFkk5MGBCNA-exercise-74-parameter-efficient-fine-tuning-with-lora&quot;&gt;Exercise 7.4 Parameter-efficient fine-tuning with LoRA&lt;/h3&gt;

&lt;p&gt;To instruction fine-tune an LLM more efficiently, modify the code in this chapter to use the low-rank adaptation method (LoRA) from appendix E. Compare the training run time and model performance before and after the modification.&lt;/p&gt;

&lt;p&gt;With LoRA, training on Sebastian’s instruction set took 648 seconds, compared with 687 seconds without LoRA, with a slightly worse accuracy score of 49.9, compared with 50.32 without LoRA. Here are the commits that implemented the LoRA algorithm: &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://github.com/karlosmid/llm-from-scratch/commit/2c43a61b05c74d612302c89e03f01e79637ec347&quot;&gt;feat: 🎸 lora support&lt;/a&gt;, &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://github.com/karlosmid/llm-from-scratch/commit/852f5f51c60e007b9663a09752a0f640ab4363f4&quot;&gt;feat: 🎸 lora support in model inspect&lt;/a&gt;, and &lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://github.com/karlosmid/llm-from-scratch/commit/a231974b42c9c7a786d6a029cf6bc1e2f2ed97d8&quot;&gt;test: 💍 lora test&lt;/a&gt;.&lt;/p&gt;</summary><author><name></name></author><source gr:stream-id="feed/https://blog.tentamen.eu/feed/"><id>tag:google.com,2005:reader/feed/https://blog.tentamen.eu/feed/</id><title type="html">Tentamen Software Testing Blog</title><link rel="alternate" href="https://karlosmid.com/" type="text/html"></link></source></entry><entry gr:crawl-timestamp-msec="1781509416000"><id gr:original-id="https://medium.com/p/f24b0c8aab4d">tag:google.com,2005:reader/item/0000091b0000007c</id><category term="psychology"></category><category term="short-story"></category><category term="self-improvement"></category><category term="writing"></category><category term="humor"></category><title type="html">The Price of Kindness</title><published>2026-06-15T07:43:36Z</published><updated>2026-06-15T07:43:36Z</updated><link rel="alternate" href="https://medium.com/illumination/the-price-of-kindness-f24b0c8aab4d?source=rss-d56167afca7d------2" type="text/html"></link><summary type="html">&lt;div&gt;&lt;p&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://medium.com/illumination/the-price-of-kindness-f24b0c8aab4d?source=rss-d56167afca7d------2&quot;&gt;&lt;img class=&quot;bqrUnknownImgSize&quot; src=&quot;https://cdn-images-1.medium.com/max/2600/0*mw3WxQJP9UGr16Cm&quot; width=&quot;5568&quot;&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;A humorous and emotional story of friendship, betrayal, family, faith, and life’s lessons.&lt;/p&gt;&lt;p&gt;&lt;a target=&quot;_blank&quot; rel=&quot;noopener&quot; href=&quot;https://medium.com/illumination/the-price-of-kindness-f24b0c8aab4d?source=rss-d56167afca7d------2&quot;&gt;Continue reading on ILLUMINATION »&lt;/a&gt;&lt;/p&gt;&lt;/div&gt;</summary><author><name>Mohammad Faisal Khatri</name></author><source gr:stream-id="feed/https://medium.com/@iamfaisalkhatri/feed"><id>tag:google.com,2005:reader/feed/https://medium.com/@iamfaisalkhatri/feed</id><title type="html">Stories by Mohammad Faisal Khatri on Medium</title><link rel="alternate" href="https://medium.com/@iamfaisalkhatri?source=rss-d56167afca7d------2" type="text/html"></link></source></entry></feed>