Course: reinforcement-fine-tuning-llms-grpo
Lesson: introduction
Slug: sjbja
[Dev Only] Loading...
Slug: sjbja
[Dev Only] Loading...
[Dev Only] Debug Info
{
"courseVersionId": null
}{
"courseId": 1058,
"name": "Reinforcement Fine-Tuning LLMs With GRPO",
"slug": "reinforcement-fine-tuning-llms-grpo",
"type": "short_course",
"progress": -1,
"maintenanceMode": false,
"releasedAt": "2025-05-21T08:00:00+00:00",
"nextCourseSlug": null,
"wpData": {
"courseName": "Reinforcement Fine-Tuning LLMs with GRPO",
"courseDescription": "Improve LLM reasoning with reinforcement fine-tuning and reward functions.",
"coursePartner": [
{
"title": "Predibase",
"logo": "https://home-wordpress.deeplearning.ai/wp-content/uploads/2025/05/QUvd03Ad_400x400.png"
}
],
"courseTopic": [
"Evaluation and Monitoring",
"Fine-Tuning",
"GenAI Applications",
"LLMOps",
"LLM Serving",
"Machine Learning",
"Prompt Engineering",
"Supervised Learning",
"Transformers"
],
"courseLevel": "Intermediate",
"courseDuration": null,
"marketingSlug": "reinforcement-fine-tuning-llms-grpo",
"videoThumbnail": "https://home-wordpress.deeplearning.ai/wp-content/uploads/2025/05/1058_reinforcement-fine-tuning-llms-grpo.jpg"
},
"certProgress": 0,
"accomplishmentProgress": 0,
"lessons": {
"sjbja": {
"index": 1,
"slug": "sjbja",
"name": "Introduction",
"type": "video",
"videoId": 873,
"time": 236,
"programId": null,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"eklye": {
"index": 2,
"slug": "eklye",
"name": "Introduction to reinforcement learning",
"type": "video",
"videoId": 874,
"time": 467,
"programId": null,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"r3p2x": {
"index": 3,
"slug": "r3p2x",
"name": "Benefits of reinforcement finetuning",
"type": "video",
"videoId": 875,
"time": 252,
"programId": null,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"bxwwv": {
"index": 4,
"slug": "bxwwv",
"name": "Can a large language model master Wordle",
"type": "video_notebook",
"videoId": 876,
"time": 658,
"programId": 24008,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"e5esw": {
"index": 5,
"slug": "e5esw",
"name": "Reward functions",
"type": "video_notebook",
"videoId": 877,
"time": 604,
"programId": 24009,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"ub5r8": {
"index": 6,
"slug": "ub5r8",
"name": "Reward functions with LLM as a judge",
"type": "video_notebook",
"videoId": 878,
"time": 756,
"programId": 24010,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"xxwlq": {
"index": 7,
"slug": "xxwlq",
"name": "Reward hacking",
"type": "video_notebook",
"videoId": 879,
"time": 420,
"programId": 24011,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"aifo6": {
"index": 8,
"slug": "aifo6",
"name": "Calculating loss in GRPO",
"type": "video_notebook",
"videoId": 880,
"time": 1087,
"programId": 24012,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"xfrzl": {
"index": 9,
"slug": "xfrzl",
"name": "Putting it all together: Training Wordle",
"type": "video_notebook",
"videoId": 881,
"time": 485,
"programId": 24013,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"lo7ww": {
"index": 10,
"slug": "lo7ww",
"name": "Conclusion",
"type": "video",
"videoId": 872,
"time": 52,
"programId": null,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": false
}
},
"n4uo6": {
"index": 11,
"slug": "n4uo6",
"name": "Quiz",
"type": "quiz",
"videoId": null,
"time": 0,
"programId": null,
"chatbotId": null,
"iframeUrl": null,
"quizId": "2632991FEEB8452D82EB5C7B31",
"progress": 0,
"readingMaterialId": null,
"accessControl": "locked",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": false,
"saveNotebookCustomjsEnabled": false
}
},
"ymyet": {
"index": 12,
"slug": "ymyet",
"name": "Appendix – Tips, Help, and Download",
"type": "notebook",
"videoId": null,
"time": 0,
"programId": 24014,
"chatbotId": null,
"iframeUrl": null,
"quizId": null,
"progress": 0,
"readingMaterialId": null,
"accessControl": "full",
"requiredUserTier": "pro",
"features": {
"saveNotebookToolbar": true,
"saveNotebookCustomjsEnabled": true
}
}
},
"subtopics": {},
"listing": [
{
"timeFrame": null,
"moduleLabel": "Module 1",
"name": null,
"content": [
{
"key": "sjbja",
"type": "lesson"
},
{
"key": "eklye",
"type": "lesson"
},
{
"key": "r3p2x",
"type": "lesson"
},
{
"key": "bxwwv",
"type": "lesson"
},
{
"key": "e5esw",
"type": "lesson"
},
{
"key": "ub5r8",
"type": "lesson"
},
{
"key": "xxwlq",
"type": "lesson"
},
{
"key": "aifo6",
"type": "lesson"
},
{
"key": "xfrzl",
"type": "lesson"
},
{
"key": "lo7ww",
"type": "lesson"
},
{
"key": "n4uo6",
"type": "lesson"
},
{
"key": "ymyet",
"type": "lesson"
}
],
"progress": 0
}
],
"totalDurationSeconds": 5017,
"lastAccessTime": null,
"reviewsCnt": 0,
"activeVersionIds": []
}