<!DOCTYPE html>
    <html lang="vi" xmlns="http://www.w3.org/1999/xhtml" prefix="og: http://ogp.me/ns#">
    <head>
<title>Data Engineering trong kỷ nguyên LLM&#x3A; Những điều cần biết</title>
<meta name="description" content="Data Engineering trong kỷ nguyên LLM&#x3A; Những điều cần biết - Savefile - Tin Tức -...">
<meta name="author" content=".: Nguoicodonvn2008.info - Cõi lòng người cô đơn :.">
<meta name="copyright" content=".: Nguoicodonvn2008.info - Cõi lòng người cô đơn :. [admin@nguoicodonvn2008.info]">
<meta name="robots" content="index, archive, follow, noodp">
<meta name="googlebot" content="index,archive,follow,noodp">
<meta name="msnbot" content="all,index,follow">
<meta name="generator" content="NukeViet v4.5">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta property="og:title" content="Data Engineering trong kỷ nguyên LLM&#x3A; Những điều cần biết">
<meta property="og:type" content="website">
<meta property="og:description" content="Savefile - Tin Tức - https&#x3A;&#x002F;&#x002F;www.nguoicodonvn2008.info&#x002F;vi&#x002F;news&#x002F;savefile&#x002F;kien-thuc-may-tinh&#x002F;data-engineering-trong-ky-nguyen-llm-nhung-dieu-can-biet-12590.html">
<meta property="og:site_name" content=".&#x3A; Nguoicodonvn2008.info - Cõi lòng người cô đơn &#x3A;.">
<meta property="og:url" content="https://www.nguoicodonvn2008.info/vi/news/savefile/kien-thuc-may-tinh/data-engineering-trong-ky-nguyen-llm-nhung-dieu-can-biet-12590.html">
<link rel="shortcut icon" href="https://www.nguoicodonvn2008.info/favicon.ico">
<link rel="canonical" href="https://www.nguoicodonvn2008.info/vi/news/savefile/kien-thuc-may-tinh/data-engineering-trong-ky-nguyen-llm-nhung-dieu-can-biet-12590.html">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/" title="Tin Tức" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/karaoke-dual/" title="Tin Tức - Karaoke Dual" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/nhac-tre/" title="Tin Tức - Nhạc trẻ" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/tru-tinh/" title="Tin Tức - Trữ tình" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/nuoc-ngoai/" title="Tin Tức - Nước ngoài" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/remix/" title="Tin Tức - Remix" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/tam-su-tinh-yeu/" title="Tin Tức - Tâm sự tình yêu" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/tho-suu-tam/" title="Tin Tức - Thơ sưu tầm" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/cuoc-song/" title="Tin Tức - Cuộc sống" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/phan-mem/" title="Tin Tức - Phần mềm" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/kien-thuc-may-tinh/" title="Tin Tức - Kiến thức máy tính" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/hoc-tap/" title="Tin Tức - Học tập" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/tai-lieu/" title="Tin Tức - Tài liệu" type="application/rss+xml">
<link rel="alternate" href="https://www.nguoicodonvn2008.info/vi/news/rss/de-thi/" title="Tin Tức - Đề thi" type="application/rss+xml">
<link rel="preload" as="style" href="https://www.nguoicodonvn2008.info/assets/css/font-awesome.min.css" type="text/css">
<link rel="preload" as="style" href="https://www.nguoicodonvn2008.info/themes/default/css/bootstrap.non-responsive.css" type="text/css">
<link rel="preload" as="style" href="https://www.nguoicodonvn2008.info/themes/default/css/style.css" type="text/css">
<link rel="preload" as="style" href="https://www.nguoicodonvn2008.info/themes/default/css/style.non-responsive.css" type="text/css">
<link rel="preload" as="style" href="https://www.nguoicodonvn2008.info/themes/default/css/news.css" type="text/css">
<link rel="preload" as="style" href="https://www.nguoicodonvn2008.info/themes/default/css/custom.css" type="text/css">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/assets/js/jquery/jquery.min.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/assets/js/language/vi.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/assets/js/DOMPurify/purify3.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/assets/js/global.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/assets/js/site.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/themes/default/js/news.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/themes/default/js/main.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/themes/default/js/custom.js" type="text/javascript">
<link rel="preload" as="script" href="https://www.nguoicodonvn2008.info/themes/default/js/bootstrap.min.js" type="text/javascript">
<link rel="stylesheet" href="https://www.nguoicodonvn2008.info/assets/css/font-awesome.min.css">
<link rel="stylesheet" href="https://www.nguoicodonvn2008.info/themes/default/css/bootstrap.non-responsive.css">
<link rel="stylesheet" href="https://www.nguoicodonvn2008.info/themes/default/css/style.css">
<link rel="stylesheet" href="https://www.nguoicodonvn2008.info/themes/default/css/style.non-responsive.css">
<link rel="StyleSheet" href="https://www.nguoicodonvn2008.info/themes/default/css/news.css">
<link rel="stylesheet" href="https://www.nguoicodonvn2008.info/themes/default/css/custom.css">
<style type="text/css">
	body{background: #fff;}
</style>
    </head>
    <body>
<div id="print">
	<div id="hd_print">
		<h2 class="pull-left">.&#x3A; Nguoicodonvn2008.info - Cõi lòng người cô đơn &#x3A;.</h2>
		<p class="pull-right"><a title=".&#x3A; Nguoicodonvn2008.info - Cõi lòng người cô đơn &#x3A;." href="https://www.nguoicodonvn2008.info/">https://www.nguoicodonvn2008.info</a></p>
	</div>
	<div class="clear"></div>
	<hr />
	<div id="content">
		<h1>Data Engineering trong kỷ nguyên LLM&#x3A; Những điều cần biết</h1>
		<ul class="list-inline">
			<li>Thứ sáu - 10/04/2026 22:53</li>
			<li class="hidden-print txtrequired"><em class="fa fa-print">&nbsp;</em><a title="In ra" href="javascript:;" onclick="window.print()">In ra</a></li>
			<li class="hidden-print txtrequired"><em class="fa fa-power-off">&nbsp;</em><a title="Đóng cửa sổ này" href="javascript:;" onclick="window.close()">Đóng cửa sổ này</a></li>
		</ul>
		<div class="clear"></div>
		<div id="hometext">
		</div>
				<div class="imghome">
			<img alt="Data Engineering trong kỷ nguyên LLM&#x3A; Những điều cần biết" src="https://st.quantrimang.com/photos/image/2026/04/07/data-engineering-trong-ky-nguyen-llm1.jpg" width="460" class="img-thumbnail" />
		</div>
		<div class="clear"></div>
		<div id="bodytext" class="clearfix">
			<figure class="nv-media"><audio controls="" src="https://st.quantrimang.com/photos/media/2026/04/10/214459-20264101221.mp3"></audio></figure><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Sự bùng nổ của các mô hình ngôn ngữ lớn (LLM) như GPT-4, Llama hay Claude đang thay đổi toàn bộ thế giới trí tuệ nhân tạo. Những mô hình này có thể viết code, trả lời câu hỏi, tóm tắt tài liệu với độ chính xác đáng kinh ngạc.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Với các nhà khoa học dữ liệu, đây là thời kỳ cực kỳ thú vị, nhưng đồng thời cũng đặt ra một thách thức lớn: <strong style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;line-height:inherit;margin:0px;padding:0px;">hiệu suất của các mô hình AI phụ thuộc trực tiếp vào chất lượng dữ liệu</strong> .</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Trong khi phần lớn sự chú ý tập trung vào mô hình, mạng nơ-ron hay cơ chế attention, thì <strong style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;line-height:inherit;margin:0px;padding:0px;">data engineering mới chính là yếu tố cốt lõi của kỷ nguyên LLM</strong> . Những nguyên tắc quản lý dữ liệu truyền thống không bị thay thế, mà đang được nâng cấp.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Trong bài viết này, chúng ta sẽ tìm hiểu cách vai trò của dữ liệu đang thay đổi, các pipeline quan trọng trong quá trình huấn luyện và suy luận, cũng như những kiến trúc mới như RAG đang định hình cách xây dựng ứng dụng AI hiện đại.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><img class="lazy lightbox loaded" style="border-radius:3px;border:0px solid rgb(204, 204, 204);box-shadow:rgb(238, 238, 238) 0px 0px 1px;font:inherit;margin:0px auto;max-width:100%;padding:0px;" src="https://st.quantrimang.com/photos/image/2026/04/07/data-engineering-trong-ky-nguyen-llm1.jpg" alt="" width="640" height="226" data-src="https://st.quantrimang.com/photos/image/2026/04/07/data-engineering-trong-ky-nguyen-llm1.jpg" data-i="0" data-was-processed="true" /></p><h2 style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:20px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;letter-spacing:normal;line-height:32px;margin:10px 0px;orphans:2;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><strong>Từ BI truyền thống sang dữ liệu sẵn sàng cho AI</strong></h2><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Trước đây, data engineering chủ yếu phục vụ Business Intelligence (BI). Mục tiêu là đưa dữ liệu từ hệ thống vận hành vào data warehouse để trả lời những câu hỏi như: “Doanh số quý trước là bao nhiêu?”</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Dữ liệu trong BI thường có cấu trúc rõ ràng, được sắp xếp theo hàng và cột.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Tuy nhiên, kỷ nguyên LLM yêu cầu một cách tiếp cận khác. Thay vì chỉ xử lý dữ liệu có cấu trúc, giờ đây chúng ta phải xử lý dữ liệu phi cấu trúc như:</p><ul style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:disc;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e852a63b4d8a4aae326fcf26ae1289a90"><p style="margin-left:0px;text-align:justify;">Văn bản trong PDF</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e3f70a921639ebd3d9964067ea0fffcf5"><p style="margin-left:0px;text-align:justify;">Bản ghi cuộc gọi khách hàng</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="eb98ff82495a1b9caf4ada5f035376e02"><p style="margin-left:0px;text-align:justify;">Email nội bộ</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e8b3e0362d92ea9ff23220c09b420d3ba"><p style="margin-left:0px;text-align:justify;">Code trong GitHub</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e97e430e6521fefda3a6f781f19ed58f8"><p style="margin-left:0px;text-align:justify;">Tài liệu nội bộ doanh nghiệp</p></li></ul><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Mục tiêu không còn chỉ là lưu trữ dữ liệu, mà là <strong style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;line-height:inherit;margin:0px;padding:0px;">chuyển đổi dữ liệu để AI có thể hiểu và suy luận</strong> .</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Điều này dẫn đến nhu cầu xây dựng pipeline dữ liệu mới phục vụ ba giai đoạn chính:</p><ul style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:disc;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="eac0fdc59b5e95a12afdab9b09add3c00"><p style="margin-left:0px;text-align:justify;">Huấn luyện và fine-tuning</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e62189585e16238471c812135fd9732ac"><p style="margin-left:0px;text-align:justify;">Suy luận và truy xuất thông tin</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e1d8ec621368a3f9adbd4f95b130f3b0a"><p style="margin-left:0px;text-align:justify;">Đánh giá và giám sát</p></li></ul><h2 style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:20px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;letter-spacing:normal;line-height:32px;margin:10px 0px;orphans:2;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><strong>Giai đoạn 1: Xây dựng dữ liệu để huấn luyện LLM</strong></h2><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Trước khi một mô hình AI có thể hoạt động hiệu quả, nó phải được huấn luyện trên lượng dữ liệu khổng lồ. Đây là nơi data engineering đóng vai trò cực kỳ quan trọng.</p><h2 style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:20px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;letter-spacing:normal;line-height:32px;margin:10px 0px;orphans:2;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><strong>Ba yếu tố cốt lõi của dữ liệu huấn luyện</strong></h2><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Thứ nhất là quy mô dữ liệu. LLM học bằng cách nhận diện mẫu thống kê trong dữ liệu. Để hiểu ngữ pháp, logic và ngữ cảnh, mô hình cần tiếp xúc với hàng nghìn tỷ token. Điều này đòi hỏi xử lý dữ liệu ở quy mô petabyte từ các nguồn như Common Crawl, GitHub hay tài liệu khoa học.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Thứ hai là độ đa dạng của dữ liệu. Một mô hình chỉ được huấn luyện bằng tài liệu pháp lý sẽ không thể viết thơ tốt. Vì vậy, dữ liệu cần được lấy từ nhiều lĩnh vực khác nhau để đảm bảo khả năng tổng quát hóa.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Thứ ba là chất lượng dữ liệu. Internet chứa rất nhiều nội dung spam, thông tin sai lệch hoặc dữ liệu trùng lặp. Vì vậy, pipeline dữ liệu cần:</p><ul style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:disc;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e11cc3e65726243c7f84dbb1a7eff12a2"><p style="margin-left:0px;text-align:justify;">Loại bỏ nội dung trùng lặp</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e75a508288d6dd8f375b1606177858024"><p style="margin-left:0px;text-align:justify;">Lọc ngôn ngữ không mong muốn</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e4d1cf7f787fed693704d85d21d2d95f0"><p style="margin-left:0px;text-align:justify;">Loại bỏ nội dung độc hại</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e2c5c75c7321132411069b57b79b31428"><p style="margin-left:0px;text-align:justify;">Theo dõi nguồn dữ liệu (data lineage)</p></li></ul><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Nguyên tắc quan trọng nhất là: <strong style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;line-height:inherit;margin:0px;padding:0px;">Mô hình AI chỉ tốt khi dữ liệu huấn luyện đủ tốt</strong> .</p><h2 style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:20px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;letter-spacing:normal;line-height:32px;margin:10px 0px;orphans:2;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><strong>Giai đoạn 2: Kiến trúc RAG đang trở thành tiêu chuẩn</strong></h2><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Phần lớn doanh nghiệp không tự huấn luyện mô hình từ đầu. Thay vào đó, họ kết nối mô hình có sẵn với dữ liệu riêng. Đây chính là lúc kiến trúc RAG (Retrieval-Augmented Generation) phát huy vai trò.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">RAG giúp giải quyết vấn đề lớn của LLM: dữ liệu bị “đóng băng” tại thời điểm huấn luyện. Nếu hỏi về sự kiện mới, mô hình sẽ không biết.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Với RAG, mô hình có thể truy xuất dữ liệu theo thời gian thực.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Quy trình RAG thường diễn ra như sau:</p><ol style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:decimal;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e744091f026ede6a7121186c69b02f952"><p style="margin-left:0px;text-align:justify;">Trước tiên, dữ liệu nội bộ như PDF, Slack hoặc tài liệu nội bộ được đưa vào pipeline. Sau đó dữ liệu được chia thành các đoạn nhỏ để phù hợp với giới hạn context window.</p></li><li style="border-width:0px;font:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e8872345e845d7acba976f7fd2bcc2e87"><p style="margin-left:0px;text-align:justify;">Mỗi đoạn dữ liệu được chuyển thành vector thông qua embedding model. Các vector này được lưu trong vector database.</p></li><li style="border-width:0px;font:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e89d4bd31cd9823e59d6c3f330b45e16a"><p style="margin-left:0px;text-align:justify;">Khi người dùng đặt câu hỏi, hệ thống chuyển câu hỏi thành vector, tìm kiếm dữ liệu tương tự và gửi dữ liệu liên quan cho LLM để tạo câu trả lời.</p></li></ol><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Hiệu quả của RAG phụ thuộc trực tiếp vào pipeline dữ liệu. Nếu dữ liệu bị chia sai hoặc embedding không phù hợp, kết quả sẽ không chính xác.</p><h2 style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:20px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;letter-spacing:normal;line-height:32px;margin:10px 0px;orphans:2;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><strong>Giai đoạn 3: Modern Data Stack cho LLM</strong></h2><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Kỷ nguyên LLM cũng kéo theo sự thay đổi trong hệ sinh thái công nghệ dữ liệu.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Vector database trở thành thành phần cốt lõi. Khác với database truyền thống tìm kiếm theo từ khóa, vector database tìm kiếm theo ngữ nghĩa.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Một số vector database phổ biến gồm:</p><ul style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:disc;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="ee29ee35ee06182bb6c3f80e162a51542"><p style="margin-left:0px;text-align:justify;">Pinecone</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="ee7da17815d2489409e009da99c569a86"><p style="margin-left:0px;text-align:justify;">Weaviate</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e29493ed0e141c1b187e67fa815a4dd7f"><p style="margin-left:0px;text-align:justify;">Milvus</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e9b3b99fed8d9eff92004fe96bf3f5c74"><p style="margin-left:0px;text-align:justify;">PostgreSQL với pgvector</p></li></ul><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Ngoài ra, các framework orchestration giúp kết nối pipeline và LLM cũng ngày càng phổ biến. Ví dụ như:</p><ul style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:disc;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="ed46a5cbb96e5fde97e71eefe1edafacf"><p style="margin-left:0px;text-align:justify;">LangChain</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="eb76c3fb1cee10973e6de4734907e3acf"><p style="margin-left:0px;text-align:justify;">LlamaIndex</p></li></ul><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Các công cụ ETL truyền thống như Spark vẫn đóng vai trò quan trọng trong xử lý dữ liệu lớn.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Điểm quan trọng là stack mới không thay thế stack cũ, mà mở rộng thêm khả năng AI.</p><h2 style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:20px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;letter-spacing:normal;line-height:32px;margin:10px 0px;orphans:2;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><strong>Giai đoạn 4: Đánh giá và giám sát LLM</strong></h2><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Khác với machine learning truyền thống, việc đánh giá LLM phức tạp hơn. Nếu mô hình tạo ra đoạn văn, làm sao biết nó đúng hay sai?</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Đây là lúc observability trở nên quan trọng. Data engineer cần theo dõi toàn bộ pipeline để xác định lỗi.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Một hệ thống RAG trả lời sai có thể do:</p><ul style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:disc;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e6fe6ba1ca90b28363c1d4324ac431250"><p style="margin-left:0px;text-align:justify;">Thiếu dữ liệu</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e641cb5a0c0eba663d8eb26729fe1c7c8"><p style="margin-left:0px;text-align:justify;">Lỗi truy xuất</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e447a68cb7930f963389b61bc29781f88"><p style="margin-left:0px;text-align:justify;">LLM tạo nội dung sai</p></li></ul><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Để giải quyết, hệ thống cần ghi lại:</p><ul style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;clear:both;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:inherit;list-style:disc;margin-bottom:0px;margin-right:0px;margin-top:0px;orphans:2;padding:0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="ec17c08060fe0ce3a330dfb38918e1f38"><p style="margin-left:0px;text-align:justify;">Câu hỏi người dùng</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="e34e2ba41c98edd2cb7b53c0d333a5440"><p style="margin-left:0px;text-align:justify;">Dữ liệu truy xuất</p></li><li style="border-width:0px;font-family:inherit;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:inherit;font-stretch:inherit;font-style:inherit;font-variant:inherit;font-variation-settings:inherit;font-weight:400;line-height:inherit;margin:0px 0px 0px 30px;padding:0px;" data-list-item-id="ef782c1fc0a304c720d90e0a793f89c6c"><p style="margin-left:0px;text-align:justify;">Câu trả lời cuối cùng</p></li></ul><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Thông qua phân tích dữ liệu này, hệ thống có thể cải thiện liên tục.</p><h2 style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:20px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;letter-spacing:normal;line-height:32px;margin:10px 0px;orphans:2;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;"><strong>Data Engineering trở thành nền tảng của AI</strong></h2><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Chúng ta đang bước vào thời kỳ AI trở thành giao diện chính để tương tác với dữ liệu. Điều này khiến data engineering trở nên quan trọng hơn bao giờ hết.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Các kỹ năng xử lý dữ liệu, làm sạch dữ liệu và xây dựng pipeline đang trở thành nền tảng của AI hiện đại.</p><p style="-webkit-text-stroke-width:0px;background-color:rgb(255, 255, 255);border-width:0px;color:rgba(0, 0, 0, 0.87);font-family:Arial, sans-serif;font-feature-settings:inherit;font-kerning:inherit;font-language-override:inherit;font-optical-sizing:inherit;font-size-adjust:inherit;font-size:16px;font-stretch:inherit;font-style:normal;font-variant-alternates:inherit;font-variant-caps:normal;font-variant-east-asian:inherit;font-variant-emoji:inherit;font-variant-ligatures:normal;font-variant-numeric:inherit;font-variant-position:inherit;font-variation-settings:inherit;font-weight:400;letter-spacing:normal;line-height:26px;margin:10px 0px;orphans:2;overflow-wrap:break-word;padding:0px;text-align:justify;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;white-space:normal;widows:2;word-spacing:0px;">Nếu bạn là data scientist, việc hiểu data engineering trong kỷ nguyên LLM sẽ giúp bạn không chỉ bắt kịp xu hướng mà còn xây dựng nền tảng cho tương lai.</p>
		</div>
				<div id="author">
						<p>
				<strong>Nguồn tin:</strong>
				Quantrimang.com
			</p>
		</div>
	</div>
	<div id="footer" class="clearfix">
		<div id="url">
			<strong>URL của bản tin này: </strong><a href="https://www.nguoicodonvn2008.info/vi/news/savefile/kien-thuc-may-tinh/data-engineering-trong-ky-nguyen-llm-nhung-dieu-can-biet-12590.html" title="Data Engineering trong kỷ nguyên LLM&#x3A; Những điều cần biết">https://www.nguoicodonvn2008.info/vi/news/savefile/kien-thuc-may-tinh/data-engineering-trong-ky-nguyen-llm-nhung-dieu-can-biet-12590.html</a>

		</div>
		<div class="clear"></div>
		<div class="copyright">
			&copy; .&#x3A; Nguoicodonvn2008.info - Cõi lòng người cô đơn &#x3A;.
		</div>
		<div id="contact">
			<a href="mailto:admin@nguoicodonvn2008.info">admin@nguoicodonvn2008.info</a>
		</div>
	</div>
</div>
        <div id="timeoutsess" class="chromeframe">
            Bạn đã không sử dụng Site, <a onclick="timeoutsesscancel();" href="https://www.nguoicodonvn2008.info/#">Bấm vào đây để duy trì trạng thái đăng nhập</a>. Thời gian chờ: <span id="secField"> 60 </span> giây
        </div>
        <div id="openidResult" class="nv-alert" style="display:none"></div>
        <div id="openidBt" data-result="" data-redirect=""></div>
		</script>
		<div class="car-top">
  <span><img src="https://www.nguoicodonvn2008.info/themes/default/images/car.png" alt=""></span>
</div>
<div id="run_cronjobs" style="visibility:hidden;display:none;"><img alt="cron" src="/index.php?second=cronjobs&amp;p=q0y615x7" width="1" height="1" /></div>
<script src="https://www.nguoicodonvn2008.info/assets/js/jquery/jquery.min.js"></script>
<script>var nv_base_siteurl="/",nv_lang_data="vi",nv_lang_interface="vi",nv_name_variable="nv",nv_fc_variable="op",nv_lang_variable="language",nv_module_name="news",nv_func_name="savefile",nv_is_user=0, nv_my_ofs=-4,nv_my_abbr="EDT",nv_cookie_prefix="nv4c_e856T",nv_check_pass_mstime=1738000,nv_area_admin=0,nv_safemode=0,theme_responsive=0,nv_recaptcha_ver=2,nv_recaptcha_sitekey="",nv_recaptcha_type="image",XSSsanitize=1;</script>
<script src="https://www.nguoicodonvn2008.info/assets/js/language/vi.js"></script>
<script src="https://www.nguoicodonvn2008.info/assets/js/DOMPurify/purify3.js"></script>
<script src="https://www.nguoicodonvn2008.info/assets/js/global.js"></script>
<script src="https://www.nguoicodonvn2008.info/assets/js/site.js"></script>
<script src="https://www.nguoicodonvn2008.info/themes/default/js/news.js"></script>
<script src="https://www.nguoicodonvn2008.info/themes/default/js/main.js"></script>
<script src="https://www.nguoicodonvn2008.info/themes/default/js/custom.js"></script>
<script type="application/ld+json">
        {
            "@context": "https://schema.org",
            "@type": "Organization",
            "url": "https://www.nguoicodonvn2008.info",
            "logo": "https://www.nguoicodonvn2008.info/uploads/angel.gif"
        }
        </script>
<script src="https://www.nguoicodonvn2008.info/themes/default/js/bootstrap.min.js"></script>
<script type="text/javascript">
var $scrolltop = $('.car-top');
$scrolltop.on('click', function () {
    $('html,body').animate({
        scrollTop: 0
    }, 800);
    $(this).addClass("car-run");
    setTimeout(function(){ $scrolltop.removeClass('car-run');}, 1000);
    return false;
});
$(window).on('scroll', function ()
{ 
    if($(window).scrollTop() >= 200)
    {
        $scrolltop.addClass("show");
        $scrolltop.addClass("car-down");
    }
    else
    {
       $scrolltop.removeClass("show");
       setTimeout(function(){ $scrolltop.removeClass('car-down');}, 300);
    }
});
</script>
</body>
</html>