7:["$","div",null,{"className":"min-h-screen bg-page-bg relative","children":[["$","div",null,{"className":"scanlines fixed inset-0 pointer-events-none z-10 opacity-30","aria-hidden":"true"}],["$","article",null,{"className":"max-w-3xl mx-auto px-6 pt-20 pb-20 relative z-20","children":[["$","$L12",null,{"href":"/blog","className":"inline-flex items-center gap-3 border border-accent/20 rounded-full px-4 py-1.5 text-accent/70 hover:border-accent hover:text-accent hover:bg-accent/5 transition-all duration-300 font-mono text-xs mb-10 group","children":[["$","span",null,{"className":"inline-block transition-transform duration-300 group-hover:-translate-x-1","children":"<"}],["$","span",null,{"className":"tracking-wider","children":"cd .."}],["$","span",null,{"className":"text-gray-600","children":"/feed"}]]}],["$","header",null,{"className":"mb-10","children":["$","div",null,{"className":"border border-accent/10 rounded-lg overflow-hidden mb-6","children":[["$","div",null,{"className":"bg-page-surface/80 px-4 py-2 flex items-center gap-2 border-b border-accent/10","children":[["$","span",null,{"className":"w-2.5 h-2.5 rounded-full bg-red-500/60"}],["$","span",null,{"className":"w-2.5 h-2.5 rounded-full bg-yellow-500/60"}],["$","span",null,{"className":"w-2.5 h-2.5 rounded-full bg-green-500/60"}],["$","span",null,{"className":"ml-2 text-[10px] text-gray-600 font-mono","children":["a-long-peek-into-reinforcement-learning",".log"]}]]}],["$","div",null,{"className":"p-6","children":[["$","div",null,{"className":"flex items-center gap-3 text-xs font-mono text-gray-500 mb-4","children":[["$","time",null,{"dateTime":"2018-02-19T00:00:00.000Z","children":"Feb 18, 2018"}],["$","span",null,{"className":"text-accent/40","children":"|"}],["$","span",null,{"className":"uppercase tracking-wider text-[10px]","children":["src: ","lilianweng.github.io"]}]]}],["$","h1",null,{"className":"text-2xl md:text-3xl font-extrabold text-gray-100 font-mono leading-tight","children":"A (Long) Peek into Reinforcement Learning"}]]}]]}]}],["$","div",null,{"className":"prose prose-invert max-w-none prose-headings:text-gray-100 prose-headings:font-mono prose-p:text-gray-400 prose-p:leading-relaxed prose-a:text-accent hover:prose-a:text-accent-light prose-strong:text-gray-200 prose-code:text-accent-light prose-code:font-mono prose-pre:bg-page-surface prose-pre:border prose-pre:border-accent/10 prose-pre:rounded-lg prose-blockquote:border-accent/30 prose-blockquote:text-gray-500 prose-blockquote:font-mono prose-img:rounded-lg prose-img:border prose-img:border-accent/10","dangerouslySetInnerHTML":{"__html":"\n

[Updated on 2020-09-03: Updated the algorithm of SARSA and Q-learning so that the difference is more pronounced.\n
\n[Updated on 2021-09-19: Thanks to 爱吃猫的鱼, we have this post in Chinese].

"}}],["$","div",null,{"className":"mt-16","children":[["$","div",null,{"className":"h-px w-full mb-8","style":{"background":"linear-gradient(90deg, transparent, var(--page-accent), transparent)"},"aria-hidden":"true"}],["$","div",null,{"className":"flex items-center justify-between","children":["$L13","$L14"]}]]}]]}]]}]