a:["$","div",null,{"children":[["$","div",null,{"className":"mb-6 font-jetbrains text-sm text-text-secondary","children":[["$","$L15",null,{"href":"/how-its-built","className":"hover:text-crt-accent transition-colors","children":"./how-its-built"}],["$","span",null,{"className":"mx-2 text-text-muted","children":"/"}],["$","span",null,{"className":"text-crt-accent","children":"inference-system"}]]}],["$","div",null,{"className":"mb-8","children":[["$","div",null,{"className":"mb-2 font-jetbrains text-sm text-crt-accent","children":["[","07","]"]}],["$","h1",null,{"className":"mb-4 font-inter text-3xl md:text-4xl text-text-primary font-semibold tracking-tight","children":"Inference System - Running Models Locally"}],["$","p",null,{"className":"font-inter text-text-secondary leading-relaxed","children":"Multi-backend architecture for running fine-tuned models entirely on your Mac."}]]}],["$","$L16",null,{"technicalContent":{"frontmatter":{"title":"Inference System - Running Models Locally","slug":"inference-system","order":7,"description":"Multi-backend architecture for running fine-tuned models entirely on your Mac."},"content":"\n\n\n\n","htmlContent":"\n","headings":[{"id":"inference-system","text":"Inference System","level":1},{"id":"multi-backend-architecture","text":"Multi-Backend Architecture","level":2},{"id":"mlx-python-backend","text":"MLX Python Backend","level":3},{"id":"llamacpp-alternative","text":"llama.cpp Alternative","level":3},{"id":"future-native-mlx-rust","text":"Future: Native MLX Rust","level":3},{"id":"model-loading","text":"Model Loading","level":2},{"id":"base-model--adapter","text":"Base Model + Adapter","level":3},{"id":"memory-management","text":"Memory Management","level":3},{"id":"lazy-loading-strategy","text":"Lazy Loading Strategy","level":3},{"id":"generation-configuration","text":"Generation Configuration","level":2},{"id":"temperature--sampling","text":"Temperature & Sampling","level":3},{"id":"stop-sequences","text":"Stop Sequences","level":3},{"id":"repetition-penalty","text":"Repetition Penalty","level":3},{"id":"streaming-responses","text":"Streaming Responses","level":2},{"id":"server-sent-events","text":"Server-Sent Events","level":3},{"id":"token-by-token-output","text":"Token-by-token Output","level":3}]},"simpleContent":{"frontmatter":{"title":"Inference System - Running Models Locally","slug":"inference-system","order":7,"description":"Multi-backend architecture for running fine-tuned models entirely on your Mac."},"content":"\n

🤖AI-translated - This is a simplified version of the technical article, translated by AI to be more accessible. Switch to Technical mode for the full version with code samples. While I reviewed this portion, I did not write it.

\n\n\n","htmlContent":"

\n","headings":[]},"articleTitle":"Inference System - Running Models Locally","isShared":false}],"$L17"]}]