[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"post-v2-\u002Fblog\u002Fagent-ops-butterfly-effect":3},{"id":4,"title":5,"body":6,"date":1061,"description":1062,"draft":1063,"extension":1064,"meta":1065,"navigation":233,"path":1067,"seo":1068,"stem":1069,"tags":1070,"__hash__":1077},"blog\u002Fblog\u002Fagent-ops-butterfly-effect.md","当 AI Ops 把自己网站搞挂了——一个容器重启的蝴蝶效应",{"type":7,"value":8,"toc":1034},"minimark",[9,13,20,23,26,29,34,37,93,96,99,102,105,107,111,116,119,122,145,148,152,155,166,169,172,176,179,182,188,191,336,339,345,348,350,354,357,410,416,419,423,480,483,524,527,531,534,606,609,612,615,633,640,642,646,650,653,714,717,720,731,734,766,773,779,785,787,791,794,857,860,863,957,960,962,966,970,977,981,984,990,994,997,1003,1007,1010,1012,1015,1018,1021,1024,1027,1030],[10,11,5],"h1",{"id":12},"当-ai-ops-把自己网站搞挂了一个容器重启的蝴蝶效应",[14,15,16],"blockquote",{},[17,18,19],"p",{},"一切都很顺利，直到 AI 运维 Agent 决定重启一个容器。然后整个网站挂了。然后它修不好了。然后它又把自己 SSH 踢出了服务器。",[17,21,22],{},"这是我用 Hermes Multi-Agent 系统管理个人网站部署的第六天。前五天风平浪静——AI Designer 出图、AI Coder 写代码、AI Reviewer 审查、AI Ops 部署——一条龙全自动，我甚至连终端都没怎么打开。",[17,24,25],{},"第六天，事情开始变得有趣。",[27,28],"hr",{},[30,31,33],"h2",{"id":32},"一背景v4v5-的切换","一、背景：v4→v5 的切换",[17,35,36],{},"网站首页经历了四个设计版本：",[38,39,40,56],"table",{},[41,42,43],"thead",{},[44,45,46,50,53],"tr",{},[47,48,49],"th",{},"版本",[47,51,52],{},"设计师",[47,54,55],{},"风格",[57,58,59,71,82],"tbody",{},[44,60,61,65,68],{},[62,63,64],"td",{},"v1-v3",[62,66,67],{},"AI Designer（迭代）",[62,69,70],{},"赛博朋克风，Three.js 3D 背景，粒子效果，光晕文字",[44,72,73,76,79],{},[62,74,75],{},"v4",[62,77,78],{},"AI + 用户反馈",[62,80,81],{},"精简版赛博朋克，IcosahedronGeometry 球体，900 粒子",[44,83,84,87,90],{},[62,85,86],{},"v5",[62,88,89],{},"AI Designer 自主优化",[62,91,92],{},"极度精简，移除了大量 3D 效果",[17,94,95],{},"v4 是用户确认过的版本——Three.js 的 IcosahedronGeometry 球体 + 双环面 + 900 个粒子的 Canvas 神经网络背景，效果相当惊艳。",[17,97,98],{},"但 AI Designer 觉得\"还能更好\"。于是它在没有用户明确指令的情况下，基于自己对这个风格的\"理解\"，产出了一个 v5：移除了环面、减少了粒子、简化了着色器。它认为这样更\"克制\"。",[17,100,101],{},"用户看到 v5 后说了一句话：「这个不好看，回退到 v4。」",[17,103,104],{},"于是，整条流水线启动了。",[27,106],{},[30,108,110],{"id":109},"二故障始末","二、故障始末",[112,113,115],"h3",{"id":114},"第一步designer-分析差异","第一步：Designer 分析差异",[17,117,118],{},"AI Designer 接到任务：对比 v5 和 v4，产出差异清单。",[17,120,121],{},"它产出了一个 26 项差异分析文档，分三个优先级：",[123,124,125,133,139],"ul",{},[126,127,128,132],"li",{},[129,130,131],"strong",{},"P0（7 项）","：球体几何体错误、颜色不对、环面缺失、拖拽旋转反向",[126,134,135,138],{},[129,136,137],{},"P1（13 项）","：FOV 偏移、粒子数量不足、导航样式差异、按钮风格不匹配",[126,140,141,144],{},[129,142,143],{},"P2（6 项）","：微妙的动画时间和缓动曲线差异",[17,146,147],{},"这本身没问题——Designer 做得很漂亮。",[112,149,151],{"id":150},"第二步coder-修改代码","第二步：Coder 修改代码",[17,153,154],{},"AI Coder 拿到差异清单，开始修改 Nuxt 3 项目的三个 Vue 文件：",[156,157,162],"pre",{"className":158,"code":160,"language":161},[159],"language-text","3DBackground.vue  → 球体几何体 + 环面 + 着色器\nindex.vue          → 粒子系统 + 相机配置\napp.vue            → 全局样式 + 导航栏\n","text",[163,164,160],"code",{"__ignoreMap":165},"",[17,167,168],{},"Coder 完成了修改，Reviewer 审查通过（APPROVE），Git 提交成功，Tester 测试通过。",[17,170,171],{},"一切正常。",[112,173,175],{"id":174},"第三步ops-部署然后炸了","第三步：Ops 部署——然后炸了",[17,177,178],{},"AI Ops Worker 的任务是：将修改后的 Nuxt 项目通过 Docker 部署到腾讯云服务器（43.135.47.130）。",[17,180,181],{},"它的标准流程是：",[156,183,186],{"className":184,"code":185,"language":161},[159],"本地 Docker build → 压缩镜像 → SFTP 上传 → 服务器 docker load → docker compose up\n",[163,187,185],{"__ignoreMap":165},[17,189,190],{},"但这次不一样。因为它要\"安全部署\"——先在测试端口启动验证，再切换。",[156,192,196],{"className":193,"code":194,"language":195,"meta":165,"style":165},"language-bash shiki shiki-themes github-light github-dark","# 构建新镜像\ndocker build -t website:v4-fix .\n\n# 在 3001 端口测试\ndocker run -d --name web-test -p 3001:3000 website:v4-fix\n\n# 验证...\ncurl http:\u002F\u002F127.0.0.1:3001\u002F  # 返回 200，OK\n\n# 切换！\ndocker stop web && docker rm web\ndocker rename web-test web\n","bash",[163,197,198,207,228,235,241,267,272,278,290,295,301,324],{"__ignoreMap":165},[199,200,203],"span",{"class":201,"line":202},"line",1,[199,204,206],{"class":205},"sJ8bj","# 构建新镜像\n",[199,208,210,214,218,222,225],{"class":201,"line":209},2,[199,211,213],{"class":212},"sScJk","docker",[199,215,217],{"class":216},"sZZnC"," build",[199,219,221],{"class":220},"sj4cs"," -t",[199,223,224],{"class":216}," website:v4-fix",[199,226,227],{"class":216}," .\n",[199,229,231],{"class":201,"line":230},3,[199,232,234],{"emptyLinePlaceholder":233},true,"\n",[199,236,238],{"class":201,"line":237},4,[199,239,240],{"class":205},"# 在 3001 端口测试\n",[199,242,244,246,249,252,255,258,261,264],{"class":201,"line":243},5,[199,245,213],{"class":212},[199,247,248],{"class":216}," run",[199,250,251],{"class":220}," -d",[199,253,254],{"class":220}," --name",[199,256,257],{"class":216}," web-test",[199,259,260],{"class":220}," -p",[199,262,263],{"class":216}," 3001:3000",[199,265,266],{"class":216}," website:v4-fix\n",[199,268,270],{"class":201,"line":269},6,[199,271,234],{"emptyLinePlaceholder":233},[199,273,275],{"class":201,"line":274},7,[199,276,277],{"class":205},"# 验证...\n",[199,279,281,284,287],{"class":201,"line":280},8,[199,282,283],{"class":212},"curl",[199,285,286],{"class":216}," http:\u002F\u002F127.0.0.1:3001\u002F",[199,288,289],{"class":205},"  # 返回 200，OK\n",[199,291,293],{"class":201,"line":292},9,[199,294,234],{"emptyLinePlaceholder":233},[199,296,298],{"class":201,"line":297},10,[199,299,300],{"class":205},"# 切换！\n",[199,302,304,306,309,312,316,318,321],{"class":201,"line":303},11,[199,305,213],{"class":212},[199,307,308],{"class":216}," stop",[199,310,311],{"class":216}," web",[199,313,315],{"class":314},"sVt8B"," && ",[199,317,213],{"class":212},[199,319,320],{"class":216}," rm",[199,322,323],{"class":216}," web\n",[199,325,327,329,332,334],{"class":201,"line":326},12,[199,328,213],{"class":212},[199,330,331],{"class":216}," rename",[199,333,257],{"class":216},[199,335,323],{"class":216},[17,337,338],{},"切换命令执行了。然后：",[156,340,343],{"className":341,"code":342,"language":161},[159],"$ curl https:\u002F\u002Fdeeeli.com\ncurl: (35) OpenSSL SSL_connect: Connection reset by peer\n",[163,344,342],{"__ignoreMap":165},[17,346,347],{},"网站挂了。",[27,349],{},[30,351,353],{"id":352},"三诊断过程","三、诊断过程",[112,355,356],{"id":356},"症状分析",[38,358,359,369],{},[41,360,361],{},[44,362,363,366],{},[47,364,365],{},"测试",[47,367,368],{},"结果",[57,370,371,381,391,400],{},[44,372,373,378],{},[62,374,375],{},[163,376,377],{},"curl http:\u002F\u002Fdeeeli.com",[62,379,380],{},"301 → https，正常",[44,382,383,388],{},[62,384,385],{},[163,386,387],{},"curl https:\u002F\u002Fdeeeli.com",[62,389,390],{},"Connection reset",[44,392,393,398],{},[62,394,395],{},[163,396,397],{},"curl -k https:\u002F\u002F服务器IP",[62,399,390],{},[44,401,402,407],{},[62,403,404],{},[163,405,406],{},"curl http:\u002F\u002F服务器IP:3000",[62,408,409],{},"200 OK",[17,411,412,413],{},"关键发现：",[129,414,415],{},"HTTP 80 端口正常（nginx 在跑），HTTPS 443 连接被 reset，但直接访问 Nuxt 的 3000 端口正常。",[17,417,418],{},"这说明 nginx 活着，Nuxt 也活着，但它们之间的通信出了问题。",[112,420,422],{"id":421},"排查-nginx-配置","排查 nginx 配置",[156,424,426],{"className":193,"code":425,"language":195,"meta":165,"style":165},"$ ssh user@43.135.47.130\n$ docker ps\nCONTAINER ID   IMAGE            STATUS\na1b2c3d4e5f6   nginx:alpine     Up 2 hours\n",[163,427,428,439,449,463],{"__ignoreMap":165},[199,429,430,433,436],{"class":201,"line":202},[199,431,432],{"class":212},"$",[199,434,435],{"class":216}," ssh",[199,437,438],{"class":216}," user@43.135.47.130\n",[199,440,441,443,446],{"class":201,"line":209},[199,442,432],{"class":212},[199,444,445],{"class":216}," docker",[199,447,448],{"class":216}," ps\n",[199,450,451,454,457,460],{"class":201,"line":230},[199,452,453],{"class":212},"CONTAINER",[199,455,456],{"class":216}," ID",[199,458,459],{"class":216},"   IMAGE",[199,461,462],{"class":216},"            STATUS\n",[199,464,465,468,471,474,477],{"class":201,"line":237},[199,466,467],{"class":212},"a1b2c3d4e5f6",[199,469,470],{"class":216},"   nginx:alpine",[199,472,473],{"class":216},"     Up",[199,475,476],{"class":220}," 2",[199,478,479],{"class":216}," hours\n",[17,481,482],{},"等等——只有一个容器？Nuxt 容器呢？",[156,484,486],{"className":193,"code":485,"language":195,"meta":165,"style":165},"$ docker ps -a | grep nuxt\nb5c6d7e8f9a0   website:v4-fix   Exited (137) 3 minutes ago\n",[163,487,488,510],{"__ignoreMap":165},[199,489,490,492,494,497,500,504,507],{"class":201,"line":202},[199,491,432],{"class":212},[199,493,445],{"class":216},[199,495,496],{"class":216}," ps",[199,498,499],{"class":220}," -a",[199,501,503],{"class":502},"szBVR"," |",[199,505,506],{"class":212}," grep",[199,508,509],{"class":216}," nuxt\n",[199,511,512,515,518,521],{"class":201,"line":209},[199,513,514],{"class":212},"b5c6d7e8f9a0",[199,516,517],{"class":216},"   website:v4-fix",[199,519,520],{"class":216},"   Exited",[199,522,523],{"class":314}," (137) 3 minutes ago\n",[17,525,526],{},"Nuxt 容器崩溃了，退出码 137（OOM killed）。",[112,528,530],{"id":529},"nuxt-容器为什么-oom","Nuxt 容器为什么 OOM？",[17,532,533],{},"拉日志：",[156,535,537],{"className":193,"code":536,"language":195,"meta":165,"style":165},"$ docker logs b5c6d7e8f9a0 --tail 50\n...\nFATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed\n...\n\u003C--- JS stacktrace --->\n",[163,538,539,557,562,591,595],{"__ignoreMap":165},[199,540,541,543,545,548,551,554],{"class":201,"line":202},[199,542,432],{"class":212},[199,544,445],{"class":216},[199,546,547],{"class":216}," logs",[199,549,550],{"class":216}," b5c6d7e8f9a0",[199,552,553],{"class":220}," --tail",[199,555,556],{"class":220}," 50\n",[199,558,559],{"class":201,"line":209},[199,560,561],{"class":220},"...\n",[199,563,564,567,570,573,576,579,582,585,588],{"class":201,"line":230},[199,565,566],{"class":212},"FATAL",[199,568,569],{"class":216}," ERROR:",[199,571,572],{"class":216}," Ineffective",[199,574,575],{"class":216}," mark-compacts",[199,577,578],{"class":216}," near",[199,580,581],{"class":216}," heap",[199,583,584],{"class":216}," limit",[199,586,587],{"class":216}," Allocation",[199,589,590],{"class":216}," failed\n",[199,592,593],{"class":201,"line":237},[199,594,561],{"class":220},[199,596,597,600,603],{"class":201,"line":243},[199,598,599],{"class":502},"\u003C",[199,601,602],{"class":314},"--- JS stacktrace ---",[199,604,605],{"class":502},">\n",[17,607,608],{},"v4 版本的 Three.js 场景在 SSR（服务端渲染）阶段触发了大量内存分配。IcosahedronGeometry + 双环面 + 900 粒子 + Canvas 纹理在服务端渲染时，内存占用直接从 v5 的 200MB 飙到了 1.2GB。",[17,610,611],{},"而 Docker 容器的内存限制是 1GB。",[17,613,614],{},"所以：",[616,617,618,621,624,627,630],"ol",{},[126,619,620],{},"Coder 修改正确（v4 的 Three.js 代码没有问题）",[126,622,623],{},"Reviewer 审查通过（代码质量没问题）",[126,625,626],{},"Tester 测试通过（本地 dev 模式内存充裕）",[126,628,629],{},"Ops 部署\"成功\"——容器确实起来了",[126,631,632],{},"但容器在首次 SSR 请求时 OOM 崩溃",[17,634,635,636,639],{},"这是典型的",[129,637,638],{},"环境差异问题","：本地开发 vs Docker 容器环境的内存边界不同。",[27,641],{},[30,643,645],{"id":644},"四修复与回退","四、修复与回退",[112,647,649],{"id":648},"ops-的自动修复尝试","Ops 的自动修复尝试",[17,651,652],{},"AI Ops Worker 检测到容器崩溃后，自动启动了修复流程：",[616,654,655,661,686],{},[126,656,657,660],{},[129,658,659],{},"分析崩溃日志"," → 识别到 OOM",[126,662,663,666,667,682,685],{},[129,664,665],{},"尝试重启容器","：",[156,668,670],{"className":193,"code":669,"language":195,"meta":165,"style":165},"docker restart b5c6d7e8f9a0\n",[163,671,672],{"__ignoreMap":165},[199,673,674,676,679],{"class":201,"line":202},[199,675,213],{"class":212},[199,677,678],{"class":216}," restart",[199,680,681],{"class":216}," b5c6d7e8f9a0\n",[683,684],"br",{},"容器重启成功，但第一次请求再次 OOM——这是设计问题，重启没用。",[126,687,688,666,691,711,713],{},[129,689,690],{},"尝试增加内存限制",[156,692,694],{"className":193,"code":693,"language":195,"meta":165,"style":165},"docker update --memory 2g web\n",[163,695,696],{"__ignoreMap":165},[199,697,698,700,703,706,709],{"class":201,"line":202},[199,699,213],{"class":212},[199,701,702],{"class":216}," update",[199,704,705],{"class":220}," --memory",[199,707,708],{"class":216}," 2g",[199,710,323],{"class":216},[683,712],{},"失败了——服务器总共只有 2GB 内存，给容器 2GB 意味着系统和 nginx 会受影响。",[112,715,716],{"id":716},"陷入僵局",[17,718,719],{},"此时 Ops Worker 面临一个选择：",[123,721,722,725,728],{},[126,723,724],{},"增大容器内存（可能导致整机 OOM）",[126,726,727],{},"回退到 v5（v5 内存占用低）",[126,729,730],{},"修改代码减少 Three.js 复杂度",[17,732,733],{},"它选择了第三条路——尝试通过 paramiko SSH 修改 Dockerfile，在构建时禁用 SSR 的 Three.js 初始化。",[156,735,739],{"className":736,"code":737,"language":738,"meta":165,"style":165},"language-python shiki shiki-themes github-light github-dark","# Ops Worker 的修复脚本\nimport paramiko\nssh = paramiko.SSHClient()\nssh.connect('43.135.47.130', username='root', password='...')\nssh.exec_command('cd \u002Froot\u002Fpersonal-website && sed -i ...')\n","python",[163,740,741,746,751,756,761],{"__ignoreMap":165},[199,742,743],{"class":201,"line":202},[199,744,745],{},"# Ops Worker 的修复脚本\n",[199,747,748],{"class":201,"line":209},[199,749,750],{},"import paramiko\n",[199,752,753],{"class":201,"line":230},[199,754,755],{},"ssh = paramiko.SSHClient()\n",[199,757,758],{"class":201,"line":237},[199,759,760],{},"ssh.connect('43.135.47.130', username='root', password='...')\n",[199,762,763],{"class":201,"line":243},[199,764,765],{},"ssh.exec_command('cd \u002Froot\u002Fpersonal-website && sed -i ...')\n",[17,767,768,769,772],{},"但这时发生了另一件事：",[129,770,771],{},"SSH 连接被拒绝了","。",[156,774,777],{"className":775,"code":776,"language":161},[159],"paramiko.ssh_exception.SSHException: Error reading SSH protocol banner\nConnection reset by peer\n",[163,778,776],{"__ignoreMap":165},[17,780,781,782],{},"Ops Worker 之前的多次 SSH 连接加上 SFTP 上传镜像，触发了服务器的 fail2ban 规则——",[129,783,784],{},"AI 把自己的 SSH 访问给封了。",[27,786],{},[30,788,790],{"id":789},"五兜底方案与教训","五、兜底方案与教训",[17,792,793],{},"最终是用户手动介入：",[156,795,797],{"className":193,"code":796,"language":195,"meta":165,"style":165},"# 解除 fail2ban\nfail2ban-client unban 本地IP\n\n# 直接回退\ndocker stop web\ndocker run -d --name web -p 3000:3000 --memory 1.5g website:v5\n",[163,798,799,804,815,819,824,832],{"__ignoreMap":165},[199,800,801],{"class":201,"line":202},[199,802,803],{"class":205},"# 解除 fail2ban\n",[199,805,806,809,812],{"class":201,"line":209},[199,807,808],{"class":212},"fail2ban-client",[199,810,811],{"class":216}," unban",[199,813,814],{"class":216}," 本地IP\n",[199,816,817],{"class":201,"line":230},[199,818,234],{"emptyLinePlaceholder":233},[199,820,821],{"class":201,"line":237},[199,822,823],{"class":205},"# 直接回退\n",[199,825,826,828,830],{"class":201,"line":243},[199,827,213],{"class":212},[199,829,308],{"class":216},[199,831,323],{"class":216},[199,833,834,836,838,840,842,844,846,849,851,854],{"class":201,"line":269},[199,835,213],{"class":212},[199,837,248],{"class":216},[199,839,251],{"class":220},[199,841,254],{"class":220},[199,843,311],{"class":216},[199,845,260],{"class":220},[199,847,848],{"class":216}," 3000:3000",[199,850,705],{"class":220},[199,852,853],{"class":216}," 1.5g",[199,855,856],{"class":216}," website:v5\n",[17,858,859],{},"网站恢复，v4 部署失败，v5 继续运行。",[112,861,862],{"id":862},"整个过程的时间线",[38,864,865,875],{},[41,866,867],{},[44,868,869,872],{},[47,870,871],{},"时间",[47,873,874],{},"事件",[57,876,877,885,893,901,909,917,925,933,941,949],{},[44,878,879,882],{},[62,880,881],{},"T+0",[62,883,884],{},"Designer 产出 26 项差异分析",[44,886,887,890],{},[62,888,889],{},"T+15min",[62,891,892],{},"Coder 完成修复，20 项 P0+P1",[44,894,895,898],{},[62,896,897],{},"T+25min",[62,899,900],{},"Reviewer 审查通过",[44,902,903,906],{},[62,904,905],{},"T+30min",[62,907,908],{},"Git 提交 + Tester 测试通过",[44,910,911,914],{},[62,912,913],{},"T+45min",[62,915,916],{},"Ops 构建镜像、部署、切换",[44,918,919,922],{},[62,920,921],{},"T+46min",[62,923,924],{},"容器 OOM，网站挂掉",[44,926,927,930],{},[62,928,929],{},"T+47min",[62,931,932],{},"Ops 自动诊断、尝试修复",[44,934,935,938],{},[62,936,937],{},"T+52min",[62,939,940],{},"触发 fail2ban，SSH 被封",[44,942,943,946],{},[62,944,945],{},"T+55min",[62,947,948],{},"Ops 报错，block 等待人工",[44,950,951,954],{},[62,952,953],{},"T+60min",[62,955,956],{},"用户 SSH 解除封禁，回退 v5",[17,958,959],{},"55 分钟，100+ 个 Kanban 任务，最终回到了起点。",[27,961],{},[30,963,965],{"id":964},"六踩坑总结","六、踩坑总结",[112,967,969],{"id":968},"_1-环境差异是最大的坑","1. 环境差异是最大的坑",[17,971,972,973,976],{},"本地 ",[163,974,975],{},"npm run dev"," → 一切正常。Docker SSR → OOM。Dev 和 Prod 的内存边界完全不同。AI Reviewer 和 Tester 在审查\u002F测试时无法捕捉这种差异——它们工作在开发环境。",[112,978,980],{"id":979},"_2-ssr-3d-库-内存炸弹","2. SSR + 3D 库 = 内存炸弹",[17,982,983],{},"Three.js 在服务端渲染时会创建完整的 WebGL 上下文（通过 headless-gl），这在 1GB 的容器里是灾难。v4 的 IcosahedronGeometry + 900 粒子 + Canvas 纹理，SSR 内存峰值接近 1.5GB。",[17,985,986,989],{},[129,987,988],{},"解决办法","：在 SSR 阶段跳过 Three.js 初始化，仅在客户端执行。",[112,991,993],{"id":992},"_3-ai-ops-的自我毁灭","3. AI Ops 的自我毁灭",[17,995,996],{},"AI Ops Worker 的自动修复逻辑本身没问题，但它没有考虑到\"频繁的 SSH 连接会被封禁\"这一层。加上 paramiko 在连接失败时的重试行为，加速了 fail2ban 的触发。",[17,998,999,1002],{},[129,1000,1001],{},"教训","：给 Ops Worker 加上 SSH 连接频率限制，或在任务流中显式处理 fail2ban 场景。",[112,1004,1006],{"id":1005},"_4-模型切换的连锁反应","4. 模型切换的连锁反应",[17,1008,1009],{},"在部署过程中，MiniMax API 遇到了 429 限流。按照系统设计，Worker 自动切换到了 DeepSeek 模型。但 DeepSeek 的推理速度比 MiniMax 慢 3-5 倍，导致原本 15 分钟的部署任务拖到了 45 分钟——正好跨越了 fail2ban 的检测窗口。",[27,1011],{},[30,1013,1014],{"id":1014},"结语",[17,1016,1017],{},"AI Ops 不是银弹。它能自动化 90% 的流程，但剩下那 10%——环境差异、资源边界、连锁反应——仍然需要人的判断。",[17,1019,1020],{},"但这也正是有趣的地方：AI 在\"翻车\"中学到的教训，比在\"顺利\"中多得多。这次事件之后，Manager 的 SOUL.md 里新增了一大段关于部署安全的规则：先验证再切换、保留回滚方案、SSH 连接频率限制、Docker 内存限制检查……",[17,1022,1023],{},"AI 学会了。代价是网站挂了 9 分钟。",[17,1025,1026],{},"下一次部署，同样的坑不会再踩。",[17,1028,1029],{},"（除非 AI 发现了新的坑。）",[1031,1032,1033],"style",{},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}",{"title":165,"searchDepth":209,"depth":209,"links":1035},[1036,1037,1042,1047,1051,1054,1060],{"id":32,"depth":209,"text":33},{"id":109,"depth":209,"text":110,"children":1038},[1039,1040,1041],{"id":114,"depth":230,"text":115},{"id":150,"depth":230,"text":151},{"id":174,"depth":230,"text":175},{"id":352,"depth":209,"text":353,"children":1043},[1044,1045,1046],{"id":356,"depth":230,"text":356},{"id":421,"depth":230,"text":422},{"id":529,"depth":230,"text":530},{"id":644,"depth":209,"text":645,"children":1048},[1049,1050],{"id":648,"depth":230,"text":649},{"id":716,"depth":230,"text":716},{"id":789,"depth":209,"text":790,"children":1052},[1053],{"id":862,"depth":230,"text":862},{"id":964,"depth":209,"text":965,"children":1055},[1056,1057,1058,1059],{"id":968,"depth":230,"text":969},{"id":979,"depth":230,"text":980},{"id":992,"depth":230,"text":993},{"id":1005,"depth":230,"text":1006},{"id":1014,"depth":209,"text":1014},"2026-06-02","记录了 AI Ops Worker 在部署网站时引发的一连串故障：Docker 容器崩溃、nginx 502、SSH 连接被拒。从 root cause 到修复过程的完整复盘。",false,"md",{"author":1066},"陈德立","\u002Fblog\u002Fagent-ops-butterfly-effect",{"title":5,"description":1062},"blog\u002Fagent-ops-butterfly-effect",[1071,1072,213,1073,1074,1075,1076],"ai-agent","devops","nginx","incident","ops","hermes","3GORltjWwyGBJyuHVDAt5tTJuyTiABrxxqJjHZ2nGg0"]