Comment on Forgejo fills up hard drive with repo-archives

jeena@piefed.jeena.net ⁨5⁩ ⁨days⁩ ago

For now I asked chatgtp to help me to implement a simple return 403 on bot user agent. I looked into my logs and collected the bot names which I saw. I know it won't hold forever but for now it's quite nice, I just added this file to /etc/nginx/conf.d/block_bots.conf and it gets run before all the vhosts and rejects all bots. The rest just goes normally to the vhosts. This way I don't need to implement it in each vhost seperatelly.

➜ jeena@Abraham conf.d cat block_bots.conf

# /etc/nginx/conf.d/block_bots.conf

# 1️⃣ Map user agents to $bad_bot
map $http_user_agent $bad_bot {
    default 0;

    ~*SemrushBot                            1;
    ~*AhrefsBot                             1;
    ~*PetalBot                              1;
    ~*YisouSpider                           1;
    ~*Amazonbot                             1;
    ~*VelenPublicWebCrawler                 1;
    ~*DataForSeoBot                          1;
    ~*Expanse,\ a\ Palo\ Alto\ Networks\ company 1;
    ~*BacklinksExtendedBot                   1;
    ~*ClaudeBot                              1;
    ~*OAI-SearchBot                          1;
    ~*GPTBot                                 1;
    ~*meta-externalagent                     1;
}

# 2️⃣ Global default server to block bad bots
server {
    listen 80 default_server;
    listen [::]:80 default_server;
    listen 443 ssl default_server;
    listen [::]:443 ssl default_server;

    # dummy SSL cert for HTTPS
    ssl_certificate     /etc/ssl/certs/ssl-cert-snakeoil.pem;
    ssl_certificate_key /etc/ssl/private/ssl-cert-snakeoil.key;

    # block bad bots
    if ($bad_bot) {
        return 403;
    }

    # close connection for anything else hitting default server
    return 444;
}

source
Sort:hotnewtop