#!/usr/bin/python import pycurl import json import StringIO from datetime import datetime, timedelta retentionDays = 30 c = pycurl.Curl() b = StringIO.StringIO() c.setopt(c.URL, 'http://127.0.0.1:9200/_status') c.setopt(pycurl.WRITEFUNCTION, b.write) c.perform() blob = json.loads( b.getvalue() ) for index in blob['indices']: if 'logstash' in index: old = datetime.now() - timedelta(days = retentionDays) indexDate = datetime.strptime(index, "logstash-%Y.%m.%d") if old > indexDate: print "delete", index c.setopt(pycurl.CUSTOMREQUEST, "DELETE") c.setopt(c.URL, ('http://127.0.0.1:9200/%s').format(index)) c.perform() else: print "optimize", index c.setopt(c.URL, ('http://127.0.0.1:9200/%s/_optimize').format(index)) c.perform()
Turns out there is a much better tool to do all Elasticsearch related housekeeping called curator but anyway sometimes it's nice to make your own scripts :-)