#!/usr/bin/python
import pycurl
import json
import StringIO
from datetime import datetime, timedelta
retentionDays = 30
c = pycurl.Curl()
b = StringIO.StringIO()
c.setopt(c.URL, 'http://127.0.0.1:9200/_status')
c.setopt(pycurl.WRITEFUNCTION, b.write)
c.perform()
blob = json.loads( b.getvalue() )
for index in blob['indices']:
 if 'logstash' in index:
  old = datetime.now() - timedelta(days = retentionDays)
  indexDate = datetime.strptime(index, "logstash-%Y.%m.%d")
  if old > indexDate:
   print "delete", index
   c.setopt(pycurl.CUSTOMREQUEST, "DELETE")
   c.setopt(c.URL, ('http://127.0.0.1:9200/%s').format(index))
   c.perform()
  else:
   print "optimize", index
   c.setopt(c.URL, ('http://127.0.0.1:9200/%s/_optimize').format(index))
   c.perform()
Turns out there is a much better tool to do all Elasticsearch related housekeeping called curator but anyway sometimes it's nice to make your own scripts :-)
