{ "cells": [ { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 4, "hidden": false, "row": 0, "width": 4 }, "report_default": {} } } }, "hideCode": true, "hidePrompt": true }, "source": [ "# Import data from the new DIMM into ElasticSearch" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 4, "height": 4, "hidden": false, "row": 0, "width": 4 }, "report_default": {} } } }, "hideCode": false, "hidePrompt": false }, "source": [ "We load the modules and other variables. The data_slodar.txt has to be cleaned up since it contains towards the end some text which cannot be parsed." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-05-26T16:36:47.605236", "start_time": "2017-05-26T16:36:47.595025" }, "collapsed": true, "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "hidden": true }, "report_default": {} } } }, "hideCode": false, "hidePrompt": false }, "outputs": [], "source": [ "from elasticsearch import Elasticsearch\n", "import pandas as pd\n", "import os\n", "import numpy as np\n", "import subprocess\n", "path_data = '/data/datalake/asm'\n", "new_dimm_filename = 'data_new_dimm.csv'" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 8, "height": 4, "hidden": false, "row": 0, "width": 4 }, "report_default": {} } } }, "hideCode": false, "hidePrompt": false }, "source": [ "We init the elastic search. Instead of servername, just insert the name of your server." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-05-26T16:36:48.584409", "start_time": "2017-05-26T16:36:48.578669" }, "collapsed": true, "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "hidden": true }, "report_default": {} } } }, "hideCode": false, "hidePrompt": false }, "outputs": [], "source": [ "es = Elasticsearch('http://servername:9200', timeout=20.0, bulk_size=100000)" ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2017-05-26T16:36:41.248938", "start_time": "2017-05-26T16:36:41.238927" } }, "source": [ "We perform the query" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2017-05-26T16:39:38.286125", "start_time": "2017-05-26T16:39:34.718856" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "wget -O /data/datalake/asm/data_new_dimm.txt http://archive.eso.org/wdb/wdb/asm/dimm_paranal/query?wdbo=csv&start_date=2017-04-28T00:00:00.00..2017-05-01T12:00:00.00&tab_fwhm=1&tab_rfl=0&tab_rfl_time=0&top=1000000\n", "--2017-05-26 16:39:34-- http://archive.eso.org/wdb/wdb/asm/dimm_paranal/query?wdbo=csv&start_date=2017-04-28T00:00:00.00..2017-05-01T12:00:00.00&tab_fwhm=1&tab_rfl=0&tab_rfl_time=0&top=1000000\n", "Resolving archive.eso.org (archive.eso.org)... 134.171.46.246\n", "Connecting to archive.eso.org (archive.eso.org)|134.171.46.246|:80... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: unspecified [text/plain]\n", "Saving to: ‘/data/datalake/asm/data_new_dimm.txt’\n", "\n", " 0K .......... .......... .......... .......... .......... 19.0K\n", " 50K 42.4K=2.6s\n", "\n", "2017-05-26 16:39:38 (19.1 KB/s) - ‘/data/datalake/asm/data_new_dimm.txt’ saved [51693]\n", "\n", "\n" ] } ], "source": [ "start_date_asm_str='2017-04-28T00:00:00.00'\n", "end_date_asm_str ='2017-05-01T12:00:00.00'\n", "request_asm_str = ['wget','-O',os.path.join(path_data,new_dimm_filename),\\\n", " 'http://archive.eso.org/wdb/wdb/asm/dimm_paranal/query?wdbo=csv&start_date={0:s}..{1:s}&tab_fwhm=1&tab_rfl=0&tab_rfl_time=0&top=1000000'.format(\\\n", " start_date_asm_str,end_date_asm_str)]\n", "output,error = subprocess.Popen(request_asm_str,stdout=subprocess.PIPE,stderr=subprocess.STDOUT).communicate()\n", "print(' '.join(request_asm_str))\n", "print(output.decode('UTF8'))" ] }, { "cell_type": "markdown", "metadata": { "hideCode": false, "hidePrompt": false }, "source": [ "We read the csv files as a panda array." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2017-05-26T16:39:39.307410", "start_time": "2017-05-26T16:39:39.283089" }, "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "hidden": true }, "report_default": {} } } }, "hideCode": false, "hidePrompt": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1984\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", " if __name__ == '__main__':\n" ] } ], "source": [ "new_dimm_df = pd.read_csv(os.path.join(path_data,new_dimm_filename),skiprows=1,skipfooter=5)\n", "print(len(new_dimm_df))" ] }, { "cell_type": "markdown", "metadata": { "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 0, "height": 4, "hidden": false, "row": 4, "width": 4 }, "report_default": {} } } }, "hideCode": false, "hidePrompt": false }, "source": [ "Let's see how it looks like" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-05-26T16:39:41.645258", "start_time": "2017-05-26T16:39:41.626649" }, "extensions": { "jupyter_dashboards": { "version": 1, "views": { "grid_default": { "col": 4, "height": 13, "hidden": false, "row": 4, "width": 4 }, "report_default": {} } } }, "hideCode": false, "hidePrompt": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | Date time | \n", "DIMM Seeing [\"] | \n", "
---|---|---|
0 | \n", "2017-04-28T00:01:01 | \n", "0.401 | \n", "
1 | \n", "2017-04-28T00:02:20 | \n", "0.412 | \n", "
2 | \n", "2017-04-28T00:03:40 | \n", "0.457 | \n", "
3 | \n", "2017-04-28T00:04:59 | \n", "0.496 | \n", "
4 | \n", "2017-04-28T00:06:18 | \n", "0.493 | \n", "