From 1b87af327be767f726a2c6909e161b482e2d71ea Mon Sep 17 00:00:00 2001 From: Roberto Rodriguez Date: Sat, 4 Aug 2018 16:33:48 -0700 Subject: [PATCH] HELK v0.1.3-alpha08042018 Removed Zeppelin Folder and removed wrong password string displayed for Jupyterhub after installing HELK. --- helk-zeppelin/Dockerfile | 89 ------------------------- helk-zeppelin/spark-defaults.conf | 38 ----------- helk-zeppelin/zeppelin-env.sh.template | 90 -------------------------- 3 files changed, 217 deletions(-) delete mode 100644 helk-zeppelin/Dockerfile delete mode 100644 helk-zeppelin/spark-defaults.conf delete mode 100644 helk-zeppelin/zeppelin-env.sh.template diff --git a/helk-zeppelin/Dockerfile b/helk-zeppelin/Dockerfile deleted file mode 100644 index 29ea1a3e..00000000 --- a/helk-zeppelin/Dockerfile +++ /dev/null @@ -1,89 +0,0 @@ -# HELK script: HELK Zeppelin Dockerfile -# HELK build version: 0.9 (Alpha) -# Author: Roberto Rodriguez (@Cyb3rWard0g) -# License: BSD 3-Clause -# Reference: https://github.com/apache/zeppelin/blob/master/scripts/docker/zeppelin/bin/Dockerfile - -FROM cyb3rward0g/helk-spark-base:2.3.0 -LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" -LABEL description="Dockerfile base for the HELK Zeppelin." - -ENV DEBIAN_FRONTEND noninteractive - -USER root -# *********** Installing Prerequisites *************** -ENV Z_VERSION="0.8." -ENV LOG_TAG="[ZEPPELIN_${Z_VERSION}]:" \ - Z_HOME="/zeppelin" - -ENV Z_GID=710 -ENV Z_UID=710 -ENV Z_USER=zelk - -ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 -RUN echo "$LOG_TAG Install java8" && \ - apt-get -y update && \ - apt-get install -y git openjdk-8-jdk \ - libfontconfig git build-essential chrpath \ - libssl-dev libxft-dev libfreetype6 libfreetype6-dev \ - libfontconfig1 libfontconfig1-dev python3-pip && \ - rm -rf /var/lib/apt/lists/* - -# *********** Upgrading PIP *************** -RUN pip3 install --upgrade pip - -# *********** Create User ***************** -RUN groupadd -r zelk -g ${Z_GID} \ - && useradd -m -s /bin/bash -N -u $Z_UID $Z_USER \ - && chmod g+w /etc/passwd /etc/group \ - && chown -R zelk:zelk /usr/local/ /tmp /usr/bin ${SPARK_HOME} - -RUN echo "$LOG_TAG Install python related packages" && \ - apt-get -y update && \ - apt-get install -y gfortran && \ - # numerical/algebra packages - apt-get install -y libblas-dev libatlas-dev liblapack-dev && \ - # font, image for matplotlib - apt-get install -y libpng-dev libfreetype6-dev libxft-dev && \ - # for tkinter - apt-get install -y python-tk libxml2-dev libxslt-dev zlib1g-dev && \ - pip3 install numpy && \ - pip3 install matplotlib - -# ************** Install PhantpmJS **************** -USER $Z_UID - -# ************** Install Maven ********************* -ENV MAVEN_VERSION 3.5.3 -RUN wget -qO- http://www.us.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz | tar xvz -C /usr/local && \ - ln -s /usr/local/apache-maven-${MAVEN_VERSION}/bin/mvn /usr/bin/mvn - -USER root -# ************** Install Zeppelin ********************* -RUN echo "$LOG_TAG Download Zeppelin" && \ - cd /tmp && git clone --branch v0.8.0-rc2 https://github.com/apache/zeppelin.git && \ - mv /tmp/zeppelin ${Z_HOME} - -RUN chown -R zelk:zelk ${Z_HOME} - -USER $Z_UID - -RUN cd $Z_HOME && \ - mvn clean package -DskipTests -X - -# *********** Install CAPS *************** -RUN cd ${Z_HOME} && \ - wget https://github.com/opencypher/cypher-for-apache-spark/releases/download/1.0.0-beta7/spark-cypher-1.0.0-beta7-cluster.jar - -ADD spark-defaults.conf ${SPARK_HOME}/conf/ - -USER root - -RUN echo "$LOG_TAG Cleanup" && \ - apt-get autoclean && \ - apt-get clean - -EXPOSE $ZEPPELIN_PORT - -WORKDIR ${Z_HOME} -CMD ["bin/zeppelin.sh"] \ No newline at end of file diff --git a/helk-zeppelin/spark-defaults.conf b/helk-zeppelin/spark-defaults.conf deleted file mode 100644 index 4e9e6d60..00000000 --- a/helk-zeppelin/spark-defaults.conf +++ /dev/null @@ -1,38 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Default system properties included when running spark-submit. -# This is useful for setting default environmental settings. - -# Example: -# spark.master spark://master:7077 -# spark.eventLog.enabled true -# spark.eventLog.dir hdfs://namenode:8021/directory -# spark.serializer org.apache.spark.serializer.KryoSerializer -# spark.driver.memory 5g -# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" - -# HELK References: -# https://spark.apache.org/docs/latest/configuration.html -# https://graphframes.github.io/quick-start.html -# https://spark-packages.org/package/graphframes/graphframes -# https://spark.apache.org/docs/latest/sql-programming-guide.html#pyspark-usage-guide-for-pandas-with-apache-arrow - -#spark.master spark://helk-spark-master:7077 -spark.jars /zeppelin/spark-cypher-1.0.0-beta7-cluster.jar -spark.jars.packages graphframes:graphframes:0.5.0-spark2.1-s_2.11,org.apache.spark:spark-sql-kafka-0-10_2.11:2.3.0,databricks:spark-sklearn:0.2.3 -spark.sql.execution.arrow.enabled true \ No newline at end of file diff --git a/helk-zeppelin/zeppelin-env.sh.template b/helk-zeppelin/zeppelin-env.sh.template deleted file mode 100644 index 7c4c3583..00000000 --- a/helk-zeppelin/zeppelin-env.sh.template +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# export JAVA_HOME= -# export MASTER= # Spark master url. eg. spark://master_addr:7077. Leave empty if you want to use local mode. -# export ZEPPELIN_JAVA_OPTS # Additional jvm options. for example, export ZEPPELIN_JAVA_OPTS="-Dspark.executor.memory=8g -Dspark.cores.max=16" -# export ZEPPELIN_MEM # Zeppelin jvm mem options Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m -# export ZEPPELIN_INTP_MEM # zeppelin interpreter process jvm mem options. Default -Xms1024m -Xmx1024m -XX:MaxPermSize=512m -# export ZEPPELIN_INTP_JAVA_OPTS # zeppelin interpreter process jvm options. -# export ZEPPELIN_SSL_PORT # ssl port (used when ssl environment variable is set to true) - -# export ZEPPELIN_LOG_DIR # Where log files are stored. PWD by default. -# export ZEPPELIN_PID_DIR # The pid files are stored. ${ZEPPELIN_HOME}/run by default. -# export ZEPPELIN_WAR_TEMPDIR # The location of jetty temporary directory. -# export ZEPPELIN_NOTEBOOK_DIR # Where notebook saved -# export ZEPPELIN_NOTEBOOK_HOMESCREEN # Id of notebook to be displayed in homescreen. ex) 2A94M5J1Z -# export ZEPPELIN_NOTEBOOK_HOMESCREEN_HIDE # hide homescreen notebook from list when this value set to "true". default "false" -# export ZEPPELIN_NOTEBOOK_S3_BUCKET # Bucket where notebook saved -# export ZEPPELIN_NOTEBOOK_S3_ENDPOINT # Endpoint of the bucket -# export ZEPPELIN_NOTEBOOK_S3_USER # User in bucket where notebook saved. For example bucket/user/notebook/2A94M5J1Z/note.json -# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_ID # AWS KMS key ID -# export ZEPPELIN_NOTEBOOK_S3_KMS_KEY_REGION # AWS KMS key region -# export ZEPPELIN_IDENT_STRING # A string representing this instance of zeppelin. $USER by default. -# export ZEPPELIN_NICENESS # The scheduling priority for daemons. Defaults to 0. -# export ZEPPELIN_INTERPRETER_LOCALREPO # Local repository for interpreter's additional dependency loading -# export ZEPPELIN_INTERPRETER_DEP_MVNREPO # Remote principal repository for interpreter's additional dependency loading -# export ZEPPELIN_HELIUM_NPM_REGISTRY # Remote Npm registry for Helium dependency loader -# export ZEPPELIN_NOTEBOOK_STORAGE # Refers to pluggable notebook storage class, can have two classes simultaneously with a sync between them (e.g. local and remote). -# export ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC # If there are multiple notebook storages, should we treat the first one as the only source of truth? -# export ZEPPELIN_NOTEBOOK_PUBLIC # Make notebook public by default when created, private otherwise - -#### Spark interpreter configuration #### - -## Use provided spark installation ## -## defining SPARK_HOME makes Zeppelin run spark interpreter process using spark-submit -## -# export SPARK_HOME # (required) When it is defined, load it instead of Zeppelin embedded Spark libraries -# export SPARK_SUBMIT_OPTIONS # (optional) extra options to pass to spark submit. eg) "--driver-memory 512M --executor-memory 1G". -# export SPARK_APP_NAME # (optional) The name of spark application. - -## Use embedded spark binaries ## -## without SPARK_HOME defined, Zeppelin still able to run spark interpreter process using embedded spark binaries. -## however, it is not encouraged when you can define SPARK_HOME -## -# Options read in YARN client mode -# export HADOOP_CONF_DIR # yarn-site.xml is located in configuration directory in HADOOP_CONF_DIR. -# Pyspark (supported with Spark 1.2.1 and above) -# To configure pyspark, you need to set spark distribution's path to 'spark.home' property in Interpreter setting screen in Zeppelin GUI -# export PYSPARK_PYTHON # path to the python command. must be the same path on the driver(Zeppelin) and all workers. -# export PYTHONPATH - -## Spark interpreter options ## -## -# export ZEPPELIN_SPARK_USEHIVECONTEXT # Use HiveContext instead of SQLContext if set true. true by default. -# export ZEPPELIN_SPARK_CONCURRENTSQL # Execute multiple SQL concurrently if set true. false by default. -# export ZEPPELIN_SPARK_IMPORTIMPLICIT # Import implicits, UDF collection, and sql if set true. true by default. -# export ZEPPELIN_SPARK_MAXRESULT # Max number of Spark SQL result to display. 1000 by default. -# export ZEPPELIN_WEBSOCKET_MAX_TEXT_MESSAGE_SIZE # Size in characters of the maximum text message to be received by websocket. Defaults to 1024000 - - -#### HBase interpreter configuration #### - -## To connect to HBase running on a cluster, either HBASE_HOME or HBASE_CONF_DIR must be set - -# export HBASE_HOME= # (require) Under which HBase scripts and configuration should be -# export HBASE_CONF_DIR= # (optional) Alternatively, configuration directory can be set to point to the directory that has hbase-site.xml - -#### ZeppelinHub connection configuration #### -# export ZEPPELINHUB_API_ADDRESS # Refers to the address of the ZeppelinHub service in use -# export ZEPPELINHUB_API_TOKEN # Refers to the Zeppelin instance token of the user -# export ZEPPELINHUB_USER_KEY # Optional, when using Zeppelin with authentication. - -#### Zeppelin impersonation configuration -# export ZEPPELIN_IMPERSONATE_CMD # Optional, when user want to run interpreter as end web user. eg) 'sudo -H -u ${ZEPPELIN_IMPERSONATE_USER} bash -c ' -# export ZEPPELIN_IMPERSONATE_SPARK_PROXY_USER #Optional, by default is true; can be set to false if you don't want to use --proxy-user option with Spark interpreter when impersonation enabled \ No newline at end of file