Commit bb702c1a authored by Shao-Ching Huang's avatar Shao-Ching Huang

first commit

parents
*~
*.swp
This is one line.
This is another line.
This is the last line.
This 3
another 1
is 3
last 1
line. 3
one 1
the 1
#!/bin/sh
#$ -cwd
#$ -pe shared 8
#$ -l h_rt=8:00:00,hadoop
# set up the hadoop cluster
source /u/local/bin/hdfsstart.sh
# your map-reduce work is here
hadoop fs -copyFromLocal $SCRATCH/in /in
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /in /out
hadoop fs -cat /out/*
# cleanup the tmp files and hadoop services
source /u/local/bin/hdfsstop.sh
# end of script
#!/usr/bin/python
# * Copyright 2016. The Regents of the University of California.
# * All Rights Reserved. Use is subject to licence terms.
# * @author Prakashan Korambath (Jan, 2010)
# * SGE prolog script for Hadoop
# * UPDATED by Qiyang Hu (June, 2016)
# * SGE prolog script updated for Hadoop 2.x
import os, sys, re, time
def getmaster(file, masters, slaves):
try:
input_file = open(file, 'r')
except IOError:
print "Could not open " + file
try:
masters_file = open(masters, 'w')
except IOError:
print "Could not open " + masters
try:
slaves_file = open(slaves, 'w')
except IOError:
print "Could not open " + slaves
line = input_file.readline()
master = line.split()
nslots = master[1]
master = master[0]
masters_file.write(master + '\n')
slaves_file.write(master + '\n')
while line:
line = input_file.readline()
slaves = line.split()
if line !="":
slaves=slaves[0]
slaves_file.write(slaves + '\n')
input_file.close()
masters_file.close()
slaves_file.close()
return master,nslots
def updateconfig(file, newconfig):
try:
infile = open(file)
s = infile.read()
infile.close
pattern = r"<configuration>([\s\S]*)</configuration>\s*$"
if re.search(pattern, s):
s1 = re.sub(pattern, '<configuration>\n'+newconfig+'</configuration>', s)
ofile = file.replace(".template", '')
outfile = open(ofile, "w")
outfile.write(s1)
outfile.close
except Exception as e:
print "Error in updateconfig for file " + file
print e;
sys.exit(1)
def updateenvsh(file) :
try:
infile = open(file)
s = infile.read()
infile.close
envlist = ['HADOOP_HOME', 'JAVA_HOME','HADOOP_CONF_DIR', 'HADOOP_LOG_DIR', 'YARN_CONF_DIR', 'YARN_LOG_DIR']
for env in envlist :
pattern = r"#*\s?export "+env+"=.*"
if re.search(pattern, s):
s = re.sub(pattern, "\n\n"+'export '+env+'='+os.environ.get(env), s)
else:
endoflic = "# limitations under the License."
pattern = endoflic
s = re.sub(pattern, endoflic+"\n\n"+'export '+env+'='+os.environ.get(env), s)
ofile = file.replace(".template", '')
outfile = open(ofile, "w")
outfile.write(s)
outfile.close
except Exception as e:
print "Error in updateenvsh for file " + file
print e;
sys.exit(1)
def createhdfssite(nodes, master, mapport, tmpdir, file):
s = "<property>" + '\n'
s += " <name>dfs.replication</name>" + '\n'
s += " <value>"+nodes+"</value>" + '\n'
s += "</property>" + '\n'
s += "<property>" + '\n'
s += " <name>dfs.secondary.http.address</name>" + '\n'
s += " <value>hdfs://" + master + ":" + mapport + "</value>" + '\n'
s += "</property>" + '\n'
s += "<property>" + '\n'
s += " <name>>dfs.name.dir</name>" + '\n'
s += " <value>"+tmpdir+"/namenode</value>" + '\n'
s += "</property>" + '\n'
s += "<property>" + '\n'
s += " <name>>dfs.data.dir</name>" + '\n'
s += " <value>"+tmpdir+"/datanode</value>" + '\n'
s += "</property>" + '\n'
updateconfig(file, s)
def createmapredsite(file):
s = "<property>" + '\n'
s += " <name>mapreduce.frameword.name</name>" + '\n'
s += " <value>yarn</value>" + '\n'
s += "</property>" + '\n'
updateconfig(file, s)
def createcoresite(master, file, coreport, tmpdir):
s = "<property>" + '\n'
s += " <name>hadoop.tmp.dir</name>" + '\n'
s += " <value>"+tmpdir+"/hadoop-${user.name}</value>" + '\n'
s += "</property>" + '\n'
s += "<property>" + '\n'
s += " <name>fs.default.name</name>" + '\n'
s += " <value>hdfs://" + master + ":" + coreport + "</value>" + '\n'
s += "</property>" + '\n'
updateconfig(file, s)
def createyarnsite(file):
s = "<property>" + '\n'
s += " <name>yarn.nodemanager.aux-services</name>" + '\n'
s += " <value>mapreduce_shuffle</value>" + '\n'
s += "</property>" + '\n'
updateconfig(file, s)
def main():
if (len(sys.argv) != 7):
print "Usage: " + sys.argv[0] + " PE_HOSTFILE TMPDIR NHOSTS HADOOP_CONF_DIR coreport mapport"
sys.exit(1)
else:
pehostfile = sys.argv[1]
tmpdir = sys.argv[2]
nodes = sys.argv[3]
HADOOP_CONF_DIR= sys.argv[4]
coreport= sys.argv[5]
mapport= sys.argv[6]
print "Ports are " + coreport + " " + mapport
home = os.environ["HOME"]
mapredfile=HADOOP_CONF_DIR+"/mapred-site.xml.template"
hdfssitefile=HADOOP_CONF_DIR+"/hdfs-site.xml"
yarnsitefile=HADOOP_CONF_DIR+"/yarn-site.xml"
mastersfile=HADOOP_CONF_DIR+"/masters"
slavesfile=HADOOP_CONF_DIR+"/slaves"
coresitefile=HADOOP_CONF_DIR+"/core-site.xml"
hadoopenvfile=HADOOP_CONF_DIR+"/hadoop-env.sh"
yarnenvfile=HADOOP_CONF_DIR+"/yarn-env.sh"
time.sleep(10)
try:
master,nslots=getmaster(pehostfile, mastersfile, slavesfile)
createcoresite(master, coresitefile, coreport, tmpdir)
createmapredsite(mapredfile)
createhdfssite(nodes, master, mapport, tmpdir, hdfssitefile)
createyarnsite(yarnsitefile)
updateenvsh(hadoopenvfile)
updateenvsh(yarnenvfile)
except Exception as e:
print "Error in python execution"
print e;
sys.exit(1)
if __name__ == "__main__":
main()
#!/bin/sh
source /u/local/Modules/default/init/modules.sh
export MODULEPATH=$MODULEPATH:/u/local/Modules/modulefiles
module load java
module load hadoop
source /u/local/bin/set_qrsh_env.sh
export HADOOP_CONF_BASE=$HOME/.hadoop
mkdir -p $HADOOP_CONF_BASE
rm -rf $TMPDIR/hadoop-*
echo "NUMBER of HOSTS = $NHOSTS"
echo "TMPDIR = $TMPDIR"
##==export HADOOP_CONF_DIR=${SGE_O_WORKDIR}/conf.${JOB_ID}
dt=$(date '+%Y-%m-%d-%H-%M-%S');
export HADOOP_CONF_DIR=$HADOOP_CONF_BASE/conf.${JOB_ID}
export HADOOP_LOG_DIR=$HADOOP_CONF_BASE/logs.${JOB_ID}
export YARN_CONF_DIR=$HADOOP_CONF_BASE/conf.${JOB_ID}
export YARN_LOG_DIR=$HADOOP_CONF_BASE/logs.${JOB_ID}
echo "Configuration directory is ${HADOOP_CONF_DIR}"
echo "HADOOP LOG directory is ${HADOOP_LOG_DIR}"
echo "YARN LOG directory is ${YARN_LOG_DIR}"
/bin/cp -r ${HADOOP_HOME}/etc/hadoop/ ${HADOOP_CONF_DIR}
#sleep 10
port=9000
count=0
master=`/bin/cat $PE_HOSTFILE | /bin/sed -n 1p | /bin/cut -f1 -d" " `
####master=`hostname`
/u/local/etc/sge.cfg/portcheck.pl $master $port
while [[ $? -eq 0 && ${count} -lt 20 ]]
do
port=`/usr/bin/expr ${port} + 2`
count=`/usr/bin/expr ${count} + 1`
echo "${master} ${port} $count"
/u/local/etc/sge.cfg/portcheck.pl $master $port
done
coreport=$port
echo "Core Port: $coreport"
port=`/usr/bin/expr ${port} + 1`
count=0
/u/local/etc/sge.cfg/portcheck.pl $master $port
while [[ $? -eq 0 && ${count} -lt 20 ]]
do
port=`/usr/bin/expr ${port} + 2`
count=`/usr/bin/expr ${count} + 1`
echo "${master} ${port} $count"
/u/local/etc/sge.cfg/portcheck.pl $master $port
done
#echo "${master} ${port} $count"
mapport=$port
echo "Map Port: $mapport"
python /u/systems/SGE/hoffman2/scripts/hadoop/2.x/hdfsprolog.py $PE_HOSTFILE $TMPDIR $NHOSTS $HADOOP_CONF_DIR $coreport $mapport
hdfs namenode -format -force
start-dfs.sh
start-yarn.sh
#!/bin/sh
source set_qrsh_env.sh
export HADOOP_CONF_BASE=$HOME/.hadoop
source /u/local/Modules/default/init/modules.sh
module load java
module load hadoop
#if [ -z "${HADOOP_HOME}" ]
#then
# HADOOP_HOME="$HOME/hadoop"; export HADOOP_HOME
#else
# HADOOP_HOME=$HADOOP_HOME; export HADOOP_HOME
#fi
#
echo "HADOOP_HOME = $HADOOP_HOME"
#cd $HADOOP_HOME
#$HADOOP_HOME/bin/stop-all.sh
#export HADOOP_CONF_DIR=${SGE_O_WORKDIR}/conf.${JOB_ID}
export HADOOP_CONF_DIR=${HADOOP_CONF_BASE}/conf.${JOB_ID}
export HADOOP_LOG_DIR=${HADOOP_CONF_BASE}/logs.${JOB_ID}
echo "Configuration directory is ${HADOOP_CONF_DIR}"
echo "LOG directory is ${HADOOP_CONF_DIR}"
echo "STOP HADOOP SERVICES"
stop-dfs.sh
stop-yarn.sh
echo "Removing TMPDIR"
/bin/rm -rf $TMPDIR
if [ -e "${HADOOP_CONF_DIR}" ]
then
echo "Removing temporary configuration directory ${HADOOP_CONF_DIR}"
/bin/rm -rf ${HADOOP_CONF_DIR}
fi
if [ -e "${HADOOP_LOG_DIR}" ]
then
echo "Removing temporary log directory ${HADOOP_LOG_DIR}"
/bin/rm -rf ${HADOOP_LOG_DIR}
fi
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment