#!/bin/bash
###########################################################################
# Simple script that tries to extracts hosts, subdomains, ip and mail from
# a Google search against a specif domain or Google scraping if you prefere!
# License: GPLv3
# Name: goohost
# Author: watakushi
# Special thanks to: kartik & kamal \n \n"
###########################################################################
###########################################################################
# General stuff - usage - errors - parameters definition
#
let I=0 #Used in the while loop's Google queries
METHOD="host" #Default mode is set to host
let PAGES=5 #Default pages to download from Google
let VERBOSITY=0 #Verbosity is set to off as default
TMPRND=$RANDOM #Random number used for temporany files
REGEXPRESULT='Results <b>[0-9,]*</b> - <b>[0-9,]*</b> of[" about "]+<b>[0-9,]*</b>' #REGEXP for extraxct the number of results from a query
METHOD=host #Default method set to host
#Print the help banner and exit the script
printhelpanddie () {
printf "\n"
printf "[*] goohost v.0.0.1 Beta \n"
printf "[*] Simple script that extracts hosts/subdomains, ip or emails for a specific domain with Google search \n"
printf "[*] Author: watakushi \n"
printf "[*] Thanks to: Johnny Long and GHDB for inspiration stuff \n"
printf "[*] Special thanks to: Danya & Roberto \n \n"
printf "[*] Usage: $0 -t domain.tld [-m <host|ip|mail> -p <1-20> -v] \n \n"
printf "[*] -t: target domain. Ex: backtrack.linux.org \n"
printf "[*] -m: method: <ip|host|mail>. Default value is set to host \n"
printf "[*] host: raw google hosts and subdomains search \n"
printf "[*] ip: raw google hosts and subdomains search and performs a reverse DNS resolution \n"
printf "[*] mail:raw google email search \n"
printf "[*] -p: pages [1-20]. Max number of pages to download from Google. Default 5 \n"
printf "[*] -v: verbosity. Default is set to off \n"
printf "[*] Example: $0 -t backtrack-linux.com -m ip -p 10 -v \n \n"
exit 1
}
#Extract the number of results google gives from the query
getresult () {
RESULT=$(grep -Eio "$REGEXPRESULT" /tmp/goohost$I-$TMPRND.log | cut -d"<" -f 6 | cut -d">" -f 2| tr -d ",")
return $RESULT
}
while getopts "t:m:p:v" optname
do
case "$optname" in
"t")
DOMAIN=$OPTARG
;;
"m")
METHOD=$OPTARG
;;
"p")
let PAGES=$OPTARG
;;
"v")
let VERBOSITY=1
;;
"?")
echo "[!] Error: Unknown option!"
printhelpanddie
;;
":")
echo "[!] Error: Argument needed!"
printhelpanddie
;;
*)
echo "[!] Error: Unknown error!!!"
printhelpanddie
;;
esac
done
#Check for write permissions and several tools used in the script
if [ ! -x /usr/bin/wget ]; then
echo "[!] Error: /usr/bin/wget not found on this system!" 1>&2
exit 1
fi
if [ ! -x /usr/bin/awk ]; then
echo "[!] Error: /usr/bin/awk not found on this system!" 1>&2
exit 1
fi
if [ ! -x /bin/sed ]; then
echo "[!] Error: /bin/sed not found on this system!" 1>&2
exit 1
fi
if [ ! -w /tmp ]; then
echo "[!] Error: Can't write in /tmp ! - Permission denied" 1>&2
exit 1
fi
if [ ! -w ./ ]; then
echo "[!] Error: Can't write in ./ ! - Permission denied" 1>&2
exit 1
fi
#Print usage if parameters are not passed to the script
if [[ -z $DOMAIN ]] || [[ $METHOD != host && $METHOD != ip && $METHOD != mail ]] ; then
printhelpanddie
fi
#Use a regular expression based on the method option
case "$METHOD" in
host)
REGEXPQUERY='[a-zA-Z0-9\._-]+\.'$DOMAIN
;;
ip)
REGEXPQUERY='[a-zA-Z0-9\._-]+\.'$DOMAIN
;;
mail)
REGEXPQUERY="[a-zA-Z0-9._-]+@<em>$DOMAIN</em>"
QEMAIL="+$DOMAIN"
;;
esac
#Set the number of queries to do. Default value 5.
if [[ $PAGES -lt 1 || $PAGES -gt 20 ]] ; then
echo "[-] Warning: Pages value not in the range 1-20. Default value used!" 1>&2
let PAGES=5
printf "\n"
fi
#Check for DNS wildcards
if [[ $(host idontexist.xxxxx$TMPRND.com | grep address) ]]; then
printf "\n"
echo "[-] Warning: DNS wildcard detected! With IP method you should have some false positive results." 1>&2
printf "\n"
fi
###########################################################################
# QUERY:0 Download the first google page with the site: parameter
#
#Google Query
case "$METHOD" in
host)
GOOGLEQUERY0="http://www.google.com/search?num=100&q=site%3A$DOMAIN" #site:example.tld
;;
ip)
GOOGLEQUERY0="http://www.google.com/search?num=100&q=site%3A$DOMAIN" #site:example.tld
;;
mail)
GOOGLEQUERY0="http://www.google.com/search?num=100&q=site%3A$DOMAIN$QEMAIL" #example.tld site:example.tld
;;
esac
#Download with wget the page
wget -U "" "$GOOGLEQUERY0" -O /tmp/goohost$I-$TMPRND.log -q
#Extract the hosts/emails and save in the result file
grep -Eio $REGEXPQUERY /tmp/goohost$I-$TMPRND.log > result-$TMPRND.log
#Extract the number of results google gives from the query
getresult
#Verbosity
if [ "$VERBOSITY" = "1" ]; then
printf "\n"
printf "Google Query n.$I \n"
echo $GOOGLEQUERY0
printf "\n"
printf "Results for query: $RESULT \n"
printf "\n"
fi
###########################################################################
# Start the loop, download the pages generated with different types of query
#
while [[ "$RESULT" -ge "100" && "$I" -lt $PAGES-1 ]]
do
let I=I+1
case "$I" in
1)
#Google Query
case "$METHOD" in
host)
GOOGLEQUERY1="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3Awww.$DOMAIN" #site:example.tld -inurl:www.example.tld
;;
ip)
GOOGLEQUERY1="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3Awww.$DOMAIN" #site:example.tld -inurl:www.example.tld
;;
mail)
GOOGLEQUERY1="http://www.google.com/search?num=100&q=site%3A$DOMAIN$QEMAIL+mail" #site:example.tld example.tld mail
;;
esac
#Download with wget the page
wget -U "" "$GOOGLEQUERY1" -O /tmp/goohost$I-$TMPRND.log -q
#Extract the hosts/emails and save in the result file
grep -Eio $REGEXPQUERY /tmp/goohost$I-$TMPRND.log >> result-$TMPRND.log
#Extract the number of results google gives from the query
getresult
#Verbosity
if [ "$VERBOSITY" = "1" ]; then
printf "\n"
printf "Google Query n.$I \n"
echo $GOOGLEQUERY1
printf "\n"
printf "Results for query: $RESULT \n"
printf "\n"
fi
;;
2)
#Google Query
case "$METHOD" in
host)
GOOGLEQUERY2="http://www.google.com/search?num=100&q=*.site%3A$DOMAIN+-inurl%3Awww.$DOMAIN" #site:example.tld -inurl:www.example.tld
;;
ip)
GOOGLEQUERY2="http://www.google.com/search?num=100&q=*.site%3A$DOMAIN+-inurl%3Awww.$DOMAIN" #site:example.tld -inurl:www.example.tld
;;
mail)
GOOGLEQUERY2="http://www.google.com/search?num=100&q=$site%3A$DOMAIN$QEMAIL+mail&start=200" #site:example.tld example.tld mail
;;
esac
#Download with wget the page
wget -U "" "$GOOGLEQUERY2" -O /tmp/goohost$I-$TMPRND.log -q
#Extract the hosts/emails and save in the result file
grep -Eio $REGEXPQUERY /tmp/goohost$I-$TMPRND.log >> result-$TMPRND.log
#Extract the number of results google gives from the query
getresult
#Verbosity
if [ "$VERBOSITY" = "1" ]; then
printf "\n"
printf "Google Query n.$I \n"
echo $GOOGLEQUERY2
printf "\n"
printf "Results for query: $RESULT \n"
printf "\n"
fi
# Generate TOP6 file and pass the values to the next queries
case "$METHOD" in
host)
grep -Eio $REGEXPQUERY result-$TMPRND.log | sort | uniq -i -c | sort -n -r | grep -Eio $REGEXPQUERY | sed -e "s/.$DOMAIN//g" > /tmp/top6-$TMPRND.log
;;
ip)
grep -Eio $REGEXPQUERY result-$TMPRND.log | sort | uniq -i -c | sort -n -r | grep -Eio $REGEXPQUERY | sed -e "s/.$DOMAIN//g" > /tmp/top6-$TMPRND.log
;;
mail)
grep -Eio $REGEXPQUERY result-$TMPRND.log | sort | uniq -i -c | sort -n -r | grep -Eio $REGEXPQUERY | cut -d"@" -f1 > /tmp/top6-$TMPRND.log
;;
esac
;;
3)
CURL1=$(awk NR==1 /tmp/top6-$TMPRND.log)
CURL2=$(awk NR==2 /tmp/top6-$TMPRND.log)
CURL3=$(awk NR==3 /tmp/top6-$TMPRND.log)
CURL4=$(awk NR==4 /tmp/top6-$TMPRND.log)
CURL5=$(awk NR==5 /tmp/top6-$TMPRND.log)
CURL6=$(awk NR==6 /tmp/top6-$TMPRND.log)
#Google Query
case "$METHOD" in
host)
GOOGLEQUERY3="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3A$CURL1+-inurl%3A$CURL2+-inurl%3A$CURL3+-inurl%3A$CURL4+-inurl%3A$CURL5+-inurl%3A$CURL6" #site:example.tlf -inurl:top1 -inurl:top2 -inurl:top3 -inurl:top4 -inurl:top5 -inurl:top6
;;
ip)
GOOGLEQUERY3="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3A$CURL1+-inurl%3A$CURL2+-inurl%3A$CURL3+-inurl%3A$CURL4+-inurl%3A$CURL5+-inurl%3A$CURL6" #site:example.tlf -inurl:top1 -inurl:top2 -inurl:top3 -inurl:top4 -inurl:top5 -inurl:top6
;;
mail)
GOOGLEQUERY3="http://www.google.com/search?num=100&q=$QEMAILsite%3A$DOMAIN+-intext%3A$CURL1+-intext%3A$CURL2+-intext%3A$CURL3+-intext%3A$CURL4+-intext%3A$CURL5+-intext%3A$CURL6" #site:example.tlf -intext:info
;;
esac
#Download with wget the page
wget -U "" "$GOOGLEQUERY3" -O /tmp/goohost$I-$TMPRND.log -q
#Extract the hosts/emails and save in the result file
grep -Eio $REGEXPQUERY /tmp/goohost$I-$TMPRND.log >> result-$TMPRND.log
#Extract the number of results google gives from the query
getresult
#Verbosity
if [ "$VERBOSITY" = "1" ]; then
printf "\n"
printf "Google Query n.$I \n"
echo $GOOGLEQUERY3
printf "\n"
printf "Result for query: $RESULT \n"
#print the top 6 host from result-$TMPRND.log
printf "The TOP6 are: \n"
printf "$CURL1 $CURL2 $CURL3 $CURL4 $CURL5 $CURL6"
printf "\n"
fi
;;
4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 )
let START=($I-3)*100 #Google query top6 changed the start parameter
#Google Query
case "$METHOD" in
host)
GOOGLEQUERY3="http://www.google.com/search?num=100&start=$START&q=site%3A$DOMAIN+-inurl%3A$CURL1+-inurl%3A$CURL2+-inurl%3A$CURL3+-inurl%3A$CURL4+-inurl%3A$CURL5+-inurl%3A$CURL6" #site:example.tlf -inurl:top1 -inurl:top2 -inurl:top3 -inurl:top4 -inurl:top5 -inurl:top6
;;
ip)
GOOGLEQUERY3="http://www.google.com/search?num=100&start=$START&q=site%3A$DOMAIN+-inurl%3A$CURL1+-inurl%3A$CURL2+-inurl%3A$CURL3+-inurl%3A$CURL4+-inurl%3A$CURL5+-inurl%3A$CURL6" #site:example.tlf -inurl:top1 -inurl:top2 -inurl:top3 -inurl:top4 -inurl:top5 -inurl:top6
;;
mail)
GOOGLEQUERY3="http://www.google.com/search?num=100&start=$START&q=$QEMAILsite%3A$DOMAIN+-intext%3A$CURL1+-intext%3A$CURL2+-intext%3A$CURL3+-intext%3A$CURL4+-intext%3A$CURL5+-intext%3A$CURL6" #site:example.tlf -intext:info
;;
esac
#Download with wget the page
wget -U "" "$GOOGLEQUERY3" -O /tmp/goohost$I-$TMPRND.log -q
#Extract the hosts/emails and save in the result file
grep -Eio $REGEXPQUERY /tmp/goohost$I-$TMPRND.log >> result-$TMPRND.log
#Extract the number of results google gives from the query
getresult
#Check how many pages to download with this query
let END=($RESULT/100) #Number of page to download
if [[ $I -ge $END+3 ]]; then
let I=12
fi
#Verbosity
if [ "$VERBOSITY" = "1" ]; then
printf "\n"
printf "Google Query n.$I \n"
echo $GOOGLEQUERY3
printf "\n"
printf "Result for query: $RESULT \n"
#print the top 6 host from result-$TMPRND.log
printf "The TOP6 are: \n"
printf "$CURL1 $CURL2 $CURL3 $CURL4 $CURL5 $CURL6"
printf "\n"
fi
;;
13)
#Generate temporary file for the random query
case "$METHOD" in
host)
sort -u result-$TMPRND.log | sed -e "s/.$DOMAIN//g" > /tmp/random-$TMPRND.log
;;
ip)
sort -u result-$TMPRND.log | sed -e "s/.$DOMAIN//g" > /tmp/random-$TMPRND.log
;;
mail)
sort -u result-$TMPRND.log | cut -d"@" -f1 > /tmp/random-$TMPRND.log
;;
esac
highest=$(wc -l /tmp/random-$TMPRND.log | cut -d" " -f1 ) #Number of hosts present in the result file
#################################################
#TODO: Exit from the case loop if highest is <= 0
#################################################
if [[ $highest -ge "1" ]]; then
R1=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R2=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R3=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R4=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R5=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R6=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
RURL1="$(awk "NR==$R1" /tmp/random-$TMPRND.log)"
RURL2="$(awk "NR==$R2" /tmp/random-$TMPRND.log)"
RURL3="$(awk "NR==$R3" /tmp/random-$TMPRND.log)"
RURL4="$(awk "NR==$R4" /tmp/random-$TMPRND.log)"
RURL5="$(awk "NR==$R5" /tmp/random-$TMPRND.log)"
RURL6="$(aewk "NR==$R6" /tmp/random-$TMPRND.log)"
#Google Query
case "$METHOD" in
host)
GOOGLEQUERY4="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3A$RURL1+-inurl%3A$RURL2+-inurl%3A$RURL3+-inurl%3A$RURL4+-inurl%3A$RURL5+-inurl%3A$RURL6" #site:example.tlf -inurl:random1 -inurl:random2 -inurl:random3 -inurl:random4 -inurl:random5 -inurl:random6
;;
ip)
GOOGLEQUERY4="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3A$RURL1+-inurl%3A$RURL2+-inurl%3A$RURL3+-inurl%3A$RURL4+-inurl%3A$RURL5+-inurl%3A$RURL6" #site:example.tlf -inurl:random1 -inurl:random2 -inurl:random3 -inurl:random4 -inurl:random5 -inurl:random6
;;
mail)
GOOGLEQUERY4="http://www.google.com/search?num=100&q=$QEMAILsite%3A$DOMAIN+-intext%3A$RURL1+-intext%3A$RURL2+-intext%3A$RURL3+-intext%3A$RURL4+-intext%3A$RURL5+-intext%3A$RURL6" #site:example.tlf example.tld -itext:random1 -intext:random2 -intext:random3 -intext:random4 -intext:random5 -intext:random6
;;
esac
#Download with wget the page
wget -U "" "$GOOGLEQUERY4" -O /tmp/goohost$I-$TMPRND.log -q
#Extract the hosts/emails and save in the result file
grep -Eio $REGEXPQUERY /tmp/goohost$I-$TMPRND.log >> result-$TMPRND.log
#Extract the number of results google gives from the query
getresult
#Verbosity
if [ "$VERBOSITY" = "1" ]; then
printf "\n"
printf "Google Query n.$I \n"
echo $GOOGLEQUERY4
printf "\n"
printf "Result for query: $RESULT \n"
printf "Random hosts: $RURL1 $RURL2 $RURL3 $RURL4 $RURL5 $RURL6 \n"
printf "\n"
fi
else
let I=20
fi
;;
14 | 15 | 16 | 17 | 18 | 19)
R1=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R2=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R3=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R4=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R5=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
R6=$[ ( $RANDOM % ( $[ $highest - 1 ] + 1 ) ) + 1 ]
RURL1="$(awk "NR==$R1" /tmp/random-$TMPRND.log)"
RURL2="$(awk "NR==$R2" /tmp/random-$TMPRND.log)"
RURL3="$(awk "NR==$R3" /tmp/random-$TMPRND.log)"
RURL4="$(awk "NR==$R4" /tmp/random-$TMPRND.log)"
RURL5="$(awk "NR==$R5" /tmp/random-$TMPRND.log)"
RURL6="$(awk "NR==$R6" /tmp/random-$TMPRND.log)"
#Google Query
case "$METHOD" in
host)
GOOGLEQUERY4="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3A$RURL1+-inurl%3A$RURL2+-inurl%3A$RURL3+-inurl%3A$RURL4+-inurl%3A$RURL5+-inurl%3A$RURL6" #site:example.tlf -inurl:random1 -inurl:random2 -inurl:random3 -inurl:random4 -inurl:random5 -inurl:random6
;;
ip)
GOOGLEQUERY4="http://www.google.com/search?num=100&q=site%3A$DOMAIN+-inurl%3A$RURL1+-inurl%3A$RURL2+-inurl%3A$RURL3+-inurl%3A$RURL4+-inurl%3A$RURL5+-inurl%3A$RURL6" #site:example.tlf -inurl:random1 -inurl:random2 -inurl:random3 -inurl:random4 -inurl:random5 -inurl:random6
;;
mail)
GOOGLEQUERY4="http://www.google.com/search?num=100&q=$QEMAILsite%3A$DOMAIN+-intext%3A$RURL1+-intext%3A$RURL2+-intext%3A$RURL3+-intext%3A$RURL4+-intext%3A$RURL5+-intext%3A$RURL6" #site:example.tlf example.tld -itext:random1 -intext:random2 -intext:random3 -intext:random4 -intext:random5 -intext:random6
;;
esac
#Download with wget the page
wget -U "" "$GOOGLEQUERY4" -O /tmp/goohost$I-$TMPRND.log -q
#Extract the hosts/emails and save in the result file
grep -Eio $REGEXPQUERY /tmp/goohost$I-$TMPRND.log >> result-$TMPRND.log
#Extract the number of results google gives from the query
getresult
#Verbosity
if [ "$VERBOSITY" = "1" ]; then
printf "\n"
printf "Google Query n.$I \n"
echo $GOOGLEQUERY4
printf "\n"
printf "Result for query: $RESULT \n"
#print the top 6 host from result-$TMPRND.log
printf "Random hosts: $RURL1 $RURL2 $RURL3 $RURL4 $RURL5 $RURL6 \n"
printf "\n"
fi
;;
esac
done
###########################################################################
# Generate output and report file
#
#Generate different report for different methods
case "$METHOD" in
host)
printf "\n"
cat result-$TMPRND.log | sort -u > report-$TMPRND-$DOMAIN.txt
printf "Results saved in file report-$TMPRND-$DOMAIN.txt \n"
printf "$(wc -l report-$TMPRND-$DOMAIN.txt | cut -d" " -f1) results found! \n"
;;
ip)
printf "\n"
for line in $(cat result-$TMPRND.log | sort -u); do
host $line | grep "has address" | cut -d" " -f1,4 >> report-$TMPRND-$DOMAIN.txt &
done
printf "Results saved in file report-$TMPRND-$DOMAIN.txt \n"
#printf "$(wc -l report-$TMPRND-$DOMAIN.txt | cut -d" " -f1) results found! \n"
;;
mail)
printf "\n"
cat result-$TMPRND.log | sort -u | sed -e "s/<[^>]*>//g" > report-$TMPRND-$DOMAIN.txt
printf "Results saved in file report-$TMPRND-$DOMAIN.txt \n"
printf "$(wc -l report-$TMPRND-$DOMAIN.txt | cut -d" " -f1) results found! \n"
;;
esac
###########################################################################
# Delete temporary files
#
rm -f result-$TMPRND.log 2> /dev/null
rm -f /tmp/goohost*-$TMPRND.log 2> /dev/null
rm -f /tmp/random-$TMPRND.log 2> /dev/null
rm -f /tmp/top6-$TMPRND.log 2> /dev/null
save this script as .sh that's it