Google Suggest
function google_suggest_search()
{
local _QUERY=$(urlencode_utf8 "$1")
local _URL='http://www.google.co.jp/complete/search'
curl -s "${_URL}?output=toolbar&q=${_QUERY}" | nkf -w | \
perl -pe "s/<\/CompleteSuggestion>/<\/CompleteSuggestion>\n/g" | \
grep 'suggestion data' | grep 'num_queries int' | \
sed "s|.*<suggestion data=\"\([^\"]*\)\"/><num_queries int=\"\([^\"]*\)\"/>.*|\1,\2|g"
sleep $(($RANDOM % 5 + 3))
return 0
}
function google_suggest()
{
local MSG="Usage: google_suggest [keyword]"
if [ ! "$1" ]||[ "-h" == "$1" ]; then echo $MSG; return 1; fi
local _QUERY="$1"
local _FILE_NAME=$(echo "$1" | sed 's/[ | ]/_/g')
echo -n "" >${_FILE_NAME}.csv
for i in {a..z} {0..9} あ い う え お か き く け こ さ し す せ そ た ち つ て と な に ぬ ね の は ひ ふ へ ほ ま み む め も や ゆ よ ら り る れ ろ わ を ん
do
google_suggest_search "${_QUERY} $i" | tee -a ${_FILE_NAME}.csv
google_suggest_search "$i ${_QUERY}" | tee -a ${_FILE_NAME}.csv
done
sort -u ${_FILE_NAME}.csv >${_FILE_NAME}.$$
mv -f ${_FILE_NAME}.$$ ${_FILE_NAME}.csv
return 0
}
google.co.jp
function google_rank_search()
{
local _QUERY="$1"
local _URL="$1"
curl -s -A 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1' \
"https://www.google.co.jp/search?q=${_QUERY}&start={0,10,20}" | nkf -w >cache.html
cat cache.html | perl -pe "s/<h3 class=\"r\">/\n<h3 class=\"r\">/g" | grep '<h3 class="r">' | \
sed -e 's|<a [^>]*>||g' -e 's|</a>||g' -e 's|<em>||g' -e 's|</em>||g' -e 's|<b>||g' -e 's|</b>||g' | \
sed -e 's|<span class=bc>||g' -e 's| ›[^>]*</span>||g' | \
sed 's|.*<h3 class="r">\([^>]*\)</h3>.*<cite>\([^>]*\)</cite>.*|\1,\2|g' | tee ${_QUERY}.csv
cat ${_QUERY}.csv | grep "${_URL}" >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "${_QUERY},NA"
fi
}
function google_result_stats()
{
local _QUERY=$(urlencode_utf8 "$1")
curl -s -A 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1' \
"https://www.google.co.jp/search?q=${_QUERY}" | nkf -w >cache.html
cat cache.html | grep '<div id=resultStats>' | \
sed -e "s|.*約 ||g" -e "s| 件.*||g" -e "s|.*<div id=resultStats>||g"
# sed "s|.*<div id=resultStats>約 \([^ ]*\) 件<nobr> (0.12 秒) </nobr></div>.*|\1|g"
}
utf-8でエンコードするPerlのワンライナー
function urlencode_utf8()
{
perl -MEncode -MURI::Escape -e "print uri_escape(encode('utf8', decode('utf8', "@ARGV[0]")));" "$1"
}
コメント