#!/bin/sh
PGNAME="nspollrawsdata"
PGDESC="NovaStar station polling for CDEC/NOAA/MESOWEST RAWS data"
PGVERS=$(nsversion)
PSQL="psql novastar novastar"
OFILE="/tmp/$PGNAME.$$.1.tmp"
TMPFILE="/tmp/$PGNAME.$$.2.tmp"
LOGFILE="/usr/ns/log/$PGNAME.log"
LOCKFILE="/usr/ns/locks/$PGNAME"
DEF_POLLTYPE=3
DEF_TIMEOUT="5"
DEF_TIMESPAN="2hours"
DEF_TIMESPANSECS="7200"

# Display program usage
usage () {
	echo "$PGNAME [$PGVERS]: $PGDESC"
	echo "Usage: $PGNAME [options...] -s#[,#,...]"
	echo "where:"
  echo " -s #,...              Station IDs to poll."
	echo "and [options...] are:"
	echo " -d #                  Debug display level."
	echo " -f                    Do NOT file interrogation results in database."
	echo " -force                Force polling to override time limits."
	echo " -help|--help          Display this help."
	echo " -q|-quiet|--quiet     Do not display activity messages."
	echo " -t #                  Station polling type:"
	echo "                        0=current reading "
	echo "                        1=last sampled"
	echo "                        2=last logged"
	echo "                        3=logged data since last poll"
	echo "                        4=logged data between start and stop times"
	echo " -ts MM/DD/YY-HH:MM:SS Request data starting at this time."
	echo " -te MM/DD/YY-HH:MM:SS Request data ending at this time."
	echo " -version|--version    Display program version."
}

# Display program usage
version () {
	echo "$PGVERS"
}

# Check for program use query
if [ "$1" = "-help" -o "$1" = "--help" ]; then
	usage
	exit 0
fi

# Check for program version query
if [ "$1" = "-version" -o "$1" = "--version" ]; then
	version
	exit 0
fi

# Get command arguments.
COMMANDLINE="$0 $*"
debug="0"
polltype=$DEF_POLLTYPE
nofile="false"
force="false";
quiet="false"
MISSING="--"
STAIDS="0"
TIMEEND=""
TIMESTART=""
TIMESPAN=""
while test $# -gt 0; do
	arg="$1"
	if [ "$arg" = "-s" ]; then shift; STAIDS="$1";
	elif [ "${arg#-s}" != "$arg" ]; then STAIDS="${arg#-s}";
	elif [ "$arg" = "-f" ]; then nofile="true";
	elif [ "$arg" = "-force" -o "$arg" = "--force" ]; then force="true";
	elif [ "$arg" = "-q" -o "$arg" = "-quiet" -o "$arg" = "--quiet" ]; then quiet="true";
	elif [ "$arg" = "-te" ]; then shift; TIMEEND="$1"
	elif [ "${arg#-te}" != "$arg" ]; then TIMEEND="${arg#-te}";
	elif [ "$arg" = "-ts" ]; then shift; TIMESTART="$1"
	elif [ "${arg#-ts}" != "$arg" ]; then TIMESTART="${arg#-ts}";
	elif [ "$arg" = "-d" ]; then shift; debug=$1
	elif [ "${arg#-d}" != "$arg" ]; then debug="${arg#-d}";
	elif [ "$arg" = "-t" ]; then shift; polltype=$1
	elif [ "${arg#-t}" != "$arg" ]; then polltype="${arg#-t}";
	else echo "Unknown argument: $arg"; fi
	shift
done

# MUST DEAL WITH TIME ZONES!!!

# Recode end time
if [ -z "$TIMEEND" ]; then TIMEEND=$(dispdate "+%m-%d-%Y+%H:%M:%S"); fi
TIMEEND=$(dispdate "$TIMEEND" "+%m-%d-%Y+%H:%M:%S")
tend=$(dispdate "$TIMEEND" -t)
timestamp=$(dispdate "$TIMEEND" "+%Y-%m-%d %H:%M:%S")

# Limit time span for current or last sample poll type
if [ $polltype -le 2 ]; then
	TIMESPAN="$DEF_TIMESPAN"
	tstrt=$((tend-$DEF_TIMESPANSECS))
	TIMESTART=$(dispdate "+%Y-%m-%d+%H:%M:%S" -t $tstrt)
# Compute time span if start time provided.
elif [ -n "$TIMESTART" ]; then
	tstrt=$(dispdate "$TIMESTART" -t)
	TIMESPAN=$((tend - $tstrt))
	if [ "$TIMESPAN" -le 0 ]; then
		TIMESPAN="$DEF_TIMESPAN"
	else 
 		TIMESPAN=$(echo "($TIMESPAN + 3599) / 3600" | bc)"hours"
	fi
fi
TIMESTARTARG=$TIMESTART

# Extract poll ending time elements.
YEAR=$(dispdate "$TIMEEND" "+%Y")
MONTH=$(dispdate "$TIMEEND" "+%m")
DAY=$(dispdate "$TIMEEND" "+%d")
HOUR=$(dispdate "$TIMEEND" "+%H")

translate_wind_direction_heading() {
  heading=$1
	if [ "$heading" = "N" ]; then wd="0"
	elif [ "$heading" = "NNE" ]; then wd="22.5"
	elif [ "$heading" = "NE" ]; then wd="45"
	elif [ "$heading" = "ENE" ]; then wd="67.5"
	elif [ "$heading" = "E" ]; then wd="90"
	elif [ "$heading" = "ESE" ]; then wd="112.5"
	elif [ "$heading" = "SE" ]; then wd="135"
	elif [ "$heading" = "SSE" ]; then wd="157.5"
	elif [ "$heading" = "S" ]; then wd="180"
	elif [ "$heading" = "SSW" ]; then wd="202.5"
	elif [ "$heading" = "SW" ]; then wd="225"
	elif [ "$heading" = "WSW" ]; then wd="247.5"
	elif [ "$heading" = "W" ]; then wd="270"
	elif [ "$heading" = "WNW" ]; then wd="292.5"
	elif [ "$heading" = "NW" ]; then wd="315"
	elif [ "$heading" = "NNW" ]; then wd="337.5"
	else wd="$heading"; fi
	echo $wd
}

process_noaa_raws_data () {
	# Station numid passed as first argument.
	STAID=$1
	# Poll start time passed as second argument.
	# Do not file data reports not newer than this time.
	TIMESTART="$2"
	if [ -z "$TIMESTART" ]; then tstrt=0;
	else tstart=$(dispdate "$TIMESTART" -t); fi
	# Poll end time passed as third argument.
	# Data lines contain time only so prefix time with poll ending time.
	# Data line time is greater than poll ending time then roll back 1 day.
	TIMEEND="$3"
	date=$(dispdate "$TIMEEND" "+%m/%d/%Y")
	tend=$(dispdate  "$TIMEEND" -t)
	tlast=$tend
	# Translate file returned stripping out HTML, trim spaces, remove blank lines.
	founddatatable=""
	founddataline=""
	processdataline="false"
	cat $OFILE | \
	# Put a space in data table following <td> for data parsing.
	sed -e 's/<td>/<td> /g' | \
	# Remove all HTML tags.
	sed -n '/^$/!{s/<[^>]*>//g;p;}' | \
	# Translate TABs and $nbsp to spaces.
	# Translate &#176; to deg.
	# Compress multiple spaces to one space.
	# Remove spaces at start of lines.
	# Empty lines filled with spaces only.
	# Remove blank lines.
	sed -e 's/\t/ /g' \
			-e 's/&nbsp;/ /g' \
			-e 's/&nbsp/ /g' \
			-e 's/&#176;/deg/g' \
			-e 's/&#176/deg/g' \
			-e 's/  */ /g' \
			-e 's/^  *//' \
			-e 's/^  *$//g' \
			-e '/^$/d' | \
	# Process trimmed output looking for data lines.
	while read line; do
		# Data header starts with string: 'Tabular Listing:'
		if [ -z "$founddatatable" ]; then
			founddatatable=$(echo $line | grep "Tabular Listing:")
		fi
		if [ -n "$founddatatable" ]; then
			# Data readout lines start with the time as 'HH:MM '
			founddataline=$(echo $line | grep "^[0-9]*:[0-9][0-9] ")
			if [ -n "$founddataline" ]; then
				processdataline="true"
				if [ "$quiet" = "false" ]; then
					if [ "$debug" -gt 0 ]; then echo "$line"; fi
				fi
				# Translate data field space separators into newlines for parsing.
				datalist=$(echo "$line" | sed -e 's/  */\n/')
				# Encode data reports for filing.
				# First field is time.
				cnt=0
				rm -f $TMPFILE
				for data in $datalist; do
					# First field is time.
					if [ $cnt -eq 0 ]; then
						# Adjust time for rollover to previous day.
						time="$data:00"
						datetime=$date"-"$time
						trpt=$(dispdate $datetime -t)
						if [ "$trpt" -gt "$tlast" ]; then
							trpt=$((trpt-86400))
							date=$(dispdate "+%m/%d/%Y" -t $trpt)
							datetime=$date"-"$time
						fi
						tlast=$trpt
						# Quit data filing if data report time before start time.
						if [ "$trpt" -lt "$tstart" ]; then break; fi
					else 
						# Translate quality flag.
						if [ "$data" = "OK" ]; then data="1"; fi
						# Translate wind heading to direction in degrees.
						data=$(translate_wind_direction_heading $data)
						echo $REMOTETAG $cnt $data $datetime >>$TMPFILE
					fi
					cnt=$((cnt+1))
				done
				# File data buffered in temporary file
				if [ -s "$TMPFILE" ]; then
					# File data reports unless disabled.
					if [ "$nofile" = "false" ]; then
						$FILEDATACMD <$TMPFILE
					fi
				fi
				# Quit data filing if only lastest report filed
				if [ $polltype -lt 2 ]; then break; fi
				# Quit data filing if data report time before start time.
				if [ "$trpt" -lt "$tstart" ]; then break; fi
			else
				if [ "$processdataline" = "false" ]; then
					if [ "$quiet" = "false" ]; then
						if [ "$debug" -gt 0 ]; then echo "$line"; fi
					fi
				fi
			fi
		fi
	done
}

process_cdec_raws_data () {
	# Station numid passed as first argument.
	STAID=$1
	# Poll start time passed as second argument.
	# Do not file data reports not newer than this time.
	TIMESTART="$2"
	if [ -z "$TIMESTART" ]; then tstrt=0;
	else tstart=$(dispdate "$TIMESTART" -t); fi
	# Poll end time passed as third argument.
	# Data lines contain time only so prefix time with poll ending time.
	# Data line time is greater than poll ending time then roll back 1 day.
	TIMEEND="$3"
	date=$(dispdate "$TIMEEND" "+%m/%d/%Y")
	tend=$(dispdate  "$TIMEEND" -t)
	tlast=$tend
	# Translate file returned stripping out HTML, trim spaces, remove blank lines.
	founddatatable=""
	founddataline=""
	processdataline="false"
	rm -f $TMPFILE
	cat $OFILE | \
	# Put a space in data table following <td> for data parsing.
	sed -e 's/<td>/<td> /g' | \
	# Remove all HTML tags.
	sed -n '/^$/!{s/<[^>]*>//g;p;}' | \
	# Translate TABs and $nbsp to spaces.
	# Translate &#176; to deg.
	# Compress multiple spaces to one space.
	# Remove spaces at start of lines.
	# Empty lines filled with spaces only.
	# Remove blank lines.
	sed -e 's/\t/ /g' \
			-e 's/&nbsp;/ /g' \
			-e 's/&nbsp/ /g' \
			-e 's/&#176;/deg/g' \
			-e 's/&#176/deg/g' \
			-e 's/  */ /g' \
			-e 's/^  *//' \
			-e 's/^  *$//g' \
			-e '/^$/d' | \
	# Process trimmed output looking for data lines.
	while read line; do
	echo "line: $line"
	# Data header starts with string: 'Date / Time'
		if [ -z "$founddatatable" ]; then
			founddatatable=$(echo $line | grep "Date / Time")
		fi
		if [ -n "$founddatatable" ]; then
			echo "found data table"
			# Data readout lines start with the date as 'MM/DD/YYYY '
			founddataline=$(echo $line | grep "^[0-9][0-9]/[0-9][0-9]/[0-9][0-9][0-9][0-9] ")
			if [ -n "$founddataline" ]; then
				processdataline="true"
				if [ "$quiet" = "false" ]; then
					if [ "$debug" -gt 0 ]; then echo "$line"; fi
				fi
				# Translate data field space separators into newlines for parsing.
				echo "line before datalist: $line"	
				datalist=$(echo "$line" | sed -e 's/  */\n/')
				# Encode data reports for filing.
				# First field is date, second field is time.
				cnt=-1
				for data in $datalist; do
					# First field is date.
					echo "cnt= $cnt"
					if [ $cnt -eq -1 ]; then
						date="$data"
					elif [ $cnt -eq 0 ]; then
						time="$data"
						datetime=$date"-"$time
						trpt=$(dispdate $datetime -t)
						echo "trpt= $trpt"
						echo "tstart= $tstart"
						if [ "$trpt" -lt "$tstart" ]; then break; fi
					elif [ $data != "$MISSING" ]; then
						echo $REMOTETAG $cnt $data $datetime >>$TMPFILE
						echo "Submit value: $REMOTETAG $cnt $data $datetime"
					fi
					cnt=$((cnt+1))
				done
			else
				if [ "$processdataline" = "false" ]; then
					if [ "$quiet" = "false" ]; then
						if [ "$debug" -gt 0 ]; then echo "$line"; fi
					fi
				fi
			fi
		fi
	done
	# File data buffered in temporary file
	if [ -s "$TMPFILE" ]; then
		# File data reports unless disabled.
		if [ "$nofile" = "false" ]; then
			$FILEDATACMD <$TMPFILE
		fi
	fi
}

process_mesowest_raws_data () {
	# Station numid passed as first argument.
	STAID=$1
	# Poll start time passed as second argument.
	# Do not file data reports not newer than this time.
	TIMESTART="$2"
	echo "TIMESTART: $TIMESTART"
	if [ -z "$TIMESTART" ]; then tstrt=0;
	else tstart=$(dispdate "$TIMESTART" -t); fi
	# Poll end time passed as third argument.
	# Data lines contain time only so prefix time with poll ending time.
	# Data line time is greater than poll ending time then roll back 1 day.
	TIMEEND="$3"
	date=$(dispdate "$TIMEEND" "+%m/%d/%Y")
	tend=$(dispdate  "$TIMEEND" -t)
	tlast=$tend
	# Get the current time for later comparison
	currenttime=`date '+%H%M'`
	previousdate="false"
	# Translate file returned stripping out HTML, trim spaces, remove blank lines.
	founddatatable=""
	founddataline=""
	processdataline="false"
	rm -f $TMPFILE
	echo "OFILE= $OFILE"
	# Replace blank table cells with a placeholder, retain tags for now.
	# This is to accommodate blank cells in the input table where there is missing data.
	# could not find a way to do this with sed...
	perl -i -p0e 's/<td>\s*?<\/td>/<td>-999<\/td>/g' $OFILE
	cat $OFILE | \
	# Put a space in data table following <td> for data parsing.
	sed -e 's/<td>/<td> /g' | \
	# Change </table> to ENDTABLE for data parsing
	# sed -e 's/</table>/ENDTABLE/g' | \
	# Remove all HTML tags.
	sed -n '/^$/!{s/<[^>]*>//g;p;}' | \
	# Remove the lines associated with <a href="/html/help/qc.html"
	# and target="_blank"
	# In mesowest, these are on separates lines and are 
	# overlooked by the previous sed
	sed -e 's/<a href[^>]*//g' | \
	sed -e 's/target="_blank">//g' | \
	# Translate TABs and $nbsp to spaces.
	# Translate &#176; to deg.
	# Compress multiple spaces to one space.
	# Remove spaces at start of lines.
	# Empty lines filled with spaces only.
	# Remove blank lines.
	sed -e 's/\t/ /g' \
			-e 's/&nbsp;/ /g' \
			-e 's/&nbsp/ /g' \
			-e 's/&#176;/deg/g' \
			-e 's/&#176/deg/g' \
			-e 's/  */ /g' \
			-e 's/^  *//' \
			-e 's/^  *$//g' \
			-e '/^$/d' | \
	# Process trimmed output looking for data lines.
	while read line; do
	# Data header starts with string: 'Time'
		if [ -z "$founddatatable" ]; then
			founddatatable=$(echo $line | grep "Time")
		fi
		if [ -n "$founddatatable" ]; then
			# check for ENDTABLE and end loop if so
			foundendtable=$(echo $line | grep "ENDTABLE")
			if [ "$foundendtable" = "true" ]; then
				break;
			fi	

			# Data readout lines start with the time as 'hh:mm'
			foundtime=$(echo $line | grep "^[0-9]*:[0-9][0-9]")
			data=$(echo "$line" | sed -e 's/  */\n/')
			if [ -n "$foundtime" ]; then
				processdataline="true"
				if [ "$quiet" = "false" ]; then
					if [ "$debug" -gt 0 ]; then echo "$line"; fi
				fi
				# Translate data field space separators into newlines for parsing.
				# Encode data reports for filing.
				# First field is time.
				cnt=0

				# need to get date correct
				date=`date '+%m/%d/%Y'`
				time=$data
				datatime=$data
				datatime=`echo $datatime | sed 's/://g'`

				if [ "$previousdate" = "false" ]; then
					if [ "$datatime" -gt "$currenttime" ]; then
						previousdate="true"
						echo "Setting date to previous date!!!!! (datatime: $datatime, currenttime: $currenttime)"
					fi
				fi
				if [ "$previousdate" = "true" ]; then
					date=`date '+%m/%d/%Y' --date='yesterday'`
				fi

				datetime=$date"-"$time
				trpt=$(dispdate $datetime -t)
				echo "Comparing trpt: $trpt to tstart: $tstart"
				if [ "$trpt" -lt "$tstart" ]; then 
					break; 
				fi
			elif [ "$processdataline" = "true" ] && [ $data != "$MISSING" ]; then
				cnt=$((cnt+1))

                                alphanumeric=$(echo $data | grep "[a-zA-Z]*")
                                if [ "$alphanumeric" ]; then
                                        # Translate quality flag.
                                        if [ "$data" = "OK" ]; then 
						data="1"
                                        else
                                                # Translate wind heading to direction in degrees.
                                                data=$(translate_wind_direction_heading $data)
                                        fi
                                fi

				echo $REMOTETAG $cnt $data $datetime >>$TMPFILE
				echo "Sending to file: $REMOTETAG $cnt $data $datetime"
			else
				if [ "$processdataline" = "false" ]; then
					if [ "$quiet" = "false" ]; then
						if [ "$debug" -gt 0 ]; then echo "$line"; fi
					fi
				fi
			fi
		fi
	done
	# File data buffered in temporary file
	if [ -s "$TMPFILE" ]; then
		# File data reports unless disabled.
		if [ "$nofile" = "false" ]; then
			$FILEDATACMD <$TMPFILE
			echo "FILEDATACMD: $FILEDATACMD"
		fi
	fi
}

# Append command output to log file
(
if [ "$quiet" = "false" ]; then
	echo "$(nstime) $PGNAME: $PGDESC started..."
	if [ "$debug" -gt 0 ]; then echo "$COMMANDLINE"; fi
fi

# Check if process locked.
locked="false"
if [ "$force" = "false" ]; then
  if [ -f $LOCKFILE ]; then
		echo "$(nstime) $PGNAME: $PGDESC locked!"
    locked="true"
	fi
fi

# Execute command if not locked.
if [ "$locked" = "false" ]; then
	echo "$(nstime) $COMMANDLINE" >$LOCKFILE

	# Poll all stations in list.
	echo "$STAIDS" | sed -e "s/,/\n/g" | \
	while read STAID; do 

		# Get station name, last polled time, timeout, remote tag, skip if not defined.
		STANAME=$(echo "select name from station where numid=$STAID;" | $PSQL -AtF,)
		STALASTTIMEPOLLED=$(echo "select last_time_polled from station where numid=$STAID;" | $PSQL -AtF,)
		TIMEOUT=$(echo "select connection_timeout from station where numid=$STAID;" | $PSQL -AtF,)
		REMOTETAG=$(echo "select remote_tag from station where numid=$STAID;" | $PSQL -AtF,)
		if [ "$quiet" = "false" ]; then
			echo "Poll Station ID: $STAID $STANAME"
		fi
		if [ -z "$REMOTETAG" ]; then 
			if [ "$quiet" = "false" ]; then
				echo "Poll Station ID: $STAID $STANAME - ERROR: No remote station tag defined."
			fi
			continue
		fi

		# Set default timeout
		TIMEOUT=${TIMEOUT:-$DEF_TIMEOUT};
		if [ "$TIMEOUT" = "0" ]; then TIMEOUT=$DEF_TIMEOUT; fi

		# Set start time to last time polled if undefined.	
		if [ -z "$TIMESTARTARG" ]; then 
			TIMESTART="$STALASTTIMEPOLLED"
			# Compute time span from ending time to last time polled.
			tstrt=$(dispdate "$TIMESTART" -t)
			TIMESPAN=$((tend - $tstrt))
			if [ "$TIMESPAN" -le 0 ]; then
				TIMESPAN="$DEF_TIMESPAN"
			else 
		 		TIMESPAN=$(echo "($TIMESPAN + 3599) / 3600" | bc)"hours"
			fi
		fi

		# Get poll line to use as data filing source, default to 1.
		SOURCE=$(echo "select line from station where numid=$STAID;" | $PSQL -AtF,)
		SOURCE=${SOURCE:-1}

		# Get web page URL for station, skip if not defined.
		# Translate URL to replace REMOTETAG, TIMEEND, and TIMESPAN.
		URL=$(echo "select comm_path from station where numid=$STAID;" | $PSQL -AtF, | \
		sed \
		-e "s/_REMOTETAG_/$REMOTETAG/" \
		-e "s/_TIMEEND_/$TIMEEND/" \
		-e "s/_DAY_/$DAY/" \
		-e "s/_MONTH_/$MONTH/" \
		-e "s/_YEAR_/$YEAR/" \
		-e "s/_HOUR_/$HOUR/" \
		-e "s/_TIMESPAN_/$TIMESPAN/")
		if [ -z "$URL" ]; then 
			if [ "$quiet" = "false" ]; then
				echo "Poll Station ID: $STAID $STANAME - ERROR: No URL defined."
			fi
			continue
		fi

		# Set data process type
		PROCESSCDECRAWSDATA="false"
		if [ -n "$(echo $URL | grep cdec)" ]; then PROCESSCDECRAWSDATA="true"; fi
		PROCESSNOAARAWSDATA="false"
		if [ -n "$(echo $URL | grep noaa)" ]; then PROCESSNOAARAWSDATA="true"; fi
		PROCESSMESOWESTRAWSDATA="false"
		if [ -n "$(echo $URL | grep mesowest)" ]; then PROCESSMESOWESTRAWSDATA="true"; fi

		# Make data filing command.
		# Display data filed if not quiet.
		FILEDATACMD="starpt -l2 -i -S$SOURCE"
		if [ "$quiet" = "false" ]; then FILEDATACMD=$FILEDATACMD" -d$debug";
		else FILEDATACMD=$FILEDATACMD" -q"; fi

		# Get web page from URL for processing.
		if [ "$quiet" = "false" ]; then
			if [ "$debug" -gt 0 ]; then 
				echo wget --no-check-certificate -q -O $OFILE $URL
			fi
		fi
		rm -f $OFILE
		timeout $TIMEOUT wget --no-check-certificate -q -O $OFILE $URL

		# Process web page buffer if not empty.
		if [ -s $OFILE ]; then
			# Process CDEC RAWS data
			if [ "$PROCESSCDECRAWSDATA" = "true" ]; then process_cdec_raws_data $STAID "$TIMESTART" "$TIMEEND"; fi
			if [ "$PROCESSNOAARAWSDATA" = "true" ]; then process_noaa_raws_data $STAID "$TIMESTART" "$TIMEEND"; fi
			if [ "$PROCESSMESOWESTRAWSDATA" = "true" ]; then process_mesowest_raws_data $STAID "$TIMESTART" "$TIMEEND"; fi

			# Update station last time polled.
			args="-qAtF,"
			if [ "$debug" -gt 0 ]; then args="-aAtF,"; fi
			$PSQL $args << _EOF_
UPDATE station SET last_time_polled='$timestamp' WHERE numid 
IN ($STAIDS) AND 
(last_time_polled IS null OR last_time_polled<'$timestamp');
_EOF_
		fi
	done

	# Clean up
	rm -rf $OFILE
	rm -rf $TMPFILE

	# Unlock
	rm -rf $LOCKFILE
fi

# Append command output to log file
if [ "$quiet" = "false" ]; then
	echo "$(nstime) $PGNAME: $PGDESC finished."
fi
) | tee -a $LOGFILE
