#!/usr/bin/perl

use CGI qw(:standard);
use LWP::UserAgent;
use URI::Escape;
use HTTP::Request;
use HTTP::Response;
use HTTP::Cookies;

my $pagequery = param('link');
my $sessionid = param('phpsessid');

#$url_to_fetch = "http://www.pangaea.de/PangaVista?query=censor"; 
$url_to_fetch = "http://www.pangaea.de/search";


if($pagequery =~ /similar/){
	$url_to_fetch = "http://www.pangaea.de/PangaVista".$pagequery."&phpsessid=".$sessionid;
	}

$new_url_on_your_site   = "http://www.censor.name/pagev2/cgi-bin/showdata.pl"; 

#$first_text_delimmiter  = 'function showMap() {'; # wouldn't it be nice if it was something like '<!--content begins-->';

$first_text_delimmiter = '<!-- ************************************* Content ********************************************************* -->';

$last_text_delimiter = '<!-- ************************************* Footer ********************************************************** -->';
#$last_text_delimiter    = '<!-- WFS INFO:';  # wouldn't it be nice if it was something like <!--content ends-->?
$last_text_delimiter_2  = ''; # Another option in case some pages are different

 %REPLACEMENTS = (
"/PICS/PanLogo.gif", "../../../uploads/pics/Logo_web_01.jpg",
"/pangaea.css", "pangea.css",
"<img src=\"/PICS/PanText.gif\" width=107 height=26 align=absmiddle>", "",
"/pvmap/", "http://www.pangaea.de/pvmap/",
"/help/help.php/PangaVista/index.html", "http://www.pangaea.de/help/help.php/PangaVista/index.html",
"/javaclient/windrose.gif", "http://www.pangaea.de/javaclient/windrose.gif",
"form action=\"/search\"", "form action=\"http://www.pangaea.de/search\"",
"http://doi.pangaea.de/", "showdatadetail.pl?dataurl=",
"/shared/map/preparemap.php?", "http://www.pangaea.de/shared/map/preparemap.php"

   );# NO COMMA AFTER THE LAST ONE.


print "Content-type: text/html\n\n";               # set up print type for browser

print "

<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">

<html>

	<head>
		<link REL=\"stylesheet\" HREF=\"pangea.css\" TYPE=\"text/css\">
		<meta http-equiv=\"content-type\" content=\"text/html;charset=iso-8859-1\">
		<title>Censor data search</title>
		
			<script type=\"text/javascript\"><!--
	
	  		var mapWindow=null;
	  		window.name=\"PVMainWnd_1b8654f7cc97f4c2396e20da772df1c1\";
	
			function clearArea() {
			        with (document.forms.PangaVistaForm) {
				        minlat.value=\"\"; maxlat.value=\"\"; minlon.value=\"\"; maxlon.value=\"\";
				        }
			}
			
			function setArea(pminlat,pminlon,pmaxlat,pmaxlon) {
			        with (document.forms.PangaVistaForm) {
				        maxlat.value=pmaxlat;
				        minlat.value=pminlat;
				        minlon.value=pminlon;
				        maxlon.value=pmaxlon;
				        }
			}
	
	  		function openHelp() {
	        	window.open('/help/help.php/PangaVista/index.html','pangaea_help','status=1,resizable=1,scrollbars=1,width=640,height=480');
	  		}
function resize()
   {
            dx = Math.min( 800, screen.availWidth  ); 
                 dy = Math.min(800, screen.availHeight );
                      x = Math.min( ( screen.availWidth - dx ) / 2, 40 );
                           y = Math.min( ( screen.availHeight - dy ) / 2, 40 );
                                
                                     window.moveTo( x, y );
                                          window.resizeTo( dx, dy );
                                             }

                                            // resize();
	  		//--></script>
	  		
    	</head>
  		<body>
  			<div align=\"center\">
  				<img src=\"http://www.censor.name/page/fileadmin/pics/CensorLogoGif.gif\" alt=\"\" border=\"0\" >
 			</div>
  			<a href=\"javascript:showMap()\">Mark datapoints on map</a><br><br>
  			<small><a href=\"http://www.pangaea.de\">Please visit the official Pangaea site for advanced search!</a></small>
  ";



&parse_form;
&process_the_url;
&fetch_it;
&fix_links;
&replacements;

print $content;
print "  
	</body>
  </html>";
  
 exit;
#-------------------------
sub parse_form{
 read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); 
 @input_chunks = split(/&/, $buffer); 
 foreach $chunk (@input_chunks) { 
     ($name, $value) = split(/=/, $chunk);
     $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
     $name =~ tr/+/ /;
     $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
     $value =~ tr/+/ /;
     $FROMWEB{$name} = $value;
 } # end of foreach
 @input_chunks = split(/&/, $ENV{'QUERY_STRING'}); 
 foreach $chunk (@input_chunks) { 
     ($name, $value) = split(/=/, $chunk);
     $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
     $name =~ tr/+/ /;
     $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
     $value =~ tr/+/ /;
     $FROMWEB{$name} = $value;
 } # end of foreach $chunk
}
#-------------------------
sub process_the_url{
 if ($FROMWEB{'link'}){
     $processed_fetch_url = $url_to_fetch."$FROMWEB{'link'}?".$ENV{'QUERY_STRING'};
     $url_to_fetch = $url_to_fetch."$FROMWEB{'link'}?".$ENV{'QUERY_STRING'};
 }
 else{
     $processed_fetch_url = $url_to_fetch."?".$ENV{'QUERY_STRING'};
 }
}
#-------------------------
sub choose_random_agent{
 my @agent_options = (
	"Mozilla/4.76 [en] (Windows NT 5.0; U)",
	"Mozilla/4.0 (compatible; MSIE 5.12; Mac_PowerPC)"
 );
 # now pick one
 my $random_index = rand($#agent_options);
 return ("$agent_options[$random_index]¡");
}
#-------------------------
sub fetch_it{
 my $ua = new LWP::UserAgent;
 $ua->cookie_jar(HTTP::Cookies->new(file=>'cookies.txt',autosave=>1,ignore_discard=>1));
 $ua->agent("&choose_random_agent");
 my $req = HTTP::Request->new(GET => "$url_to_fetch");

 $response = $ua->request($req);
 $content = $response->content();

 $line_break = "\n";
 @content = split(/$line_break/, $content);
 $content = join(" ",@content);
 $content =~ s/  / /sg;
 chomp $content;
 $start_location = index($content,$first_text_delimmiter);
 $end_location   = index($content,$last_text_delimiter);
 $end_location   = index($content,$last_text_delimiter_2) if ($end_location < 1);
 $length = $end_location - $start_location;
 $article_only = substr($content, $start_location, $length);
 $content = $article_only;
}
#-------------------------
sub fix_links{
# $content =~ s/href=\"$url_to_fetch/href=\"$new_url_on_your_site?link=/sgi;
  $content =~ s/href=\"\/search?/href=\"$new_url_on_your_site\?link=/sgi;
# $content =~ s/$new_url_on_your_site\?link=http\:\/\//http\:\/\//sgi;
# $content =~ s/$new_url_on_your_site\?link=mailto/mailto/sgi;
# $content =~ s/\&\?/\&/sgi;
}
#-------------------------
sub replacements{
 foreach my $key (keys(%REPLACEMENTS)) { 
     $content =~ s/$key/$REPLACEMENTS{$key}/sgi;
 }
 # print $content."\n\r";

}
