/*
csv2wp is a conversion tool that translated data in CSV-format
to a table in the mediawiki-table-format or to a html-table.
Copyright (C) 2004, Daniel Kinzler
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
The GNU-GPL is available online under
*/
define("VERSION","0.1.1");
function get_post($key,$def=NULL,$slashes=False) {
$v= $_REQUEST[$key];
if (!isset($v)) return $def;
return adjust_slashes($v,$slashes);
}
function unslashify($value) {
return adjust_slashes($value,False);
}
function adjust_slashes($v,$slashes) {
if ($slashes and (get_magic_quotes_gpc()==0)) $v= addslashes($v);
else if (!$slashes and (get_magic_quotes_gpc()==1)) $v= stripslashes($v);
return $v;
}
#------------------------------------------------------------------------
define("EOL","\r\n");
function table_head($attr=NULL) {
global $format;
if ($format=="html") {
$s= "
";
$s.= EOL;
}
else {
$s= "{|";
if ($attr) $s.= " $attr |";
$s.= EOL;
}
return $s;
}
function table_foot($attr=NULL) {
global $format;
if ($format=="html") return "
".EOL;
else return "|}".EOL;
}
function compose_cell($text,$break,$attr=NULL) {
global $format;
global $convert;
if ($convert=="html" or $convert=="wp") $text= htmlspecialchars($text,ENT_NOQUOTES);
if ($convert=="xhtml") $text= htmlspecialchars($text,ENT_QUOTES);
if ($convert=="wp" and $format=="wp") {
$text= preg_replace('(\[\[.*?\]\])','\0',$text);
$text= preg_replace('(\{\{\{.*?\}\}\})','\0',$text);
$text= preg_replace('(\{\{.*?\}\})','\0',$text);
$text= preg_replace('(^\*)','\0',$text);
$text= preg_replace('(^#)','\0',$text);
$text= preg_replace('(^\{\|)','\0',$text);
$text= preg_replace('($\|\})','\0',$text);
$text= preg_replace('('."'''+".')','\0',$text);
$text= preg_replace('('."''".')','\0',$text);
$text= preg_replace('(~~~~+)','\0',$text);
$text= preg_replace('(~~~)','\0',$text);
}
$text= str_replace("\r\n",$break,$text);
$text= str_replace("\n\r",$break,$text);
$text= str_replace("\n",$break,$text);
$text= str_replace("\r",$break,$text);
if ($format=="html") {
$s= "\t\t
";
$s.= htmlspecialchars($text);
$s.= "
".EOL;
}
else {
$s= "|";
if ($attr) $s.= " $attr |".EOL;
$s.= $text;
$s.= EOL;
}
return $s;
}
function compose_row($row,$break,$attr=NULL) {
global $format;
if ($format=="html") $s.= "\t
".EOL;
foreach ($row as $cell) {
$s.= compose_cell($cell,$break,$attr);
}
if ($format=="html") $s.= "\t
".EOL;
else $s.= "|----".EOL;
return $s;
}
function csv2wp($csv,$table_attr=NULL,$cell_attr=NULL) {
global $no_quote_escape;
$sep= $GLOBALS["separator"];
$break= $GLOBALS["break"];
$q_char= $GLOBALS["quotes"];
$escape= $GLOBALS["escape"];
# print "Parsing: separator:$sep; quotes:$q_char; escape:$escape; no-quote-escape:".(int)$no_quote_escape."; ";
$nl_char= "\r\n";
$ws_char= $nl_char."\t ";
$wp= "";
$wp.= table_head($table_attr);
$buffer= "";
$quote= "";
$row= array();
$cell= 0;
$esc= False;
$state= 0; #states: 0=break; 1=text; 3=quote; 4=quote-test;
$i= 0;
$len= strlen($csv);
while ($i<=$len) {
if ($i>=$len) $ch= NULL; #EOF
else $ch= substr($csv,$i,1);
$i+= 1;
if ($esc) {
#print "pos:$i; ch:$ch; state:$state ESC; cell:$cell; buffer:$buffer; ";
$esc= False;
$buffer.= $ch;
continue;
}
#print "pos:$i; ch:$ch; state:$state; cell:$cell; buffer:$buffer; ";
switch ($state) {
case 0: #break
if ($ch!==NULL and strpos($nl_char,$ch)===False and strpos($ws_char,$ch)===False) {
$row= array();
$i-= 1; #pushback!
$state= 1; #text
}
break;
case 1: #text
if ($ch===NULL or strpos($nl_char,$ch)!==False) {
$row[$cell]= $buffer;
$cell= 0;
$buffer= "";
$state= 0; #break
}
else if ($ch==$escape) {
$esc= True;
}
else if ($ch==$sep) {
$row[$cell]= $buffer;
$cell+= 1;
$buffer= "";
#next text
}
else if (strpos($q_char,$ch)!==False) {
$quote= $ch;
$state= 2; #quote
}
else $buffer.= $ch;
break;
case 2: #quote
if ($ch==$quote) {
if ($no_quote_escape) $state= 1; #text
else $state= 3; #quote-test
}
else if ($ch==$escape) {
$esc= True;
}
else if ($ch===NULL) { #Unexpected EOF inside Quote.
$row[$cell]= $buffer;
$cell= 0;
$buffer= "";
$state= 0; #break
}
else $buffer.= $ch;
break;
case 3: #quote-test
if ($ch==$quote) { #double-quote (literal quote)
$buffer.= $quote;
$state= 2; #quote
}
else if ($ch===NULL) { #EOF after closing Quote.
$row[$cell]= $buffer;
$cell= 0;
$buffer= "";
$state= 0; #break
}
else {
$i-= 1; #pushback
$state= 1; #text
}
break;
default:
$state= 0;
$i-= 1;
}
if ($state==0 and $row) {
$wp.= compose_row($row,$break,$cell_attr);
$row= array();
}
}
$wp.= table_foot();
return $wp;
}
#------------------------------------------------------------------------
$run= False;
if (isset($_REQUEST["to_wp"])) {
$format= "wp";
$run= True;
}
else if (isset($_REQUEST["to_html"])) {
$format= "html";
$run= True;
}
if (isset($_REQUEST["download"])) {
if ($format=="html" or $format=="xhtml") header("Content-Type: text/html");
else header("Content-Type: text/plain");
print get_post("wp");
exit;
}
$csv= get_post("csv");
if (isset($_REQUEST["upload"])) {
if (!isset($_FILES["file"])) $error= "No File to upload!";
else {
$f= $_FILES["file"];
$enc= get_post("encoding");
#print_r($_FILES);
if ($f['error']==UPLOAD_ERR_OK) {
$tmp= $f['tmp_name'];
$csv= file_get_contents($tmp);
if ($csv===False) $error= "Failed to load data from $tmp!";
else {
if ($enc and $enc!="UTF-8") {
$txt= iconv($enc,"UTF-8",$csv);
if ($txt===False) $error= "Conversion from $enc failed!";
else $csv= $txt;
}
}
}
else $error= "Upload failed!";
}
}
#------------------------------------------------------------------------
$separator= get_post("separator");
$break= get_post("break");
$convert= get_post("convert");
$escape= get_post("escape");
$quotes= get_post("quotes");
$no_quote_escape= isset($_REQUEST["no-quote-escape"]);
if (!isset($separator)) $separator= ",";
if (!isset($break)) $break= "SPACE";
if (!isset($convert)) $convert= "html";
if (!isset($escape)) $escape= "NONE";
if (!isset($quotes)) $quotes= "\"";
$checked= array(
"separator-$separator" => "checked='checked'",
"break-$break" => "checked='checked'",
"convert-$convert" => "checked='checked'",
"escape-$escape" => "checked='checked'",
"quotes-$quotes" => "checked='checked'",
);
if ($no_quote_escape) $checked["no-quote-escape"]= "checked='checked'";
if ($separator == "TAB") $separator= "\t";
else if ($separator == "OTHER") $separator= get_post("other-separator");
if ($break == "SPACE") $break= " ";
else if ($break == "OTHER") $break= get_post("other-break");
if ($quotes == "NONE") $quotes= "";
else if ($quotes == "OTHER") $break= get_post("other-quotes");
if ($escape == "NONE") $escape= NULL;
else if ($escape == "OTHER") $escape= get_post("other-escape");
if (!isset($separator) or $separator=="") $separator= ",";
if (!isset($break) or $break=="") $break= " ";
if (!isset($escape) or $escape=="") $escape= NULL;
if (!isset($quotes)) $quotes= "\"";
$table_attr= get_post("table-attr");
$cell_attr= get_post("cell-attr");
$out_enc= get_post("output_encoding");
if (!$out_enc) $out_enc= "UTF-8";
if ($run) {
$wp= csv2wp($csv,$table_attr,$cell_attr);
if ($out_enc and $out_enc!="UTF-8") {
$t= iconv("UTF-8",$out_enc."//TRANSLIT",$wp);
if ($wp===False) $error= "Conversion to $out_enc failed!";
else $wp= $t;
}
if (!$error) {
if ($_REQUEST["binary"]) {
header("Content-Type: application/octet-stream");
$disp= "attachment";
}
else {
if ($_REQUEST["preview"] and ($format=="html" or $format=="xhtml")) header("Content-Type: text/html; ".($out_enc?"charset=$out_enc":""));
else header("Content-Type: text/plain; ".($out_enc?"charset=$out_enc":""));
$disp= "";
}
if ($format=="html" or $format=="xhtml") $ext= ".html";
else $ext= ".txt";
$name= "csv2wp-".time().".$out_enc".$ext;
header("Content-Description: $name");
if ($disp) header("Content-Disposition: $disp; filename=\"$name\"");
print $wp;
flush;
exit;
}
}
else {
header("Content-Type: text/html; charset=UTF-8");
}
#------------------------------------------------------------------------
?>
CSV Converter
CSV Converter
This page allows you to enter a table in CSV-format and convert it to HTML or the WikiMedia format for tables.
For more information, please see the csv2wp page at my wikipedia account.