File: jarg.php

File: jarg.php
Role: Application script
Content type: text/plain
Description: Program to search/display word(s) from the Jargon File (v 1.2.5)
Class: Jargon File Processor
Just a script to extract definitions from the jarg
Author: By
Last change: - Code optimizations and cleanup
- New jargon format compliance
Date: 18 years ago
Size: 18,882 bytes


<? /* * Program to search/display word(s) from the Jargon File (v 1.2.5) * still doesn't make coffee... * * Copyleft (c+) 2007 tobozo <[email protected]> * * You may copy and use this program freely as long as this * notice is left intact. This program is provided "AS IS" * without warranty of any kind. The copyright owner may not * be held liable for any damages, direct or consequential, * which may result from the the use of this program. */ class jargon { var $VER = "1.2.5"; // version of this script var $JAR = "4.4.7"; // version of the jargon text file (retrieved from $this->JARG_SRC) var $JARG_FILE = "jarg447.txt"; // jargon file (you know where to get it eh?) var $JARG_IDX = "jarg447.idx"; // the index file (must be chmod 777 when building) var $JARG_SRC = ""; var $lock = "yes"; // set this to "yes" when index is rebuilt var $showrand = true; // show random word in search Form var $showindex = true; // show list if letters in search Form var $showlinks = true; // show internal hyperlinks for {definitions} var $showExtra = false; // display [?] link for every keyword (similar term search) var $self = ""; var $index = false; var $jargon = false; var $footer = ""; var $output = ""; var $result = array(); var $op = ""; var $l = ""; var $strict = "exact"; var $definition= ""; var $word = array(); // array( // $keyword => // "Keyword"=>$keyword, // the keyword itself // "Def" =>$text, // the full text definition // "Html" =>$html, // the full html definition // "Links" =>array("not used yet") // the embedded {keywords} // ) /* constructor */ function jargon($strict) { // name of this document if($this->self=="") { $this->self=end(explode("/", __FILE__)); } // check for Jargon file if(!file_exists($this->JARG_FILE)) { die(" File $this->JARG_FILE does not exist...\n\n". " Try to edit the file '$this->self' (this file) and\n". " modify the value of '\$this->JARG_FILE' to match\n". " the filename of your Jargon File.\n\n". " Aborting ...\n\n<br>"); } // check for Index file if(!file_exists($this->JARG_IDX) && ($this->op!="RebuildIndex")) { die(" Index File $this->JARG_IDX does not exist...\n\n". " Try to create a file to store the index and\n". " modify the value of '\$this->JARG_IDX' to match\n". " the name of the created file.\n\n". " Also you may try to edit this script and set the \n". " value of \$lock to 'true', then <a href='$this->self?op=RebuildIndex'>Rebuild the index</a> \n\n". " Aborting ...\n\n<br>"); } // clean data if($_GET['op']!='') { $this->mode = 'search'; $this->op= trim(stripslashes($_GET['op'])); } // search method if(!$strict || $strict=="no") { $this->strict="no"; } else { $this->strict = 'exact'; } // footer and link to search page if($this->showlinks) { // insert html links $this->footer = "\n\n<a href='$this->self' title=\"Back to search screen\">Search</a>"; } // $op = "List" are we listing keywords for a specific letter ? if(($_GET['op']=="List") && ($_GET['l']!="")) { $this->mode = 'list'; $l=substr(trim(strtolower($_GET['l'])), 0, 1); $this->l=$l; } if($_GET['op']=="show_source") { show_source($this->self); exit(0); } // $op = rebuild index ? if(($_GET['op']=="RebuildIndex") && ($this->lock=="no")) { $this->RebuildIndex(); } $this->getIndex(); return; } function ExtractWord() { /* this function extracts one or several words from the array $this->jargon */ $keyword = ''; $match = false; $skipped_intro = false; $skipped_appen = false; $pointerState = false; $subwords = array(); $sublinks = array(); $subwordsHTML = array(); if($this->op=="") { return false; } $this->LoadJargon(); $fullJargon = count($this->jargon); for($lineNum=0;$lineNum<$fullJargon;$lineNum++) { $bfr = $this->jargon[$lineNum]; if(!$skipped_intro && trim($bfr)!="The Jargon Lexicon") { // jargon definitions start here continue; } else { $skipped_intro = true; if(!$skipped_appen && $bfr!="Appendices") { // until jargon definitions end if($pointerState && $match) { // since Jarg4.4.5, index is also inside the jargon.txt if(strlen($bfr)==1 && trim($bfr!='')) { // check for index junk if($this->isInTheIndex($this->jargon[$lineNum+2])) { // (signature is <letter><lf><lf><keyword>) $start = $lineNum+3; while($this->isInTheIndex($this->jargon[$start])) { // skip index junk after that $start++; } if($this->strict=='exact') { return $this->definition; } $lineNum = $start; continue; }; // line +2 is not some index junk, false positive }; // end if (start keyword list) } } else { $skipped_appen = true; if($this->strict=='exact') { return $this->definition; } continue; } } if($bfr[0]==':') { // maybe keyword line ? preg_match("/:([^:]+):/", $bfr, $matches); if($matches[1]!='') { // begin word definition $keyword = $matches[1]; // extract keyword name $pointerState = true; // parser in acquire mode } } switch($this->strict) { // check if matching case 'exact': if(strtolower($this->op)==strtolower($keyword)) { $this->result[strtolower($keyword)]=strtolower($keyword); $match = true; // inside the definition itself AND match keyword } else { $match = false; continue; // skip next line } break; default: if(@eregi(strtolower($this->op),strtolower($keyword))) { $this->result[strtolower($keyword)]=strtolower($keyword); $match = true; // inside the definition itself AND match close-keyword } else { $match = false; continue; // skip next line } }; // end switch switch($pointerState) { case true: switch($match) { case true: // get the content of the {linked keywords} if any exist preg_match_all("/{([^}]+)}/", $bfr, $matches, PREG_SET_ORDER); if(count($matches)>0) { foreach($matches as $crap => $ar) { // pregged keywords as array if(trim($ar[1])!='' && in_array($ar[1], $this->index)) { if(!in_array($ar[1], $subwords)) { // todo : use sprintf() $def = sprintf('<a href="%s?op=%s&strict=exact" title="%s">%s</a>', $this->self, urlencode($ar[1]), "Exact Term Search", htmlentities($ar[1]) ); if($this->showExtra) { $def.=sprintf('<a href="%s?op=%s&strict=no" title="%s"><font size="-1"><u><sup>[?]</sup></u></font></a>', $this->self, urlencode($ar[1]), "Similar Terms" );/* $def.="<a href=\"$this->self?op="; $def.=urlencode($ar[1]); $def.="&strict=no"; $def.="\" title=\"Similar Terms\">"; $def.="<font size=-1><u><sup>[?]</sup></u></a></font>";*/ } $subwords[] = '{'.$ar[1].'}'; $sublinks[] = '{'.$def.'}'; $subwordsHTML[] = '{'.htmlentities($ar[1]).'}'; } } } } $bfr = $bfr."\n"; $this->word[strtolower($keyword)]['Keyword'] = $keyword; $this->word[strtolower($keyword)]['Def'] .= $bfr; $this->word[strtolower($keyword)]['Html'] .= str_replace($subwordsHTML, $sublinks, htmlentities($bfr)); $this->definition.= str_replace($subwords, $sublinks, $bfr); break; default: case false: continue; break; };// end switch($match) break; default: case false: continue; break; };// end switch($pointerState) } return $this->definition; } function getForm() { /* just builds the form and the index links */ $this->output .= "<form action='$this->self' method='get'>". "<div align='center'><pre>". "/----------------------------------------------------------------------------\\"."\n". "| Flat Text File Version $this->JAR |\n". "| Jargon File search tool $this->VER by tobozo |\n". "| copyleft (c+) 07-feb-2007 <a href='$this->self?op=show_source'>View source</a> |\n". "\----------------------------------------------------------------------------/\n\n". "</pre></div>". "<table border='1' cellspacing='1' cellpadding='5' align='center'><tr>". "<Td align='center'> Search : <input type='text' name='op' size=5>". "<input type='submit' value='go!'> \n<br /> Exact Term : ". "<input type='checkbox' name='strict' CHECKED value='exact'>". "</td></tr><tr><Td align=center>"; /* display list of existing letters in index*/ if($this->showindex) { $this->showLettersFromIndex($this->index); } $this->output.= "</font></td></tr></table>". "</form>"; /* display random term */ if($this->showrand) { $this->getIndex(); $max = count($this->index); srand((double)microtime()*1000000); $r = rand(0,$max); $this->output.="<table border='0' cellspacing='0' cellpadding='0' align='center'>". "<tr><td><pre><br />\nFound $max Terms \n\nRandom Term : \n\n"; $this->op = $this->index[$r]; $this->strict="exact"; $this->output.=$this->extractWord(); $this->output.= "</pre></td></tr></table>"; } return $this->output; } function GetWordsFrom($l) { /* list words for a specific letter returns false if bad entry or if nothing is found */ $l=substr(trim($l), 0, 1); if($l=="") { //echo "not enough"; return false; } $this->getIndex(); $w=0; foreach($this->index as $w => $word) { if(strtolower($l)==strtolower(substr($word,0,1))) { $this->output.=sprintf('<a href="%s?op=%s&strict=exact">%s</a>'."\n", $this->self, urlencode($word), htmlentities($word) ); $let++; } } if($let==0) { $this->output.=sprintf("Sorry, None of the words in the file %s starts with the character '<b>%s</b>'", htmlentities($this->JARG_IDX), htmlentities($l) ); } return $this->output; } function ShowLettersFromIndex() { /* returns all first letters from words in the index file */ $this->getIndex(); foreach($this->index as $pos=>$keyword) { $letter=strtolower(substr($keyword,0,1)); $links[$letter]=sprintf('<a class=letter href="%s?op=List&l=%s">%s</a>', $this->self, urlencode($letter), htmlentities($letter) ); } sort($links); $this->output.=implode("", $links); return $this->output; } function IsInTheIndex($op) { // checks in index file for matching string $op // returns the string or false if nothing found $op=trim(strtolower($op)); $this->getIndex(); // try the easy way if(in_array("$op", $this->index)) { return true; } // try the insensitive way foreach($this->index as $pos => $word) { if(strtolower($op)==strtolower($word)) { return true; } } return false; } function MatchesSimilarTerms($op) { // checks in index file for close-matching string $op // returns true or false if nothing found $op=trim(strtolower($op)); $this->getIndex(); foreach($this->index as $pos => $keyword) { if(@eregi($op, $keyword)) { return true; } } return false; } function GetIndex() { // fill $this->index with index content (if exists) if(!is_array($this->index)) { $j = file_get_contents($this->JARG_IDX) or die( "Unable to open $this->JARG_IDX file\n". "You may try edit this script and \n". "set the value of \$lock to 'true', \n". "then <a href='$this->self?op=RebuildIndex'>Rebuild the index</a>"); $this->index = explode("\n", $j); } } function RebuildIndex() { /* Just rebuilds the index file */ echo "Rebuilding Index ...<br>"; $j=file_get_contents($this->JARG_FILE) or die ("Unable to open file $this->JARG_FILE"); echo "Reading $this->JARG_FILE content....<br>"; preg_match_all("/\n:([^\n|:]+):/", $j, $matches, PREG_SET_ORDER); foreach($matches as $Pos => $word) { if($word[1]==trim($word[1])) { $this->index[]=$word[1]; echo "Inserting word <b>$word[1]</b><br>"; } } if(getType($this->index)=='array') { $q= @fopen($this->JARG_IDX, "w") or die( "Unable to create $this->JARG_IDX file\n". "You may try to chmod the file as r/w and ". "<a href='$this->self?op=RebuildIndex'>try again</a><br>". "Alternatively you can create manually the file and paste". "the followind index : <br><br>">implode("\n", $this->index)); @fputs($q, implode("\n", $this->index)); @fclose($q); echo "Index file rebuilt as $this->JARG_IDX"; exit(0); } else { die("I was unable to create an index from your file.\n". "Please use any text version from your favourite source or from ".$this->JARG_SRC.".\n"); } } function LoadJargon() { /* get the content of the jargon and store it into an array of 80 chars width */ if($this->jargon==false) { $j = file_get_contents($this->JARG_FILE) or die ("Unable to open file $this->JARG_FILE"); $this->jargon = explode("\n", $j); } } function out() { /* Returns the results or writes to stdout */ switch ($this->mode) { default : if($this->op!="") { if(!$this->IsInTheIndex($this->op)) { // empty results? if($this->MatchesSimilarTerms($this->op)) { // check if any similar match ? if($this->strict=="exact") { // found something but do not search yet, prompt user instead $this->output.="\n Term'<b>".htmlentities($this->op)."</b>' not found.\n"; $this->output.=" <a href='$this->self?op=$this->op&strict=no'>Search similar terms</a>.\n\n"; $this->output.=$this->footer; break; } else { // found something, similar match asked, process $this->output.="\n ... Fetching similar terms ..\n"; $this->strict="no"; } } else { // no similar term found, do not prompt user, abort script $this->output.="\n ... Term'<b>".htmlentities($this->op)."</b>' not found.\n\n"; $this->output.=$this->footer; break; } } $this->extractWord(); if(count($this->result)>1) { $this->output.=" ... Found ".count($this->result)." match(es) ...\n\n"; } foreach($this->result as $k => $keyword) { if($this->showlinks) { $this->output.='<br>'.$this->word[$keyword]['Html']; } else { $this->output.="\n".$this->word[$keyword]['Def']; } } } else { // $this->op is empty -> display form $this->getForm(); } break; case "list": if ($this->l!="") { echo "Alphabetical Listing\n\n"; $this->output =$this->getWordsFrom($this->l); } break; }; // end switch echo $this->output; } }; // end class ?><html> <head> <title>Jargon File Processor ... Copyleft(c+) 2001 [email protected]</title> </head> <style> A { text-decoration:none; } A:link { text-decoration:none; } A:visited { text-decoration:none; } A:active { text-decoration:none; } A:hover { text-decoration:underline; background:black; color:white;} A.letter { text-decoration:none; font-size:12px; } A.letter:link { text-decoration:none; } A.letter:visited { text-decoration:none; } A.letter:active { text-decoration:none; } A.letter:hover { text-decoration:underline; background:black; color:white; } </style> <body> <pre> <? $j = new jargon($strict); $j ->out(); ?> </pre> </body> </html>