<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-gb">
	<link rel="self" type="application/atom+xml" href="https://forum.eggheads.org/app.php/feed/topic/21288" />

	<title>egghelp/eggheads community</title>
	<subtitle>Discussion of eggdrop bots, shell accounts and tcl scripts.</subtitle>
	<link href="https://forum.eggheads.org/index.php" />
	<updated>2023-07-16T20:28:04-04:00</updated>

	<author><name><![CDATA[egghelp/eggheads community]]></name></author>
	<id>https://forum.eggheads.org/app.php/feed/topic/21288</id>

		<entry>
		<author><name><![CDATA[Arnold_X-P]]></name></author>
		<updated>2023-07-16T20:28:04-04:00</updated>

		<published>2023-07-16T20:28:04-04:00</published>
		<id>https://forum.eggheads.org/viewtopic.php?p=112056#p112056</id>
		<link href="https://forum.eggheads.org/viewtopic.php?p=112056#p112056"/>
		<title type="html"><![CDATA[Wiki Script Slow To Output To Channel]]></title>

		
		<content type="html" xml:base="https://forum.eggheads.org/viewtopic.php?p=112056#p112056"><![CDATA[
try this<div class="codebox"><p>Code: </p><pre><code># Requires Tcl 8.5+ and tcllib# To enable you must .chanset #channel +wikipackage require httppackage require htmlparsepackage require tls http::register https 443 [list tls::socket -tls1 1]namespace eval wiki {   variable max_lines 1   variable max_chars 400   variable url "https://en.wikipedia.org/wiki/"   bind pub -|- "!w" wiki::search   bind pub -|- "!wiki" wiki::search    bind pub -|- ".w" wiki::search   bind pub -|- ".wiki" wiki::search  #variable parse_regexp {(&lt;table class.*?&lt;p&gt;.*?&lt;/p&gt;.*?&lt;/table&gt;)??.*?&lt;p&gt;(.*?)&lt;/p&gt;\n&lt;table id="toc"}   #variable parse_regexp {(?:&lt;/table&gt;)?.*?&lt;p&gt;(.*)((&lt;/ul&gt;)|(&lt;/p&gt;)).*?((&lt;table id="toc")|(&lt;h2&gt;)|(&lt;table id="disambigbox"))}  variable parse_regexp {&lt;\/table&gt;.*?&lt;p&gt;(.*?)&lt;\/p&gt;}   setudef flag wiki}proc wiki::fetch {term {url {}}} {   if {$url != ""} {      set token [http::geturl $url -timeout 10000]   } else {     set query [regsub -all -- {\s} $term "_"]   set token [http::geturl ${wiki::url}${query} -timeout 10000]   }   set data [http::data $token]   set ncode [http::ncode $token]   set meta [http::meta $token]   upvar #0 $token state   set fetched_url $state(url)    http::cleanup $token   # debug   putlog "Fetch! term: $term url: $url fetched: $fetched_url"   set fid [open "w-debug.txt" w]    puts $fid $data   close $fid   # Follow redirects   if {[regexp -- {^3\d{2}$} $ncode]} {      return [wiki::fetch $term [dict get $meta Location]]   }   if {$ncode != 200} {      error "HTTP query failed ($ncode): $data: $meta"   }   # If page returns list of results, choose the first one and fetch that   #if {[regexp -- {&lt;p&gt;.*?((may refer to:)|(in one of the following senses:))&lt;/p&gt;} $data]} {   #   regexp -- {&lt;ul&gt;.*?&lt;li&gt;.*? title="(.*?)"&gt;.*?&lt;/li&gt;} $data -&gt; new_query   #   return [wiki::fetch $new_query]   #}   if {![regexp -- $wiki::parse_regexp $data -&gt; out]} {      error "Parse error"   }   return [list url $fetched_url result [wiki::sanitise $out]]}proc wiki::sanitise {raw} {   set raw [::htmlparse::mapEscapes $raw]   # Remove some help links   set raw [regsub -- {&lt;small class="metadata"&gt;.*?&lt;/small&gt;} $raw ""]   set raw [regsub -all -- {&lt;(.*?)&gt;} $raw ""]   set raw [regsub -all -- {\[.*?\]} $raw ""]   set raw [regsub -all -- {\n} $raw " "]   return $raw}proc wiki::search {nick uhost hand chan argv} {   if {![channel get $chan wiki]} { return }   if {[string length $argv] == 0} {  puthelp "PRIVMSG $chan :Please provide a term."      return   }   set argv [string trim $argv]   # Upper case first character   set argv [string toupper [string index $argv 0]][string range $argv 1 end]   if {[catch {wiki::fetch $argv} data]} {  puthelp "PRIVMSG $chan :Error: $data"      return   }   foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] {      if {[incr count] &gt; $wiki::max_lines} {puthelp "PRIVMSG $chan :Output truncated. [dict get $data url]"         break      }  putserv [encoding convertfrom utf-8 "PRIVMSG $chan :$line"]   }}# by fedexproc wiki::split_line {max str} {   set last [expr {[string length $str] -1}]   set start 0   set end [expr {$max -1}]   set lines []   while {$start &lt;= $last} {      if {$last &gt;= $end} {         set end [string last { } $str $end]      }      lappend lines [string trim [string range $str $start $end]]      set start $end      set end [expr {$start + $max}]   }   return $lines}putlog "wiki.tcl loaded"</code></pre></div><p>Statistics: Posted by <a href="https://forum.eggheads.org/memberlist.php?mode=viewprofile&amp;u=8327">Arnold_X-P</a> — Sun Jul 16, 2023 8:28 pm</p><hr />
]]></content>
	</entry>
		<entry>
		<author><name><![CDATA[Goga]]></name></author>
		<updated>2023-07-06T03:34:50-04:00</updated>

		<published>2023-07-06T03:34:50-04:00</published>
		<id>https://forum.eggheads.org/viewtopic.php?p=112043#p112043</id>
		<link href="https://forum.eggheads.org/viewtopic.php?p=112043#p112043"/>
		<title type="html"><![CDATA[Wiki Script Slow To Output To Channel]]></title>

		
		<content type="html" xml:base="https://forum.eggheads.org/viewtopic.php?p=112043#p112043"><![CDATA[
Hello Masters.<br>I have put the given wiki script and got that error in the result.<blockquote class="uncited"><div>Error: error flushing "sock7": software caused connection abort</div></blockquote><p>Statistics: Posted by <a href="https://forum.eggheads.org/memberlist.php?mode=viewprofile&amp;u=12883">Goga</a> — Thu Jul 06, 2023 3:34 am</p><hr />
]]></content>
	</entry>
		<entry>
		<author><name><![CDATA[MMX]]></name></author>
		<updated>2023-06-10T15:30:04-04:00</updated>

		<published>2023-06-10T15:30:04-04:00</published>
		<id>https://forum.eggheads.org/viewtopic.php?p=111998#p111998</id>
		<link href="https://forum.eggheads.org/viewtopic.php?p=111998#p111998"/>
		<title type="html"><![CDATA[Wiki Script Slow To Output To Channel]]></title>

		
		<content type="html" xml:base="https://forum.eggheads.org/viewtopic.php?p=111998#p111998"><![CDATA[
There are some issues.<br><br>1. <strong class="text-strong">Wrong and unnecessary use of http::formatQuery</strong> - it is used to format request parameters and it takes an even number of arguments - a list of key value pairs (you're giving it just 1 argument). Because of this it gives an error (Incorrect number of arguments, must be an even number.) and cannot make a http request at all. It is not needed in this case.<br>Find this line:<div class="codebox"><p>Code: </p><pre><code>set query [http::formatQuery [regsub -all -- {\s} $term "_"]]</code></pre></div>and make it look like this: <div class="codebox"><p>Code: </p><pre><code>set query [regsub -all -- {\s} $term "_"]</code></pre></div>2. <strong class="text-strong">REGEXP is slow.</strong> And your pattern is not optimized.<br>With your current regexp pattern it took ~<span style="text-decoration:underline"> 85 million steps</span> to find what it looks for (and it fails in some cases). During the search, 1 CPU core is at 100% load for like 5 seconds (in my case, depends on the CPU). It may be better (and faster) to use [string first...] several times to find stuff from position to position, extract the info with [string range...] and then use simple regexp patterns to filter the content.<br><br>You can also try to change your regexp pattern to something simpler.<br>Just for example changing it to the one below, makes it respond in less than a second, but it may need further refining as it may not work in 100% of the pages on wikipedia.<div class="codebox"><p>Code: </p><pre><code>variable parse_regexp {&lt;\/table&gt;.*?&lt;p&gt;(.*?)&lt;\/p&gt;}</code></pre></div><p>Statistics: Posted by <a href="https://forum.eggheads.org/memberlist.php?mode=viewprofile&amp;u=12967">MMX</a> — Sat Jun 10, 2023 3:30 pm</p><hr />
]]></content>
	</entry>
		<entry>
		<author><name><![CDATA[Dominatez]]></name></author>
		<updated>2023-06-09T22:35:44-04:00</updated>

		<published>2023-06-09T22:35:44-04:00</published>
		<id>https://forum.eggheads.org/viewtopic.php?p=111995#p111995</id>
		<link href="https://forum.eggheads.org/viewtopic.php?p=111995#p111995"/>
		<title type="html"><![CDATA[Wiki Script Slow To Output To Channel]]></title>

		
		<content type="html" xml:base="https://forum.eggheads.org/viewtopic.php?p=111995#p111995"><![CDATA[
Hi Guys,<br><br>Wiki script is taking 2 - 3 minutes to output to the screen after i input anything, and i am really at a loss as to why it is.<br><br>Any help would be greatly appreciated.<br><div class="codebox"><p>Code: </p><pre><code># Requires Tcl 8.5+ and tcllib# To enable you must .chanset #channel +wikipackage require httppackage require htmlparsepackage require tls::http::register https 443 ::tls::socketnamespace eval wiki {variable max_lines 1variable max_chars 400variable output_cmd "putserv"variable url "https://en.wikipedia.org/wiki/"bind pub -|- "!w" wiki::searchbind pub -|- "!wiki" wiki::search#variable parse_regexp {(&lt;table class.*?&lt;p&gt;.*?&lt;/p&gt;.*?&lt;/table&gt;)??.*?&lt;p&gt;(.*?)&lt;/p&gt;\n&lt;table id="toc"}variable parse_regexp {(?:&lt;/table&gt;)?.*?&lt;p&gt;(.*)((&lt;/ul&gt;)|(&lt;/p&gt;)).*?((&lt;table id="toc")|(&lt;h2&gt;)|(&lt;table id="disambigbox"))}setudef flag wiki}proc wiki::fetch {term {url {}}} {if {$url != ""} {set token [http::geturl $url -timeout 10000]} else {set query [http::formatQuery [regsub -all -- {\s} $term "_"]]set token [http::geturl ${wiki::url}${query} -timeout 10000]}set data [http::data $token]set ncode [http::ncode $token]set meta [http::meta $token]upvar #0 $token stateset fetched_url $state(url)http::cleanup $token# debugputlog "Fetch! term: $term url: $url fetched: $fetched_url"set fid [open "w-debug.txt" w]puts $fid $dataclose $fid# Follow redirectsif {[regexp -- {^3\d{2}$} $ncode]} {return [wiki::fetch $term [dict get $meta Location]]}if {$ncode != 200} {error "HTTP query failed ($ncode): $data: $meta"}# If page returns list of results, choose the first one and fetch that#if {[regexp -- {&lt;p&gt;.*?((may refer to:)|(in one of the following senses:))&lt;/p&gt;} $data]} {#regexp -- {&lt;ul&gt;.*?&lt;li&gt;.*? title="(.*?)"&gt;.*?&lt;/li&gt;} $data -&gt; new_query#return [wiki::fetch $new_query]#}if {![regexp -- $wiki::parse_regexp $data -&gt; out]} {error "Parse error"}return [list url $fetched_url result [wiki::sanitise $out]]}proc wiki::sanitise {raw} {set raw [::htmlparse::mapEscapes $raw]# Remove some help linksset raw [regsub -- {&lt;small class="metadata"&gt;.*?&lt;/small&gt;} $raw ""]set raw [regsub -all -- {&lt;(.*?)&gt;} $raw ""]set raw [regsub -all -- {\[.*?\]} $raw ""]set raw [regsub -all -- {\n} $raw " "]return $raw}proc wiki::search {nick uhost hand chan argv} {if {![channel get $chan wiki]} { return }if {[string length $argv] == 0} {$wiki::output_cmd "PRIVMSG $chan :Please provide a term."return}set argv [string trim $argv]# Upper case first characterset argv [string toupper [string index $argv 0]][string range $argv 1 end]if {[catch {wiki::fetch $argv} data]} {$wiki::output_cmd "PRIVMSG $chan :Error: $data"return}foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] {if {[incr count] &gt; $wiki::max_lines} {$wiki::output_cmd "PRIVMSG $chan :Output truncated. [dict get $data url]"break}$wiki::output_cmd "PRIVMSG $chan :$line"}}# by fedexproc wiki::split_line {max str} {set last [expr {[string length $str] -1}]set start 0set end [expr {$max -1}]set lines []while {$start &lt;= $last} {if {$last &gt;= $end} {set end [string last { } $str $end]}lappend lines [string trim [string range $str $start $end]]set start $endset end [expr {$start + $max}]}return $lines}putlog "wiki.tcl loaded"</code></pre></div><p>Statistics: Posted by <a href="https://forum.eggheads.org/memberlist.php?mode=viewprofile&amp;u=12783">Dominatez</a> — Fri Jun 09, 2023 10:35 pm</p><hr />
]]></content>
	</entry>
	</feed>
