# # Rubyのインストール場所がC:\RUBYならばC:\RUBY\lib\ruby\site_ruby # # サンプルプログラム # require 'jcode' # require 'html' # $KCODE = 's' # # html = HTMLtokens.new # Dir.glob("*.htm*").each { |name| # html.load(name) # print "タイトル =\n" # p html.between("title") # print "コメント =\n" # p html.find("!--") # print "壁紙 =\n" # p html.findattr("body", "background") # print "画像 =\n" # p html.findattr("img", "src") # print "リンク =\n" # p html.findattr("a", "href") # } require 'jcode' $KCODE = 's' if not $KCODE class HTMLtokens < Struct.new(:tokens, :map) def initialize self.tokens = nil self.map = nil end def fetch(s) self.tokens = Array.new(0) stt = 0 txt = "" tag = "" cmt = "" (0..(s.size - 1)).each { |i| c = s.slice(i, 1) # printf("%s", c) # DEBUG if stt == 0 then if c == "<" then stt = 1 self.tokens.push txt if txt.size > 0 tag = "" else txt = txt + c end elsif stt == 1 then if (tag == "") && (c == "!") then stt = 3 elsif c == ">" then stt = 0 self.tokens.push "<" + tag.strip + ">" txt = "" elsif c == '"' stt = 2 tag = tag + '"' elsif c =~ /[ \t\n ]/ if tag == "/" # nop elsif (tag.size > 0) && (tag.slice(tag.size - 1, 1) != " ") then tag = tag + " " end else tag = tag + c.downcase end elsif stt == 2 then if c == '"' then stt = 1 tag = tag + '"' else tag = tag + c end elsif stt == 3 then # ', etc. stt = 8 tag = c end elsif stt == 4 then # '" then stt = 0 self.tokens.push "" else # comment-body stt = 5 cmt = cmt + "--" + c end elsif stt == 8 then # '" then stt = 0 self.tokens.push "" elsif c == '"' stt = 9 tag = tag + '"' else tag = tag + c end elsif stt == 9 then # ' 0 return true end def parse self.map = Hash.new(nil) stt = 0 (0..(self.tokens.size - 1)).each { |i| if self.tokens[i] =~ /^