(.+?)}m) { |placeholder| placeholder = placeholder.gsub(/\n/, '###BR###') } string.gsub!(%r{
(.+?)
}m) { |placeholder|
placeholder = placeholder.gsub(/\n/, '###BR###')
}
# insert spaces on [A-z]\n[A-z]
string.gsub!(/([A-z])[[:space:]]([A-z])/m, '\1 \2')
# remove all new lines
string.gsub!(/(\n\r|\r\r\n|\r\n|\n)/, '')
# blockquote handling
string.gsub!(%r{]*)>(.+?)}m) { "\n" + $2.html2text(true).gsub(/^(.*)$/, '> \1') + "\n" } # pre/code handling 2/2 string.gsub!(/###BR###/, "\n") # add counting string.gsub!(/
}im, "\n")
string.gsub!(%r{<(div|p|pre|br|table|tr|h)(|/| [^>]*)>}i, "\n")
string.gsub!(%r{(p|br|div)(|[[:space:]].+?)>}i, "\n")
string.gsub!(%r{}i, ' ')
# strip all other tags
string.gsub!(/\<.+?\>/, '')
# replace multiple spaces with one
string.gsub!(/ /, ' ')
# add hyperlinks
if strict
string.gsub!(%r{([[:space:]])((http|https|ftp|tel)://.+?|(www..+?))([[:space:]]|\.[[:space:]]|,[[:space:]])}mxi) { |_placeholder|
pre = $1
content = $2
post = $5
if content =~ /^www/i
content = "http://#{content}"
end
placeholder = if content =~ /^(http|https|ftp|tel)/i
"#{pre}######LINKRAW:#{content}#######{post}"
else
"#{pre}#{content}#{post}"
end
}
end
# try HTMLEntities, if it fails on invalid signes, use manual way
begin
coder = HTMLEntities.new
string = coder.decode(string)
rescue
# strip all & < > "
string.gsub!('&', '&')
string.gsub!('<', '<')
string.gsub!('>', '>')
string.gsub!('"', '"')
string.gsub!(' ', ' ')
# encode html entities like "–"
string.gsub!(/(&\#(\d+);?)/x) {
$2.chr
}
# encode html entities like "d;"
string.gsub!(/(&\#[xX]([0-9a-fA-F]+);?)/x) {
chr_orig = $1
hex = $2.hex
if hex
chr = hex.chr
if chr
chr_orig = chr
else
chr_orig
end
else
chr_orig
end
# check valid encoding
begin
if !chr_orig.encode('UTF-8').valid_encoding?
chr_orig = '?'
end
rescue
chr_orig = '?'
end
chr_orig
}
end
# remove tailing empty spaces
string.gsub!(/[[:blank:]]+$/, '')
# remove double multiple empty lines
string.gsub!(/\n\n\n+/, "\n\n")
# add extracted links
if link_list != ''
string += "\n\n\n" + link_list
end
# remove double multiple empty lines
string.gsub!(/\n\n\n+/, "\n\n")
string.strip
end
=begin
html = text_string.text2html
=end
def text2html
text = CGI.escapeHTML(self)
text.gsub!(/\n/, '
')
text.chomp
end
=begin
html = text_string.text2html
=end
def html2html_strict(force = false)
string = html2text(true, true)
string.signature_identify(force)
string = string.text2html
string.gsub!(%r{######LINKEXT:(.+?)/TEXT:(.+?)######}, '\2')
string.gsub!(/######LINKRAW:(.+?)######/, '\1')
marker_template = ''
string.sub!(/######SIGNATURE_MARKER######/, marker_template)
string.gsub!(/######SIGNATURE_MARKER######/, '')
string.gsub!(/######(.+?)######/, '<\1>')
string.chomp
end
def signature_identify(force = false)
string = self
# if we do have less then 10 lines and less then 300 chars ignore this
if !force
lines = string.split("\n")
return if lines.count < 10 && string.length < 300
end
marker = '######SIGNATURE_MARKER######'
# search for signature separator "--\n"
string.sub!(/^\s{0,2}--\s{0,2}$/) { |placeholder|
placeholder = "#{marker}#{placeholder}"
}
map = {}
# Apple Mail
# On 01/04/15 10:55, Bob Smith wrote:
map['apple-en'] = '^(On)[[:space:]].{6,20}[[:space:]].{3,10}[[:space:]].{1,250}[[:space:]](wrote):'
# Am 03.04.2015 um 20:58 schrieb Martin Edenhofer