linkchecker/linkcheck/strformat.py
2004-08-16 19:20:53 +00:00

151 lines
4.1 KiB
Python

# -*- coding: iso-8859-1 -*-
"""Various string utility functions. Note that these functions are not
necessarily optimised for large strings, so use with care."""
# Copyright (C) 2000-2004 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import re
import textwrap
import sys
import os
import time
from linkcheck.i18n import _
def unquote (s):
"""if string s is not empty, strip quotes from s"""
if not s:
return ""
if len(s) < 2:
return s
if s[0] in ("\"'"):
s = s[1:]
if s[-1] in ("\"'"):
s = s[:-1]
return s
_para_ro = re.compile(r"(?:\r\n|\r|\n)(?:(?:\r\n|\r|\n)\s*)+")
def get_paragraphs (text):
"""A new paragraph is considered to start at a line which follows
one or more blank lines (lines containing nothing or just spaces).
The first line of the text also starts a paragraph.
"""
if not text:
return []
return _para_ro.split(text)
def wrap (text, width, **kwargs):
"""Adjust lines of text to be not longer than width. The text will be
returned unmodified if width <= 0.
See textwrap.wrap() for a list of supported kwargs.
Returns text with lines no longer than given width.
"""
if width <= 0:
return text
if not text:
return ""
ret = []
for para in get_paragraphs(text):
ret.extend(textwrap.wrap(para.strip(), width, **kwargs))
return os.linesep.join(ret)
def get_line_number (s, index):
"""Return the line number of s[index]. Lines are assumed to be separated
by the ASCII character '\\n'"""
i = 0
if index < 0:
index = 0
line = 1
while i < index:
if s[i] == '\n':
line += 1
i += 1
return line
def paginate (text, lines=22):
"""print text in pages of lines"""
curline = 1
for line in text.splitlines():
print line
curline += 1
isatty = hasattr(sys.stdin, "isatty") and sys.stdin.isatty()
if curline >= lines and isatty:
curline = 1
print "press return to continue..."
sys.stdin.read(1)
_markup_re = re.compile("<.*?>", re.DOTALL)
def remove_markup (s):
"""remove all <*> html markup tags from s"""
mo = _markup_re.search(s)
while mo:
s = s[0:mo.start()] + s[mo.end():]
mo = _markup_re.search(s)
return s
def strsize (b):
"""Return human representation of bytes b. A negative number of bytes
raises a value error.
"""
if b < 0:
raise ValueError("Invalid negative byte number")
if b == 1:
return "%d Byte" % b
if b < 1024:
return "%d Bytes" % b
b /= 1024.0
if b < 1024:
return "%.2f kB" % b
b /= 1024.0
if b < 1024:
return "%.2f MB" % b
b /= 1024.0
return "%.2f GB"
def strtime (t):
"""return ISO 8601 formatted time"""
return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(t)) + \
strtimezone()
def strduration (duration):
"""return translated and formatted time duration"""
name = _("seconds")
if duration > 60:
duration = duration / 60
name = _("minutes")
if duration > 60:
duration = duration / 60
name = _("hours")
return " %.3f %s" % (duration, name)
def strtimezone ():
"""return timezone info, %z on some platforms, but not supported on all"""
if time.daylight:
zone = time.altzone
else:
zone = time.timezone
return "%+04d" % int(-zone/3600)