mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Parse Safari bookmark files.
This commit is contained in:
parent
925a7166b6
commit
3d9958dfbb
14 changed files with 336 additions and 24 deletions
|
|
@ -19,6 +19,7 @@ Features:
|
|||
the configuration file.
|
||||
- gui: Add configuration for ignore URL patterns.
|
||||
Closes: SF bug #3311262
|
||||
- checking: Support parsing of Safari Bookmark files.
|
||||
|
||||
|
||||
7.2 "Driver" (released 20.10.2011)
|
||||
|
|
|
|||
109
linkcheck/bookmarks/safari.py
Normal file
109
linkcheck/bookmarks/safari.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2011 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import plistlib
|
||||
try:
|
||||
import biplist
|
||||
has_biplist = True
|
||||
except ImportError:
|
||||
has_biplist = False
|
||||
|
||||
|
||||
def get_profile_dir ():
|
||||
"""Return path where all profiles of current user are stored."""
|
||||
basedir = unicode(os.environ["HOME"])
|
||||
return os.path.join(basedir, u"Library", u"Safari")
|
||||
|
||||
|
||||
def find_bookmark_file ():
|
||||
"""Return the bookmark file of the Default profile.
|
||||
Returns absolute filename if found, or empty string if no bookmark file
|
||||
could be found.
|
||||
"""
|
||||
if sys.platform != 'darwin':
|
||||
return u""
|
||||
dirname = get_profile_dir()
|
||||
if os.path.isdir(dirname):
|
||||
fname = os.path.join(dirname, u"Bookmarks.plist")
|
||||
if os.path.isfile(fname):
|
||||
return fname
|
||||
return u""
|
||||
|
||||
|
||||
def parse_bookmark_file (filename):
|
||||
"""Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
return parse_plist(get_plist_data_from_file(filename))
|
||||
|
||||
|
||||
def parse_bookmark_data (data):
|
||||
"""Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
return parse_plist(get_plist_data_from_string(data))
|
||||
|
||||
|
||||
def get_plist_data_from_file (filename):
|
||||
if has_biplist:
|
||||
return biplist.readPlist(filename)
|
||||
# fall back to normal plistlist
|
||||
try:
|
||||
return plistlib.readPlist(filename)
|
||||
except Exception:
|
||||
# not parseable (eg. not well-formed, or binary)
|
||||
return {}
|
||||
|
||||
|
||||
def get_plist_data_from_string (data):
|
||||
if has_biplist:
|
||||
return biplist.readPlistFromString(data)
|
||||
# fall back to normal plistlist
|
||||
try:
|
||||
return plistlib.readPlistFromString(data)
|
||||
except Exception:
|
||||
# not parseable (eg. not well-formed, or binary)
|
||||
return {}
|
||||
|
||||
|
||||
# some key strings
|
||||
KEY_URLSTRING = 'URLString'
|
||||
KEY_URIDICTIONARY = 'URIDictionary'
|
||||
KEY_CHILDREN = 'Children'
|
||||
KEY_WEBBOOKMARKTYPE = 'WebBookmarkType'
|
||||
|
||||
def parse_plist(entry):
|
||||
"""Parse a XML dictionary entry."""
|
||||
if is_leaf(entry):
|
||||
url = entry[KEY_URLSTRING]
|
||||
title = entry[KEY_URIDICTIONARY].get('title', url)
|
||||
yield (url, title)
|
||||
elif has_children(entry):
|
||||
for child in entry[KEY_CHILDREN]:
|
||||
for item in parse_plist(child):
|
||||
yield item
|
||||
|
||||
|
||||
def is_leaf (entry):
|
||||
"""Return true if plist entry is an URL entry."""
|
||||
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeLeaf'
|
||||
|
||||
|
||||
def has_children (entry):
|
||||
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeList'
|
||||
|
|
@ -99,6 +99,7 @@ class UrlBase (object):
|
|||
"text/plain+linkchecker": "text",
|
||||
"text/plain+opera": "opera",
|
||||
"text/plain+chromium": "chromium",
|
||||
"application/x-plist+safari": "safari",
|
||||
}
|
||||
|
||||
# Set maximum file size for downloaded files in bytes.
|
||||
|
|
@ -981,6 +982,15 @@ class UrlBase (object):
|
|||
self.aggregate, parent_url=self.url, name=name)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def parse_safari (self):
|
||||
"""Parse a Safari bookmark file."""
|
||||
log.debug(LOG_CHECK, "Parsing Safari bookmarks %s", self)
|
||||
from ..bookmarks.safari import parse_bookmark_data
|
||||
for url, name in parse_bookmark_data(self.get_content()):
|
||||
url_data = get_url_from(url, self.recursion_level+1,
|
||||
self.aggregate, parent_url=self.url, name=name)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def parse_text (self):
|
||||
"""Parse a text file with one url per line; comment and blank
|
||||
lines are ignored."""
|
||||
|
|
|
|||
|
|
@ -200,8 +200,12 @@ def guess_mimetype (filename, read=None):
|
|||
"""Return MIME type of file, or 'application/octet-stream' if it could
|
||||
not be determined."""
|
||||
mime, encoding = mimedb.guess_type(filename, strict=False)
|
||||
basename = os.path.basename(filename)
|
||||
# Special case for Safari Bookmark files
|
||||
if not mime and basename == 'Bookmarks.plist':
|
||||
return 'application/x-plist+safari'
|
||||
# Special case for Google Chrome Bookmark files.
|
||||
if not mime and os.path.basename(filename) == 'Bookmarks':
|
||||
if not mime and basename == 'Bookmarks':
|
||||
mime = 'text/plain'
|
||||
# Mime type text/plain can be differentiated further with content reading.
|
||||
if mime == "text/plain" and read is not None:
|
||||
|
|
|
|||
|
|
@ -185,6 +185,9 @@ class LineEdit (QtGui.QLineEdit):
|
|||
if find_opera():
|
||||
action = menu.addAction(name % {"browser": u"Opera"})
|
||||
action.triggered.connect(lambda: self.setText(find_opera()))
|
||||
if find_safari():
|
||||
action = menu.addAction(name % {"browser": u"Safari"})
|
||||
action.triggered.connect(lambda: self.setText(find_safari()))
|
||||
|
||||
def contextMenuEvent (self, event):
|
||||
"""Handle context menu event."""
|
||||
|
|
@ -215,3 +218,9 @@ def find_opera ():
|
|||
"""Return Opera bookmark filename or empty string if not found."""
|
||||
from ..bookmarks.opera import find_bookmark_file
|
||||
return find_bookmark_file()
|
||||
|
||||
|
||||
def find_safari ():
|
||||
"""Return Safari bookmark filename or empty string if not found."""
|
||||
from ..bookmarks.safari import find_bookmark_file
|
||||
return find_bookmark_file()
|
||||
|
|
|
|||
|
|
@ -156,6 +156,18 @@ def has_pyftpdlib ():
|
|||
need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib")
|
||||
|
||||
|
||||
@memoized
|
||||
def has_biplib ():
|
||||
"""Test if biplib is available."""
|
||||
try:
|
||||
import biplib
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
need_biplib = _need_func(has_biplib, "biplib")
|
||||
|
||||
|
||||
@memoized
|
||||
def has_newsserver (server):
|
||||
import nntplib
|
||||
|
|
@ -233,5 +245,14 @@ def limit_time (seconds, skip=False):
|
|||
return run_limited
|
||||
|
||||
|
||||
def get_file (filename=None):
|
||||
"""
|
||||
Get file name located within 'data' directory.
|
||||
"""
|
||||
directory = os.path.join("tests", "checker", "data")
|
||||
if filename:
|
||||
return unicode(os.path.join(directory, filename))
|
||||
return unicode(directory)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print has_clamav(), has_network(), has_msgfmt(), has_posix(), has_proxy()
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import linkcheck.configuration
|
|||
import linkcheck.director
|
||||
import linkcheck.logger
|
||||
import linkcheck.i18n
|
||||
from .. import get_file
|
||||
|
||||
# helper alias
|
||||
get_url_from = linkcheck.checker.get_url_from
|
||||
|
|
@ -98,16 +99,6 @@ class TestLogger (linkcheck.logger.Logger):
|
|||
self.diff.append(line)
|
||||
|
||||
|
||||
def get_file (filename=None):
|
||||
"""
|
||||
Get file name located within 'data' directory.
|
||||
"""
|
||||
directory = os.path.join("tests", "checker", "data")
|
||||
if filename:
|
||||
return unicode(os.path.join(directory, filename))
|
||||
return unicode(directory)
|
||||
|
||||
|
||||
def get_file_url (filename):
|
||||
return re.sub("^([a-zA-Z]):", r"/\1|", filename.replace("\\", "/"))
|
||||
|
||||
|
|
|
|||
BIN
tests/checker/data/plist_binary/Bookmarks.plist
Normal file
BIN
tests/checker/data/plist_binary/Bookmarks.plist
Normal file
Binary file not shown.
19
tests/checker/data/plist_binary/Bookmarks.plist.result
Normal file
19
tests/checker/data/plist_binary/Bookmarks.plist.result
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist
|
||||
cache key file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist
|
||||
real url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist
|
||||
name %(datadir)s/plist_binary/Bookmarks.plist
|
||||
valid
|
||||
|
||||
url http://www.example.com/
|
||||
cache key http://www.example.com/
|
||||
real url http://www.iana.org/domains/example/
|
||||
name Imadoofus
|
||||
info Redirected to `http://www.iana.org/domains/example/'.
|
||||
valid
|
||||
|
||||
url http://www.example.net/ (cached)
|
||||
cache key http://www.example.net/
|
||||
real url http://www.iana.org/domains/example/
|
||||
name Imanotherdoofus
|
||||
info Redirected to `http://www.iana.org/domains/example/'.
|
||||
valid
|
||||
73
tests/checker/data/plist_xml/Bookmarks.plist
Normal file
73
tests/checker/data/plist_xml/Bookmarks.plist
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Children</key>
|
||||
<array>
|
||||
<dict>
|
||||
<key>Title</key>
|
||||
<string>History</string>
|
||||
<key>WebBookmarkIdentifier</key>
|
||||
<string>History</string>
|
||||
<key>WebBookmarkType</key>
|
||||
<string>WebBookmarkTypeProxy</string>
|
||||
<key>WebBookmarkUUID</key>
|
||||
<string>68DBD24E-CF6B-4D88-9553-ECEC327A619E</string>
|
||||
</dict>
|
||||
<dict>
|
||||
<key>Children</key>
|
||||
<array>
|
||||
<dict>
|
||||
<key>URIDictionary</key>
|
||||
<dict>
|
||||
<key>title</key>
|
||||
<string>Imadoofus</string>
|
||||
</dict>
|
||||
<key>URLString</key>
|
||||
<string>http://www.example.com/</string>
|
||||
<key>WebBookmarkType</key>
|
||||
<string>WebBookmarkTypeLeaf</string>
|
||||
<key>WebBookmarkUUID</key>
|
||||
<string>A4790F77-B13E-4BE5-8C9C-87D8C86B8B05</string>
|
||||
</dict>
|
||||
<dict>
|
||||
<key>Children</key>
|
||||
<array>
|
||||
<dict>
|
||||
<key>URIDictionary</key>
|
||||
<dict>
|
||||
<key>title</key>
|
||||
<string>Imanotherdoofus</string>
|
||||
</dict>
|
||||
<key>URLString</key>
|
||||
<string>http://www.example.net/</string>
|
||||
<key>WebBookmarkType</key>
|
||||
<string>WebBookmarkTypeLeaf</string>
|
||||
<key>WebBookmarkUUID</key>
|
||||
<string>C42EBD75-23D8-4C89-AAB3-409E68E3A519</string>
|
||||
</dict>
|
||||
</array>
|
||||
<key>Title</key>
|
||||
<string>News</string>
|
||||
<key>WebBookmarkType</key>
|
||||
<string>WebBookmarkTypeList</string>
|
||||
<key>WebBookmarkUUID</key>
|
||||
<string>124497B1-3953-4AF4-9F80-925D33BA02F5</string>
|
||||
</dict>
|
||||
</array>
|
||||
<key>Title</key>
|
||||
<string>BookmarksBar</string>
|
||||
<key>WebBookmarkType</key>
|
||||
<string>WebBookmarkTypeList</string>
|
||||
<key>WebBookmarkUUID</key>
|
||||
<string>E4DBB92F-4E11-48C5-BCFD-DF6EDFACD825</string>
|
||||
</dict>
|
||||
</array>
|
||||
<key>WebBookmarkFileVersion</key>
|
||||
<integer>1</integer>
|
||||
<key>WebBookmarkType</key>
|
||||
<string>WebBookmarkTypeList</string>
|
||||
<key>WebBookmarkUUID</key>
|
||||
<string>818DDA78-A975-4E0A-97D7-9055915D4A5E</string>
|
||||
</dict>
|
||||
</plist>
|
||||
21
tests/checker/data/plist_xml/Bookmarks.plist.result
Normal file
21
tests/checker/data/plist_xml/Bookmarks.plist.result
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# To convert from XML to binary format:
|
||||
# plutil -convert binary1 -o - Bookmarks.plist
|
||||
url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist
|
||||
cache key file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist
|
||||
real url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist
|
||||
name %(datadir)s/plist_xml/Bookmarks.plist
|
||||
valid
|
||||
|
||||
url http://www.example.com/
|
||||
cache key http://www.example.com/
|
||||
real url http://www.iana.org/domains/example/
|
||||
name Imadoofus
|
||||
info Redirected to `http://www.iana.org/domains/example/'.
|
||||
valid
|
||||
|
||||
url http://www.example.net/ (cached)
|
||||
cache key http://www.example.net/
|
||||
real url http://www.iana.org/domains/example/
|
||||
name Imanotherdoofus
|
||||
info Redirected to `http://www.iana.org/domains/example/'.
|
||||
valid
|
||||
54
tests/checker/test_bookmarks.py
Normal file
54
tests/checker/test_bookmarks.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2011 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Test file parsing.
|
||||
"""
|
||||
from . import LinkCheckTest
|
||||
from .. import need_network
|
||||
#, need_biplist
|
||||
import os
|
||||
|
||||
|
||||
class TestBookmarks (LinkCheckTest):
|
||||
"""
|
||||
Test bookmark link checking and content parsing.
|
||||
"""
|
||||
|
||||
@need_network
|
||||
def _test_firefox_bookmarks (self):
|
||||
# firefox 3 bookmark file parsing
|
||||
self.file_test("places.sqlite")
|
||||
|
||||
@need_network
|
||||
def _test_opera_bookmarks (self):
|
||||
# Opera bookmark file parsing
|
||||
self.file_test("opera6.adr")
|
||||
|
||||
@need_network
|
||||
def _test_chromium_bookmarks (self):
|
||||
# Chromium and Google Chrome bookmark file parsing
|
||||
self.file_test("Bookmarks")
|
||||
|
||||
@need_network
|
||||
def test_safari_bookmarks_xml (self):
|
||||
# Safari bookmark file parsing (for plaintext plist files)
|
||||
self.file_test(os.path.join("plist_xml", "Bookmarks.plist"))
|
||||
|
||||
@need_network
|
||||
def test_safari_bookmarks_binary (self):
|
||||
# Safari bookmark file parsing (for binary plist files)
|
||||
self.file_test(os.path.join("plist_binary", "Bookmarks.plist"))
|
||||
|
|
@ -61,18 +61,6 @@ class TestFile (LinkCheckTest):
|
|||
def test_urllist (self):
|
||||
self.file_test("urllist.txt")
|
||||
|
||||
def test_firefox_bookmarks (self):
|
||||
# firefox 3 bookmark file parsing
|
||||
self.file_test("places.sqlite")
|
||||
|
||||
def test_opera_bookmarks (self):
|
||||
# Opera bookmark file parsing
|
||||
self.file_test("opera6.adr")
|
||||
|
||||
def test_chromium_bookmarks (self):
|
||||
# Google Chrome bookmark file parsing
|
||||
self.file_test("Bookmarks")
|
||||
|
||||
def test_directory_listing (self):
|
||||
# unpack non-unicode filename which cannot be stored
|
||||
# in the SF subversion repository
|
||||
|
|
|
|||
|
|
@ -19,11 +19,14 @@ Test file utility functions.
|
|||
"""
|
||||
|
||||
import unittest
|
||||
import os
|
||||
from . import get_file
|
||||
import linkcheck.fileutil
|
||||
|
||||
file_existing = __file__
|
||||
file_non_existing = "XXX.i_dont_exist"
|
||||
|
||||
|
||||
class TestFileutil (unittest.TestCase):
|
||||
"""Test file utility functions."""
|
||||
|
||||
|
|
@ -31,8 +34,17 @@ class TestFileutil (unittest.TestCase):
|
|||
self.assertTrue(linkcheck.fileutil.get_size(file_existing) > 0)
|
||||
self.assertEqual(linkcheck.fileutil.get_size(file_non_existing), -1)
|
||||
|
||||
|
||||
def test_mtime (self):
|
||||
filename = __file__
|
||||
self.assertTrue(linkcheck.fileutil.get_mtime(file_existing) > 0)
|
||||
self.assertEqual(linkcheck.fileutil.get_mtime(file_non_existing), 0)
|
||||
|
||||
def mime_test (self, filename, mime_expected):
|
||||
mime = linkcheck.fileutil.guess_mimetype(get_file(filename))
|
||||
self.assertEqual(mime, mime_expected)
|
||||
|
||||
def test_mime (self):
|
||||
filename = os.path.join("plist_binary", "Bookmarks.plist")
|
||||
self.mime_test(filename, "application/x-plist+safari")
|
||||
filename = os.path.join("plist_xml", "Bookmarks.plist")
|
||||
self.mime_test(filename, "application/x-plist+safari")
|
||||
|
|
|
|||
Loading…
Reference in a new issue