Parse Safari bookmark files.

This commit is contained in:
Bastian Kleineidam 2011-12-17 16:38:25 +01:00
parent 925a7166b6
commit 3d9958dfbb
14 changed files with 336 additions and 24 deletions

View file

@ -19,6 +19,7 @@ Features:
the configuration file.
- gui: Add configuration for ignore URL patterns.
Closes: SF bug #3311262
- checking: Support parsing of Safari Bookmark files.
7.2 "Driver" (released 20.10.2011)

View file

@ -0,0 +1,109 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2011 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import os
import sys
import plistlib
try:
import biplist
has_biplist = True
except ImportError:
has_biplist = False
def get_profile_dir ():
"""Return path where all profiles of current user are stored."""
basedir = unicode(os.environ["HOME"])
return os.path.join(basedir, u"Library", u"Safari")
def find_bookmark_file ():
"""Return the bookmark file of the Default profile.
Returns absolute filename if found, or empty string if no bookmark file
could be found.
"""
if sys.platform != 'darwin':
return u""
dirname = get_profile_dir()
if os.path.isdir(dirname):
fname = os.path.join(dirname, u"Bookmarks.plist")
if os.path.isfile(fname):
return fname
return u""
def parse_bookmark_file (filename):
"""Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
"""
return parse_plist(get_plist_data_from_file(filename))
def parse_bookmark_data (data):
"""Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
"""
return parse_plist(get_plist_data_from_string(data))
def get_plist_data_from_file (filename):
if has_biplist:
return biplist.readPlist(filename)
# fall back to normal plistlist
try:
return plistlib.readPlist(filename)
except Exception:
# not parseable (eg. not well-formed, or binary)
return {}
def get_plist_data_from_string (data):
if has_biplist:
return biplist.readPlistFromString(data)
# fall back to normal plistlist
try:
return plistlib.readPlistFromString(data)
except Exception:
# not parseable (eg. not well-formed, or binary)
return {}
# some key strings
KEY_URLSTRING = 'URLString'
KEY_URIDICTIONARY = 'URIDictionary'
KEY_CHILDREN = 'Children'
KEY_WEBBOOKMARKTYPE = 'WebBookmarkType'
def parse_plist(entry):
"""Parse a XML dictionary entry."""
if is_leaf(entry):
url = entry[KEY_URLSTRING]
title = entry[KEY_URIDICTIONARY].get('title', url)
yield (url, title)
elif has_children(entry):
for child in entry[KEY_CHILDREN]:
for item in parse_plist(child):
yield item
def is_leaf (entry):
"""Return true if plist entry is an URL entry."""
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeLeaf'
def has_children (entry):
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeList'

View file

@ -99,6 +99,7 @@ class UrlBase (object):
"text/plain+linkchecker": "text",
"text/plain+opera": "opera",
"text/plain+chromium": "chromium",
"application/x-plist+safari": "safari",
}
# Set maximum file size for downloaded files in bytes.
@ -981,6 +982,15 @@ class UrlBase (object):
self.aggregate, parent_url=self.url, name=name)
self.aggregate.urlqueue.put(url_data)
def parse_safari (self):
"""Parse a Safari bookmark file."""
log.debug(LOG_CHECK, "Parsing Safari bookmarks %s", self)
from ..bookmarks.safari import parse_bookmark_data
for url, name in parse_bookmark_data(self.get_content()):
url_data = get_url_from(url, self.recursion_level+1,
self.aggregate, parent_url=self.url, name=name)
self.aggregate.urlqueue.put(url_data)
def parse_text (self):
"""Parse a text file with one url per line; comment and blank
lines are ignored."""

View file

@ -200,8 +200,12 @@ def guess_mimetype (filename, read=None):
"""Return MIME type of file, or 'application/octet-stream' if it could
not be determined."""
mime, encoding = mimedb.guess_type(filename, strict=False)
basename = os.path.basename(filename)
# Special case for Safari Bookmark files
if not mime and basename == 'Bookmarks.plist':
return 'application/x-plist+safari'
# Special case for Google Chrome Bookmark files.
if not mime and os.path.basename(filename) == 'Bookmarks':
if not mime and basename == 'Bookmarks':
mime = 'text/plain'
# Mime type text/plain can be differentiated further with content reading.
if mime == "text/plain" and read is not None:

View file

@ -185,6 +185,9 @@ class LineEdit (QtGui.QLineEdit):
if find_opera():
action = menu.addAction(name % {"browser": u"Opera"})
action.triggered.connect(lambda: self.setText(find_opera()))
if find_safari():
action = menu.addAction(name % {"browser": u"Safari"})
action.triggered.connect(lambda: self.setText(find_safari()))
def contextMenuEvent (self, event):
"""Handle context menu event."""
@ -215,3 +218,9 @@ def find_opera ():
"""Return Opera bookmark filename or empty string if not found."""
from ..bookmarks.opera import find_bookmark_file
return find_bookmark_file()
def find_safari ():
"""Return Safari bookmark filename or empty string if not found."""
from ..bookmarks.safari import find_bookmark_file
return find_bookmark_file()

View file

@ -156,6 +156,18 @@ def has_pyftpdlib ():
need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib")
@memoized
def has_biplib ():
"""Test if biplib is available."""
try:
import biplib
return True
except ImportError:
return False
need_biplib = _need_func(has_biplib, "biplib")
@memoized
def has_newsserver (server):
import nntplib
@ -233,5 +245,14 @@ def limit_time (seconds, skip=False):
return run_limited
def get_file (filename=None):
"""
Get file name located within 'data' directory.
"""
directory = os.path.join("tests", "checker", "data")
if filename:
return unicode(os.path.join(directory, filename))
return unicode(directory)
if __name__ == '__main__':
print has_clamav(), has_network(), has_msgfmt(), has_posix(), has_proxy()

View file

@ -27,6 +27,7 @@ import linkcheck.configuration
import linkcheck.director
import linkcheck.logger
import linkcheck.i18n
from .. import get_file
# helper alias
get_url_from = linkcheck.checker.get_url_from
@ -98,16 +99,6 @@ class TestLogger (linkcheck.logger.Logger):
self.diff.append(line)
def get_file (filename=None):
"""
Get file name located within 'data' directory.
"""
directory = os.path.join("tests", "checker", "data")
if filename:
return unicode(os.path.join(directory, filename))
return unicode(directory)
def get_file_url (filename):
return re.sub("^([a-zA-Z]):", r"/\1|", filename.replace("\\", "/"))

Binary file not shown.

View file

@ -0,0 +1,19 @@
url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist
cache key file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist
real url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist
name %(datadir)s/plist_binary/Bookmarks.plist
valid
url http://www.example.com/
cache key http://www.example.com/
real url http://www.iana.org/domains/example/
name Imadoofus
info Redirected to `http://www.iana.org/domains/example/'.
valid
url http://www.example.net/ (cached)
cache key http://www.example.net/
real url http://www.iana.org/domains/example/
name Imanotherdoofus
info Redirected to `http://www.iana.org/domains/example/'.
valid

View file

@ -0,0 +1,73 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>Children</key>
<array>
<dict>
<key>Title</key>
<string>History</string>
<key>WebBookmarkIdentifier</key>
<string>History</string>
<key>WebBookmarkType</key>
<string>WebBookmarkTypeProxy</string>
<key>WebBookmarkUUID</key>
<string>68DBD24E-CF6B-4D88-9553-ECEC327A619E</string>
</dict>
<dict>
<key>Children</key>
<array>
<dict>
<key>URIDictionary</key>
<dict>
<key>title</key>
<string>Imadoofus</string>
</dict>
<key>URLString</key>
<string>http://www.example.com/</string>
<key>WebBookmarkType</key>
<string>WebBookmarkTypeLeaf</string>
<key>WebBookmarkUUID</key>
<string>A4790F77-B13E-4BE5-8C9C-87D8C86B8B05</string>
</dict>
<dict>
<key>Children</key>
<array>
<dict>
<key>URIDictionary</key>
<dict>
<key>title</key>
<string>Imanotherdoofus</string>
</dict>
<key>URLString</key>
<string>http://www.example.net/</string>
<key>WebBookmarkType</key>
<string>WebBookmarkTypeLeaf</string>
<key>WebBookmarkUUID</key>
<string>C42EBD75-23D8-4C89-AAB3-409E68E3A519</string>
</dict>
</array>
<key>Title</key>
<string>News</string>
<key>WebBookmarkType</key>
<string>WebBookmarkTypeList</string>
<key>WebBookmarkUUID</key>
<string>124497B1-3953-4AF4-9F80-925D33BA02F5</string>
</dict>
</array>
<key>Title</key>
<string>BookmarksBar</string>
<key>WebBookmarkType</key>
<string>WebBookmarkTypeList</string>
<key>WebBookmarkUUID</key>
<string>E4DBB92F-4E11-48C5-BCFD-DF6EDFACD825</string>
</dict>
</array>
<key>WebBookmarkFileVersion</key>
<integer>1</integer>
<key>WebBookmarkType</key>
<string>WebBookmarkTypeList</string>
<key>WebBookmarkUUID</key>
<string>818DDA78-A975-4E0A-97D7-9055915D4A5E</string>
</dict>
</plist>

View file

@ -0,0 +1,21 @@
# To convert from XML to binary format:
# plutil -convert binary1 -o - Bookmarks.plist
url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist
cache key file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist
real url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist
name %(datadir)s/plist_xml/Bookmarks.plist
valid
url http://www.example.com/
cache key http://www.example.com/
real url http://www.iana.org/domains/example/
name Imadoofus
info Redirected to `http://www.iana.org/domains/example/'.
valid
url http://www.example.net/ (cached)
cache key http://www.example.net/
real url http://www.iana.org/domains/example/
name Imanotherdoofus
info Redirected to `http://www.iana.org/domains/example/'.
valid

View file

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2011 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Test file parsing.
"""
from . import LinkCheckTest
from .. import need_network
#, need_biplist
import os
class TestBookmarks (LinkCheckTest):
"""
Test bookmark link checking and content parsing.
"""
@need_network
def _test_firefox_bookmarks (self):
# firefox 3 bookmark file parsing
self.file_test("places.sqlite")
@need_network
def _test_opera_bookmarks (self):
# Opera bookmark file parsing
self.file_test("opera6.adr")
@need_network
def _test_chromium_bookmarks (self):
# Chromium and Google Chrome bookmark file parsing
self.file_test("Bookmarks")
@need_network
def test_safari_bookmarks_xml (self):
# Safari bookmark file parsing (for plaintext plist files)
self.file_test(os.path.join("plist_xml", "Bookmarks.plist"))
@need_network
def test_safari_bookmarks_binary (self):
# Safari bookmark file parsing (for binary plist files)
self.file_test(os.path.join("plist_binary", "Bookmarks.plist"))

View file

@ -61,18 +61,6 @@ class TestFile (LinkCheckTest):
def test_urllist (self):
self.file_test("urllist.txt")
def test_firefox_bookmarks (self):
# firefox 3 bookmark file parsing
self.file_test("places.sqlite")
def test_opera_bookmarks (self):
# Opera bookmark file parsing
self.file_test("opera6.adr")
def test_chromium_bookmarks (self):
# Google Chrome bookmark file parsing
self.file_test("Bookmarks")
def test_directory_listing (self):
# unpack non-unicode filename which cannot be stored
# in the SF subversion repository

View file

@ -19,11 +19,14 @@ Test file utility functions.
"""
import unittest
import os
from . import get_file
import linkcheck.fileutil
file_existing = __file__
file_non_existing = "XXX.i_dont_exist"
class TestFileutil (unittest.TestCase):
"""Test file utility functions."""
@ -31,8 +34,17 @@ class TestFileutil (unittest.TestCase):
self.assertTrue(linkcheck.fileutil.get_size(file_existing) > 0)
self.assertEqual(linkcheck.fileutil.get_size(file_non_existing), -1)
def test_mtime (self):
filename = __file__
self.assertTrue(linkcheck.fileutil.get_mtime(file_existing) > 0)
self.assertEqual(linkcheck.fileutil.get_mtime(file_non_existing), 0)
def mime_test (self, filename, mime_expected):
mime = linkcheck.fileutil.guess_mimetype(get_file(filename))
self.assertEqual(mime, mime_expected)
def test_mime (self):
filename = os.path.join("plist_binary", "Bookmarks.plist")
self.mime_test(filename, "application/x-plist+safari")
filename = os.path.join("plist_xml", "Bookmarks.plist")
self.mime_test(filename, "application/x-plist+safari")